]> Git Repo - buildroot-mgba.git/blob - utils/scanpypi
scanpypi: fix code style
[buildroot-mgba.git] / utils / scanpypi
1 #!/usr/bin/env python2
2 """
3
4 Utility for building Buildroot packages for existing PyPI packages
5
6 Any package built by scanpypi should be manually checked for
7 errors.
8 """
9 from __future__ import print_function
10 import argparse
11 import json
12 import urllib2
13 import sys
14 import os
15 import shutil
16 import StringIO
17 import tarfile
18 import zipfile
19 import errno
20 import hashlib
21 import re
22 import textwrap
23 import tempfile
24 import imp
25 from functools import wraps
26
27 BUF_SIZE = 65536
28
29 try:
30     import spdx_lookup as liclookup
31 except ImportError:
32     # spdx_lookup is not installed
33     print('spdx_lookup module is not installed. This can lead to an '
34           'inaccurate licence detection. Please install it via\n'
35           'pip install spdx_lookup')
36     liclookup = None
37
38
39 def setup_decorator(func, method):
40     """
41     Decorator for distutils.core.setup and setuptools.setup.
42     Puts the arguments with which setup is called as a dict
43     Add key 'method' which should be either 'setuptools' or 'distutils'.
44
45     Keyword arguments:
46     func -- either setuptools.setup or distutils.core.setup
47     method -- either 'setuptools' or 'distutils'
48     """
49
50     @wraps(func)
51     def closure(*args, **kwargs):
52         # Any python packages calls its setup function to be installed.
53         # Argument 'name' of this setup function is the package's name
54         BuildrootPackage.setup_args[kwargs['name']] = kwargs
55         BuildrootPackage.setup_args[kwargs['name']]['method'] = method
56     return closure
57
58 # monkey patch
59 import setuptools  # noqa E402
60 setuptools.setup = setup_decorator(setuptools.setup, 'setuptools')
61 import distutils   # noqa E402
62 distutils.core.setup = setup_decorator(setuptools.setup, 'distutils')
63
64
65 def find_file_upper_case(filenames, path='./'):
66     """
67     List generator:
68     Recursively find files that matches one of the specified filenames.
69     Returns a relative path starting with path argument.
70
71     Keyword arguments:
72     filenames -- List of filenames to be found
73     path -- Path to the directory to search
74     """
75     for root, dirs, files in os.walk(path):
76         for file in files:
77             if file.upper() in filenames:
78                 yield (os.path.join(root, file))
79
80
81 def pkg_buildroot_name(pkg_name):
82     """
83     Returns the Buildroot package name for the PyPI package pkg_name.
84     Remove all non alphanumeric characters except -
85     Also lowers the name and adds 'python-' suffix
86
87     Keyword arguments:
88     pkg_name -- String to rename
89     """
90     name = re.sub('[^\w-]', '', pkg_name.lower())
91     prefix = 'python-'
92     pattern = re.compile('^(?!' + prefix + ')(.+?)$')
93     name = pattern.sub(r'python-\1', name)
94     return name
95
96
97 class DownloadFailed(Exception):
98     pass
99
100
101 class BuildrootPackage():
102     """This class's methods are not meant to be used individually please
103     use them in the correct order:
104
105     __init__
106
107     download_package
108
109     extract_package
110
111     load_module
112
113     get_requirements
114
115     create_package_mk
116
117     create_hash_file
118
119     create_config_in
120
121     """
122     setup_args = {}
123
124     def __init__(self, real_name, pkg_folder):
125         self.real_name = real_name
126         self.buildroot_name = pkg_buildroot_name(self.real_name)
127         self.pkg_dir = os.path.join(pkg_folder, self.buildroot_name)
128         self.mk_name = self.buildroot_name.upper().replace('-', '_')
129         self.as_string = None
130         self.md5_sum = None
131         self.metadata = None
132         self.metadata_name = None
133         self.metadata_url = None
134         self.pkg_req = None
135         self.setup_metadata = None
136         self.tmp_extract = None
137         self.used_url = None
138         self.filename = None
139         self.url = None
140         self.version = None
141         self.license_files = []
142
143     def fetch_package_info(self):
144         """
145         Fetch a package's metadata from the python package index
146         """
147         self.metadata_url = 'https://pypi.python.org/pypi/{pkg}/json'.format(
148             pkg=self.real_name)
149         try:
150             pkg_json = urllib2.urlopen(self.metadata_url).read().decode()
151         except urllib2.HTTPError as error:
152             print('ERROR:', error.getcode(), error.msg, file=sys.stderr)
153             print('ERROR: Could not find package {pkg}.\n'
154                   'Check syntax inside the python package index:\n'
155                   'https://pypi.python.org/pypi/ '
156                   .format(pkg=self.real_name))
157             raise
158         except urllib2.URLError:
159             print('ERROR: Could not find package {pkg}.\n'
160                   'Check syntax inside the python package index:\n'
161                   'https://pypi.python.org/pypi/ '
162                   .format(pkg=self.real_name))
163             raise
164         self.metadata = json.loads(pkg_json)
165         self.version = self.metadata['info']['version']
166         self.metadata_name = self.metadata['info']['name']
167
168     def download_package(self):
169         """
170         Download a package using metadata from pypi
171         """
172         try:
173             self.metadata['urls'][0]['filename']
174         except IndexError:
175             print(
176                 'Non-conventional package, ',
177                 'please check carefully after creation')
178             self.metadata['urls'] = [{
179                 'packagetype': 'sdist',
180                 'url': self.metadata['info']['download_url'],
181                 'md5_digest': None}]
182             # In this case, we can't get the name of the downloaded file
183             # from the pypi api, so we need to find it, this should work
184             urlpath = urllib2.urlparse.urlparse(
185                 self.metadata['info']['download_url']).path
186             # urlparse().path give something like
187             # /path/to/file-version.tar.gz
188             # We use basename to remove /path/to
189             self.metadata['urls'][0]['filename'] = os.path.basename(urlpath)
190         for download_url in self.metadata['urls']:
191             if 'bdist' in download_url['packagetype']:
192                 continue
193             try:
194                 print('Downloading package {pkg} from {url}...'.format(
195                       pkg=self.real_name, url=download_url['url']))
196                 download = urllib2.urlopen(download_url['url'])
197             except urllib2.HTTPError as http_error:
198                 download = http_error
199             else:
200                 self.used_url = download_url
201                 self.as_string = download.read()
202                 if not download_url['md5_digest']:
203                     break
204                 self.md5_sum = hashlib.md5(self.as_string).hexdigest()
205                 if self.md5_sum == download_url['md5_digest']:
206                     break
207         else:
208             if download.__class__ == urllib2.HTTPError:
209                 raise download
210             raise DownloadFailed('Failed to downloas package {pkg}'
211                                  .format(pkg=self.real_name))
212         self.filename = self.used_url['filename']
213         self.url = self.used_url['url']
214
215     def extract_package(self, tmp_path):
216         """
217         Extract the package contents into a directrory
218
219         Keyword arguments:
220         tmp_path -- directory where you want the package to be extracted
221         """
222         as_file = StringIO.StringIO(self.as_string)
223         if self.filename[-3:] == 'zip':
224             with zipfile.ZipFile(as_file) as as_zipfile:
225                 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
226                 try:
227                     os.makedirs(tmp_pkg)
228                 except OSError as exception:
229                     if exception.errno != errno.EEXIST:
230                         print("ERROR: ", exception.message, file=sys.stderr)
231                         return None, None
232                     print('WARNING:', exception.message, file=sys.stderr)
233                     print('Removing {pkg}...'.format(pkg=tmp_pkg))
234                     shutil.rmtree(tmp_pkg)
235                     os.makedirs(tmp_pkg)
236                 as_zipfile.extractall(tmp_pkg)
237         else:
238             with tarfile.open(fileobj=as_file) as as_tarfile:
239                 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
240                 try:
241                     os.makedirs(tmp_pkg)
242                 except OSError as exception:
243                     if exception.errno != errno.EEXIST:
244                         print("ERROR: ", exception.message, file=sys.stderr)
245                         return None, None
246                     print('WARNING:', exception.message, file=sys.stderr)
247                     print('Removing {pkg}...'.format(pkg=tmp_pkg))
248                     shutil.rmtree(tmp_pkg)
249                     os.makedirs(tmp_pkg)
250                 as_tarfile.extractall(tmp_pkg)
251
252         tmp_extract = '{folder}/{name}-{version}'
253         self.tmp_extract = tmp_extract.format(
254             folder=tmp_pkg,
255             name=self.metadata_name,
256             version=self.version)
257
258     def load_setup(self):
259         """
260         Loads the corresponding setup and store its metadata
261         """
262         current_dir = os.getcwd()
263         os.chdir(self.tmp_extract)
264         sys.path.append(self.tmp_extract)
265         s_file, s_path, s_desc = imp.find_module('setup', [self.tmp_extract])
266         setup = imp.load_module('setup', s_file, s_path, s_desc)
267         try:
268             self.setup_metadata = self.setup_args[self.metadata_name]
269         except KeyError:
270             # This means setup was not called which most likely mean that it is
271             # called through the if __name__ == '__main__' directive.
272             # In this case, we can only pray that it is called through a
273             # function called main() in setup.py.
274             setup.main()  # Will raise AttributeError if not found
275             self.setup_metadata = self.setup_args[self.metadata_name]
276         # Here we must remove the module the hard way.
277         # We must do this because of a very specific case: if a package calls
278         # setup from the __main__ but does not come with a 'main()' function,
279         # for some reason setup.main() will successfully call the main
280         # function of a previous package...
281         sys.modules.pop('setup', None)
282         del setup
283         os.chdir(current_dir)
284         sys.path.remove(self.tmp_extract)
285
286     def get_requirements(self, pkg_folder):
287         """
288         Retrieve dependencies from the metadata found in the setup.py script of
289         a pypi package.
290
291         Keyword Arguments:
292         pkg_folder -- location of the already created packages
293         """
294         if 'install_requires' not in self.setup_metadata:
295             self.pkg_req = None
296             return set()
297         self.pkg_req = self.setup_metadata['install_requires']
298         self.pkg_req = [re.sub('([-.\w]+).*', r'\1', req)
299                         for req in self.pkg_req]
300
301         # get rid of commented lines and also strip the package strings
302         self.pkg_req = [item.strip() for item in self.pkg_req
303                         if len(item) > 0 and item[0] != '#']
304
305         req_not_found = self.pkg_req
306         self.pkg_req = map(pkg_buildroot_name, self.pkg_req)
307         pkg_tuples = zip(req_not_found, self.pkg_req)
308         # pkg_tuples is a list of tuples that looks like
309         # ('werkzeug','python-werkzeug') because I need both when checking if
310         # dependencies already exist or are already in the download list
311         req_not_found = set(
312             pkg[0] for pkg in pkg_tuples
313             if not os.path.isdir(pkg[1])
314             )
315         return req_not_found
316
317     def __create_mk_header(self):
318         """
319         Create the header of the <package_name>.mk file
320         """
321         header = ['#' * 80 + '\n']
322         header.append('#\n')
323         header.append('# {name}\n'.format(name=self.buildroot_name))
324         header.append('#\n')
325         header.append('#' * 80 + '\n')
326         header.append('\n')
327         return header
328
329     def __create_mk_download_info(self):
330         """
331         Create the lines refering to the download information of the
332         <package_name>.mk file
333         """
334         lines = []
335         version_line = '{name}_VERSION = {version}\n'.format(
336             name=self.mk_name,
337             version=self.version)
338         lines.append(version_line)
339
340         targz = self.filename.replace(
341             self.version,
342             '$({name}_VERSION)'.format(name=self.mk_name))
343         targz_line = '{name}_SOURCE = {filename}\n'.format(
344             name=self.mk_name,
345             filename=targz)
346         lines.append(targz_line)
347
348         if self.filename not in self.url:
349             # Sometimes the filename is in the url, sometimes it's not
350             site_url = self.url
351         else:
352             site_url = self.url[:self.url.find(self.filename)]
353         site_line = '{name}_SITE = {url}'.format(name=self.mk_name,
354                                                  url=site_url)
355         site_line = site_line.rstrip('/') + '\n'
356         lines.append(site_line)
357         return lines
358
359     def __create_mk_setup(self):
360         """
361         Create the line refering to the setup method of the package of the
362         <package_name>.mk file
363
364         There are two things you can use to make an installer
365         for a python package: distutils or setuptools
366         distutils comes with python but does not support dependencies.
367         distutils is mostly still there for backward support.
368         setuptools is what smart people use,
369         but it is not shipped with python :(
370         """
371         lines = []
372         setup_type_line = '{name}_SETUP_TYPE = {method}\n'.format(
373             name=self.mk_name,
374             method=self.setup_metadata['method'])
375         lines.append(setup_type_line)
376         return lines
377
378     def __get_license_names(self, license_files):
379         """
380         Try to determine the related license name.
381
382         There are two possibilities. Either the scripts tries to
383         get license name from package's metadata or, if spdx_lookup
384         package is available, the script compares license files with
385         SPDX database.
386         """
387         license_line = ''
388         if liclookup is None:
389             license_dict = {
390                 'Apache Software License': 'Apache-2.0',
391                 'BSD License': 'BSD',
392                 'European Union Public Licence 1.0': 'EUPL-1.0',
393                 'European Union Public Licence 1.1': 'EUPL-1.1',
394                 "GNU General Public License": "GPL",
395                 "GNU General Public License v2": "GPL-2.0",
396                 "GNU General Public License v2 or later": "GPL-2.0+",
397                 "GNU General Public License v3": "GPL-3.0",
398                 "GNU General Public License v3 or later": "GPL-3.0+",
399                 "GNU Lesser General Public License v2": "LGPL-2.1",
400                 "GNU Lesser General Public License v2 or later": "LGPL-2.1+",
401                 "GNU Lesser General Public License v3": "LGPL-3.0",
402                 "GNU Lesser General Public License v3 or later": "LGPL-3.0+",
403                 "GNU Library or Lesser General Public License": "LGPL-2.0",
404                 "ISC License": "ISC",
405                 "MIT License": "MIT",
406                 "Mozilla Public License 1.0": "MPL-1.0",
407                 "Mozilla Public License 1.1": "MPL-1.1",
408                 "Mozilla Public License 2.0": "MPL-2.0",
409                 "Zope Public License": "ZPL"
410                 }
411             regexp = re.compile('^License :* *.* *:+ (.*)( \(.*\))?$')
412             classifiers_licenses = [regexp.sub(r"\1", lic)
413                                     for lic in self.metadata['info']['classifiers']
414                                     if regexp.match(lic)]
415             licenses = map(lambda x: license_dict[x] if x in license_dict else x,
416                            classifiers_licenses)
417             if not len(licenses):
418                 print('WARNING: License has been set to "{license}". It is most'
419                       ' likely wrong, please change it if need be'.format(
420                           license=', '.join(licenses)))
421                 licenses = [self.metadata['info']['license']]
422             license_line = '{name}_LICENSE = {license}\n'.format(
423                 name=self.mk_name,
424                 license=', '.join(licenses))
425         else:
426             license_names = []
427             for license_file in license_files:
428                 with open(license_file) as lic_file:
429                     match = liclookup.match(lic_file.read())
430                 if match.confidence >= 90.0:
431                     license_names.append(match.license.id)
432
433             if len(license_names) > 0:
434                 license_line = ('{name}_LICENSE ='
435                                 ' {names}\n'.format(
436                                     name=self.mk_name,
437                                     names=', '.join(license_names)))
438
439         return license_line
440
441     def __create_mk_license(self):
442         """
443         Create the lines referring to the package's license informations of the
444         <package_name>.mk file
445
446         The license's files are found by searching the package (case insensitive)
447         for files named license, license.txt etc. If more than one license file
448         is found, the user is asked to select which ones he wants to use.
449         """
450         lines = []
451
452         filenames = ['LICENCE', 'LICENSE', 'LICENSE.RST', 'LICENSE.TXT',
453                      'COPYING', 'COPYING.TXT']
454         self.license_files = list(find_file_upper_case(filenames, self.tmp_extract))
455
456         lines.append(self.__get_license_names(self.license_files))
457
458         license_files = [license.replace(self.tmp_extract, '')[1:]
459                          for license in self.license_files]
460         if len(license_files) > 0:
461             if len(license_files) > 1:
462                 print('More than one file found for license:',
463                       ', '.join(license_files))
464             license_files = [filename
465                              for index, filename in enumerate(license_files)]
466             license_file_line = ('{name}_LICENSE_FILES ='
467                                  ' {files}\n'.format(
468                                      name=self.mk_name,
469                                      files=' '.join(license_files)))
470             lines.append(license_file_line)
471         else:
472             print('WARNING: No license file found,'
473                   ' please specify it manually afterwards')
474             license_file_line = '# No license file found\n'
475
476         return lines
477
478     def __create_mk_requirements(self):
479         """
480         Create the lines referring to the dependencies of the of the
481         <package_name>.mk file
482
483         Keyword Arguments:
484         pkg_name -- name of the package
485         pkg_req -- dependencies of the package
486         """
487         lines = []
488         dependencies_line = ('{name}_DEPENDENCIES ='
489                              ' {reqs}\n'.format(
490                                  name=self.mk_name,
491                                  reqs=' '.join(self.pkg_req)))
492         lines.append(dependencies_line)
493         return lines
494
495     def create_package_mk(self):
496         """
497         Create the lines corresponding to the <package_name>.mk file
498         """
499         pkg_mk = '{name}.mk'.format(name=self.buildroot_name)
500         path_to_mk = os.path.join(self.pkg_dir, pkg_mk)
501         print('Creating {file}...'.format(file=path_to_mk))
502         lines = self.__create_mk_header()
503         lines += self.__create_mk_download_info()
504         lines += self.__create_mk_setup()
505         lines += self.__create_mk_license()
506
507         lines.append('\n')
508         lines.append('$(eval $(python-package))')
509         lines.append('\n')
510         with open(path_to_mk, 'w') as mk_file:
511             mk_file.writelines(lines)
512
513     def create_hash_file(self):
514         """
515         Create the lines corresponding to the <package_name>.hash files
516         """
517         pkg_hash = '{name}.hash'.format(name=self.buildroot_name)
518         path_to_hash = os.path.join(self.pkg_dir, pkg_hash)
519         print('Creating {filename}...'.format(filename=path_to_hash))
520         lines = []
521         if self.used_url['md5_digest']:
522             md5_comment = '# md5 from {url}, sha256 locally computed\n'.format(
523                 url=self.metadata_url)
524             lines.append(md5_comment)
525             hash_line = '{method}\t{digest}  {filename}\n'.format(
526                 method='md5',
527                 digest=self.used_url['md5_digest'],
528                 filename=self.filename)
529             lines.append(hash_line)
530         digest = hashlib.sha256(self.as_string).hexdigest()
531         hash_line = '{method}\t{digest}  {filename}\n'.format(
532             method='sha256',
533             digest=digest,
534             filename=self.filename)
535         lines.append(hash_line)
536
537         for license_file in self.license_files:
538             sha256 = hashlib.sha256()
539             with open(license_file, 'rb') as lic_f:
540                 while True:
541                     data = lic_f.read(BUF_SIZE)
542                     if not data:
543                         break
544                     sha256.update(data)
545             hash_line = '{method}\t{digest}  {filename}\n'.format(
546                 method='sha256',
547                 digest=sha256.hexdigest(),
548                 filename=os.path.basename(license_file))
549             lines.append(hash_line)
550
551         with open(path_to_hash, 'w') as hash_file:
552             hash_file.writelines(lines)
553
554     def create_config_in(self):
555         """
556         Creates the Config.in file of a package
557         """
558         path_to_config = os.path.join(self.pkg_dir, 'Config.in')
559         print('Creating {file}...'.format(file=path_to_config))
560         lines = []
561         config_line = 'config BR2_PACKAGE_{name}\n'.format(
562             name=self.mk_name)
563         lines.append(config_line)
564
565         bool_line = '\tbool "{name}"\n'.format(name=self.buildroot_name)
566         lines.append(bool_line)
567         if self.pkg_req:
568             for dep in self.pkg_req:
569                 dep_line = '\tselect BR2_PACKAGE_{req} # runtime\n'.format(
570                     req=dep.upper().replace('-', '_'))
571                 lines.append(dep_line)
572
573         lines.append('\thelp\n')
574
575         help_lines = textwrap.wrap(self.metadata['info']['summary'],
576                                    initial_indent='\t  ',
577                                    subsequent_indent='\t  ')
578
579         # make sure a help text is terminated with a full stop
580         if help_lines[-1][-1] != '.':
581             help_lines[-1] += '.'
582
583         # \t + two spaces is 3 char long
584         help_lines.append('')
585         help_lines.append('\t  ' + self.metadata['info']['home_page'])
586         help_lines = map(lambda x: x + '\n', help_lines)
587         lines += help_lines
588
589         with open(path_to_config, 'w') as config_file:
590             config_file.writelines(lines)
591
592
593 def main():
594     # Building the parser
595     parser = argparse.ArgumentParser(
596         description="Creates buildroot packages from the metadata of "
597                     "an existing PyPI packages and include it "
598                     "in menuconfig")
599     parser.add_argument("packages",
600                         help="list of packages to be created",
601                         nargs='+')
602     parser.add_argument("-o", "--output",
603                         help="""
604                         Output directory for packages.
605                         Default is ./package
606                         """,
607                         default='./package')
608
609     args = parser.parse_args()
610     packages = list(set(args.packages))
611
612     # tmp_path is where we'll extract the files later
613     tmp_prefix = 'scanpypi-'
614     pkg_folder = args.output
615     tmp_path = tempfile.mkdtemp(prefix=tmp_prefix)
616     try:
617         for real_pkg_name in packages:
618             package = BuildrootPackage(real_pkg_name, pkg_folder)
619             print('buildroot package name for {}:'.format(package.real_name),
620                   package.buildroot_name)
621             # First we download the package
622             # Most of the info we need can only be found inside the package
623             print('Package:', package.buildroot_name)
624             print('Fetching package', package.real_name)
625             try:
626                 package.fetch_package_info()
627             except (urllib2.URLError, urllib2.HTTPError):
628                 continue
629             if package.metadata_name.lower() == 'setuptools':
630                 # setuptools imports itself, that does not work very well
631                 # with the monkey path at the begining
632                 print('Error: setuptools cannot be built using scanPyPI')
633                 continue
634
635             try:
636                 package.download_package()
637             except urllib2.HTTPError as error:
638                 print('Error: {code} {reason}'.format(code=error.code,
639                                                       reason=error.reason))
640                 print('Error downloading package :', package.buildroot_name)
641                 print()
642                 continue
643
644             # extract the tarball
645             try:
646                 package.extract_package(tmp_path)
647             except (tarfile.ReadError, zipfile.BadZipfile):
648                 print('Error extracting package {}'.format(package.real_name))
649                 print()
650                 continue
651
652             # Loading the package install info from the package
653             try:
654                 package.load_setup()
655             except ImportError as err:
656                 if 'buildutils' in err.message:
657                     print('This package needs buildutils')
658                 else:
659                     raise
660                 continue
661             except AttributeError as error:
662                 print('Error: Could not install package {pkg}: {error}'.format(
663                     pkg=package.real_name, error=error))
664                 continue
665
666             # Package requirement are an argument of the setup function
667             req_not_found = package.get_requirements(pkg_folder)
668             req_not_found = req_not_found.difference(packages)
669
670             packages += req_not_found
671             if req_not_found:
672                 print('Added packages \'{pkgs}\' as dependencies of {pkg}'
673                       .format(pkgs=", ".join(req_not_found),
674                               pkg=package.buildroot_name))
675             print('Checking if package {name} already exists...'.format(
676                 name=package.pkg_dir))
677             try:
678                 os.makedirs(package.pkg_dir)
679             except OSError as exception:
680                 if exception.errno != errno.EEXIST:
681                     print("ERROR: ", exception.message, file=sys.stderr)
682                     continue
683                 print('Error: Package {name} already exists'
684                       .format(name=package.pkg_dir))
685                 del_pkg = raw_input(
686                     'Do you want to delete existing package ? [y/N]')
687                 if del_pkg.lower() == 'y':
688                     shutil.rmtree(package.pkg_dir)
689                     os.makedirs(package.pkg_dir)
690                 else:
691                     continue
692             package.create_package_mk()
693
694             package.create_hash_file()
695
696             package.create_config_in()
697             print()
698             # printing an empty line for visual confort
699     finally:
700         shutil.rmtree(tmp_path)
701
702
703 if __name__ == "__main__":
704     main()
This page took 0.071684 seconds and 4 git commands to generate.