4 Utility for building Buildroot packages for existing PyPI packages
6 Any package built by scanpypi should be manually checked for
9 from __future__ import print_function
10 from __future__ import absolute_import
24 from functools import wraps
25 import six.moves.urllib.request
26 import six.moves.urllib.error
27 import six.moves.urllib.parse
28 from six.moves import map
29 from six.moves import zip
30 from six.moves import input
39 import spdx_lookup as liclookup
41 # spdx_lookup is not installed
42 print('spdx_lookup module is not installed. This can lead to an '
43 'inaccurate licence detection. Please install it via\n'
44 'pip install spdx_lookup')
48 def setup_decorator(func, method):
50 Decorator for distutils.core.setup and setuptools.setup.
51 Puts the arguments with which setup is called as a dict
52 Add key 'method' which should be either 'setuptools' or 'distutils'.
55 func -- either setuptools.setup or distutils.core.setup
56 method -- either 'setuptools' or 'distutils'
60 def closure(*args, **kwargs):
61 # Any python packages calls its setup function to be installed.
62 # Argument 'name' of this setup function is the package's name
63 BuildrootPackage.setup_args[kwargs['name']] = kwargs
64 BuildrootPackage.setup_args[kwargs['name']]['method'] = method
68 import setuptools # noqa E402
69 setuptools.setup = setup_decorator(setuptools.setup, 'setuptools')
70 import distutils # noqa E402
71 distutils.core.setup = setup_decorator(setuptools.setup, 'distutils')
74 def find_file_upper_case(filenames, path='./'):
77 Recursively find files that matches one of the specified filenames.
78 Returns a relative path starting with path argument.
81 filenames -- List of filenames to be found
82 path -- Path to the directory to search
84 for root, dirs, files in os.walk(path):
86 if file.upper() in filenames:
87 yield (os.path.join(root, file))
90 def pkg_buildroot_name(pkg_name):
92 Returns the Buildroot package name for the PyPI package pkg_name.
93 Remove all non alphanumeric characters except -
94 Also lowers the name and adds 'python-' suffix
97 pkg_name -- String to rename
99 name = re.sub('[^\w-]', '', pkg_name.lower())
100 name = name.replace('_', '-')
102 pattern = re.compile('^(?!' + prefix + ')(.+?)$')
103 name = pattern.sub(r'python-\1', name)
107 class DownloadFailed(Exception):
111 class BuildrootPackage():
112 """This class's methods are not meant to be used individually please
113 use them in the correct order:
134 def __init__(self, real_name, pkg_folder):
135 self.real_name = real_name
136 self.buildroot_name = pkg_buildroot_name(self.real_name)
137 self.pkg_dir = os.path.join(pkg_folder, self.buildroot_name)
138 self.mk_name = self.buildroot_name.upper().replace('-', '_')
139 self.as_string = None
142 self.metadata_name = None
143 self.metadata_url = None
145 self.setup_metadata = None
146 self.tmp_extract = None
151 self.license_files = []
153 def fetch_package_info(self):
155 Fetch a package's metadata from the python package index
157 self.metadata_url = 'https://pypi.org/pypi/{pkg}/json'.format(
160 pkg_json = six.moves.urllib.request.urlopen(self.metadata_url).read().decode()
161 except six.moves.urllib.error.HTTPError as error:
162 print('ERROR:', error.getcode(), error.msg, file=sys.stderr)
163 print('ERROR: Could not find package {pkg}.\n'
164 'Check syntax inside the python package index:\n'
165 'https://pypi.python.org/pypi/ '
166 .format(pkg=self.real_name))
168 except six.moves.urllib.error.URLError:
169 print('ERROR: Could not find package {pkg}.\n'
170 'Check syntax inside the python package index:\n'
171 'https://pypi.python.org/pypi/ '
172 .format(pkg=self.real_name))
174 self.metadata = json.loads(pkg_json)
175 self.version = self.metadata['info']['version']
176 self.metadata_name = self.metadata['info']['name']
178 def download_package(self):
180 Download a package using metadata from pypi
184 self.metadata['urls'][0]['filename']
187 'Non-conventional package, ',
188 'please check carefully after creation')
189 self.metadata['urls'] = [{
190 'packagetype': 'sdist',
191 'url': self.metadata['info']['download_url'],
193 # In this case, we can't get the name of the downloaded file
194 # from the pypi api, so we need to find it, this should work
195 urlpath = six.moves.urllib.parse.urlparse(
196 self.metadata['info']['download_url']).path
197 # urlparse().path give something like
198 # /path/to/file-version.tar.gz
199 # We use basename to remove /path/to
200 self.metadata['urls'][0]['filename'] = os.path.basename(urlpath)
201 for download_url in self.metadata['urls']:
202 if 'bdist' in download_url['packagetype']:
205 print('Downloading package {pkg} from {url}...'.format(
206 pkg=self.real_name, url=download_url['url']))
207 download = six.moves.urllib.request.urlopen(download_url['url'])
208 except six.moves.urllib.error.HTTPError as http_error:
209 download = http_error
211 self.used_url = download_url
212 self.as_string = download.read()
213 if not download_url['digests']['md5']:
215 self.md5_sum = hashlib.md5(self.as_string).hexdigest()
216 if self.md5_sum == download_url['digests']['md5']:
220 raise DownloadFailed('Failed to download package {pkg}: '
221 'No source archive available'
222 .format(pkg=self.real_name))
223 elif download.__class__ == six.moves.urllib.error.HTTPError:
226 self.filename = self.used_url['filename']
227 self.url = self.used_url['url']
229 def check_archive(self, members):
231 Check archive content before extracting
234 members -- list of archive members
236 # Protect against https://github.com/snyk/zip-slip-vulnerability
237 # Older python versions do not validate that the extracted files are
238 # inside the target directory. Detect and error out on evil paths
239 evil = [e for e in members if os.path.relpath(e).startswith(('/', '..'))]
241 print('ERROR: Refusing to extract {} with suspicious members {}'.format(
242 self.filename, evil))
245 def extract_package(self, tmp_path):
247 Extract the package contents into a directrory
250 tmp_path -- directory where you want the package to be extracted
253 as_file = StringIO.StringIO(self.as_string)
255 as_file = io.BytesIO(self.as_string)
256 if self.filename[-3:] == 'zip':
257 with zipfile.ZipFile(as_file) as as_zipfile:
258 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
261 except OSError as exception:
262 if exception.errno != errno.EEXIST:
263 print("ERROR: ", exception.strerror, file=sys.stderr)
265 print('WARNING:', exception.strerror, file=sys.stderr)
266 print('Removing {pkg}...'.format(pkg=tmp_pkg))
267 shutil.rmtree(tmp_pkg)
269 self.check_archive(as_zipfile.namelist())
270 as_zipfile.extractall(tmp_pkg)
271 pkg_filename = self.filename.split(".zip")[0]
273 with tarfile.open(fileobj=as_file) as as_tarfile:
274 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
277 except OSError as exception:
278 if exception.errno != errno.EEXIST:
279 print("ERROR: ", exception.strerror, file=sys.stderr)
281 print('WARNING:', exception.strerror, file=sys.stderr)
282 print('Removing {pkg}...'.format(pkg=tmp_pkg))
283 shutil.rmtree(tmp_pkg)
285 self.check_archive(as_tarfile.getnames())
286 as_tarfile.extractall(tmp_pkg)
287 pkg_filename = self.filename.split(".tar")[0]
289 tmp_extract = '{folder}/{name}'
290 self.tmp_extract = tmp_extract.format(
294 def load_setup(self):
296 Loads the corresponding setup and store its metadata
298 current_dir = os.getcwd()
299 os.chdir(self.tmp_extract)
300 sys.path.append(self.tmp_extract)
301 s_file, s_path, s_desc = imp.find_module('setup', [self.tmp_extract])
302 setup = imp.load_module('setup', s_file, s_path, s_desc)
303 if self.metadata_name in self.setup_args:
305 elif self.metadata_name.replace('_', '-') in self.setup_args:
306 self.metadata_name = self.metadata_name.replace('_', '-')
307 elif self.metadata_name.replace('-', '_') in self.setup_args:
308 self.metadata_name = self.metadata_name.replace('-', '_')
310 self.setup_metadata = self.setup_args[self.metadata_name]
312 # This means setup was not called which most likely mean that it is
313 # called through the if __name__ == '__main__' directive.
314 # In this case, we can only pray that it is called through a
315 # function called main() in setup.py.
316 setup.main() # Will raise AttributeError if not found
317 self.setup_metadata = self.setup_args[self.metadata_name]
318 # Here we must remove the module the hard way.
319 # We must do this because of a very specific case: if a package calls
320 # setup from the __main__ but does not come with a 'main()' function,
321 # for some reason setup.main() will successfully call the main
322 # function of a previous package...
323 sys.modules.pop('setup', None)
325 os.chdir(current_dir)
326 sys.path.remove(self.tmp_extract)
328 def get_requirements(self, pkg_folder):
330 Retrieve dependencies from the metadata found in the setup.py script of
334 pkg_folder -- location of the already created packages
336 if 'install_requires' not in self.setup_metadata:
339 self.pkg_req = self.setup_metadata['install_requires']
340 self.pkg_req = [re.sub('([-.\w]+).*', r'\1', req)
341 for req in self.pkg_req]
343 # get rid of commented lines and also strip the package strings
344 self.pkg_req = [item.strip() for item in self.pkg_req
345 if len(item) > 0 and item[0] != '#']
347 req_not_found = self.pkg_req
348 self.pkg_req = list(map(pkg_buildroot_name, self.pkg_req))
349 pkg_tuples = list(zip(req_not_found, self.pkg_req))
350 # pkg_tuples is a list of tuples that looks like
351 # ('werkzeug','python-werkzeug') because I need both when checking if
352 # dependencies already exist or are already in the download list
354 pkg[0] for pkg in pkg_tuples
355 if not os.path.isdir(pkg[1])
359 def __create_mk_header(self):
361 Create the header of the <package_name>.mk file
363 header = ['#' * 80 + '\n']
365 header.append('# {name}\n'.format(name=self.buildroot_name))
367 header.append('#' * 80 + '\n')
371 def __create_mk_download_info(self):
373 Create the lines refering to the download information of the
374 <package_name>.mk file
377 version_line = '{name}_VERSION = {version}\n'.format(
379 version=self.version)
380 lines.append(version_line)
382 if self.buildroot_name != self.real_name:
383 targz = self.filename.replace(
385 '$({name}_VERSION)'.format(name=self.mk_name))
386 targz_line = '{name}_SOURCE = {filename}\n'.format(
389 lines.append(targz_line)
391 if self.filename not in self.url:
392 # Sometimes the filename is in the url, sometimes it's not
395 site_url = self.url[:self.url.find(self.filename)]
396 site_line = '{name}_SITE = {url}'.format(name=self.mk_name,
398 site_line = site_line.rstrip('/') + '\n'
399 lines.append(site_line)
402 def __create_mk_setup(self):
404 Create the line refering to the setup method of the package of the
405 <package_name>.mk file
407 There are two things you can use to make an installer
408 for a python package: distutils or setuptools
409 distutils comes with python but does not support dependencies.
410 distutils is mostly still there for backward support.
411 setuptools is what smart people use,
412 but it is not shipped with python :(
415 setup_type_line = '{name}_SETUP_TYPE = {method}\n'.format(
417 method=self.setup_metadata['method'])
418 lines.append(setup_type_line)
421 def __get_license_names(self, license_files):
423 Try to determine the related license name.
425 There are two possibilities. Either the script tries to
426 get license name from package's metadata or, if spdx_lookup
427 package is available, the script compares license files with
431 if liclookup is None:
433 'Apache Software License': 'Apache-2.0',
434 'BSD License': 'FIXME: please specify the exact BSD version',
435 'European Union Public Licence 1.0': 'EUPL-1.0',
436 'European Union Public Licence 1.1': 'EUPL-1.1',
437 "GNU General Public License": "GPL",
438 "GNU General Public License v2": "GPL-2.0",
439 "GNU General Public License v2 or later": "GPL-2.0+",
440 "GNU General Public License v3": "GPL-3.0",
441 "GNU General Public License v3 or later": "GPL-3.0+",
442 "GNU Lesser General Public License v2": "LGPL-2.1",
443 "GNU Lesser General Public License v2 or later": "LGPL-2.1+",
444 "GNU Lesser General Public License v3": "LGPL-3.0",
445 "GNU Lesser General Public License v3 or later": "LGPL-3.0+",
446 "GNU Library or Lesser General Public License": "LGPL-2.0",
447 "ISC License": "ISC",
448 "MIT License": "MIT",
449 "Mozilla Public License 1.0": "MPL-1.0",
450 "Mozilla Public License 1.1": "MPL-1.1",
451 "Mozilla Public License 2.0": "MPL-2.0",
452 "Zope Public License": "ZPL"
454 regexp = re.compile('^License :* *.* *:+ (.*)( \(.*\))?$')
455 classifiers_licenses = [regexp.sub(r"\1", lic)
456 for lic in self.metadata['info']['classifiers']
457 if regexp.match(lic)]
458 licenses = [license_dict[x] if x in license_dict else x for x in classifiers_licenses]
459 if not len(licenses):
460 print('WARNING: License has been set to "{license}". It is most'
461 ' likely wrong, please change it if need be'.format(
462 license=', '.join(licenses)))
463 licenses = [self.metadata['info']['license']]
464 licenses = set(licenses)
465 license_line = '{name}_LICENSE = {license}\n'.format(
467 license=', '.join(licenses))
470 for license_file in license_files:
471 with open(license_file) as lic_file:
472 match = liclookup.match(lic_file.read())
473 if match is not None and match.confidence >= 90.0:
474 license_names.append(match.license.id)
476 license_names.append("FIXME: license id couldn't be detected")
477 license_names = set(license_names)
479 if len(license_names) > 0:
480 license_line = ('{name}_LICENSE ='
483 names=', '.join(license_names)))
487 def __create_mk_license(self):
489 Create the lines referring to the package's license informations of the
490 <package_name>.mk file
492 The license's files are found by searching the package (case insensitive)
493 for files named license, license.txt etc. If more than one license file
494 is found, the user is asked to select which ones he wants to use.
498 filenames = ['LICENCE', 'LICENSE', 'LICENSE.RST', 'LICENSE.TXT',
499 'COPYING', 'COPYING.TXT']
500 self.license_files = list(find_file_upper_case(filenames, self.tmp_extract))
502 lines.append(self.__get_license_names(self.license_files))
504 license_files = [license.replace(self.tmp_extract, '')[1:]
505 for license in self.license_files]
506 if len(license_files) > 0:
507 if len(license_files) > 1:
508 print('More than one file found for license:',
509 ', '.join(license_files))
510 license_files = [filename
511 for index, filename in enumerate(license_files)]
512 license_file_line = ('{name}_LICENSE_FILES ='
515 files=' '.join(license_files)))
516 lines.append(license_file_line)
518 print('WARNING: No license file found,'
519 ' please specify it manually afterwards')
520 license_file_line = '# No license file found\n'
524 def __create_mk_requirements(self):
526 Create the lines referring to the dependencies of the of the
527 <package_name>.mk file
530 pkg_name -- name of the package
531 pkg_req -- dependencies of the package
534 dependencies_line = ('{name}_DEPENDENCIES ='
537 reqs=' '.join(self.pkg_req)))
538 lines.append(dependencies_line)
541 def create_package_mk(self):
543 Create the lines corresponding to the <package_name>.mk file
545 pkg_mk = '{name}.mk'.format(name=self.buildroot_name)
546 path_to_mk = os.path.join(self.pkg_dir, pkg_mk)
547 print('Creating {file}...'.format(file=path_to_mk))
548 lines = self.__create_mk_header()
549 lines += self.__create_mk_download_info()
550 lines += self.__create_mk_setup()
551 lines += self.__create_mk_license()
554 lines.append('$(eval $(python-package))')
556 with open(path_to_mk, 'w') as mk_file:
557 mk_file.writelines(lines)
559 def create_hash_file(self):
561 Create the lines corresponding to the <package_name>.hash files
563 pkg_hash = '{name}.hash'.format(name=self.buildroot_name)
564 path_to_hash = os.path.join(self.pkg_dir, pkg_hash)
565 print('Creating {filename}...'.format(filename=path_to_hash))
567 if self.used_url['digests']['md5'] and self.used_url['digests']['sha256']:
568 hash_header = '# md5, sha256 from {url}\n'.format(
569 url=self.metadata_url)
570 lines.append(hash_header)
571 hash_line = '{method}\t{digest} {filename}\n'.format(
573 digest=self.used_url['digests']['md5'],
574 filename=self.filename)
575 lines.append(hash_line)
576 hash_line = '{method}\t{digest} {filename}\n'.format(
578 digest=self.used_url['digests']['sha256'],
579 filename=self.filename)
580 lines.append(hash_line)
582 if self.license_files:
583 lines.append('# Locally computed sha256 checksums\n')
584 for license_file in self.license_files:
585 sha256 = hashlib.sha256()
586 with open(license_file, 'rb') as lic_f:
588 data = lic_f.read(BUF_SIZE)
592 hash_line = '{method}\t{digest} {filename}\n'.format(
594 digest=sha256.hexdigest(),
595 filename=license_file.replace(self.tmp_extract, '')[1:])
596 lines.append(hash_line)
598 with open(path_to_hash, 'w') as hash_file:
599 hash_file.writelines(lines)
601 def create_config_in(self):
603 Creates the Config.in file of a package
605 path_to_config = os.path.join(self.pkg_dir, 'Config.in')
606 print('Creating {file}...'.format(file=path_to_config))
608 config_line = 'config BR2_PACKAGE_{name}\n'.format(
610 lines.append(config_line)
612 bool_line = '\tbool "{name}"\n'.format(name=self.buildroot_name)
613 lines.append(bool_line)
616 for dep in self.pkg_req:
617 dep_line = '\tselect BR2_PACKAGE_{req} # runtime\n'.format(
618 req=dep.upper().replace('-', '_'))
619 lines.append(dep_line)
621 lines.append('\thelp\n')
623 help_lines = textwrap.wrap(self.metadata['info']['summary'], 62,
624 initial_indent='\t ',
625 subsequent_indent='\t ')
627 # make sure a help text is terminated with a full stop
628 if help_lines[-1][-1] != '.':
629 help_lines[-1] += '.'
631 # \t + two spaces is 3 char long
632 help_lines.append('')
633 help_lines.append('\t ' + self.metadata['info']['home_page'])
634 help_lines = [x + '\n' for x in help_lines]
637 with open(path_to_config, 'w') as config_file:
638 config_file.writelines(lines)
642 # Building the parser
643 parser = argparse.ArgumentParser(
644 description="Creates buildroot packages from the metadata of "
645 "an existing PyPI packages and include it "
647 parser.add_argument("packages",
648 help="list of packages to be created",
650 parser.add_argument("-o", "--output",
652 Output directory for packages.
657 args = parser.parse_args()
658 packages = list(set(args.packages))
660 # tmp_path is where we'll extract the files later
661 tmp_prefix = 'scanpypi-'
662 pkg_folder = args.output
663 tmp_path = tempfile.mkdtemp(prefix=tmp_prefix)
665 for real_pkg_name in packages:
666 package = BuildrootPackage(real_pkg_name, pkg_folder)
667 print('buildroot package name for {}:'.format(package.real_name),
668 package.buildroot_name)
669 # First we download the package
670 # Most of the info we need can only be found inside the package
671 print('Package:', package.buildroot_name)
672 print('Fetching package', package.real_name)
674 package.fetch_package_info()
675 except (six.moves.urllib.error.URLError, six.moves.urllib.error.HTTPError):
677 if package.metadata_name.lower() == 'setuptools':
678 # setuptools imports itself, that does not work very well
679 # with the monkey path at the begining
680 print('Error: setuptools cannot be built using scanPyPI')
684 package.download_package()
685 except six.moves.urllib.error.HTTPError as error:
686 print('Error: {code} {reason}'.format(code=error.code,
687 reason=error.reason))
688 print('Error downloading package :', package.buildroot_name)
692 # extract the tarball
694 package.extract_package(tmp_path)
695 except (tarfile.ReadError, zipfile.BadZipfile):
696 print('Error extracting package {}'.format(package.real_name))
700 # Loading the package install info from the package
703 except ImportError as err:
704 if 'buildutils' in err.message:
705 print('This package needs buildutils')
709 except AttributeError as error:
710 print('Error: Could not install package {pkg}: {error}'.format(
711 pkg=package.real_name, error=error))
714 # Package requirement are an argument of the setup function
715 req_not_found = package.get_requirements(pkg_folder)
716 req_not_found = req_not_found.difference(packages)
718 packages += req_not_found
720 print('Added packages \'{pkgs}\' as dependencies of {pkg}'
721 .format(pkgs=", ".join(req_not_found),
722 pkg=package.buildroot_name))
723 print('Checking if package {name} already exists...'.format(
724 name=package.pkg_dir))
726 os.makedirs(package.pkg_dir)
727 except OSError as exception:
728 if exception.errno != errno.EEXIST:
729 print("ERROR: ", exception.message, file=sys.stderr)
731 print('Error: Package {name} already exists'
732 .format(name=package.pkg_dir))
734 'Do you want to delete existing package ? [y/N]')
735 if del_pkg.lower() == 'y':
736 shutil.rmtree(package.pkg_dir)
737 os.makedirs(package.pkg_dir)
740 package.create_package_mk()
742 package.create_hash_file()
744 package.create_config_in()
746 # printing an empty line for visual confort
748 shutil.rmtree(tmp_path)
751 if __name__ == "__main__":