4 Utility for building Buildroot packages for existing PyPI packages
6 Any package built by scanpypi should be manually checked for
9 from __future__ import print_function
25 from functools import wraps
30 import spdx_lookup as liclookup
32 # spdx_lookup is not installed
33 print('spdx_lookup module is not installed. This can lead to an '
34 'inaccurate licence detection. Please install it via\n'
35 'pip install spdx_lookup')
39 def setup_decorator(func, method):
41 Decorator for distutils.core.setup and setuptools.setup.
42 Puts the arguments with which setup is called as a dict
43 Add key 'method' which should be either 'setuptools' or 'distutils'.
46 func -- either setuptools.setup or distutils.core.setup
47 method -- either 'setuptools' or 'distutils'
51 def closure(*args, **kwargs):
52 # Any python packages calls its setup function to be installed.
53 # Argument 'name' of this setup function is the package's name
54 BuildrootPackage.setup_args[kwargs['name']] = kwargs
55 BuildrootPackage.setup_args[kwargs['name']]['method'] = method
59 import setuptools # noqa E402
60 setuptools.setup = setup_decorator(setuptools.setup, 'setuptools')
61 import distutils # noqa E402
62 distutils.core.setup = setup_decorator(setuptools.setup, 'distutils')
65 def find_file_upper_case(filenames, path='./'):
68 Recursively find files that matches one of the specified filenames.
69 Returns a relative path starting with path argument.
72 filenames -- List of filenames to be found
73 path -- Path to the directory to search
75 for root, dirs, files in os.walk(path):
77 if file.upper() in filenames:
78 yield (os.path.join(root, file))
81 def pkg_buildroot_name(pkg_name):
83 Returns the Buildroot package name for the PyPI package pkg_name.
84 Remove all non alphanumeric characters except -
85 Also lowers the name and adds 'python-' suffix
88 pkg_name -- String to rename
90 name = re.sub('[^\w-]', '', pkg_name.lower())
92 pattern = re.compile('^(?!' + prefix + ')(.+?)$')
93 name = pattern.sub(r'python-\1', name)
97 class DownloadFailed(Exception):
101 class BuildrootPackage():
102 """This class's methods are not meant to be used individually please
103 use them in the correct order:
124 def __init__(self, real_name, pkg_folder):
125 self.real_name = real_name
126 self.buildroot_name = pkg_buildroot_name(self.real_name)
127 self.pkg_dir = os.path.join(pkg_folder, self.buildroot_name)
128 self.mk_name = self.buildroot_name.upper().replace('-', '_')
129 self.as_string = None
132 self.metadata_name = None
133 self.metadata_url = None
135 self.setup_metadata = None
136 self.tmp_extract = None
141 self.license_files = []
143 def fetch_package_info(self):
145 Fetch a package's metadata from the python package index
147 self.metadata_url = 'https://pypi.python.org/pypi/{pkg}/json'.format(
150 pkg_json = urllib2.urlopen(self.metadata_url).read().decode()
151 except urllib2.HTTPError as error:
152 print('ERROR:', error.getcode(), error.msg, file=sys.stderr)
153 print('ERROR: Could not find package {pkg}.\n'
154 'Check syntax inside the python package index:\n'
155 'https://pypi.python.org/pypi/ '
156 .format(pkg=self.real_name))
158 except urllib2.URLError:
159 print('ERROR: Could not find package {pkg}.\n'
160 'Check syntax inside the python package index:\n'
161 'https://pypi.python.org/pypi/ '
162 .format(pkg=self.real_name))
164 self.metadata = json.loads(pkg_json)
165 self.version = self.metadata['info']['version']
166 self.metadata_name = self.metadata['info']['name']
168 def download_package(self):
170 Download a package using metadata from pypi
173 self.metadata['urls'][0]['filename']
176 'Non-conventional package, ',
177 'please check carefully after creation')
178 self.metadata['urls'] = [{
179 'packagetype': 'sdist',
180 'url': self.metadata['info']['download_url'],
182 # In this case, we can't get the name of the downloaded file
183 # from the pypi api, so we need to find it, this should work
184 urlpath = urllib2.urlparse.urlparse(
185 self.metadata['info']['download_url']).path
186 # urlparse().path give something like
187 # /path/to/file-version.tar.gz
188 # We use basename to remove /path/to
189 self.metadata['urls'][0]['filename'] = os.path.basename(urlpath)
190 for download_url in self.metadata['urls']:
191 if 'bdist' in download_url['packagetype']:
194 print('Downloading package {pkg} from {url}...'.format(
195 pkg=self.real_name, url=download_url['url']))
196 download = urllib2.urlopen(download_url['url'])
197 except urllib2.HTTPError as http_error:
198 download = http_error
200 self.used_url = download_url
201 self.as_string = download.read()
202 if not download_url['md5_digest']:
204 self.md5_sum = hashlib.md5(self.as_string).hexdigest()
205 if self.md5_sum == download_url['md5_digest']:
208 if download.__class__ == urllib2.HTTPError:
210 raise DownloadFailed('Failed to downloas package {pkg}'
211 .format(pkg=self.real_name))
212 self.filename = self.used_url['filename']
213 self.url = self.used_url['url']
215 def extract_package(self, tmp_path):
217 Extract the package contents into a directrory
220 tmp_path -- directory where you want the package to be extracted
222 as_file = StringIO.StringIO(self.as_string)
223 if self.filename[-3:] == 'zip':
224 with zipfile.ZipFile(as_file) as as_zipfile:
225 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
228 except OSError as exception:
229 if exception.errno != errno.EEXIST:
230 print("ERROR: ", exception.message, file=sys.stderr)
232 print('WARNING:', exception.message, file=sys.stderr)
233 print('Removing {pkg}...'.format(pkg=tmp_pkg))
234 shutil.rmtree(tmp_pkg)
236 as_zipfile.extractall(tmp_pkg)
238 with tarfile.open(fileobj=as_file) as as_tarfile:
239 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
242 except OSError as exception:
243 if exception.errno != errno.EEXIST:
244 print("ERROR: ", exception.message, file=sys.stderr)
246 print('WARNING:', exception.message, file=sys.stderr)
247 print('Removing {pkg}...'.format(pkg=tmp_pkg))
248 shutil.rmtree(tmp_pkg)
250 as_tarfile.extractall(tmp_pkg)
252 tmp_extract = '{folder}/{name}-{version}'
253 self.tmp_extract = tmp_extract.format(
255 name=self.metadata_name,
256 version=self.version)
258 def load_setup(self):
260 Loads the corresponding setup and store its metadata
262 current_dir = os.getcwd()
263 os.chdir(self.tmp_extract)
264 sys.path.append(self.tmp_extract)
265 s_file, s_path, s_desc = imp.find_module('setup', [self.tmp_extract])
266 setup = imp.load_module('setup', s_file, s_path, s_desc)
268 self.setup_metadata = self.setup_args[self.metadata_name]
270 # This means setup was not called which most likely mean that it is
271 # called through the if __name__ == '__main__' directive.
272 # In this case, we can only pray that it is called through a
273 # function called main() in setup.py.
274 setup.main() # Will raise AttributeError if not found
275 self.setup_metadata = self.setup_args[self.metadata_name]
276 # Here we must remove the module the hard way.
277 # We must do this because of a very specific case: if a package calls
278 # setup from the __main__ but does not come with a 'main()' function,
279 # for some reason setup.main() will successfully call the main
280 # function of a previous package...
281 sys.modules.pop('setup', None)
283 os.chdir(current_dir)
284 sys.path.remove(self.tmp_extract)
286 def get_requirements(self, pkg_folder):
288 Retrieve dependencies from the metadata found in the setup.py script of
292 pkg_folder -- location of the already created packages
294 if 'install_requires' not in self.setup_metadata:
297 self.pkg_req = self.setup_metadata['install_requires']
298 self.pkg_req = [re.sub('([-.\w]+).*', r'\1', req)
299 for req in self.pkg_req]
301 # get rid of commented lines and also strip the package strings
302 self.pkg_req = [item.strip() for item in self.pkg_req
303 if len(item) > 0 and item[0] != '#']
305 req_not_found = self.pkg_req
306 self.pkg_req = map(pkg_buildroot_name, self.pkg_req)
307 pkg_tuples = zip(req_not_found, self.pkg_req)
308 # pkg_tuples is a list of tuples that looks like
309 # ('werkzeug','python-werkzeug') because I need both when checking if
310 # dependencies already exist or are already in the download list
312 pkg[0] for pkg in pkg_tuples
313 if not os.path.isdir(pkg[1])
317 def __create_mk_header(self):
319 Create the header of the <package_name>.mk file
321 header = ['#' * 80 + '\n']
323 header.append('# {name}\n'.format(name=self.buildroot_name))
325 header.append('#' * 80 + '\n')
329 def __create_mk_download_info(self):
331 Create the lines refering to the download information of the
332 <package_name>.mk file
335 version_line = '{name}_VERSION = {version}\n'.format(
337 version=self.version)
338 lines.append(version_line)
340 targz = self.filename.replace(
342 '$({name}_VERSION)'.format(name=self.mk_name))
343 targz_line = '{name}_SOURCE = {filename}\n'.format(
346 lines.append(targz_line)
348 if self.filename not in self.url:
349 # Sometimes the filename is in the url, sometimes it's not
352 site_url = self.url[:self.url.find(self.filename)]
353 site_line = '{name}_SITE = {url}'.format(name=self.mk_name,
355 site_line = site_line.rstrip('/') + '\n'
356 lines.append(site_line)
359 def __create_mk_setup(self):
361 Create the line refering to the setup method of the package of the
362 <package_name>.mk file
364 There are two things you can use to make an installer
365 for a python package: distutils or setuptools
366 distutils comes with python but does not support dependencies.
367 distutils is mostly still there for backward support.
368 setuptools is what smart people use,
369 but it is not shipped with python :(
372 setup_type_line = '{name}_SETUP_TYPE = {method}\n'.format(
374 method=self.setup_metadata['method'])
375 lines.append(setup_type_line)
378 def __get_license_names(self, license_files):
380 Try to determine the related license name.
382 There are two possibilities. Either the scripts tries to
383 get license name from package's metadata or, if spdx_lookup
384 package is available, the script compares license files with
388 if liclookup is None:
390 'Apache Software License': 'Apache-2.0',
391 'BSD License': 'BSD',
392 'European Union Public Licence 1.0': 'EUPL-1.0',
393 'European Union Public Licence 1.1': 'EUPL-1.1',
394 "GNU General Public License": "GPL",
395 "GNU General Public License v2": "GPL-2.0",
396 "GNU General Public License v2 or later": "GPL-2.0+",
397 "GNU General Public License v3": "GPL-3.0",
398 "GNU General Public License v3 or later": "GPL-3.0+",
399 "GNU Lesser General Public License v2": "LGPL-2.1",
400 "GNU Lesser General Public License v2 or later": "LGPL-2.1+",
401 "GNU Lesser General Public License v3": "LGPL-3.0",
402 "GNU Lesser General Public License v3 or later": "LGPL-3.0+",
403 "GNU Library or Lesser General Public License": "LGPL-2.0",
404 "ISC License": "ISC",
405 "MIT License": "MIT",
406 "Mozilla Public License 1.0": "MPL-1.0",
407 "Mozilla Public License 1.1": "MPL-1.1",
408 "Mozilla Public License 2.0": "MPL-2.0",
409 "Zope Public License": "ZPL"
411 regexp = re.compile('^License :* *.* *:+ (.*)( \(.*\))?$')
412 classifiers_licenses = [regexp.sub(r"\1", lic)
413 for lic in self.metadata['info']['classifiers']
414 if regexp.match(lic)]
415 licenses = map(lambda x: license_dict[x] if x in license_dict else x,
416 classifiers_licenses)
417 if not len(licenses):
418 print('WARNING: License has been set to "{license}". It is most'
419 ' likely wrong, please change it if need be'.format(
420 license=', '.join(licenses)))
421 licenses = [self.metadata['info']['license']]
422 license_line = '{name}_LICENSE = {license}\n'.format(
424 license=', '.join(licenses))
427 for license_file in license_files:
428 with open(license_file) as lic_file:
429 match = liclookup.match(lic_file.read())
430 if match.confidence >= 90.0:
431 license_names.append(match.license.id)
433 if len(license_names) > 0:
434 license_line = ('{name}_LICENSE ='
437 names=', '.join(license_names)))
441 def __create_mk_license(self):
443 Create the lines referring to the package's license informations of the
444 <package_name>.mk file
446 The license's files are found by searching the package (case insensitive)
447 for files named license, license.txt etc. If more than one license file
448 is found, the user is asked to select which ones he wants to use.
452 filenames = ['LICENCE', 'LICENSE', 'LICENSE.RST', 'LICENSE.TXT',
453 'COPYING', 'COPYING.TXT']
454 self.license_files = list(find_file_upper_case(filenames, self.tmp_extract))
456 lines.append(self.__get_license_names(self.license_files))
458 license_files = [license.replace(self.tmp_extract, '')[1:]
459 for license in self.license_files]
460 if len(license_files) > 0:
461 if len(license_files) > 1:
462 print('More than one file found for license:',
463 ', '.join(license_files))
464 license_files = [filename
465 for index, filename in enumerate(license_files)]
466 license_file_line = ('{name}_LICENSE_FILES ='
469 files=' '.join(license_files)))
470 lines.append(license_file_line)
472 print('WARNING: No license file found,'
473 ' please specify it manually afterwards')
474 license_file_line = '# No license file found\n'
478 def __create_mk_requirements(self):
480 Create the lines referring to the dependencies of the of the
481 <package_name>.mk file
484 pkg_name -- name of the package
485 pkg_req -- dependencies of the package
488 dependencies_line = ('{name}_DEPENDENCIES ='
491 reqs=' '.join(self.pkg_req)))
492 lines.append(dependencies_line)
495 def create_package_mk(self):
497 Create the lines corresponding to the <package_name>.mk file
499 pkg_mk = '{name}.mk'.format(name=self.buildroot_name)
500 path_to_mk = os.path.join(self.pkg_dir, pkg_mk)
501 print('Creating {file}...'.format(file=path_to_mk))
502 lines = self.__create_mk_header()
503 lines += self.__create_mk_download_info()
504 lines += self.__create_mk_setup()
505 lines += self.__create_mk_license()
508 lines.append('$(eval $(python-package))')
510 with open(path_to_mk, 'w') as mk_file:
511 mk_file.writelines(lines)
513 def create_hash_file(self):
515 Create the lines corresponding to the <package_name>.hash files
517 pkg_hash = '{name}.hash'.format(name=self.buildroot_name)
518 path_to_hash = os.path.join(self.pkg_dir, pkg_hash)
519 print('Creating {filename}...'.format(filename=path_to_hash))
521 if self.used_url['md5_digest']:
522 md5_comment = '# md5 from {url}, sha256 locally computed\n'.format(
523 url=self.metadata_url)
524 lines.append(md5_comment)
525 hash_line = '{method}\t{digest} {filename}\n'.format(
527 digest=self.used_url['md5_digest'],
528 filename=self.filename)
529 lines.append(hash_line)
530 digest = hashlib.sha256(self.as_string).hexdigest()
531 hash_line = '{method}\t{digest} {filename}\n'.format(
534 filename=self.filename)
535 lines.append(hash_line)
537 for license_file in self.license_files:
538 sha256 = hashlib.sha256()
539 with open(license_file, 'rb') as lic_f:
541 data = lic_f.read(BUF_SIZE)
545 hash_line = '{method}\t{digest} {filename}\n'.format(
547 digest=sha256.hexdigest(),
548 filename=os.path.basename(license_file))
549 lines.append(hash_line)
551 with open(path_to_hash, 'w') as hash_file:
552 hash_file.writelines(lines)
554 def create_config_in(self):
556 Creates the Config.in file of a package
558 path_to_config = os.path.join(self.pkg_dir, 'Config.in')
559 print('Creating {file}...'.format(file=path_to_config))
561 config_line = 'config BR2_PACKAGE_{name}\n'.format(
563 lines.append(config_line)
565 bool_line = '\tbool "{name}"\n'.format(name=self.buildroot_name)
566 lines.append(bool_line)
568 for dep in self.pkg_req:
569 dep_line = '\tselect BR2_PACKAGE_{req} # runtime\n'.format(
570 req=dep.upper().replace('-', '_'))
571 lines.append(dep_line)
573 lines.append('\thelp\n')
575 help_lines = textwrap.wrap(self.metadata['info']['summary'],
576 initial_indent='\t ',
577 subsequent_indent='\t ')
579 # make sure a help text is terminated with a full stop
580 if help_lines[-1][-1] != '.':
581 help_lines[-1] += '.'
583 # \t + two spaces is 3 char long
584 help_lines.append('')
585 help_lines.append('\t ' + self.metadata['info']['home_page'])
586 help_lines = map(lambda x: x + '\n', help_lines)
589 with open(path_to_config, 'w') as config_file:
590 config_file.writelines(lines)
594 # Building the parser
595 parser = argparse.ArgumentParser(
596 description="Creates buildroot packages from the metadata of "
597 "an existing PyPI packages and include it "
599 parser.add_argument("packages",
600 help="list of packages to be created",
602 parser.add_argument("-o", "--output",
604 Output directory for packages.
609 args = parser.parse_args()
610 packages = list(set(args.packages))
612 # tmp_path is where we'll extract the files later
613 tmp_prefix = 'scanpypi-'
614 pkg_folder = args.output
615 tmp_path = tempfile.mkdtemp(prefix=tmp_prefix)
617 for real_pkg_name in packages:
618 package = BuildrootPackage(real_pkg_name, pkg_folder)
619 print('buildroot package name for {}:'.format(package.real_name),
620 package.buildroot_name)
621 # First we download the package
622 # Most of the info we need can only be found inside the package
623 print('Package:', package.buildroot_name)
624 print('Fetching package', package.real_name)
626 package.fetch_package_info()
627 except (urllib2.URLError, urllib2.HTTPError):
629 if package.metadata_name.lower() == 'setuptools':
630 # setuptools imports itself, that does not work very well
631 # with the monkey path at the begining
632 print('Error: setuptools cannot be built using scanPyPI')
636 package.download_package()
637 except urllib2.HTTPError as error:
638 print('Error: {code} {reason}'.format(code=error.code,
639 reason=error.reason))
640 print('Error downloading package :', package.buildroot_name)
644 # extract the tarball
646 package.extract_package(tmp_path)
647 except (tarfile.ReadError, zipfile.BadZipfile):
648 print('Error extracting package {}'.format(package.real_name))
652 # Loading the package install info from the package
655 except ImportError as err:
656 if 'buildutils' in err.message:
657 print('This package needs buildutils')
661 except AttributeError as error:
662 print('Error: Could not install package {pkg}: {error}'.format(
663 pkg=package.real_name, error=error))
666 # Package requirement are an argument of the setup function
667 req_not_found = package.get_requirements(pkg_folder)
668 req_not_found = req_not_found.difference(packages)
670 packages += req_not_found
672 print('Added packages \'{pkgs}\' as dependencies of {pkg}'
673 .format(pkgs=", ".join(req_not_found),
674 pkg=package.buildroot_name))
675 print('Checking if package {name} already exists...'.format(
676 name=package.pkg_dir))
678 os.makedirs(package.pkg_dir)
679 except OSError as exception:
680 if exception.errno != errno.EEXIST:
681 print("ERROR: ", exception.message, file=sys.stderr)
683 print('Error: Package {name} already exists'
684 .format(name=package.pkg_dir))
686 'Do you want to delete existing package ? [y/N]')
687 if del_pkg.lower() == 'y':
688 shutil.rmtree(package.pkg_dir)
689 os.makedirs(package.pkg_dir)
692 package.create_package_mk()
694 package.create_hash_file()
696 package.create_config_in()
698 # printing an empty line for visual confort
700 shutil.rmtree(tmp_path)
703 if __name__ == "__main__":