wheelfile.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. from __future__ import print_function
  2. import csv
  3. import hashlib
  4. import os.path
  5. import re
  6. import stat
  7. import sys
  8. import time
  9. from collections import OrderedDict
  10. from distutils import log as logger
  11. from zipfile import ZIP_DEFLATED, ZipInfo, ZipFile
  12. from wheel.cli import WheelError
  13. from wheel.util import urlsafe_b64decode, as_unicode, native, urlsafe_b64encode, as_bytes, StringIO
  14. if sys.version_info >= (3,):
  15. from io import TextIOWrapper
  16. def read_csv(fp):
  17. return csv.reader(TextIOWrapper(fp, newline='', encoding='utf-8'))
  18. else:
  19. def read_csv(fp):
  20. for line in csv.reader(fp):
  21. yield [column.decode('utf-8') for column in line]
  22. # Non-greedy matching of an optional build number may be too clever (more
  23. # invalid wheel filenames will match). Separate regex for .dist-info?
  24. WHEEL_INFO_RE = re.compile(
  25. r"""^(?P<namever>(?P<name>.+?)-(?P<ver>.+?))(-(?P<build>\d[^-]*))?
  26. -(?P<pyver>.+?)-(?P<abi>.+?)-(?P<plat>.+?)\.whl$""",
  27. re.VERBOSE)
  28. def get_zipinfo_datetime(timestamp=None):
  29. # Some applications need reproducible .whl files, but they can't do this without forcing
  30. # the timestamp of the individual ZipInfo objects. See issue #143.
  31. timestamp = int(os.environ.get('SOURCE_DATE_EPOCH', timestamp or time.time()))
  32. return time.gmtime(timestamp)[0:6]
  33. class WheelFile(ZipFile):
  34. """A ZipFile derivative class that also reads SHA-256 hashes from
  35. .dist-info/RECORD and checks any read files against those.
  36. """
  37. _default_algorithm = hashlib.sha256
  38. def __init__(self, file, mode='r', compression=ZIP_DEFLATED):
  39. basename = os.path.basename(file)
  40. self.parsed_filename = WHEEL_INFO_RE.match(basename)
  41. if not basename.endswith('.whl') or self.parsed_filename is None:
  42. raise WheelError("Bad wheel filename {!r}".format(basename))
  43. ZipFile.__init__(self, file, mode, compression=compression, allowZip64=True)
  44. self.dist_info_path = '{}.dist-info'.format(self.parsed_filename.group('namever'))
  45. self.record_path = self.dist_info_path + '/RECORD'
  46. self._file_hashes = OrderedDict()
  47. self._file_sizes = {}
  48. if mode == 'r':
  49. # Ignore RECORD and any embedded wheel signatures
  50. self._file_hashes[self.record_path] = None, None
  51. self._file_hashes[self.record_path + '.jws'] = None, None
  52. self._file_hashes[self.record_path + '.p7s'] = None, None
  53. # Fill in the expected hashes by reading them from RECORD
  54. try:
  55. record = self.open(self.record_path)
  56. except KeyError:
  57. raise WheelError('Missing {} file'.format(self.record_path))
  58. with record:
  59. for line in read_csv(record):
  60. path, hash_sum, size = line
  61. if not hash_sum:
  62. continue
  63. algorithm, hash_sum = hash_sum.split(u'=')
  64. try:
  65. hashlib.new(algorithm)
  66. except ValueError:
  67. raise WheelError('Unsupported hash algorithm: {}'.format(algorithm))
  68. if algorithm.lower() in {'md5', 'sha1'}:
  69. raise WheelError(
  70. 'Weak hash algorithm ({}) is not permitted by PEP 427'
  71. .format(algorithm))
  72. self._file_hashes[path] = (
  73. algorithm, urlsafe_b64decode(hash_sum.encode('ascii')))
  74. def open(self, name_or_info, mode="r", pwd=None):
  75. def _update_crc(newdata, eof=None):
  76. if eof is None:
  77. eof = ef._eof
  78. update_crc_orig(newdata)
  79. else: # Python 2
  80. update_crc_orig(newdata, eof)
  81. running_hash.update(newdata)
  82. if eof and running_hash.digest() != expected_hash:
  83. raise WheelError("Hash mismatch for file '{}'".format(native(ef_name)))
  84. ef_name = as_unicode(name_or_info.filename if isinstance(name_or_info, ZipInfo)
  85. else name_or_info)
  86. if mode == 'r' and not ef_name.endswith('/') and ef_name not in self._file_hashes:
  87. raise WheelError("No hash found for file '{}'".format(native(ef_name)))
  88. ef = ZipFile.open(self, name_or_info, mode, pwd)
  89. if mode == 'r' and not ef_name.endswith('/'):
  90. algorithm, expected_hash = self._file_hashes[ef_name]
  91. if expected_hash is not None:
  92. # Monkey patch the _update_crc method to also check for the hash from RECORD
  93. running_hash = hashlib.new(algorithm)
  94. update_crc_orig, ef._update_crc = ef._update_crc, _update_crc
  95. return ef
  96. def write_files(self, base_dir):
  97. logger.info("creating '%s' and adding '%s' to it", self.filename, base_dir)
  98. deferred = []
  99. for root, dirnames, filenames in os.walk(base_dir):
  100. # Sort the directory names so that `os.walk` will walk them in a
  101. # defined order on the next iteration.
  102. dirnames.sort()
  103. for name in sorted(filenames):
  104. path = os.path.normpath(os.path.join(root, name))
  105. if os.path.isfile(path):
  106. arcname = os.path.relpath(path, base_dir).replace(os.path.sep, '/')
  107. if arcname == self.record_path:
  108. pass
  109. elif root.endswith('.dist-info'):
  110. deferred.append((path, arcname))
  111. else:
  112. self.write(path, arcname)
  113. deferred.sort()
  114. for path, arcname in deferred:
  115. self.write(path, arcname)
  116. def write(self, filename, arcname=None, compress_type=None):
  117. with open(filename, 'rb') as f:
  118. st = os.fstat(f.fileno())
  119. data = f.read()
  120. zinfo = ZipInfo(arcname or filename, date_time=get_zipinfo_datetime(st.st_mtime))
  121. zinfo.external_attr = (stat.S_IMODE(st.st_mode) | stat.S_IFMT(st.st_mode)) << 16
  122. zinfo.compress_type = compress_type or self.compression
  123. self.writestr(zinfo, data, compress_type)
  124. def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
  125. ZipFile.writestr(self, zinfo_or_arcname, bytes, compress_type)
  126. fname = (zinfo_or_arcname.filename if isinstance(zinfo_or_arcname, ZipInfo)
  127. else zinfo_or_arcname)
  128. logger.info("adding '%s'", fname)
  129. if fname != self.record_path:
  130. hash_ = self._default_algorithm(bytes)
  131. self._file_hashes[fname] = hash_.name, native(urlsafe_b64encode(hash_.digest()))
  132. self._file_sizes[fname] = len(bytes)
  133. def close(self):
  134. # Write RECORD
  135. if self.fp is not None and self.mode == 'w' and self._file_hashes:
  136. data = StringIO()
  137. writer = csv.writer(data, delimiter=',', quotechar='"', lineterminator='\n')
  138. writer.writerows((
  139. (
  140. fname,
  141. algorithm + "=" + hash_,
  142. self._file_sizes[fname]
  143. )
  144. for fname, (algorithm, hash_) in self._file_hashes.items()
  145. ))
  146. writer.writerow((format(self.record_path), "", ""))
  147. zinfo = ZipInfo(native(self.record_path), date_time=get_zipinfo_datetime())
  148. zinfo.compress_type = self.compression
  149. zinfo.external_attr = 0o664 << 16
  150. self.writestr(zinfo, as_bytes(data.getvalue()))
  151. ZipFile.close(self)