_collections.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. from __future__ import absolute_import
  2. try:
  3. from collections.abc import Mapping, MutableMapping
  4. except ImportError:
  5. from collections import Mapping, MutableMapping
  6. try:
  7. from threading import RLock
  8. except ImportError: # Platform-specific: No threads available
  9. class RLock:
  10. def __enter__(self):
  11. pass
  12. def __exit__(self, exc_type, exc_value, traceback):
  13. pass
  14. from collections import OrderedDict
  15. from .exceptions import InvalidHeader
  16. from .packages import six
  17. from .packages.six import iterkeys, itervalues
  18. __all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"]
  19. _Null = object()
  20. class RecentlyUsedContainer(MutableMapping):
  21. """
  22. Provides a thread-safe dict-like container which maintains up to
  23. ``maxsize`` keys while throwing away the least-recently-used keys beyond
  24. ``maxsize``.
  25. :param maxsize:
  26. Maximum number of recent elements to retain.
  27. :param dispose_func:
  28. Every time an item is evicted from the container,
  29. ``dispose_func(value)`` is called. Callback which will get called
  30. """
  31. ContainerCls = OrderedDict
  32. def __init__(self, maxsize=10, dispose_func=None):
  33. self._maxsize = maxsize
  34. self.dispose_func = dispose_func
  35. self._container = self.ContainerCls()
  36. self.lock = RLock()
  37. def __getitem__(self, key):
  38. # Re-insert the item, moving it to the end of the eviction line.
  39. with self.lock:
  40. item = self._container.pop(key)
  41. self._container[key] = item
  42. return item
  43. def __setitem__(self, key, value):
  44. evicted_value = _Null
  45. with self.lock:
  46. # Possibly evict the existing value of 'key'
  47. evicted_value = self._container.get(key, _Null)
  48. self._container[key] = value
  49. # If we didn't evict an existing value, we might have to evict the
  50. # least recently used item from the beginning of the container.
  51. if len(self._container) > self._maxsize:
  52. _key, evicted_value = self._container.popitem(last=False)
  53. if self.dispose_func and evicted_value is not _Null:
  54. self.dispose_func(evicted_value)
  55. def __delitem__(self, key):
  56. with self.lock:
  57. value = self._container.pop(key)
  58. if self.dispose_func:
  59. self.dispose_func(value)
  60. def __len__(self):
  61. with self.lock:
  62. return len(self._container)
  63. def __iter__(self):
  64. raise NotImplementedError(
  65. "Iteration over this class is unlikely to be threadsafe."
  66. )
  67. def clear(self):
  68. with self.lock:
  69. # Copy pointers to all values, then wipe the mapping
  70. values = list(itervalues(self._container))
  71. self._container.clear()
  72. if self.dispose_func:
  73. for value in values:
  74. self.dispose_func(value)
  75. def keys(self):
  76. with self.lock:
  77. return list(iterkeys(self._container))
  78. class HTTPHeaderDict(MutableMapping):
  79. """
  80. :param headers:
  81. An iterable of field-value pairs. Must not contain multiple field names
  82. when compared case-insensitively.
  83. :param kwargs:
  84. Additional field-value pairs to pass in to ``dict.update``.
  85. A ``dict`` like container for storing HTTP Headers.
  86. Field names are stored and compared case-insensitively in compliance with
  87. RFC 7230. Iteration provides the first case-sensitive key seen for each
  88. case-insensitive pair.
  89. Using ``__setitem__`` syntax overwrites fields that compare equal
  90. case-insensitively in order to maintain ``dict``'s api. For fields that
  91. compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
  92. in a loop.
  93. If multiple fields that are equal case-insensitively are passed to the
  94. constructor or ``.update``, the behavior is undefined and some will be
  95. lost.
  96. >>> headers = HTTPHeaderDict()
  97. >>> headers.add('Set-Cookie', 'foo=bar')
  98. >>> headers.add('set-cookie', 'baz=quxx')
  99. >>> headers['content-length'] = '7'
  100. >>> headers['SET-cookie']
  101. 'foo=bar, baz=quxx'
  102. >>> headers['Content-Length']
  103. '7'
  104. """
  105. def __init__(self, headers=None, **kwargs):
  106. super(HTTPHeaderDict, self).__init__()
  107. self._container = OrderedDict()
  108. if headers is not None:
  109. if isinstance(headers, HTTPHeaderDict):
  110. self._copy_from(headers)
  111. else:
  112. self.extend(headers)
  113. if kwargs:
  114. self.extend(kwargs)
  115. def __setitem__(self, key, val):
  116. self._container[key.lower()] = [key, val]
  117. return self._container[key.lower()]
  118. def __getitem__(self, key):
  119. val = self._container[key.lower()]
  120. return ", ".join(val[1:])
  121. def __delitem__(self, key):
  122. del self._container[key.lower()]
  123. def __contains__(self, key):
  124. return key.lower() in self._container
  125. def __eq__(self, other):
  126. if not isinstance(other, Mapping) and not hasattr(other, "keys"):
  127. return False
  128. if not isinstance(other, type(self)):
  129. other = type(self)(other)
  130. return dict((k.lower(), v) for k, v in self.itermerged()) == dict(
  131. (k.lower(), v) for k, v in other.itermerged()
  132. )
  133. def __ne__(self, other):
  134. return not self.__eq__(other)
  135. if six.PY2: # Python 2
  136. iterkeys = MutableMapping.iterkeys
  137. itervalues = MutableMapping.itervalues
  138. __marker = object()
  139. def __len__(self):
  140. return len(self._container)
  141. def __iter__(self):
  142. # Only provide the originally cased names
  143. for vals in self._container.values():
  144. yield vals[0]
  145. def pop(self, key, default=__marker):
  146. """D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
  147. If key is not found, d is returned if given, otherwise KeyError is raised.
  148. """
  149. # Using the MutableMapping function directly fails due to the private marker.
  150. # Using ordinary dict.pop would expose the internal structures.
  151. # So let's reinvent the wheel.
  152. try:
  153. value = self[key]
  154. except KeyError:
  155. if default is self.__marker:
  156. raise
  157. return default
  158. else:
  159. del self[key]
  160. return value
  161. def discard(self, key):
  162. try:
  163. del self[key]
  164. except KeyError:
  165. pass
  166. def add(self, key, val):
  167. """Adds a (name, value) pair, doesn't overwrite the value if it already
  168. exists.
  169. >>> headers = HTTPHeaderDict(foo='bar')
  170. >>> headers.add('Foo', 'baz')
  171. >>> headers['foo']
  172. 'bar, baz'
  173. """
  174. key_lower = key.lower()
  175. new_vals = [key, val]
  176. # Keep the common case aka no item present as fast as possible
  177. vals = self._container.setdefault(key_lower, new_vals)
  178. if new_vals is not vals:
  179. vals.append(val)
  180. def extend(self, *args, **kwargs):
  181. """Generic import function for any type of header-like object.
  182. Adapted version of MutableMapping.update in order to insert items
  183. with self.add instead of self.__setitem__
  184. """
  185. if len(args) > 1:
  186. raise TypeError(
  187. "extend() takes at most 1 positional "
  188. "arguments ({0} given)".format(len(args))
  189. )
  190. other = args[0] if len(args) >= 1 else ()
  191. if isinstance(other, HTTPHeaderDict):
  192. for key, val in other.iteritems():
  193. self.add(key, val)
  194. elif isinstance(other, Mapping):
  195. for key in other:
  196. self.add(key, other[key])
  197. elif hasattr(other, "keys"):
  198. for key in other.keys():
  199. self.add(key, other[key])
  200. else:
  201. for key, value in other:
  202. self.add(key, value)
  203. for key, value in kwargs.items():
  204. self.add(key, value)
  205. def getlist(self, key, default=__marker):
  206. """Returns a list of all the values for the named field. Returns an
  207. empty list if the key doesn't exist."""
  208. try:
  209. vals = self._container[key.lower()]
  210. except KeyError:
  211. if default is self.__marker:
  212. return []
  213. return default
  214. else:
  215. return vals[1:]
  216. def _prepare_for_method_change(self):
  217. """
  218. Remove content-specific header fields before changing the request
  219. method to GET or HEAD according to RFC 9110, Section 15.4.
  220. """
  221. content_specific_headers = [
  222. "Content-Encoding",
  223. "Content-Language",
  224. "Content-Location",
  225. "Content-Type",
  226. "Content-Length",
  227. "Digest",
  228. "Last-Modified",
  229. ]
  230. for header in content_specific_headers:
  231. self.discard(header)
  232. return self
  233. # Backwards compatibility for httplib
  234. getheaders = getlist
  235. getallmatchingheaders = getlist
  236. iget = getlist
  237. # Backwards compatibility for http.cookiejar
  238. get_all = getlist
  239. def __repr__(self):
  240. return "%s(%s)" % (type(self).__name__, dict(self.itermerged()))
  241. def _copy_from(self, other):
  242. for key in other:
  243. val = other.getlist(key)
  244. if isinstance(val, list):
  245. # Don't need to convert tuples
  246. val = list(val)
  247. self._container[key.lower()] = [key] + val
  248. def copy(self):
  249. clone = type(self)()
  250. clone._copy_from(self)
  251. return clone
  252. def iteritems(self):
  253. """Iterate over all header lines, including duplicate ones."""
  254. for key in self:
  255. vals = self._container[key.lower()]
  256. for val in vals[1:]:
  257. yield vals[0], val
  258. def itermerged(self):
  259. """Iterate over all headers, merging duplicate ones together."""
  260. for key in self:
  261. val = self._container[key.lower()]
  262. yield val[0], ", ".join(val[1:])
  263. def items(self):
  264. return list(self.iteritems())
  265. @classmethod
  266. def from_httplib(cls, message): # Python 2
  267. """Read headers from a Python 2 httplib message object."""
  268. # python2.7 does not expose a proper API for exporting multiheaders
  269. # efficiently. This function re-reads raw lines from the message
  270. # object and extracts the multiheaders properly.
  271. obs_fold_continued_leaders = (" ", "\t")
  272. headers = []
  273. for line in message.headers:
  274. if line.startswith(obs_fold_continued_leaders):
  275. if not headers:
  276. # We received a header line that starts with OWS as described
  277. # in RFC-7230 S3.2.4. This indicates a multiline header, but
  278. # there exists no previous header to which we can attach it.
  279. raise InvalidHeader(
  280. "Header continuation with no previous header: %s" % line
  281. )
  282. else:
  283. key, value = headers[-1]
  284. headers[-1] = (key, value + " " + line.strip())
  285. continue
  286. key, value = line.split(":", 1)
  287. headers.append((key, value.strip()))
  288. return cls(headers)