__init__.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. import string
  2. from collections import OrderedDict
  3. from typing import Dict, Optional
  4. from .. import BaseProvider, ElementsType
  5. class Provider(BaseProvider):
  6. """Implement default file provider for Faker."""
  7. application_mime_types: ElementsType = (
  8. "application/atom+xml", # Atom feeds
  9. "application/ecmascript",
  10. # ECMAScript/JavaScript; Defined in RFC 4329 (equivalent to
  11. # application/javascript but with stricter processing rules)
  12. "application/EDI-X12", # EDI X12 data; Defined in RFC 1767
  13. "application/EDIFACT", # EDI EDIFACT data; Defined in RFC 1767
  14. "application/json", # JavaScript Object Notation JSON; Defined in RFC 4627
  15. # ECMAScript/JavaScript; Defined in RFC 4329 (equivalent to
  16. # application/ecmascript
  17. "application/javascript",
  18. # but with looser processing rules) It is not accepted in IE 8
  19. # or earlier - text/javascript is accepted but it is defined as obsolete in RFC 4329.
  20. # The "type" attribute of the <script> tag in HTML5 is optional and in practice
  21. # omitting the media type of JavaScript programs is the most interoperable
  22. # solution since all browsers have always assumed the correct
  23. # default even before HTML5.
  24. "application/octet-stream",
  25. # Arbitrary binary data.[6] Generally speaking this type identifies files that are not associated with
  26. # a specific application. Contrary to past assumptions by software packages such as Apache this is not
  27. # a type that should be applied to unknown files. In such a case, a server or application should not indicate
  28. # a content type, as it may be incorrect, but rather, should omit the type in order to allow the recipient
  29. # to guess the type.[7]
  30. "application/ogg", # Ogg, a multimedia bitstream container format; Defined in RFC 5334
  31. "application/pdf", # Portable Document Format, PDF has been in use for document exchange
  32. # on the Internet since 1993; Defined in RFC 3778
  33. "application/postscript", # PostScript; Defined in RFC 2046
  34. "application/rdf+xml", # Resource Description Framework; Defined by RFC 3870
  35. "application/rss+xml", # RSS feeds
  36. "application/soap+xml", # SOAP; Defined by RFC 3902
  37. # Web Open Font Format; (candidate recommendation; use application/x-font-woff
  38. "application/font-woff",
  39. # until standard is official)
  40. "application/xhtml+xml", # XHTML; Defined by RFC 3236
  41. "application/xml-dtd", # DTD files; Defined by RFC 3023
  42. "application/xop+xml", # XOP
  43. "application/zip", # ZIP archive files; Registered[8]
  44. "application/gzip", # Gzip, Defined in RFC 6713
  45. )
  46. audio_mime_types: ElementsType = (
  47. "audio/basic", # mulaw audio at 8 kHz, 1 channel; Defined in RFC 2046
  48. "audio/L24", # 24bit Linear PCM audio at 8-48 kHz, 1-N channels; Defined in RFC 3190
  49. "audio/mp4", # MP4 audio
  50. "audio/mpeg", # MP3 or other MPEG audio; Defined in RFC 3003
  51. "audio/ogg", # Ogg Vorbis, Speex, Flac and other audio; Defined in RFC 5334
  52. "audio/vorbis", # Vorbis encoded audio; Defined in RFC 5215
  53. # RealAudio; Documented in RealPlayer Help[9]
  54. "audio/vnd.rn-realaudio",
  55. "audio/vnd.wave", # WAV audio; Defined in RFC 2361
  56. "audio/webm", # WebM open media format
  57. )
  58. image_mime_types: ElementsType = (
  59. "image/gif", # GIF image; Defined in RFC 2045 and RFC 2046
  60. "image/jpeg", # JPEG JFIF image; Defined in RFC 2045 and RFC 2046
  61. "image/pjpeg",
  62. # JPEG JFIF image; Associated with Internet Explorer; Listed in ms775147(v=vs.85) - Progressive JPEG,
  63. # initiated before global browser support for progressive JPEGs (Microsoft and Firefox).
  64. # Portable Network Graphics; Registered,[10] Defined in RFC 2083
  65. "image/png",
  66. "image/svg+xml", # SVG vector image; Defined in SVG Tiny 1.2 Specification Appendix M
  67. # Tag Image File Format (only for Baseline TIFF); Defined in RFC 3302
  68. "image/tiff",
  69. "image/vnd.microsoft.icon", # ICO image; Registered[11]
  70. )
  71. message_mime_types: ElementsType = (
  72. "message/http", # Defined in RFC 2616
  73. "message/imdn+xml", # IMDN Instant Message Disposition Notification; Defined in RFC 5438
  74. "message/partial", # Email; Defined in RFC 2045 and RFC 2046
  75. # Email; EML files, MIME files, MHT files, MHTML files; Defined in RFC
  76. # 2045 and RFC 2046
  77. "message/rfc822",
  78. )
  79. model_mime_types: ElementsType = (
  80. "model/example", # Defined in RFC 4735
  81. "model/iges", # IGS files, IGES files; Defined in RFC 2077
  82. "model/mesh", # MSH files, MESH files; Defined in RFC 2077, SILO files
  83. "model/vrml", # WRL files, VRML files; Defined in RFC 2077
  84. # X3D ISO standard for representing 3D computer graphics, X3DB binary
  85. # files
  86. "model/x3d+binary",
  87. "model/x3d+vrml", # X3D ISO standard for representing 3D computer graphics, X3DV VRML files
  88. "model/x3d+xml", # X3D ISO standard for representing 3D computer graphics, X3D XML files
  89. )
  90. multipart_mime_types: ElementsType = (
  91. "multipart/mixed", # MIME Email; Defined in RFC 2045 and RFC 2046
  92. "multipart/alternative", # MIME Email; Defined in RFC 2045 and RFC 2046
  93. # MIME Email; Defined in RFC 2387 and used by MHTML (HTML mail)
  94. "multipart/related",
  95. "multipart/form-data", # MIME Webform; Defined in RFC 2388
  96. "multipart/signed", # Defined in RFC 1847
  97. "multipart/encrypted", # Defined in RFC 1847
  98. )
  99. text_mime_types: ElementsType = (
  100. "text/cmd", # commands; subtype resident in Gecko browsers like Firefox 3.5
  101. "text/css", # Cascading Style Sheets; Defined in RFC 2318
  102. "text/csv", # Comma-separated values; Defined in RFC 4180
  103. "text/html", # HTML; Defined in RFC 2854
  104. "text/javascript",
  105. # (Obsolete): JavaScript; Defined in and obsoleted by RFC 4329 in order to discourage its usage in favor of
  106. # application/javascript. However, text/javascript is allowed in HTML 4 and 5 and, unlike
  107. # application/javascript, has cross-browser support. The "type" attribute of the <script> tag in HTML5 is
  108. # optional and there is no need to use it at all since all browsers have always assumed the correct default
  109. # (even in HTML 4 where it was required by the specification).
  110. "text/plain", # Textual data; Defined in RFC 2046 and RFC 3676
  111. "text/vcard", # vCard (contact information); Defined in RFC 6350
  112. "text/xml", # Extensible Markup Language; Defined in RFC 3023
  113. )
  114. video_mime_types: ElementsType = (
  115. "video/mpeg", # MPEG-1 video with multiplexed audio; Defined in RFC 2045 and RFC 2046
  116. "video/mp4", # MP4 video; Defined in RFC 4337
  117. # Ogg Theora or other video (with audio); Defined in RFC 5334
  118. "video/ogg",
  119. "video/quicktime", # QuickTime video; Registered[12]
  120. "video/webm", # WebM Matroska-based open media format
  121. "video/x-matroska", # Matroska open media format
  122. "video/x-ms-wmv", # Windows Media Video; Documented in Microsoft KB 288102
  123. "video/x-flv", # Flash video (FLV files)
  124. )
  125. mime_types: Dict[str, ElementsType] = OrderedDict(
  126. (
  127. ("application", application_mime_types),
  128. ("audio", audio_mime_types),
  129. ("image", image_mime_types),
  130. ("message", message_mime_types),
  131. ("model", model_mime_types),
  132. ("multipart", multipart_mime_types),
  133. ("text", text_mime_types),
  134. ("video", video_mime_types),
  135. )
  136. )
  137. audio_file_extensions: ElementsType = (
  138. "flac",
  139. "mp3",
  140. "wav",
  141. )
  142. image_file_extensions: ElementsType = (
  143. "bmp",
  144. "gif",
  145. "jpeg",
  146. "jpg",
  147. "png",
  148. "tiff",
  149. )
  150. text_file_extensions: ElementsType = (
  151. "css",
  152. "csv",
  153. "html",
  154. "js",
  155. "json",
  156. "txt",
  157. )
  158. video_file_extensions: ElementsType = (
  159. "mp4",
  160. "avi",
  161. "mov",
  162. "webm",
  163. )
  164. office_file_extensions: ElementsType = (
  165. "doc", # legacy MS Word
  166. "docx", # MS Word
  167. "xls", # legacy MS Excel
  168. "xlsx", # MS Excel
  169. "ppt", # legacy MS PowerPoint
  170. "pptx", # MS PowerPoint
  171. "odt", # LibreOffice document
  172. "ods", # LibreOffice spreadsheet
  173. "odp", # LibreOffice presentation
  174. "pages", # Apple Pages
  175. "numbers", # Apple Numbers
  176. "key", # Apple Keynote
  177. "pdf", # Portable Document Format
  178. )
  179. file_extensions: Dict[str, ElementsType] = OrderedDict(
  180. (
  181. ("audio", audio_file_extensions),
  182. ("image", image_file_extensions),
  183. ("office", office_file_extensions),
  184. ("text", text_file_extensions),
  185. ("video", video_file_extensions),
  186. )
  187. )
  188. unix_device_prefixes: ElementsType = ("sd", "vd", "xvd")
  189. def mime_type(self, category: Optional[str] = None) -> str:
  190. """Generate a mime type under the specified ``category``.
  191. If ``category`` is ``None``, a random category will be used. The list of
  192. valid categories include ``'application'``, ``'audio'``, ``'image'``,
  193. ``'message'``, ``'model'``, ``'multipart'``, ``'text'``, and
  194. ``'video'``.
  195. :sample:
  196. :sample: category='application'
  197. """
  198. category = category if category else self.random_element(list(self.mime_types.keys()))
  199. return self.random_element(self.mime_types[category])
  200. def file_name(self, category: Optional[str] = None, extension: Optional[str] = None) -> str:
  201. """Generate a random file name with extension.
  202. If ``extension`` is ``None``, a random extension will be created under
  203. the hood using |file_extension| with the specified ``category``. If a
  204. value for ``extension`` is provided, the value will be used instead,
  205. and ``category`` will be ignored. The actual name part itself is
  206. generated using |word|.
  207. :sample size=10:
  208. :sample: category='audio'
  209. :sample: extension='abcdef'
  210. :sample: category='audio', extension='abcdef'
  211. """
  212. if extension is None:
  213. extension = self.file_extension(category)
  214. filename: str = self.generator.word()
  215. return f"{filename}.{extension}"
  216. def file_extension(self, category: Optional[str] = None) -> str:
  217. """Generate a file extension under the specified ``category``.
  218. If ``category`` is ``None``, a random category will be used. The list of
  219. valid categories include: ``'audio'``, ``'image'``, ``'office'``,
  220. ``'text'``, and ``'video'``.
  221. :sample:
  222. :sample: category='image'
  223. """
  224. if category is None:
  225. category = self.random_element(list(self.file_extensions.keys()))
  226. return self.random_element(self.file_extensions[category])
  227. def file_path(
  228. self,
  229. depth: int = 1,
  230. category: Optional[str] = None,
  231. extension: Optional[str] = None,
  232. ) -> str:
  233. """Generate an absolute pathname to a file.
  234. This method uses |file_name| under the hood to generate the file name
  235. itself, and ``depth`` controls the depth of the directory path, and
  236. |word| is used under the hood to generate the different directory names.
  237. :sample size=10:
  238. :sample: depth=3
  239. :sample: depth=5, category='video'
  240. :sample: depth=5, category='video', extension='abcdef'
  241. """
  242. file: str = self.file_name(category, extension)
  243. path: str = f"/{file}"
  244. for _ in range(0, depth):
  245. path = f"/{self.generator.word()}{path}"
  246. return path
  247. def unix_device(self, prefix: Optional[str] = None) -> str:
  248. """Generate a Unix device file name.
  249. If ``prefix`` is ``None``, a random prefix will be used. The list of
  250. valid prefixes include: ``'sd'``, ``'vd'``, and ``'xvd'``.
  251. :sample:
  252. :sample: prefix='mmcblk'
  253. """
  254. if prefix is None:
  255. prefix = self.random_element(self.unix_device_prefixes)
  256. suffix: str = self.random_element(string.ascii_lowercase)
  257. path = "/dev/%s%s" % (prefix, suffix)
  258. return path
  259. def unix_partition(self, prefix: Optional[str] = None) -> str:
  260. """Generate a Unix partition name.
  261. This method uses |unix_device| under the hood to create a device file
  262. name with the specified ``prefix``.
  263. :sample:
  264. :sample: prefix='mmcblk'
  265. """
  266. path: str = self.unix_device(prefix=prefix)
  267. path += str(self.random_digit())
  268. return path