You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mimetypes.py 7.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. """Guess the MIME type of a file.
  2. This module defines two useful functions:
  3. guess_type(url) -- guess the MIME type and encoding of a URL.
  4. guess_extension(type) -- guess the extension for a given MIME type.
  5. It also contains the following, for tuning the behavior:
  6. Data:
  7. knownfiles -- list of files to parse
  8. inited -- flag set when init() has been called
  9. suffixes_map -- dictionary mapping suffixes to suffixes
  10. encodings_map -- dictionary mapping suffixes to encodings
  11. types_map -- dictionary mapping suffixes to types
  12. Functions:
  13. init([files]) -- parse a list of files, default knownfiles
  14. read_mime_types(file) -- parse one file, return a dictionary or None
  15. """
  16. import posixpath
  17. import urllib
  18. __all__ = ["guess_type","guess_extension","read_mime_types","init"]
  19. knownfiles = [
  20. "/usr/local/etc/httpd/conf/mime.types",
  21. "/usr/local/lib/netscape/mime.types",
  22. "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
  23. "/usr/local/etc/mime.types", # Apache 1.3
  24. ]
  25. inited = 0
  26. def guess_type(url):
  27. """Guess the type of a file based on its URL.
  28. Return value is a tuple (type, encoding) where type is None if the
  29. type can't be guessed (no or unknown suffix) or a string of the
  30. form type/subtype, usable for a MIME Content-type header; and
  31. encoding is None for no encoding or the name of the program used
  32. to encode (e.g. compress or gzip). The mappings are table
  33. driven. Encoding suffixes are case sensitive; type suffixes are
  34. first tried case sensitive, then case insensitive.
  35. The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
  36. to ".tar.gz". (This is table-driven too, using the dictionary
  37. suffix_map).
  38. """
  39. if not inited:
  40. init()
  41. scheme, url = urllib.splittype(url)
  42. if scheme == 'data':
  43. # syntax of data URLs:
  44. # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
  45. # mediatype := [ type "/" subtype ] *( ";" parameter )
  46. # data := *urlchar
  47. # parameter := attribute "=" value
  48. # type/subtype defaults to "text/plain"
  49. comma = url.find(',')
  50. if comma < 0:
  51. # bad data URL
  52. return None, None
  53. semi = url.find(';', 0, comma)
  54. if semi >= 0:
  55. type = url[:semi]
  56. else:
  57. type = url[:comma]
  58. if '=' in type or '/' not in type:
  59. type = 'text/plain'
  60. return type, None # never compressed, so encoding is None
  61. base, ext = posixpath.splitext(url)
  62. while suffix_map.has_key(ext):
  63. base, ext = posixpath.splitext(base + suffix_map[ext])
  64. if encodings_map.has_key(ext):
  65. encoding = encodings_map[ext]
  66. base, ext = posixpath.splitext(base)
  67. else:
  68. encoding = None
  69. if types_map.has_key(ext):
  70. return types_map[ext], encoding
  71. elif types_map.has_key(ext.lower()):
  72. return types_map[ext.lower()], encoding
  73. else:
  74. return None, encoding
  75. def guess_extension(type):
  76. """Guess the extension for a file based on its MIME type.
  77. Return value is a string giving a filename extension, including the
  78. leading dot ('.'). The extension is not guaranteed to have been
  79. associated with any particular data stream, but would be mapped to the
  80. MIME type `type' by guess_type(). If no extension can be guessed for
  81. `type', None is returned.
  82. """
  83. global inited
  84. if not inited:
  85. init()
  86. type = type.lower()
  87. for ext, stype in types_map.items():
  88. if type == stype:
  89. return ext
  90. return None
  91. def init(files=None):
  92. global inited
  93. for file in files or knownfiles:
  94. s = read_mime_types(file)
  95. if s:
  96. for key, value in s.items():
  97. types_map[key] = value
  98. inited = 1
  99. def read_mime_types(file):
  100. try:
  101. f = open(file)
  102. except IOError:
  103. return None
  104. map = {}
  105. while 1:
  106. line = f.readline()
  107. if not line: break
  108. words = line.split()
  109. for i in range(len(words)):
  110. if words[i][0] == '#':
  111. del words[i:]
  112. break
  113. if not words: continue
  114. type, suffixes = words[0], words[1:]
  115. for suff in suffixes:
  116. map['.'+suff] = type
  117. f.close()
  118. return map
  119. suffix_map = {
  120. '.tgz': '.tar.gz',
  121. '.taz': '.tar.gz',
  122. '.tz': '.tar.gz',
  123. }
  124. encodings_map = {
  125. '.gz': 'gzip',
  126. '.Z': 'compress',
  127. }
  128. types_map = {
  129. '.a': 'application/octet-stream',
  130. '.ai': 'application/postscript',
  131. '.aif': 'audio/x-aiff',
  132. '.aifc': 'audio/x-aiff',
  133. '.aiff': 'audio/x-aiff',
  134. '.au': 'audio/basic',
  135. '.avi': 'video/x-msvideo',
  136. '.bcpio': 'application/x-bcpio',
  137. '.bin': 'application/octet-stream',
  138. '.cdf': 'application/x-netcdf',
  139. '.cpio': 'application/x-cpio',
  140. '.csh': 'application/x-csh',
  141. '.dll': 'application/octet-stream',
  142. '.dvi': 'application/x-dvi',
  143. '.exe': 'application/octet-stream',
  144. '.eps': 'application/postscript',
  145. '.etx': 'text/x-setext',
  146. '.gif': 'image/gif',
  147. '.gtar': 'application/x-gtar',
  148. '.hdf': 'application/x-hdf',
  149. '.htm': 'text/html',
  150. '.html': 'text/html',
  151. '.ief': 'image/ief',
  152. '.jpe': 'image/jpeg',
  153. '.jpeg': 'image/jpeg',
  154. '.jpg': 'image/jpeg',
  155. '.js': 'application/x-javascript',
  156. '.latex': 'application/x-latex',
  157. '.man': 'application/x-troff-man',
  158. '.me': 'application/x-troff-me',
  159. '.mif': 'application/x-mif',
  160. '.mov': 'video/quicktime',
  161. '.movie': 'video/x-sgi-movie',
  162. '.mpe': 'video/mpeg',
  163. '.mpeg': 'video/mpeg',
  164. '.mpg': 'video/mpeg',
  165. '.ms': 'application/x-troff-ms',
  166. '.nc': 'application/x-netcdf',
  167. '.o': 'application/octet-stream',
  168. '.obj': 'application/octet-stream',
  169. '.oda': 'application/oda',
  170. '.pbm': 'image/x-portable-bitmap',
  171. '.pdf': 'application/pdf',
  172. '.pgm': 'image/x-portable-graymap',
  173. '.pnm': 'image/x-portable-anymap',
  174. '.png': 'image/png',
  175. '.ppm': 'image/x-portable-pixmap',
  176. '.py': 'text/x-python',
  177. '.pyc': 'application/x-python-code',
  178. '.ps': 'application/postscript',
  179. '.qt': 'video/quicktime',
  180. '.ras': 'image/x-cmu-raster',
  181. '.rgb': 'image/x-rgb',
  182. '.rdf': 'application/xml',
  183. '.roff': 'application/x-troff',
  184. '.rtf': 'application/rtf',
  185. '.rtx': 'text/richtext',
  186. '.sgm': 'text/x-sgml',
  187. '.sgml': 'text/x-sgml',
  188. '.sh': 'application/x-sh',
  189. '.shar': 'application/x-shar',
  190. '.snd': 'audio/basic',
  191. '.so': 'application/octet-stream',
  192. '.src': 'application/x-wais-source',
  193. '.sv4cpio': 'application/x-sv4cpio',
  194. '.sv4crc': 'application/x-sv4crc',
  195. '.t': 'application/x-troff',
  196. '.tar': 'application/x-tar',
  197. '.tcl': 'application/x-tcl',
  198. '.tex': 'application/x-tex',
  199. '.texi': 'application/x-texinfo',
  200. '.texinfo': 'application/x-texinfo',
  201. '.tif': 'image/tiff',
  202. '.tiff': 'image/tiff',
  203. '.tr': 'application/x-troff',
  204. '.tsv': 'text/tab-separated-values',
  205. '.txt': 'text/plain',
  206. '.ustar': 'application/x-ustar',
  207. '.wav': 'audio/x-wav',
  208. '.xbm': 'image/x-xbitmap',
  209. '.xml': 'text/xml',
  210. '.xsl': 'application/xml',
  211. '.xpm': 'image/x-xpixmap',
  212. '.xwd': 'image/x-xwindowdump',
  213. '.zip': 'application/zip',
  214. }
  215. if __name__ == '__main__':
  216. import sys
  217. print guess_type(sys.argv[1])