我正在寻找一种简单的方法来获取文件扩展名不正确或没有给出的mime类型,类似于.Net中的这个问题.
我最后使用了urlmon.dll.我认为会有一种更简单的方法,但这有效.我包含了帮助其他人的代码,并允许我在需要时再次找到它.
using System.Runtime.InteropServices;
...
[DllImport(@"urlmon.dll", CharSet = CharSet.Auto)] private extern static System.UInt32 FindMimeFromData( System.UInt32 pBC, [MarshalAs(UnmanagedType.LPStr)] System.String pwzUrl, [MarshalAs(UnmanagedType.LPArray)] byte[] pBuffer, System.UInt32 cbSize, [MarshalAs(UnmanagedType.LPStr)] System.String pwzMimeProposed, System.UInt32 dwMimeFlags, out System.UInt32 ppwzMimeOut, System.UInt32 dwReserverd ); public static string getMimeFromFile(string filename) { if (!File.Exists(filename)) throw new FileNotFoundException(filename + " not found"); byte[] buffer = new byte[256]; using (FileStream fs = new FileStream(filename, FileMode.Open)) { if (fs.Length >= 256) fs.Read(buffer, 0, 256); else fs.Read(buffer, 0, (int)fs.Length); } try { System.UInt32 mimetype; FindMimeFromData(0, null, buffer, 256, null, 0, out mimetype, 0); System.IntPtr mimeTypePtr = new IntPtr(mimetype); string mime = Marshal.PtrToStringUni(mimeTypePtr); Marshal.FreeCoTaskMem(mimeTypePtr); return mime; } catch (Exception e) { return "unknown/unknown"; } }
我找到了一个硬编码的解决方案,我希望我会帮助某个人:
public static class MIMEAssistant { private static readonly DictionaryMIMETypesDictionary = new Dictionary { {"ai", "application/postscript"}, {"aif", "audio/x-aiff"}, {"aifc", "audio/x-aiff"}, {"aiff", "audio/x-aiff"}, {"asc", "text/plain"}, {"atom", "application/atom+xml"}, {"au", "audio/basic"}, {"avi", "video/x-msvideo"}, {"bcpio", "application/x-bcpio"}, {"bin", "application/octet-stream"}, {"bmp", "image/bmp"}, {"cdf", "application/x-netcdf"}, {"cgm", "image/cgm"}, {"class", "application/octet-stream"}, {"cpio", "application/x-cpio"}, {"cpt", "application/mac-compactpro"}, {"csh", "application/x-csh"}, {"css", "text/css"}, {"dcr", "application/x-director"}, {"dif", "video/x-dv"}, {"dir", "application/x-director"}, {"djv", "image/vnd.djvu"}, {"djvu", "image/vnd.djvu"}, {"dll", "application/octet-stream"}, {"dmg", "application/octet-stream"}, {"dms", "application/octet-stream"}, {"doc", "application/msword"}, {"docx","application/vnd.openxmlformats-officedocument.wordprocessingml.document"}, {"dotx", "application/vnd.openxmlformats-officedocument.wordprocessingml.template"}, {"docm","application/vnd.ms-word.document.macroEnabled.12"}, {"dotm","application/vnd.ms-word.template.macroEnabled.12"}, {"dtd", "application/xml-dtd"}, {"dv", "video/x-dv"}, {"dvi", "application/x-dvi"}, {"dxr", "application/x-director"}, {"eps", "application/postscript"}, {"etx", "text/x-setext"}, {"exe", "application/octet-stream"}, {"ez", "application/andrew-inset"}, {"gif", "image/gif"}, {"gram", "application/srgs"}, {"grxml", "application/srgs+xml"}, {"gtar", "application/x-gtar"}, {"hdf", "application/x-hdf"}, {"hqx", "application/mac-binhex40"}, {"htm", "text/html"}, {"html", "text/html"}, {"ice", "x-conference/x-cooltalk"}, {"ico", "image/x-icon"}, {"ics", "text/calendar"}, {"ief", "image/ief"}, {"ifb", "text/calendar"}, {"iges", "model/iges"}, {"igs", "model/iges"}, {"jnlp", "application/x-java-jnlp-file"}, {"jp2", "image/jp2"}, {"jpe", "image/jpeg"}, {"jpeg", "image/jpeg"}, {"jpg", "image/jpeg"}, {"js", "application/x-javascript"}, {"kar", "audio/midi"}, {"latex", "application/x-latex"}, {"lha", "application/octet-stream"}, {"lzh", "application/octet-stream"}, {"m3u", "audio/x-mpegurl"}, {"m4a", "audio/mp4a-latm"}, {"m4b", "audio/mp4a-latm"}, {"m4p", "audio/mp4a-latm"}, {"m4u", "video/vnd.mpegurl"}, {"m4v", "video/x-m4v"}, {"mac", "image/x-macpaint"}, {"man", "application/x-troff-man"}, {"mathml", "application/mathml+xml"}, {"me", "application/x-troff-me"}, {"mesh", "model/mesh"}, {"mid", "audio/midi"}, {"midi", "audio/midi"}, {"mif", "application/vnd.mif"}, {"mov", "video/quicktime"}, {"movie", "video/x-sgi-movie"}, {"mp2", "audio/mpeg"}, {"mp3", "audio/mpeg"}, {"mp4", "video/mp4"}, {"mpe", "video/mpeg"}, {"mpeg", "video/mpeg"}, {"mpg", "video/mpeg"}, {"mpga", "audio/mpeg"}, {"ms", "application/x-troff-ms"}, {"msh", "model/mesh"}, {"mxu", "video/vnd.mpegurl"}, {"nc", "application/x-netcdf"}, {"oda", "application/oda"}, {"ogg", "application/ogg"}, {"pbm", "image/x-portable-bitmap"}, {"pct", "image/pict"}, {"pdb", "chemical/x-pdb"}, {"pdf", "application/pdf"}, {"pgm", "image/x-portable-graymap"}, {"pgn", "application/x-chess-pgn"}, {"pic", "image/pict"}, {"pict", "image/pict"}, {"png", "image/png"}, {"pnm", "image/x-portable-anymap"}, {"pnt", "image/x-macpaint"}, {"pntg", "image/x-macpaint"}, {"ppm", "image/x-portable-pixmap"}, {"ppt", "application/vnd.ms-powerpoint"}, {"pptx","application/vnd.openxmlformats-officedocument.presentationml.presentation"}, {"potx","application/vnd.openxmlformats-officedocument.presentationml.template"}, {"ppsx","application/vnd.openxmlformats-officedocument.presentationml.slideshow"}, {"ppam","application/vnd.ms-powerpoint.addin.macroEnabled.12"}, {"pptm","application/vnd.ms-powerpoint.presentation.macroEnabled.12"}, {"potm","application/vnd.ms-powerpoint.template.macroEnabled.12"}, {"ppsm","application/vnd.ms-powerpoint.slideshow.macroEnabled.12"}, {"ps", "application/postscript"}, {"qt", "video/quicktime"}, {"qti", "image/x-quicktime"}, {"qtif", "image/x-quicktime"}, {"ra", "audio/x-pn-realaudio"}, {"ram", "audio/x-pn-realaudio"}, {"ras", "image/x-cmu-raster"}, {"rdf", "application/rdf+xml"}, {"rgb", "image/x-rgb"}, {"rm", "application/vnd.rn-realmedia"}, {"roff", "application/x-troff"}, {"rtf", "text/rtf"}, {"rtx", "text/richtext"}, {"sgm", "text/sgml"}, {"sgml", "text/sgml"}, {"sh", "application/x-sh"}, {"shar", "application/x-shar"}, {"silo", "model/mesh"}, {"sit", "application/x-stuffit"}, {"skd", "application/x-koan"}, {"skm", "application/x-koan"}, {"skp", "application/x-koan"}, {"skt", "application/x-koan"}, {"smi", "application/smil"}, {"smil", "application/smil"}, {"snd", "audio/basic"}, {"so", "application/octet-stream"}, {"spl", "application/x-futuresplash"}, {"src", "application/x-wais-source"}, {"sv4cpio", "application/x-sv4cpio"}, {"sv4crc", "application/x-sv4crc"}, {"svg", "image/svg+xml"}, {"swf", "application/x-shockwave-flash"}, {"t", "application/x-troff"}, {"tar", "application/x-tar"}, {"tcl", "application/x-tcl"}, {"tex", "application/x-tex"}, {"texi", "application/x-texinfo"}, {"texinfo", "application/x-texinfo"}, {"tif", "image/tiff"}, {"tiff", "image/tiff"}, {"tr", "application/x-troff"}, {"tsv", "text/tab-separated-values"}, {"txt", "text/plain"}, {"ustar", "application/x-ustar"}, {"vcd", "application/x-cdlink"}, {"vrml", "model/vrml"}, {"vxml", "application/voicexml+xml"}, {"wav", "audio/x-wav"}, {"wbmp", "image/vnd.wap.wbmp"}, {"wbmxl", "application/vnd.wap.wbxml"}, {"wml", "text/vnd.wap.wml"}, {"wmlc", "application/vnd.wap.wmlc"}, {"wmls", "text/vnd.wap.wmlscript"}, {"wmlsc", "application/vnd.wap.wmlscriptc"}, {"wrl", "model/vrml"}, {"xbm", "image/x-xbitmap"}, {"xht", "application/xhtml+xml"}, {"xhtml", "application/xhtml+xml"}, {"xls", "application/vnd.ms-excel"}, {"xml", "application/xml"}, {"xpm", "image/x-xpixmap"}, {"xsl", "application/xml"}, {"xlsx","application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"}, {"xltx","application/vnd.openxmlformats-officedocument.spreadsheetml.template"}, {"xlsm","application/vnd.ms-excel.sheet.macroEnabled.12"}, {"xltm","application/vnd.ms-excel.template.macroEnabled.12"}, {"xlam","application/vnd.ms-excel.addin.macroEnabled.12"}, {"xlsb","application/vnd.ms-excel.sheet.binary.macroEnabled.12"}, {"xslt", "application/xslt+xml"}, {"xul", "application/vnd.mozilla.xul+xml"}, {"xwd", "image/x-xwindowdump"}, {"xyz", "chemical/x-xyz"}, {"zip", "application/zip"} }; public static string GetMIMEType(string fileName) { //get file extension string extension = Path.GetExtension(fileName).ToLowerInvariant(); if (extension.Length > 0 && MIMETypesDictionary.ContainsKey(extension.Remove(0, 1))) { return MIMETypesDictionary[extension.Remove(0, 1)]; } return "unknown/unknown"; } }
编辑:只需使用Mime Detective
我使用字节数组序列来确定给定文件的正确MIME类型.仅仅查看文件名的文件扩展名的优点是,如果用户要重命名文件以绕过某些文件类型上载限制,则文件扩展名将无法捕获此信息.另一方面,通过字节数组获取文件签名将阻止这种恶作剧行为的发生.
这是C#中的一个例子:
public class MimeType { private static readonly byte[] BMP = { 66, 77 }; private static readonly byte[] DOC = { 208, 207, 17, 224, 161, 177, 26, 225 }; private static readonly byte[] EXE_DLL = { 77, 90 }; private static readonly byte[] GIF = { 71, 73, 70, 56 }; private static readonly byte[] ICO = { 0, 0, 1, 0 }; private static readonly byte[] JPG = { 255, 216, 255 }; private static readonly byte[] MP3 = { 255, 251, 48 }; private static readonly byte[] OGG = { 79, 103, 103, 83, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0 }; private static readonly byte[] PDF = { 37, 80, 68, 70, 45, 49, 46 }; private static readonly byte[] PNG = { 137, 80, 78, 71, 13, 10, 26, 10, 0, 0, 0, 13, 73, 72, 68, 82 }; private static readonly byte[] RAR = { 82, 97, 114, 33, 26, 7, 0 }; private static readonly byte[] SWF = { 70, 87, 83 }; private static readonly byte[] TIFF = { 73, 73, 42, 0 }; private static readonly byte[] TORRENT = { 100, 56, 58, 97, 110, 110, 111, 117, 110, 99, 101 }; private static readonly byte[] TTF = { 0, 1, 0, 0, 0 }; private static readonly byte[] WAV_AVI = { 82, 73, 70, 70 }; private static readonly byte[] WMV_WMA = { 48, 38, 178, 117, 142, 102, 207, 17, 166, 217, 0, 170, 0, 98, 206, 108 }; private static readonly byte[] ZIP_DOCX = { 80, 75, 3, 4 }; public static string GetMimeType(byte[] file, string fileName) { string mime = "application/octet-stream"; //DEFAULT UNKNOWN MIME TYPE //Ensure that the filename isn't empty or null if (string.IsNullOrWhiteSpace(fileName)) { return mime; } //Get the file extension string extension = Path.GetExtension(fileName) == null ? string.Empty : Path.GetExtension(fileName).ToUpper(); //Get the MIME Type if (file.Take(2).SequenceEqual(BMP)) { mime = "image/bmp"; } else if (file.Take(8).SequenceEqual(DOC)) { mime = "application/msword"; } else if (file.Take(2).SequenceEqual(EXE_DLL)) { mime = "application/x-msdownload"; //both use same mime type } else if (file.Take(4).SequenceEqual(GIF)) { mime = "image/gif"; } else if (file.Take(4).SequenceEqual(ICO)) { mime = "image/x-icon"; } else if (file.Take(3).SequenceEqual(JPG)) { mime = "image/jpeg"; } else if (file.Take(3).SequenceEqual(MP3)) { mime = "audio/mpeg"; } else if (file.Take(14).SequenceEqual(OGG)) { if (extension == ".OGX") { mime = "application/ogg"; } else if (extension == ".OGA") { mime = "audio/ogg"; } else { mime = "video/ogg"; } } else if (file.Take(7).SequenceEqual(PDF)) { mime = "application/pdf"; } else if (file.Take(16).SequenceEqual(PNG)) { mime = "image/png"; } else if (file.Take(7).SequenceEqual(RAR)) { mime = "application/x-rar-compressed"; } else if (file.Take(3).SequenceEqual(SWF)) { mime = "application/x-shockwave-flash"; } else if (file.Take(4).SequenceEqual(TIFF)) { mime = "image/tiff"; } else if (file.Take(11).SequenceEqual(TORRENT)) { mime = "application/x-bittorrent"; } else if (file.Take(5).SequenceEqual(TTF)) { mime = "application/x-font-ttf"; } else if (file.Take(4).SequenceEqual(WAV_AVI)) { mime = extension == ".AVI" ? "video/x-msvideo" : "audio/x-wav"; } else if (file.Take(16).SequenceEqual(WMV_WMA)) { mime = extension == ".WMA" ? "audio/x-ms-wma" : "video/x-ms-wmv"; } else if (file.Take(4).SequenceEqual(ZIP_DOCX)) { mime = extension == ".DOCX" ? "application/vnd.openxmlformats-officedocument.wordprocessingml.document" : "application/x-zip-compressed"; } return mime; } }
注意我处理DOCX文件类型的方式不同,因为DOCX实际上只是一个ZIP文件.在这种情况下,我只是在验证它具有该序列后检查文件扩展名.对于某些人来说,这个例子还远未完成,但您可以轻松添加自己的例子.
如果要添加更多MIME类型,可以从此处获取许多不同文件类型的字节数组序列.此外,这是另一个关于文件签名的好资源.
如果所有其他方法都失败了,我会做很多次的步骤通过我正在寻找的特定类型的几个文件,并在文件的字节序列中查找模式.最后,这仍然是基本验证,不能用于确定文件类型的100%证明.
在Urlmon.dll中,有一个名为的函数FindMimeFromData
.
从文档中
MIME类型检测或"数据嗅探"是指从二进制数据中确定适当的MIME类型的过程.最终结果取决于服务器提供的MIME类型标头,文件扩展名和/或数据本身的组合.通常,只有前256字节的数据是重要的.
因此,从文件中读取第一个(最多)256个字节并将其传递给FindMimeFromData
.
如果您使用的是.NET Framework 4.5或更高版本,则现在有一个MimeMapping.GetMimeMapping(filename)方法,该方法将返回一个字符串,其中包含传递文件名的正确Mime映射.请注意,这使用文件扩展名,而不是文件本身中的数据.
文档位于http://msdn.microsoft.com/en-us/library/system.web.mimemapping.getmimemapping
您还可以查看注册表.
using System.IO; using Microsoft.Win32; string GetMimeType(FileInfo fileInfo) { string mimeType = "application/unknown"; RegistryKey regKey = Registry.ClassesRoot.OpenSubKey( fileInfo.Extension.ToLower() ); if(regKey != null) { object contentType = regKey.GetValue("Content Type"); if(contentType != null) mimeType = contentType.ToString(); } return mimeType; }
无论如何,你将不得不进入一个MIME数据库 - 无论它们是从扩展或魔术数字映射还是有点微不足道 - Windows注册表就是这样一个地方.对于独立于平台的解决方案,尽管必须使用代码(或作为独立库)运送此DB.
我使用混合解决方案:
using System.Runtime.InteropServices; [DllImport (@"urlmon.dll", CharSet = CharSet.Auto)] private extern static System.UInt32 FindMimeFromData( System.UInt32 pBC, [MarshalAs(UnmanagedType.LPStr)] System.String pwzUrl, [MarshalAs(UnmanagedType.LPArray)] byte[] pBuffer, System.UInt32 cbSize, [MarshalAs(UnmanagedType.LPStr)] System.String pwzMimeProposed, System.UInt32 dwMimeFlags, out System.UInt32 ppwzMimeOut, System.UInt32 dwReserverd ); private string GetMimeFromRegistry (string Filename) { string mime = "application/octetstream"; string ext = System.IO.Path.GetExtension(Filename).ToLower(); Microsoft.Win32.RegistryKey rk = Microsoft.Win32.Registry.ClassesRoot.OpenSubKey(ext); if (rk != null && rk.GetValue("Content Type") != null) mime = rk.GetValue("Content Type").ToString(); return mime; } public string GetMimeTypeFromFileAndRegistry (string filename) { if (!File.Exists(filename)) { return GetMimeFromRegistry (filename); } byte[] buffer = new byte[256]; using (FileStream fs = new FileStream(filename, FileMode.Open)) { if (fs.Length >= 256) fs.Read(buffer, 0, 256); else fs.Read(buffer, 0, (int)fs.Length); } try { System.UInt32 mimetype; FindMimeFromData(0, null, buffer, 256, null, 0, out mimetype, 0); System.IntPtr mimeTypePtr = new IntPtr(mimetype); string mime = Marshal.PtrToStringUni(mimeTypePtr); Marshal.FreeCoTaskMem(mimeTypePtr); if (string.IsNullOrWhiteSpace (mime) || mime =="text/plain" || mime == "application/octet-stream") { return GetMimeFromRegistry (filename); } return mime; } catch (Exception e) { return GetMimeFromRegistry (filename); } }