1 ///2 /// 通过给定的文件流,判断文件的编码类型 3 /// 4 /// 文件流 5 ///文件的编码类型 6 public static System.Text.Encoding GetEncoding(Stream fs) 7 { 8 byte[] Unicode = new byte[] { 0xFF, 0xFE, 0x41 }; 9 byte[] UnicodeBIG = new byte[] { 0xFE, 0xFF, 0x00 };10 byte[] UTF8 = new byte[] { 0xEF, 0xBB, 0xBF }; //带BOM11 Encoding reVal = Encoding.Default;12 13 BinaryReader r = new BinaryReader(fs, System.Text.Encoding.Default);14 byte[] ss = r.ReadBytes(4);15 if (ss[0] == 0xFE && ss[1] == 0xFF && ss[2] == 0x00)16 {17 reVal = Encoding.BigEndianUnicode;18 }19 else if (ss[0] == 0xFF && ss[1] == 0xFE && ss[2] == 0x41)20 {21 reVal = Encoding.Unicode;22 }23 else24 {25 if (ss[0] == 0xEF && ss[1] == 0xBB && ss[2] == 0xBF)26 {27 reVal = Encoding.UTF8;28 }29 else30 {31 int i;32 int.TryParse(fs.Length.ToString(), out i);33 ss = r.ReadBytes(i);34 35 if (IsUTF8Bytes(ss))36 reVal = Encoding.UTF8;37 }38 }39 r.Close();40 return reVal;41 42 }43 44 ///45 /// 判断是否是不带 BOM 的 UTF8 格式46 /// 47 /// 48 ///49 private static bool IsUTF8Bytes(byte[] data)50 {51 int charByteCounter = 1; //计算当前正分析的字符应还有的字节数52 byte curByte; //当前分析的字节.53 for (int i = 0; i < data.Length; i++)54 {55 curByte = data[i];56 if (charByteCounter == 1)57 {58 if (curByte >= 0x80)59 {60 //判断当前61 while (((curByte <<= 1) & 0x80) != 0)62 {63 charByteCounter++;64 }65 //标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X 66 if (charByteCounter == 1 || charByteCounter > 6)67 {68 return false;69 }70 }71 }72 else73 {74 //若是UTF-8 此时第一位必须为175 if ((curByte & 0xC0) != 0x80)76 {77 return false;78 }79 charByteCounter--;80 }81 }82 if (charByteCounter > 1)83 {84 throw new Exception("非预期的byte格式!");85 }86 return true;87 }