Generated: Tue Feb 2 17:54:59 2010 from utf_16_BOM.pl 2010/01/27 1.6 KB.
#!/usr/bin/perl -W # BOM Type Hex = Decimal # UTF-8 EF BB BF = 239 187 191 # UTF-16 (BE) FE FF = 254 255 # UTF-16 (LE) FF FE = 255 254 # UTF-32 (BE) 00 00 FE FF = 0 0 254 255 # UTF-32 (LE) FF FE 00 00 = 255 254 0 0 # UTF-7 2B 2F 76, and one of: [38|39|2B|2F] 43 47 118, and one of: [56|57|43|47] +/v, and one of 8 9 + / # UTF-1 F7 64 4C = 247 100 76 # UTF-EBCDIC DD 73 66 73 = 221 115 102 115 # SCSU 0E FE FF = 14 254 255 # BOCU-1 FB EE 28 +optional FF 251 238 40 +optional 255 # GB-18030 84 31 95 33 = 132 49 149 51 my $in_file = 'C:\DTEMP\libxml2-2.6.30\result\slashdot16.xml'; sub has_utf_16_BOM($) { my ($fil) = shift; if (open INF, "<$fil") { binmode INF; my $buf = ""; if ((read INF, $buf, 2) == 2) { close INF; my $od1 = ord(substr($buf,0,1)); my $od2 = ord(substr($buf,1,1)); if (($od1 == 0xFF)&&($od2 == 0xFE)) { return 2; # LittleEndians (windows) } elsif (($od1 == 0xFE)&&($od2 == 0xFF)) { return 3; # BigEndians (unix) } return 1; } close INF; } return 0; } if (@ARGV) { $in_file = $ARGV[0]; } my $bom = has_utf_16_BOM($in_file); print "File [$in_file]\n"; if ($bom == 2) { print " *** IS *** UTF-16 (LE) LittleEndians 0xFFFE"; } elsif ($bom == 3) { print " *** IS *** UTF-16 (BE) BigEndians 0xFEFF"; } elsif ($bom == 1) { print "is *** NOT *** UTF-16..."; } else { print "Can not open file!"; } print "\n";