utf_16_BOM.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:59 2010 from utf_16_BOM.pl 2010/01/27 1.6 KB.

#!/usr/bin/perl -W
# BOM Type    Hex          = Decimal
# UTF-8       EF BB BF     = 239 187 191
# UTF-16 (BE) FE FF        = 254 255
# UTF-16 (LE) FF FE        = 255 254 
# UTF-32 (BE) 00 00 FE FF  = 0 0 254 255 
# UTF-32 (LE) FF FE 00 00  = 255 254 0 0 
# UTF-7       2B 2F 76, and one of: [38|39|2B|2F] 43 47 118, and one of: [56|57|43|47] +/v, and one of 8 9 + / 
# UTF-1       F7 64 4C     = 247 100 76
# UTF-EBCDIC  DD 73 66 73  = 221 115 102 115
# SCSU        0E FE FF     = 14 254 255 
# BOCU-1      FB EE 28 +optional FF 251 238 40 +optional 255
# GB-18030    84 31 95 33  = 132 49 149 51
my $in_file = 'C:\DTEMP\libxml2-2.6.30\result\slashdot16.xml';
sub has_utf_16_BOM($) {
   my ($fil) = shift;
   if (open INF, "<$fil") {
      binmode INF;
      my $buf = "";
      if ((read INF, $buf, 2) == 2) {
         close INF;
         my $od1 = ord(substr($buf,0,1));
         my $od2 = ord(substr($buf,1,1));
         if (($od1 == 0xFF)&&($od2 == 0xFE)) {
            return 2;   # LittleEndians (windows)
         } elsif (($od1 == 0xFE)&&($od2 == 0xFF)) {
            return 3;   # BigEndians (unix)
         }
         return 1;
      }
      close INF;
   }
   return 0;
}
if (@ARGV) {
   $in_file = $ARGV[0];
}
my $bom = has_utf_16_BOM($in_file);
print "File [$in_file]\n";
if ($bom == 2) {
   print " *** IS *** UTF-16 (LE) LittleEndians 0xFFFE";
} elsif ($bom == 3) {
   print " *** IS *** UTF-16 (BE) BigEndians 0xFEFF";
} elsif ($bom == 1) {
   print "is *** NOT *** UTF-16...";
} else {
   print "Can not open file!";
}
print "\n";

index -|- top

checked by tidy  Valid HTML 4.01 Transitional