relinehtm.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:54 2010 from relinehtm.pl 2009/03/28 2.9 KB.

#!/Perl
# AIM: To reline HTML, just per tags
my ($LF, $HF, $IF);
my $outfile = 'temp'.$0.'.txt';
require "logfile.pl" or die "Missing logfile.pl ...\n";
open_log($outfile);
my $in_file = 'C:\Projects\Tidy\tests\tempmem.htm';
#my $in_file = 'search[1].txt';
my $out_file = 'temphtm2.htm';
my $nlbrace = 0;
my $in_script = 0;
my $snlbrace = 0;
my $in_table = 0;
my $line;
prt( "$0 ...Hello, World...\n" );
open $IF, "<$in_file" or mydie( "ERROR: Can not open $in_file ...\n" );
open $HF, ">$out_file" or mydie( "ERROR: Can not create $out_file ...\n" );
my @filearr = <$IF>; # slurp whole file into an array
my $lc = scalar @filearr;
my @newarr = ();
prt( "$in_file contain $lc line ...\n" );
process_array2();
$lc = scalar @newarr;
prt( "Got $lc line in new array ...\n" );
foreach $line (@newarr) {
   print $HF $line;
}
close($HF);
###system($out_file);
close_log($outfile,1);
exit(0);
sub process_array2 {
   my $i, $bit, $ch, $line, $cnt;
   $bit = '';
   $cnt = 0;
   foreach $line (@filearr) {
      chomp $line;
      $line =~ s/\r//;
      while( $line =~ /\t/ ) {
         $line =~ s/\t/ /g;
      }
      my $ll = length($line);
      $cnt++;
      for ($i = 0; $i < $ll; $i++) {
         $ch = substr($line, $i, 1);
         if ($ch eq '<') {
            if (length($bit)) {
               prt( "$cnt: [$bit]\n" );
               $bit .= "\n";
               ###print $HF $bit;
               push(@newarr, $bit);
            }
            $bit = $ch;
         } elsif ($nlbrace && ($ch eq '{')) {
            $bit .= $ch;
            prt( "$cnt: [$bit]\n" );
            $bit .= "\n";
            ##print $HF $bit;
            push(@newarr, $bit);
            $bit = '';
         } elsif ($ch eq '}') {
            if ($nlbrace) {
               if (length($bit)) {
                  prt( "$cnt: [$bit]\n" );
                  $bit .= "\n";
                  ###print $HF $bit;
                  push(@newarr, $bit);
               }
               prt( "$cnt: [$ch]\n" );
               $bit = $ch;
               $bit .= "\n";
               ###print $HF $bit;
               push(@newarr, $bit);
            } else {
               $bit .= $ch;
               prt( "$cnt: [$bit]\n" );
               $bit .= "\n";
               ###print $HF $bit;
               push(@newarr, $bit);
            }
            $bit = '';
         } elsif ($ch eq '>') {
            # closing item
            $bit .= $ch;
            if ($bit =~ /<script/i) {
               $in_script = 1;
               prt( "Entering SCRIPT\n" );
               $snlbrace = $nlbrace;
               $nlbrace = 1;
            } elsif ($bit =~ /<\/script/i) {
               $in_script = 0;
               prt( "Exit SCRIPT\n" );
               $nlbrace = $snlbrace;
            } elsif ($bit =~ /<table/i) {
               $in_table++;
               prt( "Entering TABLE $in_table\n" );
            } elsif ($bit =~ /<\/table/i) {
               prt( "Exit TABLE $in_table\n" );
               $in_table-- if $in_table;
            }
         } elsif ($in_script && ($ch eq ';')) {
            $bit .= $ch;
            prt( "$cnt: [$bit]\n" );
            $bit .= "\n";
            push(@newarr, $bit);
            ###print $HF $bit;
            $bit = '';
         } else {
            $bit .= $ch;
         }
      }
      if (length($bit)) {
         prt( "$cnt: [$bit]\n" );
         $bit .= "\n";
         ###print $HF $bit;
         push(@newarr, $bit);
      }
      $bit = '';
   }
}
### eof - relinehtm.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional