txt2htm02.pl to HTML.

index -|- end

Generated: Sun Apr 15 11:46:49 2012 from txt2htm02.pl 2012/02/16 13.2 KB.

#!perl -w
# Module: txt2htm02.pl
# coded using EditPlus v2.12 (76)
# 16/02/2012 - Turn off $debug_on, and other tidying...
# 06/08/2011 - Some BIG updates ;=))
# 2010/04/24 - more fixes
# 2010/03/21 - some fixes, and improvements
# March, 2005 geoff mclane
# Sep 2006 - update
# Orignal output was the whole file as one paragraph, using <br> to divide lines
# Add option to use <pre [class="cd"]>...</pre> formating

use strict;
use warnings;
use Cwd;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use File::Spec; # File::Spec->rel2abs($rel); # we are IN the SLN directory, get ABSOLUTE from RELATIVE

my $perl_dir = 'C:\GTools\perl';
unshift(@INC, $perl_dir);
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $perl_dir."\\temp.$pgmname.txt";
open_log($outfile);

my $out_file = $perl_dir."\\tempt2h.htm";

# user variables
my $load_log = 0;
my $in_file = '';

my $debug_on = 0;
my $def_file = 'C:\Documents and Settings\Geoff McLane\My Documents\unix\Ubuntu-11-nautilus-fails.txt';

my $M_VERSION = "0.3";
my $start_time = time();
my $verbose = 0;
my $verb2 = 0;
my @input_files = ();
my $file_lines = 0;
my @files = ();
my @file_list = ();
my $WHITE_PATTERN = "^[ \t]*\$";
my $tab_stg = ' &nbsp;&nbsp;&nbsp;';
my $check_out = 0;
my $use_pre = 0;   # default to ON
my $doc_type = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"'."\n".
'"http://www.w3.org/TR/html4/loose.dtd">'."\n";
my $add_links = 1;
my $use_fixed_font = 1;

### program variables
my @warnings = ();
my $cwd = cwd();
my $os = $^O;

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        ###prt( "\nNo warnings issued.\n\n" );
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub init_out_file {
    my ($out_name, $in_name) = @_;
    prt("Creating $out_name\n");
    open(DSP, ">$out_name") || die "Can not create $out_name: $!\n";
    prt("Writing to $out_name ...\n") if $verbose;
    $file_lines++;
   print DSP $doc_type;
    print DSP <<"EOF";

<html>

<head>
<title>$in_name to HTML</title>
<style type="text/css">
<!-- /* some style */
body { 
   background-image:url('http://geoffair.org/images/clds3.jpg');
   margin: 0cm 1cm 0cm 1cm; }
h1 {
 background:#efefef;
 border-style: solid solid solid solid;
 border-color:#d9e2e2;
 border-width:1px;
 padding:2px 2px 2px 2px;
 font-size:200%;
 text-align:center;
}
.ctr { text-align: center; }
.a { color:red; }
.b { color:#006666; }
.c { color:blue; }
.d { color:#a52a2a; }
.e { color:#9400d3; }
.f { color:#666666; }
.o { color:#008b8b; }
.v { color:#a52a2a; }
.t { color:#006600; }
.cd {
  /* top, right, bottom, left */
  padding: 0px 10px 0px 10px;
  margin: 1px 10px 1px 10px;
  background: #f0f8ff;
  border-width: 1px;
  border-style: solid solid solid solid;
  border-color: #cccccc;
  width: 90%;
  font-family:"Courier New";
}
.cn { font-family:"Courier New"; } 
// -->
</style>
</head>

<body>

<h1>$in_name to HTML</h1>

<p class="ctr"><a href="index.htm">index</a></p>

EOF

    print DSP "<div class=\"cn\">\n" if ($use_fixed_font);
    prt("Closing $out_name.\n") if $verbose;
    close(DSP);
}

sub end_out_file {
    my ($out_name, $in_name) = @_;
    prt("Appending to $out_name\n") if $verbose;
    open(DSP, ">>$out_name") || die "Can not append to $out_name: $!\n";
    prt("Writing to $out_name ...\n") if $verbose;
    print DSP "</div>\n" if ($use_fixed_font);
   print DSP "<p class=\"ctr\"><a href=\"index.htm\">index</a></p>\n";
   print DSP '<p>Generated ';
   print DSP scalar localtime(time());
   print DSP ", from <b>$in_name</b>, by <b>$pgmname</b>, my Perl text-to-html \n";
   print DSP "<a href=\"http://geoffmclane.com/mperl/samples/index.htm\">'converter'</a></p>\n";
   print DSP '<!-- generated ';
   print DSP scalar localtime(time());
   print DSP ", from $in_name, by $pgmname, my text-to-html converter - geoffmclane.com -->\n";
    print DSP <<"EOF";
</body>
</html>
EOF

    prt("Closing $out_name.\n");
    close(DSP);
}



sub local_dirname($) { # passed a path, './dir1/dir2/file.name' returns './dir1/dir2/
    my ($file) = @_;
    my ($sub);
    ($sub = $file) =~ s,/+[^/]+$,,g;
    $sub = '.' if $sub eq $file;
    return $sub;
}

sub do_this_file($$) {
    my ($out_name,$mfile) = @_;
    prt("Opening, for append $out_name\n") if $verbose;
    open(DSP, ">>$out_name") || die "Can't append to $out_name: $!\n";
    prt("Writing to $out_name ...\n") if $verbose;
    $file_lines++;
    dsp_add_src(\*DSP, $mfile);
    close(DSP);
    prt("Closed $out_name.\n") if $verbose;
}

sub convert_links($) {
    my ($tx) = shift;
    my $len = length($tx);
    my ($i,$ch,$tag);
    my $ntx = '';
    for ($i = 0; $i < $len; $i++) {
        $ch = substr($tx,$i,1);
        if ( !($ch =~ /\s/) ) {
            $tag = $ch; # start a tag
            $i++;   # and go to next
            for (; $i < $len; $i++) {
                $ch = substr($tx,$i,1);
                if ($ch =~ /\s/) { # exit on space (or end of line)
                    $i--;   # back up to collect this char later
                    last;
                }
                $tag .= $ch;    # accumuate tag
            }
            if ($tag =~ /^\w+:{1}\/{1}\/{1}\w+(.+)$/) {
                $tag = "<a target=\"_blank\" href=\"$tag\">$tag</a>";
            }
            $ntx .= $tag;
            next;
        }
        $ntx .= $ch;
    }
    return $ntx;
}

sub line_to_html($) {
    my ($tx) = @_;
   $tx =~ s/&/&amp;/g; # convert '&' to '&amp;'
   $tx =~ s/\t/$tab_stg/g; # substitute TAB characters
   $tx =~ s/"/&quot;/g; # sub double quotes
   $tx =~ s/\</&lt;/g; # sub less than tag beginning
   $tx =~ s/\>/&gt;/g; # and html/xml tag ending
    my ($ch,$len,$i);
    my $nline = '';
    $ch = '';
    $len = length($tx);
    for ($i = 0; $i < $len; $i++) {
        $ch = substr($tx,$i,1);
        if ($ch =~ /\s/) {
            if (($nline =~ /\s$/) || ($nline =~ /&nbsp;$/)) {
                $ch = "&nbsp;";
            }
        }
        $nline .= $ch;
    }
    return $nline;
}

sub contains_a_link($) {
    my ($tx) = shift;
    return 1 if ($tx =~ /\w+:{1}\/{1}\/{1}\w+\./);
    return 0;
}

sub dsp_add_src($$) {
    my ($fh,$file) = @_;
   my $line_num = 0;
   my $dn_para = 0;
    my (@lines,$tx,$ln);
    my $need_br = 0;
   if (-f $file) {
      open(INF, $file) || die "Unable to open $file!\n";
      if ($use_pre) {
         print $fh '<pre class="cd">'."\n";
      }
        @lines = <INF>;
        close INF;
      $line_num = scalar @lines;
      prt("Reading $file ... $line_num lines...\n");
        $line_num = 0;
        foreach $tx (@lines) {
         $line_num++;
         chomp $tx; # clear end of line
         $ln = length($tx);
         if ( $tx =~ /$WHITE_PATTERN/o ) {
            prt("white [$tx]$ln\n") if $verb2;
                if ($use_pre) {
               print $fh "\n"; # 2010-04-24 - add this blank line
                } else {    # if ( !$use_pre )
               print $fh "\</p\>\n" if $dn_para;
               $dn_para = 0;
                    $need_br = 0;
            }
         } else {
            if (!$use_pre) {
               print $fh "\<p\>\n" if ! $dn_para;
               $dn_para = 1;
            }
                $tx = line_to_html($tx);
            $ln = length($tx); # get the final length
            if ($use_pre) {
                    #if ($add_links && ($tx =~ /\s+\w+:{1}\/{1}\/{1}\w+/)) {
                    if ( $add_links && contains_a_link($tx) ) {
                        $tx = convert_links($tx);
                    }
               print $fh "$tx\n"; # out the line
            } else# if ( !$use_pre ) {
               if (substr ($tx, 0, 2) eq '  ') { # if starts with 2 spaces
                  my $sps = 0;
                  my $nbs = ' &nbsp;';
                  for ($sps = 2; $sps < $ln; $sps++) {
                     if (substr ($tx, $sps, 1) ne ' ') {
                        last;
                     }
                     $nbs .= '&nbsp;' if $sps > 1;
                  }
                  $sps-- if $sps > 1; # back off last space, if more than 1
                  prt("Replacing $sps with [$nbs] ...\n") if $verb2;
                  $tx =~ s/ {$sps}/$nbs/; # replace (N) spaces with '&nbsp; x N
                  if ($verb2) {
                     my @vals = split(/\s/,$tx);
                     while (@vals) {
                        my $vc = shift (@vals);
                        prt("[$vc] ");
                     }
                     prt("\n");
                  }
               } # if it was space beginning
                    #if ($add_links && ($tx =~ /\s+\w+:{1}\/{1}\/{1}\w+/)) {
                    if ( $add_links && contains_a_link($tx) ) {
                        $tx = convert_links($tx);
                    }
                    if ($need_br) {
                   print $fh "\<br\>\n"; # out a line separator
                    }
               print $fh "$tx\n"; # out the line
                    $need_br = 1;
            }
            prt("sig [$tx]$ln\n") if $verb2;
         }
      }
        $need_br = 0;
      if ($use_pre) {
         print $fh '</pre>'."\n";
      } else {
         print $fh "\</p\>\n" if $dn_para;
      }
      prt("Done $file ... $line_num lines ...\n");
   } else {
      print $fh "WARNING: Missed SOURCE [$file]\n";
      prtw("WARNING: Missed SOURCE [$file]\n");
   }
}


sub get_dir_list
{
    my $name = shift;
   # put all files in the current directory in @files:
   # opendir(THEDIR, ".") || die("Couldn't open current directory\n");
   opendir(THEDIR, $name) || die("Couldn't open current directory\n");
   @files = readdir(THEDIR);
   closedir(THEDIR);
   my $f_cnt = 0;
   my $d_cnt = 0;
   prt("Found " . $#files . " files and folders ...\n");
   foreach my $dfile (@files) {
    if ( -d $dfile ) {
       # if ($dfile eq '.' || $dfile eq '..') or
       if ($dfile =~ '^\.$' || $dfile =~ '^\.\.$') {
          # do nothing with DOT and DOUBLE DOT
       } else {
        $d_cnt++;
        prt("$dfile <DIR>\n") if $verb2;
       }
    } else {
     $f_cnt++;
     my $ff = $name . '\\' . $dfile;
     # $ff =~ s/\//\\/g; # set DOS path separators ...
     $ff =~ s/\\/\//g; # set *nix path separators ...
     my $sb = dirname($ff);
     $ff =~ s/\//\\/g; # set DOS path separators ...
     $sb =~ s/\//\\/g; # set DOS path separators ...
     prt("$dfile dos [$ff] [$sb] ") if $verb2;
     if ($f_cnt == 1) {
        $sb =~ s/\\/\//g; # set *nix path separators ...
        prt("[$sb]") if $verb2;
     }
     prt("\n") if $verb2;
    }
   }

   prt("Found " . $#files . " - folders = $d_cnt, files = $f_cnt ...\n");
}


############################################
### MAIN ###

parse_arguments(@ARGV);

pgm_exit(1,"ERROR: no input files found or specified\n") if ! @input_files;

# pre-process
foreach my $inf (@input_files) {
   if (-f $inf) {
      prt("File: $inf ok\n");
   } else {
      pgm_exit(1,"ERROR: Can not locate file [$inf] ... check command ...\n");
   }
}

$in_file = $input_files[0];   # get the FIRST
my ($nm,$dr) = fileparse($in_file);
init_out_file($out_file, $nm); # abort, if no create ...

# show count in the array ...
prt("Adding $#input_files lines to file $out_file.\n") if $verbose;

foreach $in_file (@input_files) {
 do_this_file($out_file, $in_file);
}

end_out_file($out_file,$in_file);

prt("Done $out_file on " . localtime(time()) . ".\n");

system($out_file);

pgm_exit(0,"");

###################################################

sub parse_arguments {
 my @av = @_; # take it off the passed stack
 while (@av) {
    my $a = shift @av; # get and move to next
    if ($a eq '--version') {
       prt("$M_VERSION\n");
    } elsif ($a eq '--help' || $a eq '--h' || $a eq '-h' || $a eq '-?') {
       pgm_exit(1,"No help available! ;=))\nexcept reading the code here!\nTry -v, -o name, in_file etc ...");
    } elsif ($a eq '--verbose' || $a eq '-v') {
       prt("Setting verbose.\n");
       $verbose = 1;
    } elsif ($a eq '-v2') {
       prt("Setting verbose 2.\n");
       $verbose = 1;
       $verb2 = 1;
    } elsif ($a eq '--output' || $a eq '-o') {
       pgm_exit(1,"ERROR no argument given for option \`$a'\n") if ! @av; # require_argument(@av);
       my $tmp = shift @av; # take next argument
       if ($tmp ne $out_file) {
          $check_out = 1;
          $out_file = $tmp;
       }
    } elsif ((length($a) >= 2) && (substr($a,0,2) eq '-p') ) {
       if (length($a) > 2) {
          my $pa = substr($a,2);
          if ($pa eq '-') {
             $use_pre = 0; # off the feature
          } elsif ($pa eq '+') {
             $use_pre = 1; # ON the feature
          } else {
             pgm_exit(1,"ERROR: Errant -p command. Only -p+ for ON, and -p- for OFF allowed...\n");
          }
       } else {
         # just -p toggles the feature
         if ($use_pre) {
            $use_pre = 0;
         } else {
            $use_pre = 1;
         }
       }
    } elsif ($a =~ /^-/) {
       pgm_exit(1,"ERROR: unrecognised option -- '$a'\nTry $pgmname --help for more information.\n");
    } else {
       prt("Storing argument [$a].\n");
       push(@input_files, $a);
    }
 } # while arguments

 if (! @input_files && $debug_on && (-f $def_file)) {
    push(@input_files, $def_file);
 }
}


# eof - txt2htm02.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional