htmlize.pl


SUBROUTINES

  1. convert_tags


#!/usr/bin/env perl

#####################################################################################
#
# File:		htmlilze.pl
# Author:	Curtis Smith
# Date:		January 19, 2000
# DLM:		February 5, 2004 (Date last modified)
#
# Purpose:	This program converts a file containing Perl code, given as 
#		an argument on the command line, to HTML so that it may be 
#		viewed through a browser.  Any calls to subroutines that are
#		made in the file are re-written as links that point to that 
#		subroutine.  A link to the top of the page (where a list of any 
#		subroutines in the file is kept) is placed 
#		above each subroutine definition.  This allows the user to 
#		navigate easily throughout the file (especially useful with
#		large modules that contain several subroutines).
#
#		The file that is produced will have the name of the original
#		file with a '.html' extension.  Any periods (.) that existed
#		in the original file name are converted to underscores (_).
#
# Usage:	htmlize.pl filename
#
#####################################################################################
#
# To Do:
# - Improve performance
# - determine whether or not converting tags is necessary
#
#####################################################################################
#
# Revision History
#
# 6/7/02	- first stab at displaying comments as green text in html file
#
# 1/16/03	- change font color of comments to grey and italicize the comments
#		- add XHTML DTD
#
# 8/03		- actually make XHTML (1.0 transitional) compliant
#
# 2/04		- include copyright/license info
#
# 6/04		- changed regex not to match "subroutine" when looking for subroutines
#		- added ampersand as one of the tags that gets converted in convert_tags()
#
#####################################################################################
#
# Copyright (c) 2004
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
#####################################################################################

use File::Basename; 
use strict;

my ($file, $title, $base_name, $html_file, $out, $i, @subs);
my $DEBUG = 0;

$file = $ARGV[0];

# used for HTML file
$title = basename($file);

if(! $ARGV[0])
{
	print "\nUSAGE :  htmlize.pl <filename.ext>\n";
	print "Output:  filename_ext.html (Ex: Module.pm will become Module_pm.html)\n\n";
	exit 0;
}

if(!(-e $file))
{
	print "File $file not found.\n";
	exit 0;
}

# convert less than(<), greater than(>), and quotes(") to their HTML 
# equivalent so that they aren't interpreted as HTML code
$file = convert_tags($file);

$file =~ /(.*)\.tagtemp/o;

$base_name = $1;
$base_name =~ s#\.#\_#go;

$html_file = $base_name . ".html";

# $file.tagtemp is a temporary file that contains a copy of the original code
# with HTML tags converted
system("mv $file $html_file");

# open file for update
open(HTML_FILE, "+< $html_file") or die "Can't open file $html_file: $!\n";

$out = "";
$i=0;

print "\nDetermining method names...\n";

while(<HTML_FILE>)
{
	my ($sub_name, $anchor, $link_to_top, $leftover);

	my $rest = "";
	
	if (/^sub\s+(.*)$/o)
	{
		$sub_name = $1;

		# parse out opening brace from subroutine name if it exists
		if ($sub_name =~ /\{/)
		{
			$sub_name =~ s/(\{.*)$//;
			$rest = $1;
		}

		$sub_name =~ s/\s+//go;

		# array to hold names of all subroutines
		# used to write list of subs at top of HTML file
		$subs[$i++] = $sub_name;
	
		$anchor = "<a name=\"$sub_name\"></a>\n"; 

		# link to top of page to place before each subroutine definition
		$link_to_top = "<a href=\"\#Top\" title=\"Go to top of page\">Top Of Page</a>\n";

		$leftover = "sub $sub_name" . $rest . "\n"; 

		# updated lines
		$_ = $anchor . $link_to_top . $leftover;
	}
	$out .= $_;
}

print "Finished.\n";
print "Updating file with links to top of page...\n";

# rewind to beginning of file and write updated lines to file
seek(HTML_FILE, 0, 0);
print HTML_FILE $out;
#truncate(HTML_FILE, tell(HTML_FILE));
close(HTML_FILE);

print "Finished.\n";
print "Creating list of method names...\n";

$out = "";

$out = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11-transitional.dtd\">\n";

$out .= "<html>\n<head>\n<title>$title</title>\n";

$out .= "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\" />\n";
$out .= "<style type=\"text/css\">\n";
$out .= "<!--\n";
$out .= "body {background-color: #FFFFFF; color: #000000;}\n" .
	".comment {color: #A1A1A1;}\n" .
	"a:link {text-decoration: none; color: blue;}\n" .
	"a:visited {text-decoration: none; color: blue;}\n" .
	"a:active {text-decoration: underline; color: blue;}\n" .
	"a:hover {text-decoration: underline; color: black;}\n";
$out .= "-->\n";
$out .= "</style>\n";
$out .= "</head>\n";

$out .= "<body>\n";                          
$out .= "<a name=\"Top\"></a>\n";  #destination anchor for "Top of Page" links
$out .= "<center><h1>$title</h1></center>\n";

# write list of links to all subroutines in the file
# would be nice to have links in an HTML TABLE...
if(@subs)
{
	$out .= "<hr />\n";

	$out .= "<center><h3><span style=\"font-family: verdana;\">SUBROUTINES</span></h3></center>\n";
	$out .= "<ol>\n";

	foreach my $sub(@subs)
	{
		$out .= "<li><a href=\"\#$sub\" title=\"sub $sub\">$sub</a></li>\n";
	}
	$out .= "</ol><br />\n";
}

$out .= "<hr /><pre>\n";

print "Finished.\n";
print "Linking method names and updating file (may take a while)...\n";

# open file for update
open(HTML_FILE, "+< $html_file") or die "Can't open file $html_file:  $!\n";

while(<HTML_FILE>)
{
	my ($line, $sub_name, $link);

	$line = $_;

	if($line =~ /#/o
	   and $line !~ /\\#/o
	   and $line !~ /\$\#/o			# syntax for last array index, i.e. $#array
	   and $line !~ /a\s+href\s*=.*?#/o	# on-page link, i.e. <a href="#download">
	   and $line !~ /(!|=)~\s*s\s*#/o)	# regular expression, i.e. s#this#that#
	{
		$line =~ s/^(.*?)(#.*)$/$1<span class=\"comment\"><i>$2<\/i><\/span>/m;
	}

	my $stime = time();

	foreach my $sub_name (@subs)
	{
                next if $line =~ /^sub|\<a name\=.*$sub_name.*/i;
		
		# if a variable has the same name as a sub name don't place a link behind it
                next if ($line =~ /\$$sub_name/);
                next if ($line =~ /\$\$$sub_name/);
                next if ($line =~ /\@$sub_name/);
                next if ($line =~ /\%$sub_name/);

		# if a sub name is contained within comment line don't place a link behind it
                next if ($line =~ /#.*$sub_name/);

		# don't link subroutines from superclasses
                next if ($line =~ /SUPER::$sub_name/);

		# don't link subroutine names that appear in use/require statements
                next if ($line =~ /^use\s+$sub_name/);
                next if ($line =~ /^require\s+$sub_name/);

		# don't link package names that have the name as a subroutine
                next if ($line =~ /^package\s+$sub_name/);

		# check that the subroutine name is not contained within a word
                next if ($line =~ /$sub_name[A-Za-z0-9_]/);
                next if ($line =~ /[A-Za-z0-9_]$sub_name/);

		# don't link sub names surrounded by quotes
                next if ($line =~ /\'|\"$sub_name\'|\"/);

                if ( $line =~ /(^$sub_name\W|\W$sub_name\W)/g )
                {
                	# place link tags around each call to a subroutine
			$link = "<a href=\"\#$sub_name\" title=\"sub $sub_name\">$sub_name</a>";                        
			$line =~ s/$sub_name/$link/g;
			# $line =~ s/([^\$]|^)$sub_name/$link/g;
			# $line =~ s/([^\$]|\#|\&|^)$sub_name/$link/g;
		}
	}

	my $etime = time();

	$out .= $line;

	my $time_per_line = $etime - $stime;

	print "Time taken per line: $time_per_line secs.\n" if $DEBUG;
}

# link to top of page at end of file
$out .= "-----------\n";
$out .= "<a href=\"\#Top\" title=\"Go to top of page\">Top Of Page</a>\n";
$out .= "-----------\n";

$out .= "</pre></body></html>\n";

print "Updating.\n";

# rewind to beginning of file and write updated lines to file
seek(HTML_FILE, 0, 0);
print HTML_FILE $out;
#truncate(HTML_FILE, tell(HTML_FILE));    
close(HTML_FILE);

print "Done.\n\n";


Top Of Page
sub convert_tags
{
	my($file) = @_;
	my ($root, $tmp_file, $out);

	$root = basename($file);

	$tmp_file = $root.".tagtemp";

	system("cp $file $tmp_file");
 
	# open file for update
	open(TMP_FILE, "+< $tmp_file") or die "Can't open file $tmp_file:  $!\n";

	$out = "";

	while(<TMP_FILE>)
	{
		if(/\</o)
		{
			$_ =~ s/\</\<\;/go; # $a < $b becomes $a < $b
		}
		if(/\>/o)
		{
			$_ =~ s/\>/\>\;/go; # $a > $b becomes $a > $b 
		}
		if(/\"/o)
		{
			$_ =~ s/\"/\"\;/go; # $str = "hello" becomes
					       # $str = &qout;hello&qout;
		}
		if(/\&/o)
		{
			if($_ !~ /\&lt\;|\&gt\;|\&quot\;/) {
				$_ =~ s/\&/\&amp\;/go;
			}
		}
		$out .= $_;
	}

	# rewind to beginning of file and write updated lines to file
	seek(TMP_FILE, 0, 0);
	print TMP_FILE $out;
	#truncate(TMP_FILE, tell(TMP_FILE));
	close(TMP_FILE);

	return($tmp_file);
}
-----------
Top Of Page
-----------