#! /usr/bin/perl

# cgi-bin access counter program
# Version 3.2.5
#
# The program is placed under the GNU Public License (GPL)
# gburgyan@cybercon.com

########################################################################
#
#   CHANGE THESE TO SUIT YOUR SITE
#

# The default language option (english, french)
$default_lang = "english";

# The name of the file to use.  You should probably give this an absolute path
$FileName = "/usr/local/etc/httpd/access";

# Replace with a list of regular expression IP addresses that we
# are supposed to ignore.  If you don't know what this means, just use
# "\." instead of periods.  Comment out entirely to ignore nothing.
@IgnoreIP = ("199\.18\.203\..*", # Change Me!
	     "199\.18\.159\.1",  # Change Me!
	     );

# Aliases: Set this up so that diffent pages will all yield the same
# count.  For instance, if you have a link like "index.html -> home.html"
# set it up like ("/index.html", "/home.html").  Make sure you give a full
# path to it. This will treat "/index.html" as if it were "/home.html".
%Aliases = ("/index.html", "/home.html",
	    "/webtools/counter/index.html", "/~gburgyan/counter/index.html",
	    "/counter/index.html", "/~gburgyan/counter/index.html",
	    );


# counter  or  counterbanner  or  counterfiglet
#
# Outputs the number of times a specific page has been accessed.
# The output depends on which page 'called' it, and what the program
# is named:
#
# The counter can "take arguments" via its name.  That is, if you tack
# -arg to the end of the program name, -arg is taken to be an argument.
# For example, if you call the counter 'counter-ord', '-ord' is considered
# an argument, and an ordinal count (1st, 2nd, 3rd, ...) will be printed
# instead of (1, 2, 3, ...).  Note that counterord does the same thing as
# counter-ord for backward compatibility.
#
# Currently recognized arguments:
#
#  -f=font	sets "font" to be the font for figlet
#  -lang=lang   sets the language used to lang
#  -nc		no count; don't to write the incremented count back to the file
#  -nl		no link; don't automatically generate a link
#  -ord		make an ordinal count instead of regular
#  -doc=document override the DOCUMENT_URI environment variable
#
# Example:  counterfiglet-ord-f=bigfont-nc
#
# This will cause the counter to call figlet as the output routine, printing
# in a big font an ordinal count, without updating the access count file.
# Note that the order of arguments is irrelevant so long as you spell the
# file name correctly.  It is generally assumed that the ability to take
# different arguments/use different output routines is done with symlinks:
# i.e. ln -s counter counterfiglet-ord-f=bigfont-nc

# Ok, so what if you want to use the font "banner3-D"?  You have to quote
# the "-" as either "\-" or "%2D" (where 2D is hex for the ASCII code for
# "-").  Since you can use \ and % to quote both must be quoted if you want
# them to show up by themselves.  "%" can be either "\%" or "%25" and
# "\" can be either "\\" or "%5C".  Also, remember that each "\" must be
# quoted in HTML or in your shell.  This means you will have to type:
# 'ln -s counter counterfiglet-f=banner3\\-D' or use
# 'counterfigler-f=banner3%2DD' in order to make the link.

# Advantages:
#  Does not require any graphics for the client.
#
#  Allows anyone on a server (after the program has been installed) to
#  very easily put an access counter on any of their pages.
#
#  Only one *TEXT* file is used for the whole server.
#
#  Fits in well with the rest of the formatting of the document.
#
#  Can Selectively ignore accesses from certain hosts.
#
#  Easily customizable with symlinks so only one actual copy of the
#  counter is ever needed.
#
# Possible flaws:
#  It requires server-side includes to be turned on unless you use a
#  CGI script to handle the parsing.  Such a script is available from
#  http://www.webtools.org/ssis/ssis
#
#  Requires one of the most ridiculous methods possible for passing arguments.
#  Unless you are using the ssis script which allows you to pass arguments
#  with "/" instead of "-" such that the environment variable PATH_INFO is
#  used to pass arguments in any order and without symlinks. 
#
#
# File format:
#  On each line of the "access_count" file, there is one record:
#      'document' 0000000000
#  The document name in single quotes followed by a space, followed by
#  the number of accesses of the document (must be 10 digits).  Any
#  line which does not fit this format is ignored for safety's sake.
#  
# 
#  Please send your comments to me!  :)
#  gburgyan@cybercon.com
#
########################################################################
#
#  Version 3.2.5-Added -lang= command-line option to change output
#                language (thanks Chris Polewczuk <chris@hexonx.com>) - GEB
#  Version 3.2.4-Added -doc= command-line option to work around netsite's
#                brokenness (thanks Steve Manes <manes@magpie.com>) - GEB
#  Version 3.2.3-Fixed command-line options for NCSA httpd 1.4 - GNS
#  Version 3.2.2-Clean up some warnings if used with `perl -w`
#  Version 3.2.1-Allow .cgi and .pl extentions to filenames - DE
#  Version 3.2 - Now it can take arguments via the filename - GNS
#  Version 3.1 - Link now depends on the printing module.
#              - Removed dependence on cgi-lib.pl - GNS
#  Version 3.0 - File locking made non-necessary. (This is not as bad
#                as it seems.
#              - Made the output format more easily user-definable
#  Version 2.3 - Locking problems *REALLY* fixed.  Again, this is
#                Phil that cought my bug.  It really works now!  :)
#  Version 2.2 - Locking problem fixed.  Thank you Philip Greenspun
#                <philg@medg.lcs.mit.edu>
#  Version 2.1 - Support for aliases of pages (to support many links to
#                one page)
#  Version 2.0 - No longer depends on a dbm file; instead uses a very
#                simple text file. It also supports locking. 
#  Version 1.0 - First public release.  Uses DBM files to count accesses
#
########################################################################
#
# Thing that shouldn't really need changing, but are configurable anyway.
#

# Maximum number of times to try to lock the file.
# Each try is .1 second.  Try for 1 second.
$MaxTries = 10;

# Set this to point to something, or comment it out, and it
# won't be a link at all.
#$Link = "http://www.ezenet.com.org/counter/";

# Whether or not to use locking.  If perl complains that flock is not
# defined, change this to 0.  Not *really* necessary because we check
# to make sure it works properly.
$UseLocking = 1;
#
#########################################################################
#
# Misc documents to refer people to in case of errors.
#
$CreateFile = "<a href=\"http://www.webtools.org/counter/faq.html#create\">[Error Creating Counter File -- Click for more info]</a>";
$AccessRights = "<a href=\"http://www.webtools.org/counter/faq.html#rights\">[Error Opening Counter File -- Click for more info]</a>";
$TimeoutLock = "[Timeout locking counter file]";
$BadVersion = "<a href=\"http://www.webtools.org/counter/\">[Version access_count newer than this program.  Please upgrade.]</a>";

# Get arguments from program name.  Argh...what a horrible way to do it!
$prog = $0;
$prog =~ s/(\.cgi|\.pl)//;      #strip .cgi|.pl name extension
$prog =~ s!^(.*/)!!;		# separate program name
$prog =~ s/\\(.)/sprintf("%%%02x", ord($1))/ge;	# quote \c to %xx

($printer, @args) = split(/-/, $prog);	# args are separated by dashes
$printer =~ s/%(..)/pack("c", hex($1))/ge; # unquote printer function name

# This gets path info, which is only applicable if you are using our
# ssis script (see above).  This makes counter/ord the same as counter-ord
push(@args, split("/", $ENV{"PATH_INFO"})) if $ENV{"PATH_INFO"};

# put them in assoc array %arg
foreach (@args)	# means do this for each element in the array
{
    s/%(..)/pack("c", hex($1))/ge;	# unquote %xx
    /^([^=]*)=?(.*)$/;			# extract "=" part, if any
    $arg{$1} = $2 ? $2 : 1;
}

undef $Link if $arg{'nl'};	# make link?

# Print out the header
print "Content-type: text/html\n\n";

# Make sure the file exists:
if (!(-f $FileName)) {
    if (!open (COUNT,">$FileName")) {
	print $CreateFile;
	exit 1;
    }
} else {
    if (!((-r $FileName) && (-w $FileName))) {
	# Make sure that we can in fact read and write to the file in
	# question.  If not, direct them to the FAQ.
	print $AccessRights;
	exit 1;
    }

    if (!open (COUNT,"+<$FileName")) {	# Now make sure it *really* opens
	print $AccessRights;	        # ...just in case...
	exit 1;
    }

    $version = <COUNT>;
    if (!($version =~ /^\d+.\d+$/)) {
	$version = 1;
	seek(COUNT,0,0);
    }
}

# This is for the future: the access_count file will have a version number.
if ($version > 1) {
    print $BadVersion;
    exit 1;
}

if ($UseLocking) {
    # Try to get a lock on the file
    while ($MaxTries--) {
	
	# Try to use locking, if it doesn't use locking, the eval would
	# die.  Catch that, and don't use locking.

	# Try to grad the lock with a non-blocking (4) exclusive (2) lock.
	# (4 | 2 = 6)
	$lockresult = eval("flock(COUNT,6)");

	if ($@) {
	    $UseLocking = 0;
	    last;
	}

	if (!$lockresult) {
	    select(undef,undef,undef,0.1); # Wait for 1/10 sec.
	} else {
	    last;		# We have gotten the lock.
	}
    }
}

# You would figure that $MaxTries would equal 0 if it didn't work.  The
# post-decrement takes it to -1 when the loop finally exits.
if ($MaxTries == -1) {
    print $TimeoutLock;
    exit(0);
}

# Make sure perl doesn't spit out warnings...
if (defined $arg{'doc'}) {
    $doc_uri = $arg{'doc'};
} elsif (defined $ENV{'DOCUMENT_URI'}) {
    $doc_uri = $ENV{'DOCUMENT_URI'};
} else {
    $doc_uri = "";
}

$doc_uri = $Aliases{$doc_uri} if defined $Aliases{$doc_uri};

$location = tell COUNT;
while ($line = <COUNT>) {
    ($uri,$accesses) = ($line =~ /^'(\S*)' (\d\d\d\d\d\d\d\d\d\d)$/);
    last if ($uri eq $doc_uri);
    $location = tell COUNT;
    $accesses = 0;
}

$accesses += 1;	# *NOT* '++' because we don't want '++'s magic

if (defined $arg{'ord'}) {
    if (defined $arg{'lang'}) {
	$ord = eval("&ordinalize_$arg{lang}($accesses)");
    } else {
	$ord = &ordinalize($accesses);
    }
} else {
    $ord = "";
}

$num = $accesses . $ord;

($count, $nLink) = eval("&output_$printer('$num')");

if ($@) {
    ($count, $nLink) = &output_counter($num);
}

# Print out a link to something informative (if we were requested to)
print "<a href=\"$nLink\">" if $nLink;

print $count;

print "</a>" if $nLink;

# Make sure we are not ignoring the host:

$ignore = 0;
$ignore = grep($ENV{"REMOTE_ADDR"} =~ /$_/, @IgnoreIP) if defined ($ENV{"REMOTE_ADDR"});

if (!$ignore && !$arg{"nc"}) # if we aren't ignored and requested to not count
{
    # Now update the counter file
    seek(COUNT, $location, 0);
    $longaccesses = sprintf("%010d", $accesses);
    print COUNT "'$doc_uri' $longaccesses\n";
}

if ($UseLocking) {
    flock(COUNT,8);			# Unlock the file.
}

close COUNT;


# output_translate
#
# Quote any special characters with HTML quoting.

sub translate_output {
    local($string) = @_;

    $_ = $string;
  
    s//&egrave;/g;

    return $_;
}

# ordinalize
#
# Call the appropriate ordinalize function for the default language

sub ordinalize
{
    local($count) = @_;

    if (defined $arg{'lang'}) {
	return eval("&ordinalize_$arg{lang}($count)");
    } else {
	return eval("&ordinalize_$default_lang($count)");
    }
}


# ordinalize_english
#
# Figure out what suffix (st, nd, rd, th) a number would have in ordinal
# form and return that extension.

sub ordinalize_english {
    local($count) = @_;
    local($last, $last2);

    $last2 = $count % 100;
    $last = $count % 10;

    if ($last2 < 10 || $last2 > 13) {
	return "st" if $last == 1;
	return "nd" if $last == 2;
	return "rd" if $last == 3;
    }

    return "th";		# Catch "eleventh, twelveth, thirteenth" etc.
}

# ordinalize_french
#
# Trivial...  Return the extension for french.  The only exception is 1.
# Thank you Chris Polewczuk <chris@hexonx.com>

sub ordinalize_french {
    local ($count) = @_;

    if ($count == 1) {
	return "'ire";
    } else {
	return "ime";
    }
}

# The following are the routines that actually convert the number
# of accesses into something that we print out.
#
# The name of each function is "output_" followed by the program's name.
# For instance, is the program is called "counter" then "output_counter"
# will be called; a program called "counterbanner" will call
# "output_counterbanner" to get the output.
#
# If the function is not defined, then "output_counter" will be called.


# output_counter
#
# The simplest function: just returns the number of accesses and the link.

sub output_counter {
    local($count) = @_;

    return &translate_output($count), $Link; # we return the count and the link
}


# output_counterord
#
# Return the number of accesses as an ordinal number.  (ie. 1st, 2nd, 3rd, 4th)

sub output_counterord {
    local($count) = @_;

    return &translate_output($count . &ordinalize($count)), $Link;
}


# output_counterbanner
#
# A somewhat silly one that uses the "banner" command to print out the
# count.  :)  You might need to change the path to make it work.

sub output_counterbanner {
    local($count) = @_;
    
    $banner = `banner $count`;

    return "<pre>$banner</pre>"; # return no link here (it would be annoying)
}


# output_counterfiglet
#
# An even sillier one than counterbanner.  :)

sub output_counterfiglet {
    local($count) = @_;

    $fig = "echo $count | /usr/games/figlet";	# setup command line
    $fig .= " -f $arg{'f'}" if $arg{"f"};	# use a different font?
    $fig = `$fig`;
    $fig =~ s!&!&amp;!;
    $fig =~ s!<!&lt;!;
    return "<br><pre>" . $fig . "</pre>";	# note no link here, either
}

