#!/usr/bin/perl

# AXS Script Set, Logging Module
# Copyright 1997-2000 by Fluid Dynamics
#
# Please adhere to the copyright notice and conditions of use as described at
# the URL below.  For latest version and help files, visit:
#	http://www.xav.com/scripts/axs/
# ___________________________________________________________________________

$VERSION = '2.3.0.0012';

# Enter the location of your log file relative to this script.  This is path
# and file name, not a web address.  Leave as-is for a default install.

$LogFile = 'log.txt';

# Logging can be disabled after the log exceeds a certain size.  To use this
# feature, enter a non-zero number for the maximum byte size for your log
# file.  Leave it at zero to always log, without size restriction.

$MaxLogSize = 0;

# AXS supports extended error reporting and remote debugging.  These features
# are useful during install and when writing for help.  To view debug output:
#
#	http://[server]/[path]/ax.pl?debugme
#
# After AXS is working, disable these features by setting $AllowDebug = 0.

$AllowDebug = 1;

# This script will not log visits from users with hostnames or IP addresses
# listed below.  Use all lowercase names.  Empty the array to log everyone:

@IgnoreHosts = ('.ezenet.com', '207.107.181.');

# Example:
#
# @IgnoreHosts = ('.foobar.org', 'host.example.co.uk', '250.245.240.');

# This maps hostnames to a consistent format; for example, if your site can
# be addressed as http://xav.com/ and http://www.xav.com/ then this set of
# mappings can convert all URL's to a consistent format.
#
# Format is:
#	Original-String, Final-String,
#
# The To and From web addresses will have a find-and-replace operation done
# on them with each name-value pair in the %Maps hash.  The operation will be
# done as a case insensitive substring match.

%Maps = (
	'http://deepdisco.com/',      'http://www.deepdisco.com/',
	'http://ftp.deepdisco.com/',  'http://www.deepdisco.com/',
	);


# __________________________________________________________________
#
# The following shouldn't need to be changed:

$domain = 'http://www.deepdisco.com';

# If your webserver doesn't support SERVER_NAME, then set this variable
# as the top-level URL to your server without a trailing slash, e.g.:
# $domain = 'http://www.xav.com';

$header = "Content-type: text/html\r\n\r\n";

# This should be deleted if the content-type header is being echoed out
# to your SSI output, otherwise leave as is.


# The above variable allows you to correct for a different time zone if
# your ISP is somewhere else.  This is an integer of +/- a certain number
# of hours.  i.e., ISP is in Pennsylvania and owner is in Seattle:
#	$TimeOffsetInHours = -3;
# ISP in Australia, owner in London:
#	$TimeOffsetInHours = +12;

$TimeOffsetInHours = 0;

# If you use image redirects and the image appears broken, you may enter the
# path to a real 1x1 pixel transparent GIF image in the $TransURL variable.
# This real image will be used by ax.pl instead of a synthetic one if this
# variable is set (meaning you will have to remove the # comment in front of
# it as well):
#
####	$TransURL = 'http://www.xav.com/scripts/axs/trans.gif';
#

# If every visitor is being logged twice, try setting the following variable
# to 1:

$NoLogHead = 0;

# ___________________________________________________________________________


$IIS = 1 if ($ENV{'SERVER_SOFTWARE'} =~ m!IIS!i);
chdir($1) if (($IIS) && ($0 =~ m!(.*)(\\|\/)!));

# These conditionals return output to the browser, and determine which
# URLs go in the $From and $To variables:

if ($ENV{'DOCUMENT_URI'}) {
	# print nothing (SSI call):
	$To = $domain.$ENV{'DOCUMENT_URI'};
	$From = $ENV{'HTTP_REFERER'};
	print "$header\n  \n";
	}

# Alternate SSI call (via REQUEST_URI not DOCUMENT_URI)
elsif ($ENV{'REQUEST_URI'} && ($ENV{'QUERY_STRING'} eq '')) {
	$To = $domain.$ENV{'REQUEST_URI'};
	$From = $ENV{'HTTP_REFERER'};
	print "$header\n  \n";
	}

elsif (($IIS) && ($ENV{'PATH_INFO'} ne $ENV{'SCRIPT_NAME'})) {
	# win/iis ssi call:
	$To = $domain.$ENV{'SCRIPT_NAME'};
	$From = $ENV{'HTTP_REFERER'};
	print ' ';
	}
elsif ($ENV{'QUERY_STRING'} =~ m!^(\w+)\.gif(\&ref=)?(.*)$!i) {
	# print transparent image:
	$To = $ENV{'HTTP_REFERER'};
	$From = $3 ? $3 : $To;
	&Print_Image;
	}
elsif (($ENV{'QUERY_STRING'}) && ($ENV{'QUERY_STRING'} ne 'debugme')) {
	# redirect user to another URL:
	$To = $ENV{'QUERY_STRING'};
	$From = $ENV{'HTTP_REFERER'};
	$Export = 1;
	print "HTTP/1.0 302 Moved\n" if ($IIS);
	print "Location: $ENV{'QUERY_STRING'}\n\n";
	}
elsif (($ENV{'QUERY_STRING'} =~ m!^debugme$!i) && ($AllowDebug)) {
	&SpawnDebugger;
	exit;
	}
else {
	# we should never get here, this is just a valid HTTP response
	# in case of mis-configuration or whatever:
	print "HTTP/1.0 200 OK\n" if ($IIS);
	print "$header\n<BR>\n";
	}

if ($MaxLogSize) {
	$LogSize = -s $LogFile;
	exit if ($MaxLogSize < $LogSize);
	}

# [optional:] this was used a few builds back, but doesn't seem necessary with new
# cache-control headers.  It should be un-commented if you see a lot of double
# requests for files from the same people at about the same time:

exit if (($NoLogHead) && ($ENV{'REQUEST_METHOD'} eq 'HEAD'));


# This code converts un-resolved hostnames to their text versions, then makes
# the names lowercase, and then aborts logging if this hostname is forbidden:

$RemoteHost = $ENV{'REMOTE_HOST'};
if ((!$RemoteHost) || ($RemoteHost =~ m!^\d+\.\d+\.\d+\.\d+$!)) {
	if ($ENV{'REMOTE_ADDR'} =~ m!^(\d+)\.(\d+)\.(\d+)\.(\d+)$!) {
		$RemoteHost = (gethostbyaddr(pack('C4',$1,$2,$3,$4),2))[0] || $ENV{'REMOTE_ADDR'};
		}
	}
$RemoteHost = lc($RemoteHost);

foreach (@IgnoreHosts) {
	next unless $_;
	exit if ($RemoteHost =~ m!$_!);
	exit if ($ENV{'REMOTE_ADDR'} =~ m!$_!);
	}

# Note: you can filter on other things as well.  If you want to ignore people
# arriving from a certain site, like Yahoo, you can write the following (note
# that HTTP_REFERER is used instead of REMOTE_HOST):
#
#	@ignore = ('yahoo.com', 'av.yahoo.com');
#	foreach (@ignore) {
#		exit if ($ENV{'HTTP_REFERER'} =~ m!$_!);
#		}


# END CUSTOM SECTION.


($clean_url, $host, $port, $path, $is_valid) = &parse_url($From);
if ($is_valid) {
	$From = $clean_url;
	}

($clean_url, $host, $port, $path, $is_valid) = &parse_url($To);
if ($is_valid) {
	$To = $clean_url;
	}

# Apply the mappings:
foreach (keys %Maps) {
	$To =~ s!$_!$Maps{$_}!ig;
	$From =~ s!$_!$Maps{$_}!ig;
	}


$logline = '|';
foreach ($RemoteHost, $ENV{'REMOTE_ADDR'}, $From, $To, $ENV{'HTTP_USER_AGENT'}) {
	# strip the pipe delimiter, or \r\n record separators, from the data:
	s/\||\r|\n//g;
	s!\015!!g;
	s!\012!!g;
	$logline .= $_.'|';
	}
foreach ((localtime(time + (3600*$TimeOffsetInHours)))[0..7]) {
	$logline .= $_.'|';
	}

$logline .= 'export|' if $Export;
$logline .= "\n";

# If we're watching filesize...
if ($MaxLogSize) {
	# then exit if this record is larger than the remaining allowed
	# space:
	exit if (($MaxLogSize - $LogSize) < length($logline));
	}

# Make sure the record is strictly valid before writing to the log:
exit unless ($logline =~ m!^\|([^\|]+)\|([^\|]+)\|([^\|]*)\|([^\|]*)\|([^\|]*)\|\d+\|\d+\|\d+\|\d+\|\d+\|\d+\|\d+\|\d+\|(export\|)?$!);



if (open(LOG,">>$LogFile")) {
	binmode(LOG);
	print LOG $logline;
	close(LOG);
	}

sub Print_Image {
	print "HTTP/1.0 200 OK\r\n" if ($IIS);
	print "Pragma: no-cache\r\n";
	print "Expires: Saturday, February 15, 1997 10:10:10 GMT\r\n";
	if ($TransURL) {
		print "Location: $TransURL\r\n\r\n";
		}
	else {
		print "Content-Type: image/gif\r\n\r\n";
		binmode(STDOUT);
		foreach (71,73,70,56,57,97,1,0,1,0,128,255,0,192,192,192,0,0,0,33,249,4,1,0,0,0,0,44,0,0,0,0,1,0,1,0,0,1,1,50,0,59) {
			print pack('C',$_);
			}
		}
	}

# ___________________________________________________________________________

# This runs a filesystem test against $LogFile and dumps a ton of (hopefully)
# useful information to the screen:

sub SpawnDebugger {
print "HTTP/1.0 200 OK\n" if ($IIS);
print <<"EOM";
Content-Type: text/html

<HTML>
<HEAD><TITLE>AXS Remote Debugger</TITLE></HEAD>
<BODY BGCOLOR="#ffffff" LINK="#aa0000" ALINK="#cccccc" VLINK="#aa0000">
<P>Once this script works to your satisfaction, edit this file
(at <TT>$0</TT>) and turn debugging off by setting <TT>\$AllowDebug = 0;</TT>.
Read the <A HREF="http://www.xav.com/scripts/axs/" TARGET="_blank">trouble-shooting
guide</A> if you need more help.</P>
<DL>
<DT><B><U>Standard Debugging Information:</U></B></DT>
<DD>
<P>This is AXS Logging Module version $VERSION in debug mode.<BR>
The file name of this script is <TT>$0</TT>.<BR>
This script is executing under Perl version $].<BR>
The critical file system variable is <TT>\$LogFile = "$LogFile";</TT>.
EOM
if ($MaxLogSize) {
	print "MaxLogSize has been initialized to $MaxLogSize bytes.";
	}
else {
	print 'MaxLogSize is not set.';
	}
print <<"EOM";
</P></DD>

<DT><B><U>Filesystem Test:</U></B></DT>
<DD>
EOM

TEST: {

if (-e $LogFile) {
	($LogSize,$LastModT) = (stat($LogFile))[7,9];
	$LastModT = scalar localtime($LastModT);
	print "<P>The log file, <TT>$LogFile</TT>, exists with size $LogSize bytes. It was last modified at $LastModT. ";
	if (open(FILE,">>$LogFile")) {
		binmode(FILE);
		close(FILE);
		print "The log file is writable. <FONT COLOR=\"#008811\"><B>The filesystem test passed!</B></FONT></P>";
		}
	else {
print <<"EOM";
However, the log file is not writable. The filesystem returned <TT>"$!"</TT>
when this script tried to write to it. You need to change the file
permissions to make it script writable. <FONT COLOR="#ff0000"><B>The filesystem test failed.</B></FONT></P>
EOM
		last TEST;
		}
	}
elsif (open(FILE,">>$LogFile")) {
	binmode(FILE);
	close(FILE);
print <<"EOM";
<P>The log file, <TT>$LogFile</TT>, did not exist when this script started.
However, this script attempted to create it for you, and the server
responded that this was successful. So everything <I>should</I> be fine now.
Reload this web page, and hopefully you'll see a message that the file system
test has passed. If it doesn't pass, and instead you get an error or you get
this message again, then you'll have to manually create the log file and
set it's permissions. <FONT COLOR="#ff0000"><B>The filesystem test needs to be run again.</B></FONT></P>
EOM
	last TEST;
	}
else {
print <<"EOM";
<P>The log file, <TT>$LogFile</TT>, doesn't exist. You need to create one and
give it writable permissions. Alternately, the log file may exist but the
<TT>\$LogFile</TT> variable might not point to the correct location, in which
case you will need to change your variable. <FONT COLOR="#ff0000"><B>The filesystem test
failed.</B></FONT></P>
EOM
	last TEST;
	}
print <<"EOM";
</DD>
<DT><B><U>Usage Instructions:</U></B></DT>
<DD>
<P>These instructions apply <I>only</I> if your file system test passed.</P>
<P>Code your off-site links like this:</P>
<PRE>\t&lt;A HREF="$ENV{'SCRIPT_NAME'}?http://yahoo.com/"&gt;http://yahoo.com/&lt;/A&gt;</PRE>
<P>If you are using server-side includes (SSI), cut and paste the following tag into all
of your pages. You might have to name the pages with a <TT>.shtml</TT> or <TT>.stm</TT> extension in
order for this to work:</P>
<PRE>\t&lt;!--#exec cgi="$ENV{'SCRIPT_NAME'}" --&gt;</PRE>
<P>Otherwise, if you are not using SSI, choose one of the two image redirect
methods. The first one, below, doesn't log the referring URL. It works on all browsers
that display images (and most of them do). To use it, cut and paste the following code
into all of your pages:</P>
<PRE>\t&lt;IMG SRC="$ENV{'SCRIPT_NAME'}?trans.gif" HEIGHT="1" WIDTH="1"&gt;</PRE>
<P>The other image redirect method, below, works on JavaScript- and image-enabled browsers.
It logs the referring URL. To use it, cut and paste the following code into all of your pages:</P>
<PRE>\t&lt;SCRIPT LANGUAGE="JavaScript"&gt;
\t&lt;!--
\t\tdocument.write("&lt;IMG SRC=\\"$ENV{'SCRIPT_NAME'}?trans.gif&ref=");
\t\tdocument.write(document.referrer);
\t\tdocument.write("\\" HEIGHT=1 WIDTH=1&gt;");
\t// -->
\t&lt;/SCRIPT&gt;</PRE>
<P>The debugger doesn't know whether SSI will work on this particular web server.
You need to experiment. Use SSI if you can, otherwise choose an image method.</P>
EOM
	} # End of block TEST
print '</DD><DT><B><U>Environment Variables:</U></B></DT><DD><PRE>';
foreach (sort keys %ENV) {
	print "$_: $ENV{$_}\n";
	}
print <<'FOOT';
</PRE></DD></DL><HR WIDTH="50%" SIZE="1"><H5 ALIGN="center">Visit <A
HREF="http://www.xav.com/scripts/axs/" TARGET="_blank">the AXS help page</A>
for more information.  AXS is copyright 1997-2000 by Fluid Dynamics.</H5><BR>
</BODY></HTML>
FOOT
} # End SpawnDebugger.



sub Trim {
	local $_ = $_[0] ? $_[0] : '';
	s!^[\r\n\s]+!!o;
	s![\r\n\s]+$!!o;
	return $_;
	}



sub clean_path {
	local $_ = $_[0] || '';

	# trim whitespace:
	$_ = Trim($_);

	# strip pound signs and all that follows (links internal to a page)
	s!\#.*$!!;

	# map "/./" to "/"
	s!/+\./+!/!g;

	# map trailing "/." to "/"
	s!/+\.$!/!g;

	# map "/folder/../" => "/"
	while (s!([^/]+)/+\.\./+!/!) {}

	# map /../foo => /foo
	while (s!^/+\.\./+!/!) {}

	s!^/+\.\.$!/!;

	# collapse back-to-back slashes:
	s!/+!/!g;

	return $_;
	}


sub parse_url {
	local $_ = $_[0] || '';
	my ($clean_url, $host, $port, $path, $is_valid) = ('', '', 80, '/', 0);

	# add trailing slash if none present
	$_ .= '/' if (m!^http://([^/]+)$!i);

	if (m!^http://([\w|\.|\-]+)\:?(\d*)/(.*)$!i) {
		($host, $port, $path, $is_valid) = ($1, $2, clean_path("/$3"), 1);
		$port = 80 unless $port;

		if ($port == 80) {
			$clean_url = "http://$host$path";
			}
		else {
			$clean_url = "http://$host:$port$path";
			}
		}
	return ($clean_url, $host, $port, $path, $is_valid);
	}


