#!/usr/bin/perl -w

#file name: mpsearch.cgi
##Last Updated November 10, 1997

##Search script by David Turley <dturley@pobox.com>

##built from original script written by Eric D. Belsley 
##for the Mac Resource Page, edb@macresource.com (3/10/97)

##Code adapted to work with my mailing list archive and
##new features added:
## user selects how far back in archives to search
## search all text, only subject lines, or author only
## time and number of items searched in feedback
## optional logging of search strings

#####################################################################################

#Mailing list archives are formatted with each month's letters in a single text file, 
#one month per file. All files in a single directory.
#Each item (letter) is separated by the string stored in $item_separator

#See the search form at www.binary.net/dturley/macperl/search.html
#for a sample interface to this script

#Some changes to code may be neccessary to adapt this script to other formats

#####################################################################################

# main code block

##three variables to set:
$basedir = '../macperl/archive';
$html_file = 'http://www.binary.net/dturley/macperl/search.html';
$item_separator = '!!NEXT MESSAGE!!';

&init;
&search_for_string if $number_of_terms;

&print_results;

#optional log, comment out next two lines if no log
$log_file = '/usr/www/docs/dturley/logs/search.txt';
&log_it;

exit(0);

#################################################################################

##get file list and set seach parameters
sub init {

	
	&parse_form_data (*search_data); # load form data
	@string = split(/\s+/,$search_data{'string'}); # load search strings
	$number_of_terms = @string;
	
	chdir($basedir);
	@archive_list = split(/\s+/,`ls`);
	
	$case = $search_data{'case'}; # 'i' or ''
	$boolean = $search_data{'boolean'}; # 'and' or 'or'
	$criteria = $search_data{'criteria'}; # 'beg', 'whole', or 'sub'
	$limit = $search_data{'limit'}; #how many months back to search
	$where = $search_data{'where'}; #search entire message or subject only
	
	&set_search;
	&set_start;
	&set_title_string;
	
}

##set search string
sub set_search { # later will be argument of an eval

    if ($where eq 'entire'){

	    if ($criteria eq 'beg') {
		    $search = '$item =~ s/\b($string)/<B>$1<\/B>/mg'."$case".';';	    
	    }
	    
	    elsif ($criteria eq 'whole') {
		    $search = '$item =~ s/\b($string)\b/<B>$1<\/B>/mg'."$case".';';		    
	    }
	    
	    else {
		    $search = '$item =~ s/($string)/<B>$1<\/B>/mg'."$case".';';
	    }
	    
	}
	
	elsif ($where eq 'author'){
	
	    if ($criteria eq 'beg') {
		    $search = '$item =~ s/^(From:.*)\b($string)/$1<B>$2<\/B>/m'."$case".';';	    
	    }
	    
	    elsif ($criteria eq 'whole') {
		    $search = '$item =~ s/^(From:.*)\b($string)\b/$1<B>$2<\/B>/m'."$case".';';		    
	    }
	    
	    else {
		    $search = '$item =~ s/^(From:.*)($string)/$1<B>$2<\/B>/m'."$case".';';
	    }
	    
	}	
	
	
	else {
	
	    if ($criteria eq 'beg') {
		    $search = '$item =~ s/^(Subject:.*)\b($string)/$1<B>$2<\/B>/m'."$case".';';	    
	    }
	    
	    elsif ($criteria eq 'whole') {
		    $search = '$item =~ s/^(Subject:.*)\b($string)\b/$1<B>$2<\/B>/m'."$case".';';		    
	    }
	    
	    else {
		    $search = '$item =~ s/^(Subject:.*)($string)/$1<B>$2<\/B>/m'."$case".';';
	    }
	    
	}	
	
}

##this subroutine may need changing depending on number of files in archive
##determine how far back user wants to search
sub set_start { 

    $end = @archive_list - 1;
    
    if ($limit eq 'last month') {
    	$start =  $end;
    }
    
    elsif ($limit eq 'last 3 months') {
        $start = $end - 2;
    }
    
    elsif ($limit eq 'last 6 months') {
        $start = $end - 5;
    }
    
    elsif ($limit eq 'last 9 months') {
        $start = $end - 8;
    }
    
    elsif ($limit eq 'last 12 months') {
        $start = $end - 12;
    }
    
    else {
        $start = 0;
    }
    
}

##set feedback page header
sub set_title_string { # grammar depends on number of search strings entered
	
	if ( $number_of_terms == 0 ) {
		$title_string = 'no string entered';
	}
	elsif ( $number_of_terms == 1 ) {
		$title_string = $string[0];
	}
	elsif ( $number_of_terms == 2 ) {
		$title_string = "$string[0] $boolean $string[1]";
	}
	else {
		$title_string = join(', ', @string[0..$#string-1]).", $boolean $string[$#string]";
	}
}

##do the seach
sub search_for_string {
	
	undef $/;
    $begin_time = (times)[0];
	$count = 0; # number of matching items
	$total = 0; # total items searched

	if ($boolean eq 'or') {
		
		$string = join('|',@string);
		foreach $archive (@archive_list[$start..$end]) {
			&load_archive;
			
				@items = split(/$item_separator/, $all);
				$total += @items;
				foreach $item (@items) {
					&load_match if eval $search;
				}
			
		}
	}
	
	else { # $boolean eq 'and'
		
		foreach $archive (@archive_list[$start..$end]) {
			&load_archive;
			
				@items = split(/$item_separator/, $all);
				$total += @items;
				ITEM: foreach $item (@items) {
					foreach $string (@string) {
						next ITEM unless eval $search;
					}
					&load_match; 
				}
			
		}
	}
	
    $total_time = (times)[0] - $begin_time;

}

##load each archive file    
sub load_archive {

	if (open(ARCHIVE,"<$archive") ) {
		&lock_file(ARCHIVE);
		$all = <ARCHIVE>;
		$all =~ s/</&lt;/g; #some posts contain html tags that we 
		$all =~ s/>/&gt;/g; #don't want to render as html
		close(ARCHIVE);
	}
}

##load matching letters into array
sub load_match {
   
    $match[$count] = $item;
    $count++;
}

##feedback
sub print_results {

	&print_header;
	&print_top;
	&print_body;
	&print_footer;
}

sub print_header {
	
	print "Content-type: text/html\n\n";

}	

sub print_top {
	
	print "<HTML><HEAD><TITLE>Search Results</TITLE></HEAD>";
	print "<BODY BGCOLOR=\"#FFFFFF\"><CENTER>";
	print "<B>Search Results</B>&nbsp;&nbsp;";
	print "($total items searched in ";
	printf "%.2f seconds)",$total_time;
	print "</CENTER>";

}

sub print_body {
	
	if ($number_of_terms) {
	
		if ($count) {
		    print "<CENTER>";
		    print "<B>$count item";
		    if ($count > 1) {print "s";}
		    print " found containing $title_string</B><BR>";
		    print "</CENTER><P>";
		    
		    print "<CENTER><A HREF=\"$html_file\">Another Search</A>?<P>";
			print "Use your browser\'s back button to recover your previous search strings.";
		    print "</CENTER><HR>";
		
		    for ($i=0;$i<$count;$i++){
		        print "<PRE>";
		        print $match[$i];
		        print "</PRE>";
		        print "<HR>";
		    }
		    
		    print "<CENTER><A HREF=\"$html_file\">Another Search</A>?<P>";
			print "Use your browser\'s back button to recover your previous search strings.";
		    print "</CENTER>";
		   	
		}
		else {
			&print_no_match_html;
		}
	}
	else {
		&print_no_search_html;
	}
	
}

sub print_no_match_html {

	print <<End_of_html;
		<CENTER>
			No items matched your search criteria: 
			<A HREF=\"$html_file\">try again</A>?
			<P>
			Use your browser\'s back button to recover your previous search strings.
		</CENTER>
End_of_html

}

sub print_no_search_html {

	print <<End_of_html;
		<CENTER>
			Nothing comes of nothing!
			<A HREF=\"$html_file\">try again</A>?
			<P>
			You can also return to the search page via your browser\'s back button.
		</CENTER>
End_of_html

}

sub print_footer {
	
	print <<End_of_Footer;
	</BODY>
	</HTML>
End_of_Footer
}


sub log_it {

    open(LOG, ">>$log_file") || die $!;
    &lock_file(LOG);
    print LOG "$title_string\n";
    close (LOG);
}
        

##a typical form parsing routine
sub parse_form_data {

	local (*FORM_DATA) = @_;
	local (@key_value_pairs, $key_value, $key, $value);
	
	read (STDIN, $query_string, $ENV{'CONTENT_LENGTH'});
	@key_value_pairs = split (/&/, $query_string);
	
	foreach $key_value (@key_value_pairs) {
		($key, $value) = split (/=/, $key_value);
		$value =~ tr/+/ /;
		$value =~ s/%([\dA-Fa-f][\dA-Fa-f])/pack ('c', hex ($1))/eg;
		$value =~ s/[;><&\*`\|]//g; 	#removes dangerous characters
		$FORM_DATA{$key} = $value;
	}
}

sub lock_file {
        $LOCK = 2;
        flock($_[0], $LOCK);
}

