ncsa-httpd/support/WebReport
2013-03-13 02:28:49 -04:00

856 lines
26 KiB
Perl

#!/usr/local/bin/perl
# Written by: Eric Katz
# email: ekatz@ncsa.uiuc.edu
#########################################################################
# Permission is granted to anyone to use this software for any purpose on
# any computer, and to alter it for your personal use. Please do not
# distribute any alterations.
#
# 0. This software is provided "as is" and without any express or implied
# warranties, including, without limitation, the implied warranties of
# merchantability and fitness for a particular purpose.
#
# 1. The author is not responsible for the consequences of the use of this
# software, no matter how deleterious, even if they arise from flaws in it.
#
# 2. The origin of this software must not be misrepresented, either by
# explicit claim or by omission.
#
# 3. This software may not be sold.
#
# 4. This notice may not be removed or altered.
#
#########################################################################
# To install - Please remove everthing above the #! line.
# Change the perl path to reflect the location of your perl installation.
# Then answer the following question.
# Whereis your web log?
$WebLOG = "/web1/http/logs/access_log";
# Whereis your HTML Report Menu Document?
$MenuDoc = "/General/WebReports.html";
# What is the name of your Web Server?
$WebServer = "www.ncsa.uiuc.edu";
# Where is your httpd.conf file?
$HTTPD_CONF = "/web1/http/conf/httpd.conf";
# In which directory do you want the output file placed?
$Output_Path = "/tmp";
#$Output_Path = "WebLogs/Reports";
######################################################################
# DO NOT CHANGE ANTHING BELOW THIS LINE FOR DISTRIBUTION.
# You make may changes for your personal use but DO NO DISTRIBUTE YOUR
# CHANGES.
#
# REVISION HISTORY
# V1.0 Released 10/2/93
# V1.1 Release 09/29/93
# Hyperlinks now relative to the Root of the Web Server.
# Thanks to Mark Scott.
# V1.2 Added -V: Verbose output of progress through log during run.
######################################################################
sub USAGE {
print "USAGE: WebReport [-adtfhe] [-H] [-M] [-s] [-vN] [-l logname]
[-o output directory] [keys]
a All
d Daily
t Time
f Files
h Host
e Errors
s Summary
l Name of the log file
o Location of the output file
H Print in html format
M Update the html Menu Document
vN Verbosity
D Current Day only\n";
print " keys To limit the report to specific days, times, files, and/or
hosts. Any combination of keys may be specified. See
examples below.
Try to use the following conventions for the best results:
Search for a date: 'nn' ie: 10
Search for a time: 'nn:' ie: 10:
Search for a Day: 'Xxx' ie: Sun or Sunday
Search for a file: 'xxx' ie: software
or 'xx/' you may need a leading
or '/xx' or trailing '/'
Search for a domain: '.xxx' ie: .edu or ncsa\n";
print " WebReport will try to 'do the right thing' but it is safer
to limit the type of search by using one of the -d, -t,
-f, or -h flags.
EXAMPLES:
WebReport Thu Friday 10 11 14 15 Software
Will report on the number of people who
accessed the Software files on Thursday
and Friday during the 10am, 11am, 2pm,
and 4pm hours.
WebReport -f edu
Will report on file accesses that have
the edu substring in them.
WebReport -h edu
Will report on hosts with edu in the name.
WebReport -fh edu
Will report on both hosts and files with
edu in them.\n";
print " WebReport -HMeshfl /var/log/access_log
Will generate an error report, a summary
report, a host access report, and a file
access report -- all in html format, and
will automatically update an html Menu
document with links to these reports.\n\n";
exit;
}
require 'getopts.pl';
require 'ctime.pl';
# For now the only date information that is needed is the third field
# regardless of whether it is a BSD system or not but we never know
# what the future holds so I included the following subroutines.
if (-e "/vmunix") {
$BSD = "true";
}
else {
$BSD = "false";
}
# Get the current time and format it:
$DATE=&ctime(time),"\n";
if ($BSD eq "true") {
($day, $month, $date, $time, $year) = split (" ",$DATE);
}
else {
($day, $month, $date, $time, $TZ, $year) = split (" ",$DATE);
}
&Getopts('adDtfhesHMVl:o:v:');
# Print Usage If no options are passed on the command line.
&USAGE if (!($opt_d) &! ($opt_t) &! ($opt_f) &! ($opt_e) &! ($opt_h) &! ($opt_s) &! ($opt_a) &! ($opt_D));
# If an alternate logfile has been requested
if (defined $opt_l) {
$WebLOG = $opt_l;
}
# If an alternate output directory has been requested
if (defined $opt_o) {
$Output_Path = $opt_o;
}
# If you want a printout for today only
if ($opt_D) {
$SearchDate = "\\b" . $date . "\\b" . "|";
}
# If there are no search key limiters.
if (($#ARGV < 0) &! ($opt_D)) {
$NO_KEY = 1;
$NO_DAY_KEY = 1;
$NO_TIME_KEY = 1;
$NO_HOST_KEY = 1;
$NO_OTHERS_KEY = 1;
}
# Else create the 'or' pattern matching string
else {
for ($i=0;$i<=$#ARGV;$i++) {
if ($ARGV[$i] =~ /Mon|Tue|Wed|Thu|Fri|Sat|Sun/) {
$SearchDay = $SearchDay . $ARGV[$i] . "|";
}
elsif ($ARGV[$i] =~ /\d+:/) {
chop $ARGV[$i];
$SearchTime = $SearchTime . $ARGV[$i] . "|";
}
elsif ($ARGV[$i] =~ /\./) {
chop $ARGV[$i];
$SearchHost = $SearchHost . $ARGV[$i] . "|";
}
elsif ($ARGV[$i] =~ /\d+/) {
die "This does not make sense with the -D option\n" if ($opt_D);
$SearchDate = $SearchDate . "\\b" . $ARGV[$i] . "\\b" . "|";
}
else {
$SearchOthers = $SearchOthers . $ARGV[$i] . "|";
}
}
}
if (defined $SearchDay) {
chop ($SearchDay);
}
else {
$NO_DAY_KEY = 1;
}
if (defined $SearchDate) {
chop ($SearchDate);
}
else {
$NO_DATE_KEY = 1;
}
if (defined $SearchTime) {
chop ($SearchTime);
}
else {
$NO_TIME_KEY = 1;
}
if (defined $SearchHost) {
chop ($SearchHost);
# We need to escape possible wildcard characters in this string, then
# unescape the 'or' character
$SearchHost =~ s/\W/\\$&/g;
$SearchHost =~ s/\\\|/\|/g;
}
else {
$NO_HOST_KEY = 1;
}
if (defined $SearchOthers) {
chop ($SearchOthers);
# We need to escape possible wildcard characters in this string, then
# unescape the 'or' character
$SearchOthers =~ s/\W/\\$&/g;
$SearchOthers =~ s/\\\|/\|/g;
}
else {
$NO_OTHERS_KEY = 1;
}
# Get a snapshot of the existing files in the Web Subtree
open (WEBROOT,"$HTTPD_CONF") || die "Couldn't open $HTTPD_CONF\n";
while (<WEBROOT>) {
if ($_ =~ /^\/:/) {
chop;
($root,$root_alias) = split (/:/,$_);
}
elsif ($_ =~ /DocumentRoot/) {
chop;
($root,$root_alias) = split (/ /,$_);
}
}
close WEBROOT;
if ($opt_V) {
print "Name of Server: $WebServer
Root of Server: $root_alias
Location of Log: $WebLOG
Output Directory: $Output_Path
";
print "Checking existence of files\n";
}
close STDERR if (!($opt_V));
open (LS,"ls -sLR $root_alias |") || die "Couldn't get a listing of existing files\n";
$counter = 0;
while (<LS>){ next if (/^\n/ || /^total/);
if ($opt_V) {
$counter ++;
if ($counter % 70 != 0) {
print STDERR ".";
}
else {
print STDERR " $counter\n";
print STDERR ".";
}
}
if ($_ =~ /^\//) {
$_ =~ s/$root_alias//;
chop;
chop;
$path = $subdir . "/";
$SIZE{$path} = 0;
$subdir = $_;
next;
}
($size,$filename) = split (' ',$_);
$path = $subdir . "/" . $filename;
$SIZE{$path} = $size;
}
close LS;
open STDERR if (!($opt_V));
print "\nDone Checking existence of files.\n$counter files exist within your Web Directory Structure.\n" if ($opt_V);
open (LOG,$WebLOG) || die "Couldn't open log file\n";
# Process the information in the log.
# Set the array information and determine the maximum value for
# each array. The maximum value will be used to set the scale for the
# the graph.
# Increment the total number of connections,
# the number of connections for that weekday,
# the number of connections for that hour of the day, and the
# number of connections for that host.
# Increment the record number.
$RECORD_NUMBER = 1;
$FileMax=1;
if ($opt_V) {
print "\n\nProcessing log ...
0 10 20 30 40 50 60 70 80 90 100
| | | | | | | | | | |
";
open (WC,"wc -l $WebLOG |");
while (<WC>) {
$LINES = $_;
}
close WC;
$line_percentage = sprintf ("%d.0",$LINES/78);
$counter = 1;
}
while (<LOG>) {
if ($opt_V) {
$counter ++;
last if ($counter > $LINES);
if ($counter == $line_percentage) {
print STDERR "*";
$counter = 0;
}
}
($host,$day,$mon,$date,$time,$year,$cmd,$path) = split (/\s+/,$_);
($hour,$minutes,$seconds) = split (/:/,$time);
# remove leading [
$day = substr($day,1,3);
# remove trailing [
chop $year;
# Try to correct for extraneous characters in 'GET'. Remove direct
# references to the server and strip out double //'s
$path =~ s/^$WebServer//;
$path =~ s/\/\//\//g;
if ($path =~ /\/$/) {
$file_type = "directory";
}
else {
$file_type = "file";
}
if ($RECORD_NUMBER eq 1) {
$start_date = "$mon $date, $year @ $time";
$menu_start_date = "$mon $date";
}
# This ridiculously long conditional just tries to do the right thing with
# possible search key limiters
if (($NO_KEY) || ($NO_DAY_KEY || ($day =~ /$SearchDay/)) && ($NO_DATE_KEY || ($date =~ /$SearchDate/)) && ($NO_TIME_KEY || ($hour =~ /$SearchTime/)) && ($NO_HOST_KEY || ($host =~ /$SearchHost/)) && ($NO_OTHERS_KEY || ($path =~ (/$SearchOthers/) || ($host =~ /$SearchOthers/)))) {
$connections ++;
$Connect{$day} ++;
$DayMax = $Connect{$day} if ($Connect{$day} > $DayMax);
$Connect{$hour} ++;
$HourMax = $Connect{$hour} if ($Connect{$hour} > $HourMax);
$Machines{$host} ++;
$HostMax = $Machines{$host} if ($Machines{$host} > $HostMax);
# If this file wasn't seen when trying to get sizes for files then it doesn't
# exist and is therefore an error.
if (!(defined ($SIZE{$path}))) {
$EXIST_ERRORS{$path} ++;
$EXIST_ERRORS ++;
next;
}
else {
$KBYTES{$day} = $KBYTES{$day} + $SIZE{$path};
$SizeDayMax = $KBYTES{$day} if ($KBYTES{$day} > $SizeDayMax);
$KBYTES{$hour} = $KBYTES{$hour} + $SIZE{$path};
$SizeHourMax = $KBYTES{$hour} if ($KBYTES{$hour} > $SizeHourMax);
}
# Get subtotals for each directory in the path name of the file
$ParentDir = '';
@Tree = split (/\//,$path);
for ($i=1;$i < $#Tree;$i++) {
next if $Tree[$i] !~ /[a-zA-Z0-9\-]+/;
$ParentDir = $ParentDir . "/" . $Tree[$i];
$Directory{$ParentDir} ++;
$KBYTES{$ParentDir} = $KBYTES{$ParentDir} + $SIZE{$path};
}
if ($file_type =~ /file/) {
$TOTAL_KBYTES = $TOTAL_KBYTES + $SIZE{$path};
$KBYTES{$path} = $KBYTES{$path} + $SIZE{$path};
$FILE_ACCESSES ++;
$Directory{$path} ++;
#$FileMax = $Directory{$path} if ($Directory{$path} > $FileMax);
}
if ($file_type =~ /directory/) {
$DIRECTORY_ACCESSES ++;
chop $path;
$Directory{$path} ++;
#$FileMax = $Directory{$path} if ($Directory{$path} > $FileMax);
}
$RECORD_NUMBER ++;
}
}
close LOG;
print "\n" if ($opt_V);
# Set the end date variable to that of the last entry in the log.
$end_date = "$mon $date, $year @ $time";
$menu_end_date = "$mon $date";
# Output file prefix
$OutFile_Prefix = "$Output_Path/$date$mon$year";
# HyperLink path for files
$HyperLink_tmp = $OutFile_Prefix;
$HyperLink_tmp =~ s/$root_alias//;
$HyperLink_Prefix = "http://$WebServer$HyperLink_tmp";
# HyperLink path for Menu Document
$MenuLink = $MenuDoc;
$MenuLink =~ s/$root_alias//;
$MenuDocLink = "http://$WebServer$MenuLink";
# This number represents either the number of subdirectory levels to report
# or the top x percentage of host connections depending on the context.
if (defined $opt_v){
$verbosity_limit = $opt_v;
}
else {
$verbosity_limit = 100;
}
if ($opt_s) {
$OutFile = $OutFile_Prefix . ".sum";
print STDERR "Writing: $OutFile\n";
&PRINT_HEADING;
open (OUTPUT,">> $OutFile") || die "Can't create $OutFile\n";
if ($opt_H) {
print OUTPUT "
<TITLE>Access Report Summary</TITLE>
<H1>
Access Report Summary
</H1>
</pre>";
}
close OUTPUT;
&DO_DAILY;
&DO_HOURLY;
&DO_FILES(summary);
}
if ($opt_e) {
$OutFile = $OutFile_Prefix . ".error";
print STDERR "Writing: $OutFile\n";
&DO_ERRORS;
}
if ($opt_f) {
$OutFile = $OutFile_Prefix . ".files";
print STDERR "Writing: $OutFile\n";
&DO_FILES(no_summary);
}
if ($opt_h) {
$OutFile = $OutFile_Prefix . ".hosts";
print STDERR "Writing: $OutFile\n";
&DO_HOSTS;
}
if ($opt_d) {
$OutFile = $OutFile_Prefix . ".daily";
print STDERR "Writing: $OutFile\n";
&DO_DAILY;
}
if ($opt_t) {
$OutFile = $OutFile_Prefix . ".hourly";
print STDERR "Writing: $OutFile\n";
&DO_HOURLY;
}
if ($opt_a) {
$OutFile = $OutFile_Prefix . ".long";
print STDERR "Writing: $OutFile\n";
&PRINT_HEADING;
&DO_DAILY;
&DO_HOURLY;
&DO_HOSTS;
&DO_FILES;
}
if ($opt_M) {
if (!(-e "$MenuDoc")) {
open (MENU_WRITE,"> $MenuDoc")|| die "Can't create Menu Document\n";
print MENU_WRITE "<TITLE>W3 Server Report Menu</TITLE>
</HEAD>
<BODY>
<P>
<H1>
World Wide Web server Activity Reports for:</H1>
<UL>\n";
close MENU_WRITE;
}
$MenuDocTmp = $MenuDoc . ".tmp";
open (MENU_READ,"$MenuDoc") || die "Can't read Menu Document\n";
open (MENU_WRITE,"> $MenuDocTmp") || die "Can't create temporary Menu Document\n";
while (<MENU_READ>) {
if (/\<UL\>/) {
print MENU_WRITE "$_\n";
print MENU_WRITE " <LI>$menu_start_date to $menu_end_date: ";
print MENU_WRITE "<A HREF=\"$HyperLink_Prefix.sum\">Summary</A>/" if ($opt_s);
print MENU_WRITE "<A HREF=\"$HyperLink_Prefix.files\">File Accesses</A>/" if ($opt_f);
print MENU_WRITE "<A HREF=\"$HyperLink_Prefix.hosts\">Host Accesses</A>/" if ($opt_h);
print MENU_WRITE "<A HREF=\"$HyperLink_Prefix.error\">Errors</A>/" if ($opt_e);
print MENU_WRITE "<A HREF=\"$HyperLink_Prefix.daily\">Daily Accesses</A>/" if ($opt_d);
print MENU_WRITE "<A HREF=\"$HyperLink_Prefix.hourly\">Hourly Accesses</A>/" if ($opt_t);
print MENU_WRITE "<A HREF=\"$HyperLink_Prefix.long\">Full Report</A>/" if ($opt_a);
print MENU_WRITE "\n";
}
else {
print MENU_WRITE $_;
}
}
close MENU_READ;
close MENU_WRITE;
rename ("$MenuDocTmp","$MenuDoc");
chmod (0644,$MenuDoc);
}
sub DO_ERRORS {
open (OUTPUT,"> $OutFile") || die "Can't create $OutFile\n";
if ($opt_H) {
print OUTPUT " <pre>
<TITLE>Missing File Report</TITLE>
<H1>Missing File Report</H1>
See Also: <A HREF=\"$MenuDocLink\">Report Menu</A> / <A HREF=\"$HyperLink_Prefix.sum\">Summary</A> / <A HREF=\"$HyperLink_Prefix.files\">File Accesses</A> / <A HREF=\"$HyperLink_Prefix.hosts\">Host Accesses</A>;
";
}
print OUTPUT "==================================================================\n";
print OUTPUT " Web Error History\n";
print OUTPUT " Beginning $start_date\n";
print OUTPUT " Ending $end_date\n";
print OUTPUT "==================================================================\n";
print OUTPUT "\n";
print OUTPUT "NON-EXISTING FILES: $EXIST_ERRORS Occurences\n";
printf OUTPUT ("%-55s%9s\n","Filename","Attempts");
print OUTPUT "_" x 70;
print OUTPUT "\n\n";
# Sort by number of attempted accesses.
sub by_attempts {
$EXIST_ERRORS{$b} <=> $EXIST_ERRORS{$a};
}
@ERRORS = keys(%EXIST_ERRORS);
@sortedERRORS = sort by_attempts @ERRORS;
foreach $missing_file (@sortedERRORS) {
$print_heading = "/";
# This subroutine tries to split long pathnames on the / to wrap onto the
# next line of the output.
# Create an array consisting of the names in the full path without
# the '/'
@heading_format = split (/\//,$missing_file);
# Iterate through this list of file names that defines the path. If the
# addition of the next filename to the path is longer than 50 characters
# then wrap to the next line.
for ($i=1;$i < $#heading_format;$i++) {
$print_heading = $print_heading . $heading_format[$i] . "/";
$StringLength = length($print_heading);
if ($StringLength > 50){
printf OUTPUT ("/%-50s\n","$print_heading");
$print_heading = " ";
}
}
$print_heading = $print_heading . $heading_format[$i];
printf OUTPUT ("%-60s%9s\n","$print_heading",$EXIST_ERRORS{$missing_file});
}
printf OUTPUT "</pre>\n" if ($opt_H);
close OUTPUT;
}
sub DO_DAILY {
# Set an array to convert 3 letter weekday abbreviations to full names.
@weekdays = ('Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday');
open (OUTPUT,">> $OutFile") || die "Can't create $OutFile\n";
$SubTotal=0;
# Print the connections/weekday data
print OUTPUT "\n";
print OUTPUT "<H1>\n" if ($opt_H);
print OUTPUT "Connections & Byte Count Per Week Day\n";
print OUTPUT "</H1>\n" if ($opt_H);
if ($opt_H) {
print OUTPUT "<pre>
See Also: <A HREF=\"$MenuDocLink\">Report Menu</A>/<A HREF=\"$HyperLink_Prefix.files\">File Access Report</A>/<A HREF=\"$HyperLink_Prefix.hosts\">Host Accesses</A>/<A HREF=\"$HyperLink_Prefix.error\">Errors</A>
";
}
printf OUTPUT ("%-12s%-30s\n%s\n"," Day","Connections","---------------------------------------------------------------------");
# Print the output for each record.
foreach $heading (@weekdays) {
# Convert the heading for each day (long form) into the short form for
# accessing the array information.
$index = substr($heading,0,3);
# Calculate the number of stars to print for file accesses.
if ($DayMax > 0 ){
$Access_graph = '.' x ($Connect{$index} / $DayMax * 50);
}
else {
$graph = "";
}
# Calculate the number of #'s to print for file size.
if ($SizeDayMax > 0){
$Size_graph = '*' x ($KBYTES{$index} / $SizeDayMax * 50);
}
else {
$Size_graph = "";
}
# Print them.
if (($index =~ /$SearchDay/) || $NO_DAY_KEY) {
printf OUTPUT ("%-15s%9s %s\n%-15s%9s %s\n",$heading,$Connect{$index},$Access_graph," Kbytes",$KBYTES{$index},$Size_graph);
$SubTotal = $SubTotal + $Connect{$index};
}
}
print OUTPUT "\n";
print OUTPUT "Total for this Section: $SubTotal\n\n" if !($NO_KEY);
print OUTPUT "</pre>" if ($opt_H);
close OUTPUT;
}
sub DO_HOURLY {
open (OUTPUT,">> $OutFile") || die "Can't create $OutFile\n";
$SubTotal=0;
# Iterate through the 'Hours' array to print the hourly information. The
# first 10 numbers are quoted to preserve the leading 0.
@Hours = ('00','01','02','03','04','05','06','07','08','09',10 .. 23);
print OUTPUT "\n";
print OUTPUT "<H1>\n" if ($opt_H);
print OUTPUT "Connections & Byte Count Per Hour\n";
print OUTPUT "</H1>\n" if ($opt_H);
if ($opt_H) {
print OUTPUT "<pre>
See Also: <A HREF=\"$MenuDocLink\">Report Menu</A>/<A HREF=\"$HyperLink_Prefix.files\">File Access Report</A>/<A HREF=\"$HyperLink_Prefix.hosts\">Host Accesses</A>/<A HREF=\"$HyperLink_Prefix.error\">Errors</A>
";
}
printf OUTPUT ("%-12s%-30s\n%s\n"," Hour","Connections","---------------------------------------------------------------------");
foreach $heading (@Hours) {
if ($HourMax > 0 ){
$Access_graph = '.' x ($Connect{$heading} / $HourMax * 50);
}
else {
$Access_graph = "";
}
# Calculate the number of #'s to print for file size.
if ($SizeHourMax > 0 ){
$Size_graph = '*' x ($KBYTES{$heading} / $SizeHourMax * 50);
}
else {
$Size_graph = "";
}
if (($heading =~ /$SearchTime/) || $NO_TIME_KEY) {
$SubTotal = $SubTotal + $Connect{$heading};
# Change the heading to read 'Midnight' if appropriate
if ($heading == '00') {
printf OUTPUT ("%-15s%9s %s\n%-15s%9s %s\n","Midnight",$Connect{$heading},$Access_graph," Kbytes",$KBYTES{$heading},$Size_graph);
}
# Change the heading to read 'Noon' if appropriate
elsif ($heading == '12') {
printf OUTPUT ("%-15s%9s %s\n%-15s%9s %s\n","Noon",$Connect{$heading},$Access_graph," Kbytes",$KBYTES{$heading},$Size_graph);
}
# Else just print the hour by number.
else {
if ($heading < 12) {
$modifier = "AM";
$print_heading = $heading;
}
else {
$modifier = "PM";
$print_heading = $heading - 12;
}
printf OUTPUT ("%3s %-11s%9s %s\n%-15s%9s %s\n",$print_heading,$modifier,$Connect{$heading},$Access_graph," Kbytes",$KBYTES{$heading},$Size_graph);
}
}
}
print OUTPUT "\n";
print OUTPUT "Total for this Section: $SubTotal\n\n" if !($NO_TIME_KEY);
print OUTPUT "</pre>" if ($opt_H);
close OUTPUT;
}
sub DO_FILES {
$arg = pop(@_);
open (OUTPUT,">> $OutFile") || die "Can't create $OutFile\n";
print OUTPUT "\n";
print OUTPUT "<H1>\n" if ($opt_H);
if (($arg =~ /summary/) || defined ($opt_v)){
print OUTPUT "Abbreviated Access Report for Directories and Files\n";
}
else {
print OUTPUT "Access Report for Directories and Files\n" if ($arg =~ /summary/);
}
print OUTPUT "</H1>\n" if ($opt_H);
if ($opt_H) {
if ($arg =~ /summary/) {
print OUTPUT "<pre>
See Also: <A HREF=\"$MenuDocLink\">Report Menu</A> / <A HREF=\"$HyperLink_Prefix.files\">Complete File Access Report</A> / <A HREF=\"$HyperLink_Prefix.hosts\">Host Accesses</A> / <A HREF=\"$HyperLink_Prefix.error\">Errors</A>
";
}
else {
print OUTPUT "<pre>
<TITLE>File Access Report</TITLE>
See Also: <A HREF=\"$MenuDocLink\">Report Menu</A> / <A HREF=\"$HyperLink_Prefix.sum\">Summary</A> / <A HREF=\"$HyperLink_Prefix.hosts\">Host Accesses</A> / <A HREF=\"$HyperLink_Prefix.error\">Errors</A>
";
}
}
$SubTotal=0;
print OUTPUT "\n";
printf OUTPUT ("%s%32s %s\n%s\n","Directory or File Accessed","Files","Kb","-------------------------------------------------------------------------");
# Set an array of full pathnames
@DIRS = keys(%Directory);
# Sort that array.
@sortedDIRS = sort @DIRS;
foreach $heading (@sortedDIRS) {
$StringLength = 0;
# Create an array consisting of the names in the full path without
# the '/'
@heading_format = split (/\//,$heading);
$PRINT_ME = 1;
#$PRINT_ME = 1 if ($heading =~ /$SearchOthers/);
# Iterate through this list of file names that defines the path. For each
# name in this path that is the same as the path of the previously printed
# file name prepend a leading '.' to the name.
# This is all to prevent the duplication of full pathnames in the output
# to make it more readable.
for ($i=1;$i < $#heading_format;$i++) {
if ($heading_format[$i] eq $previous_heading[$i]) {
$heading_format[$i] = ".";
}
}
$StringLength = length ($heading_format[$i]) + $i;
if ($StringLength > 50){
$heading_format[$i] = substr ($heading_format[$i],0,(47 - $i)) . "...";
$StringLength = 50;
}
# Insert a line of tildes from the filename to it's data to make viewing easier.
$Spaces = "~" x (57 - ($StringLength + length ($Directory{$heading})));
$heading_format[$i] = "$heading_format[$i] $Spaces ";
# If this is the root menu then label it as such.
if ($heading eq "") {
@heading_format = "Main_Menu ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ";
}
# Set the printed array element delimiter to null
$" = "";
# Print it.
if ($i <= $verbosity_limit) {
if ($PRINT_ME || $NO_OTHERS_KEY) {
$SubTotal = $SubTotal + $Directory{$heading};
printf OUTPUT ("%s%s%9s Kb\n","@heading_format",$Directory{$heading},$KBYTES{$heading});
}
}
# Set this to be the previously printed filename.
@previous_heading = split (/\//,$heading);
$PRINT_ME = 0;
}
print OUTPUT "\n";
print OUTPUT "Total for this Section: $SubTotal\n\n" if !($NO_KEY);
print OUTPUT "</pre>" if ($opt_H);
close OUTPUT;
}
sub DO_HOSTS {
open (OUTPUT,">> $OutFile") || die "Can't create $OutFile\n";
print OUTPUT "<H1>\n" if ($opt_H);
print OUTPUT "Connections per Host\n";
print OUTPUT "</H1>\n" if ($opt_H);
if ($opt_H) {
print OUTPUT "<pre>
<TITLE>Host Access Report</TITLE>
See Also: <A HREF=\"$MenuDocLink\">Report Menu</A> / <A HREF=\"$HyperLink_Prefix.sum\">Access Report Summary</A> / <A HREF=\"$HyperLink_Prefix.files\">File Accesses</A> / <A HREF=\"$HyperLink_Prefix.error\">Errors</A>
";
}
$SubTotal=0;
# Print the name of each host and the number of time they connected.
sub by_connections {
$Machines{$b} <=> $Machines{$a};
}
@HOSTS = keys(%Machines);
@sortedHOSTS = sort by_connections @HOSTS;
printf OUTPUT ("%-30s%s\n%s\n","Host ","Number of Connections","---------------------------------------------------------------------");
foreach $heading (@sortedHOSTS) {
if (($heading =~ /$SearchHost/) || $NO_HOST_KEY) {
if ($SubTotal < ($connections * ($verbosity_limit)/100)){
$SubTotal = $SubTotal + $Machines{$heading};
printf OUTPUT ("%-50s%s\n",$heading,$Machines{$heading});
}
}
}
print OUTPUT "\n";
print OUTPUT "Total for this Section: $SubTotal\n\n" if !($NO_KEY);
print OUTPUT "</pre>" if ($opt_H);
close OUTPUT;
}
sub PRINT_HEADING {
# Print the header
open (OUTPUT,"> $OutFile") || die "Can't create $OutFile\n";
print OUTPUT "<pre>" if ($opt_H);
print OUTPUT "\n\n";
print OUTPUT "==================================================================\n";
print OUTPUT " Web Usage Report\n";
print OUTPUT " Beginning $start_date\n";
print OUTPUT " Ending $end_date\n";
print OUTPUT "==================================================================\n";
print OUTPUT "\n";
printf OUTPUT ("%-40s%8s\n","Total Number of Connections:",$connections);
printf OUTPUT ("%-40s%8s\n","Total Number of KiloBytes Retrieved:",$TOTAL_KBYTES);
printf OUTPUT ("%-40s%8s\n","Total Number of Directories Browsed:",$DIRECTORY_ACCESSES);
printf OUTPUT ("%-40s%8s\n","Total Number of Nonexistent Files:",$EXIST_ERRORS);
print OUTPUT "\n";
close OUTPUT;
chmod 0644,$OutFile;
}