#!/usr/bin/perl # ========================================================================== $Version = 'wwwstat-2.01'; # # Copyright (c) 1994, 1996 Regents of the University of California. # # This software has been developed by Roy Fielding as # part of the WebSoft project at the University of California, Irvine. # # See the file LICENSE for licensing and redistribution information. # See the file INSTALL for installation information. # See the file Changes for known problems and version information. # See the file README for more information. # See the wwwstat.1 man page for options and usage information. # sub usage { die <<"EndUsage"; usage: $Pname [-F system_config] [-f user_config] [-helLoOuUrRvx] [-a IP_address] [-c code] [-d date] [-t hour] [-n archive_name] [-A IP_address] [-C code] [-D date] [-T hour] [-N archive_name] [-m method] [-M method] [-H heading_title] [-X lastsummary] [-noescape] [-trunc N] [-files] [-nofiles] [-link] [-nolink] [-cgi] [-nocgi] [-dns] [-nodns] [-cache filename] [-daily] [-hourly] [-domain] [-subdomain] [-archive] [-ident] [-all] [-sort (key|byte|req)] [-top N] [-both] [-no (daily|hourly|domain|subdomain|archive|ident|all)] [--] [ logfile | summary | + | - ]... $Version Process a sequence of httpd Common Logfile Format access_log files and/or prior wwwstat output summary files (compressed if extension $Zhandle) and output an HTML summary of all the input. Configuration options: -F Get system configuration defaults from the given file. -f Get user configuration defaults from the given file. -- Last option (all later arguments are treated as filenames). Diagnostic Options: -h Help -- just display this message to STDERR and quit. -v Verbose display to STDERR of each log entry processed. -x Display to STDERR all requests of nonexistant URLs. -e Display to STDERR all invalid log entries. Display Options: -H Use the following string as the heading and title for output. -X Use the following string as the URL for last summary link. -R Display daily stats sorted in reverse. -l Do display full IP address of clients in my domain. -L Don't (i.e. strip the machine name from local addresses). -o Do display full IP address of clients from other domains. -O Don't (i.e. strip the machine name from non-local addresses). -u Do display IP address from unresolved domain names. -U Don't (i.e. group all "unresolved" addresses under that name). -dns Use DNS to lookup unresolved IP addresses (may be slow). -nodns Do not lookup unresolved IP addresses. -cache Use the given dbm file to read/write DNS cache. -trunc Truncate archive URL after Nth hierarchy level. -files Show filename in archive URL. -nofiles Truncate filename (if any) from archive URL. -link Add a hypertext link around each archive URL. -nolink Do not add a hypertext link around each archive URL. -cgi Check HTTP method and output like a CGI script. -nocgi Do not produce CGI output. Section Options,
=(all|daily|hourly|domain|subdomain|archive|ident): -no
Exclude the given section from the output. -
Include the given section and set scope for -sort and -top. -sort (key|byte|req) Sort this section by key, bytes, or requests. -top N Include only the top N entries for this section. -both Do the top N as sorted and then do full by key. Search Options (include in summary only those log entries ...): -a Containing a hostname/IP address matching the given perl regexp. -A Not containing " " " " " " " " -c Containing a server response code matching the given perl regexp. -C Not containing " " " " " " " " -d Containing a date ("Feb 2 1994") matching the given perl regexp. -D Not containing " " " " " " " " -t Containing an hour ("00" -- "23") matching the given perl regexp. -T Not containing " " " " " " " " -n Containing an archive (URL) name matching perl regexp (except +.). -N Not containing " " " " " " " " -m Using an HTTP method name matching the given perl regexp. -M Not using the HTTP method " " " " "" -noescape Do not escape "." and "+" in remaining search options. Filenames (none implies "+"): - Read standard input (STDIN). + Read the default logfile $DefaultLog. ... Anything else is treated as the name of a file to be read, with the first line indicating whether it is a logfile or a summary. EndUsage } # ========================================================================== # The main program is really quite simple ... setpriority(PRIO_PROCESS,0,19); $Pname = $0; if ($Pname =~ s#^(.*)/##) { push(@INC, $1); } # Modify include path for bin, # current dir, and home dir unshift(@INC, '.', ($ENV{'HOME'} || $ENV{'LOGDIR'})); $StartTime = time; # Get the current date-time stamp $Updated = &wtime($StartTime,''); # Format it as local time $UpdatedGMT = &wtime($StartTime,'GMT'); # and also as GMT time &init_defaults; # Set the default configuration &get_configuration; # Get system and user configuration &get_commandline; # Get command-line options &init_summary; # Initialize the accumulaters &output_cgi if $Do_CGI; # Output CGI headers (if desired) &init_DNS if $LookupDNS; # Initialize the DNS cache if ($Verbose) { print STDERR "$Version: $Updated\n"; } if ($ARGV[0]) # Check for explicit filenames { foreach $filename (@ARGV) { &process_file($filename); } } else # if none, just read the default { &process_file($DefaultLog); } if ($Verbose) { print STDERR "Done processing files, now doing summary\n"; } &output_summary; # Give us the dirty details &close_DNS if $LookupDNS; # Close the DNS cache exit(0); # and we are finished. # ========================================================================== # ========================================================================== # Initialize default configuration options. NOTE that all of these options # can be overridden in either the system or user configuration files and # many can be overridden on the command-line. # sub init_defaults { @ArchiveMap = (); require "domains.pl"; # Get the domain mapping (%DomainMap) # Specify the URL of the previous summary period (for use only as a # hypertext link). Set it = "" if link is unwanted. # The three-letter abbrev for last month is substituted at any "%M". # The four-number year of last month is substituted at any "%Y". # Note that this is the month prior to the earliest one in this summary. $LastSummary = "stats-%Y/%M.wwwstats.html.gz"; # Specify the perl regex pattern for directory index files. This will be # dependent on the server's DirectoryIndex config (usually in srm.conf). # 'index.html?' # matches "index.html" and "index.htm" # 'index(.(html?|cgi))?' # matches "index", "index.html", "index.htm", and "index.cgi" # '(([Oo]verview)|(index)).html?' # matches "Overview.html", "Overview.htm", "index.html", "index.htm", # "overview.html", and "overview.htm" $DirectoryIndex = 'index(.(html?|cgi))?'; # Specify the default location of your access log $DefaultLog = '/usr/local/etc/httpd/logs/access_log'; # Specify the command for displaying compressed files to STDOUT $Zcat = 'gunzip -c'; # specify as null string if none are available $Zhandle = '(gz|Z|z)'; # list file extensions that indicate compressed # If address in log entry is one word (a local host), append what? # Specify an appropriate '.sub.dom.ain' $AppendToLocalhost = '.no_where.com'; # Specify whether (1) or not (0) you want to display the IP address # on reversed subdomain listings as follows: $LocalFullAddress = 0; # Show full address for local hosts? $OthersFullAddress = 0; # Show full address for non-local hosts? $ShowUnresolved = 0; # Show all unresolved addresses? # Specify whether (1) or not (0) you want to insert a hypertext # anchor for each URL in the archive section (-link option). $InsertLink = 0; # Specify whether (1) or not (0) you want to add CGI output headers # before the HTML summary. $Do_CGI = 0; # Specify whether (1) or not (0) you want the Archive URL to be # truncated (this saves output space and runtime memory, but loses info). $TruncateLevel = 0; # Truncate after Nth hierarchy level $TruncateFile = 0; # Truncate any trailing filename # Specify whether (1) or not (0) you want to lookup unresolved # IP addresses via DNS. Note that this could be *very* slow! $LookupDNS = 0; $DNScachefile = 'dnscache'; # DBM files for persistent cache $DNSexpires = 5356800; # Cache for two months (in seconds) # The following lines define the visible header fields for HTML output. # They may be changed/nationalized, but changing some may affect the # parsing algorithm for reading old summary files. Test all changes! $OutputTitle = 'World Wide Web Access Statistics for www' . $AppendToLocalhost; $UpdateHeader = 'Last updated: '; $LastSumHeader = 'Previous Full Summary Period'; $TotalsHeader = 'Totals for Summary Period: '; $ReqRcvHeader = 'Requests Received During Summary Period '; $BtransHeader = 'Bytes Transmitted During Summary Period '; $AvgReqHeader = 'Average Requests Received Daily '; $AvgByteHeader = 'Average Bytes Transmitted Daily '; $TotalsFormat = "%s %14.0f\n"; $StatsHeader = '%Reqs %Byte Bytes Sent Requests '; $StatsRule = '----- ----- ------------ -------- |'; $StatsFormat = '%s %s %12.0f %8d |'; $PrefixTop = 'Top'; $PrefixTotal = 'Total'; $DailyHeader = 'Transfers by Request Date'; $HourlyHeader = 'Transfers by Request Hour'; $DomainHeader = 'Transfers by Client Domain'; $SubdomainHeader = 'Transfers by Reversed Subdomain'; $ArchiveHeader = 'Transfers by URL/Archive Section'; $IdentHeader = 'Transfers by Remote Identity'; # These Old headers are for reading old summary files $OldDailyHeader = 'Daily Transmission Statistics'; $OldHourlyHeader = 'Hourly Transmission Statistics'; $OldDomainHeader = 'Total Transfers by Client Domain'; $OldSubdomainHeader = 'Total Transfers by Reversed Subdomain'; $OldArchiveHeader = 'Total Transfers from each Archive Section'; $OldIdentHeader = 'Total Transfers to each Remote Identifier'; # The following sets the default ordering for the daily stats. # Change this to 1 if you always want gwstat-style output. $ReverseDateSort = 0; # Display daily stats in reverse order. # The following sets the default sort key for each section, # where 0 means sort by the primary key value (day, hour, domain, ...) # 1 means sort by the number of requests # 2 means sort by the number of bytes transferred $SortDaily = 0; $SortHourly = 0; $SortDomain = 0; $SortSubdomain = 0; $SortArchive = 0; $SortIdent = 0; # If the output of a section is sorted, you may also want to restrict # the output to only the N best in that section. $TopDaily = 0; $TopHourly = 0; $TopDomain = 0; $TopSubdomain = 0; $TopArchive = 0; $TopIdent = 0; # On the other hand, you may want to exclude (0) an entire section. # If set = 2, the top N is done first and then followed by normal section. $Do_Daily = 1; # Display the Daily Statistics $Do_Hourly = 1; # Display the Hourly Statistics $Do_Domain = 1; # Display the Domain (Country) Statistics $Do_Subdomain = 1; # Display the Subdomain Statistics $Do_Archive = 1; # Display the Archive Statistics # The following option is only useful if the server is running with # rfc931/ident support (i.e. "IdentityCheck on" appears in httpd.conf) # or for resources which require user authentication. # NOTE: For security reasons, you should not publish to the web any report # that lists the Remote Identities. This option is intended for server # maintenance only. Use the -r or -ident command-line option instead. $Do_Ident = 0; # Set 1 or 2 ONLY if Ident is ALWAYS desired. # The rest of these options are normally only changed on the command-line $Verbose = 0; # Display valid log entries on STDERR? $PrintInvalids = 0; # Display invalid log entries on STDERR? $PrintNonexist = 0; # Display nonexistant file requests on STDERR? $SearchAddress = ''; # Pattern to look for in hostname/IP addresses. $SearchCode = ''; # Pattern to look for in Code. $SearchDate = ''; # Pattern to look for in Date. $SearchTime = ''; # Pattern to look for in Hour. $SearchArchive = ''; # Pattern to look for in Archive names. $SearchMethod = ''; # Pattern to look for in Method. $NotAddress = ''; # Pattern to reject entry if in IP addresses. $NotCode = ''; # Pattern to reject entry if in Code. $NotDate = ''; # Pattern to reject entry if in Date. $NotTime = ''; # Pattern to reject entry if in Hour. $NotArchive = ''; # Pattern to reject entry if in Archive names. $NotMethod = ''; # Pattern to reject entry if in Method. $EscapeSpecials = '[+.]'; # Most users forget about regexp syntax # The default system and user configuration filenames should only # be changed if your filesystem can't handle these names. $SconfigFile = 'wwwstat.rc'; # System file for overriding defaults $UconfigFile = '.wwwstatrc'; # User file for overriding defaults } # ========================================================================== # Get the system configuration and user configuration # sub get_configuration { local($forced) = 0; if (defined($ARGV[0]) && ($ARGV[0] eq '-F')) { shift @ARGV; $SconfigFile = shift @ARGV; $forced = 1; } if ($SconfigFile) { eval 'require $SconfigFile;'; if ($@ && $forced) { die "Unable to read $SconfigFile: $!\n"; } else { $! = 0; undef $@; } } if (defined($ARGV[0]) && ($ARGV[0] eq '-f')) { shift @ARGV; $UconfigFile = shift @ARGV; $forced = 1; } else { $forced = 0; } if ($UconfigFile) { eval 'require $UconfigFile;'; if ($@ && $forced) { die "Unable to read $UconfigFile: $!\n"; } else { $! = 0; undef $@; } } } # ========================================================================== # Get the command-line options. # sub get_commandline { local($_, $first, $rest, $pos); local($scope) = 0; local($letteropts) = 'helLoOuUrRvxis:H:X:f:F:m:M:c:C:t:T:a:A:n:N:d:D:'; local(@args) = split(//, $letteropts); while (defined($_ = $ARGV[0])) { if ($_ eq '--') { shift @ARGV; last; } # Last option indicator if ($_ eq '-') { last; } # STDIN file indicator if (!s/^-//) { last; } # Not an option if (/^no(.*)/) # Exclude some option { # indicated by suffix unless ($_ = $1) { # or next argument shift @ARGV; &badarg('-no requires value') unless ($_ = $ARGV[0]); } if (/^escape$/) { $EscapeSpecials = ''; } elsif (/^link$/) { $InsertLink = 0; } elsif (/^files$/) { $TruncateFile = 1; } elsif (/^dns$/) { $LookupDNS = 0; } elsif (/^cgi$/) { $Do_CGI = 0; } elsif (/^da(y|ily|te)$/) { $Do_Daily = 0; } elsif (/^hour(ly)?$/) { $Do_Hourly = 0; } elsif (/^dom(ain)?$/) { $Do_Domain = 0; } elsif (/^sub(domain)?$/) { $Do_Subdomain = 0; } elsif (/^arc(hive)?$/) { $Do_Archive = 0; } elsif (/^id(ent)?$/) { $Do_Ident = 0; } elsif (/^all$/) { $Do_Daily = $Do_Hourly = $Do_Domain = $Do_Subdomain = $Do_Archive = $Do_Ident = 0; } else { &badarg('-no',$_); } } elsif (/^all$/) # Scope determines what sections are active { # and how to apply the -top and -sort options $scope = 0; $Do_Daily = $Do_Hourly = $Do_Domain = $Do_Subdomain = $Do_Archive = $Do_Ident = 1; } elsif (/^da(y|ily|te)$/) { $scope = 1; $Do_Daily = 1; } elsif (/^hour(ly)?$/) { $scope = 2; $Do_Hourly = 1; } elsif (/^dom(ain)?$/) { $scope = 3; $Do_Domain = 1; } elsif (/^sub(domain)?$/) { $scope = 4; $Do_Subdomain = 1; } elsif (/^arc(hive)?$/) { $scope = 5; $Do_Archive = 1; } elsif (/^id(ent)?$/) { $scope = 6; $Do_Ident = 1; } elsif (/^link$/) { $InsertLink = 1; # Add anchors to archive } elsif (/^files$/) # Show filename in URL { $TruncateFile = 0; } elsif (/^trunc(.*)/) # Truncate Archive URL { # after Nth level unless ($_ = $1) { shift @ARGV; &badarg('-trunc requires value') unless ($_ = $ARGV[0]); } &badarg('-trunc', $_) unless (/^\d+$/); $TruncateLevel = $_; } elsif (/^cgi$/) # Output CGI headers { $Do_CGI = 1; } elsif (/^dns$/) # Resolve IP addresses { $LookupDNS = 1; } elsif (/^cache(.*)/) # Change cache filename { unless ($_ = $1) { shift @ARGV; &badarg('-cache requires value') unless ($_ = $ARGV[0]); } $DNScachefile = $_; } elsif (/^top(.*)/) # Limit to top N { unless ($_ = $1) { shift @ARGV; &badarg('-top requires value') unless ($_ = $ARGV[0]); } &badarg('-top', $_) unless (/^\d+$/); if ($scope == 1) { $TopDaily = $_; } elsif ($scope == 2) { $TopHourly = $_; } elsif ($scope == 3) { $TopDomain = $_; } elsif ($scope == 4) { $TopSubdomain = $_; } elsif ($scope == 5) { $TopArchive = $_; } elsif ($scope == 6) { $TopIdent = $_; } else { $TopDaily = $TopHourly = $TopDomain = $TopSubdomain = $TopArchive = $TopIdent = $_; } } elsif (/^sort(.*)/) # Change sort method { unless ($_ = $1) { shift @ARGV; &badarg('-sort requires value') unless ($_ = $ARGV[0]); } if (/^key/) { $sortkey = 0; } # Sort by primary key elsif (/^req/) { $sortkey = 1; } # Sort by requests rcvd elsif (/^byt/) { $sortkey = 2; } # Sort by bytes transmitted else { &badarg('-sort',$_); } if ($scope == 1) { $SortDaily = $sortkey; } elsif ($scope == 2) { $SortHourly = $sortkey; } elsif ($scope == 3) { $SortDomain = $sortkey; } elsif ($scope == 4) { $SortSubdomain = $sortkey; } elsif ($scope == 5) { $SortArchive = $sortkey; } elsif ($scope == 6) { $SortIdent = $sortkey; } else { $SortDaily = $SortHourly = $SortDomain = $SortSubdomain = $SortArchive = $SortIdent = $sortkey; } } elsif (/^both$/) # Do both top N and normal { if ($scope == 1) { $Do_Daily = 2; if (!$TopDaily) { $TopDaily = 10; } if (!$SortDaily) { $SortDaily = 1; } } elsif ($scope == 2) { $Do_Hourly = 2; if (!$TopHourly) { $TopHourly = 10; } if (!$SortHourly) { $SortHourly = 1; } } elsif ($scope == 3) { $Do_Domain = 2; if (!$TopDomain) { $TopDomain = 10; } if (!$SortDomain) { $SortDomain = 1; } } elsif ($scope == 4) { $Do_Subdomain = 2; if (!$TopSubdomain) { $TopSubdomain = 10; } if (!$SortSubdomain) { $SortSubdomain = 1; } } elsif ($scope == 5) { $Do_Archive = 2; if (!$TopArchive) { $TopArchive = 10; } if (!$SortArchive) { $SortArchive = 1; } } elsif ($scope == 6) { $Do_Ident = 2; if (!$TopIdent) { $TopIdent = 10; } if (!$SortIdent) { $SortIdent = 1; } } else { $Do_Daily = $Do_Hourly = $Do_Domain = $Do_Subdomain = $Do_Archive = $Do_Ident = 2; $TopDaily = $TopHourly = $TopDomain = $TopSubdomain = $TopArchive = $TopIdent = 10; $SortDaily = $SortHourly = $SortDomain = $SortSubdomain = $SortArchive = $SortIdent = 1; } } else # End of full-word option arguments { while() # Loop by each character { ($first,$rest) = /^(.)(.*)/; if (($pos = index($letteropts,$first)) < 0) { &badarg("Unknown option:",$first); } if ($args[$pos+1] eq ':') { shift(@ARGV); if ($rest eq '') { &badarg($first,"requires value") unless @ARGV; $rest = $ARGV[0]; } &set_option($first, $rest); last; } else { &set_option($first, 1); $_ = $rest; last unless $_; } } } } continue { shift @ARGV; } } # ========================================================================== # Set the single-letter command-line option given. Gee, this is fun. # sub set_option { local($opt, $value) = @_; if ($opt eq 'h') { &usage; } elsif ($opt eq 'e') { $PrintInvalids = 1; } elsif ($opt eq 'l') { $LocalFullAddress = 1; } elsif ($opt eq 'L') { $LocalFullAddress = 0; } elsif ($opt eq 'o') { $OthersFullAddress = 1; } elsif ($opt eq 'O') { $OthersFullAddress = 0; } elsif ($opt eq 'u') { $ShowUnresolved = 1; } elsif ($opt eq 'U') { $ShowUnresolved = 0; } elsif ($opt eq 'r') { $Do_Ident = 2; } elsif ($opt eq 'R') { $ReverseDateSort = 1; } elsif ($opt eq 'v') { $Verbose = 1; } elsif ($opt eq 'x') { $PrintNonexist = 1; } elsif ($opt eq 'X') { $LastSummary = $value; } elsif ($opt eq 'H') { $OutputTitle = $value; } elsif ($opt eq 'm') { if ($SearchMethod) { $SearchMethod = "($SearchMethod|$value)"; } else { $SearchMethod = $value; } } elsif ($opt eq 'M') { if ($NotMethod) { $NotMethod = "($NotMethod|$value)"; } else { $NotMethod = $value; } } elsif ($opt eq 'c') { if ($SearchCode) { $SearchCode = "($SearchCode|$value)"; } else { $SearchCode = $value; } } elsif ($opt eq 'C') { if ($NotCode) { $NotCode = "($NotCode|$value)"; } else { $NotCode = $value; } } elsif ($opt eq 't') { if ($SearchTime) { $SearchTime = "($SearchTime|$value)"; } else { $SearchTime = $value; } } elsif ($opt eq 'T') { if ($NotTime) { $NotTime = "($NotTime|$value)"; } else { $NotTime = $value; } } elsif ($opt eq 'a') { if ($EscapeSpecials) { $value =~ s/($EscapeSpecials)/\\$1/go; } if ($SearchAddress) { $SearchAddress = "($SearchAddress|$value)"; } else { $SearchAddress = $value; } } elsif ($opt eq 'A') { if ($EscapeSpecials) { $value =~ s/($EscapeSpecials)/\\$1/go; } if ($NotAddress) { $NotAddress = "($NotAddress|$value)"; } else { $NotAddress = $value; } } elsif ($opt eq 'n') { if ($EscapeSpecials) { $value =~ s/($EscapeSpecials)/\\$1/go; } if ($SearchArchive) { $SearchArchive = "($SearchArchive|$value)"; } else { $SearchArchive = $value; } } elsif ($opt eq 'N') { if ($EscapeSpecials) { $value =~ s/($EscapeSpecials)/\\$1/go; } if ($NotArchive) { $NotArchive = "($NotArchive|$value)"; } else { $NotArchive = $value; } } elsif ($opt eq 'd') { if ($value eq 'today') { $value = substr($Updated, 8, 4) . substr($Updated, 5, 3) . substr($Updated, 12, 4); $value =~ s/ 0(\d) / $1 /; } if ($SearchDate) { $SearchDate = "($SearchDate|$value)"; } else { $SearchDate = $value; } } elsif ($opt eq 'D') { if ($value eq 'today') { $value = substr($Updated, 8, 4) . substr($Updated, 5, 3) . substr($Updated, 12, 4); $value =~ s/ 0(\d) / $1 /; } if ($NotDate) { $NotDate = "($NotDate|$value)"; } else { $NotDate = $value; } } elsif ($opt eq 'F') { die "The -F option MUST be first option on command-line\n"; } elsif ($opt eq 'f') { die "The -f option MUST be first option after -F option (if any)\n"; } elsif ($opt eq 'i') { warn "Option -i is no longer needed, ignoring it.\n"; } elsif ($opt eq 's') { warn "Option -s is no longer used, ignoring it.\n"; } else { &badarg("Unknown option:", $opt); } } # ========================================================================== sub badarg { local($dreck) = join(' ', @_); warn "Bad command option: $dreck\n"; &usage; } # ========================================================================== # Initialize summary accumulaters and non-optional configuration. # These settings may be customized, but are unlikely to need changing. # sub init_summary { # Setup the table of response codes and (if not '') the name for archive %RespCodes = ( '100', 'Code 100 Continue', '101', 'Code 101 Switching Protocols', '200', '', # OK response '201', '', # Created response '202', '', # Accepted response '203', '', # Non-Authoritative response '204', '', # No Content response '205', '', # Reset Content response '206', '', # Partial Content response '300', 'Code 300 Multiple Choices', '301', 'Code 301 Moved Permanently', '302', 'Code 302 Moved Temporarily', '303', 'Code 303 See Other', '304', '', # Not Modified response '305', 'Code 305 Use Proxy', '400', 'Code 400 Bad Request', '401', 'Code 401 Unauthorized', '402', 'Code 402 Payment Required', '403', 'Code 403 Forbidden', '404', 'Code 404 Not Found', '405', 'Code 405 Method Not Allowed', '406', 'Code 406 Not Acceptable', '407', 'Code 407 Proxy Authentication Required', '408', 'Code 408 Request Time-out', '409', 'Code 409 Conflict', '410', 'Code 410 Gone', '411', 'Code 411 Length Required', '412', 'Code 412 Precondition Failed', '413', 'Code 413 Request Entity Too Large', '414', 'Code 414 Request-URI Too Large', '415', 'Code 415 Unsupported Media Type', '500', 'Code 500 Internal Server Error', '501', 'Code 501 Not Implemented', '502', 'Code 502 Bad Gateway', '503', 'Code 503 Service Unavailable', '504', 'Code 504 Gateway Time-out', '505', 'Code 505 HTTP Version Not Supported', ); # Estimate the size (in bytes) of typical responses not counted in logfile # For better accuracy, tailor to your particular site's server %RespEstimates = ( '100', 100, # Continue response (never happens) '101', 100, # Switching Protocols response (never happens) '200', 200, # OK response (headers only) '201', 200, # Created response (headers only) '202', 200, # Accepted response (headers only) '203', 200, # Non-Authoritative response (headers only) '204', 200, # No Content response (headers only) '205', 200, # Reset Content response (headers only) '206', 200, # Partial Content response (headers only) '300', 400, # Multiple choices message '301', 400, # Permanent redirect message '302', 400, # Temporary redirect message '303', 400, # See other URL redirect message '304', 100, # Not Modified response to a Conditional GET '305', 400, # Use Proxy redirect message '400', 300, # Bad Request response message '401', 300, # Unauthorized response message '402', 300, # Payment Required response message '403', 300, # Forbidden response message '404', 300, # Not Found message '405', 300, # Method Not Allowed message '406', 300, # Not Acceptable message '407', 300, # Proxy Authentication Required message '408', 300, # Request Time-out message '409', 300, # Conflict message '410', 300, # Gone message '411', 300, # Length Required message '412', 300, # Precondition Failed message '413', 300, # Request Entity Too Large message '414', 300, # Request-URI Too Large message '415', 300, # Unsupported Media Type message '500', 500, # Server Error response message '501', 500, # Not Implemented response message '502', 500, # Bad Gateway message '503', 500, # Service Unavailable message '504', 500, # Gateway Time-out message '505', 500, # HTTP Version Not Supported message ); $TotalRequests = 0; # total requests received %HourRequests = (); # per hour %DayRequests = (); # per day %ArchiveRequests = (); # per file %DomainRequests = (); # per domain %SubdomainRequests = (); # per subdomain %IdentRequests = (); # per ident $TotalBytes = 0; # total bytes sent %HourBytes = (); # per hour %DayBytes = (); # per day %ArchiveBytes = (); # per file %DomainBytes = (); # per domain %SubdomainBytes = (); # per subdomain %IdentBytes = (); # per ident $StartTag = "
\n";     # Note that these vars are used by both
    $EndTag   = "
\n"; # input and output of summaries. $TimeZone = ''; $AllMonths = 'JanFebMarAprMayJunJulAugSepOctNovDec'; } # ========================================================================== # process the given filename as FILE, based on the content of its first line. # sub output_cgi { local($method) = $ENV{'REQUEST_METHOD'}; local($crlf) = "\x0D\x0A"; $| = 1; # Unbuffer STDOUT if (!defined($method) || ($method eq 'GET') || ($method eq 'HEAD')) { print 'Content-Type: text/html', $crlf, $crlf; exit(0) if ($method eq 'HEAD'); } else { print 'Status: 405 Method Not Allowed', $crlf; print 'Allow: GET, HEAD', $crlf; print 'Content-Type: text/html', $crlf, $crlf; print '', $crlf; print '405 Method Not Allowed', $crlf; print '', $crlf; print '

405 Method Not Allowed

', $crlf; print 'Only GET and HEAD are allowed for this URL', $crlf; print '', $crlf; exit(0); } } # ========================================================================== # process the given filename as FILE, based on the content of its first line. # sub process_file { local($filename) = @_; if ($Verbose) { print STDERR "Processing file \"$filename\"\n"; } if ($filename eq '+') { $filename = $DefaultLog; } if ($Zhandle && ($filename =~ /\.$Zhandle$/o)) { if (!$Zcat) { warn "No zcat decompression command has been defined\n"; return; } $filename = "$Zcat $filename |"; } if (!open(FILE,$filename)) { warn "Error opening $filename: $!\n"; return; } if ($_ = ) { if (/^/) { &process_summary; } else { &process_log($_); } } close(FILE); } # ========================================================================== # Process the summary FILE by reading in our own output, section by section. # We must assume that the old output file was created with the same options # and that its content is disjunct from all other input files. # NOTE that using search options with inclusion cannot work unless the # included file was also created with those search options. # NOTE that Top N sections are skipped, since they cannot accumulate. # sub process_summary { local($_, $date, $hour, $domain, $subdomain, $pathkey, $ident); # This code depends on the content of the output being similar # to the default distribution. If you change the output content, # you must also check to ensure this code still works. local($heading_tag) = '

'; local($table_rule) = '-----'; # The first thing to do is read past the HEAD and crossreferences # until we get to the first preformatted section (the totals) do { return unless ($_ = ); } until ($_ eq $StartTag); # Next line should be "Files Transmitted ..." or "Requests Received ..." if (($_ = ) && (/\s(\d+)$/)) { return unless $1; $TotalRequests += $1; } else { warn "Invalid summary file, skipping"; return; } # Next line should be "Bytes Transmitted ..." if (($_ = ) && (/\s(\d+)$/)) { $TotalBytes += $1; } else { warn "Invalid summary file, skipping"; return; } # We don't care about the rest of the totals for (;;) # Loop through each section { do { return unless ($_ = ); } until (/^$heading_tag/o); next if (/>$PrefixTop /o); # Skip any Top N sections if (/$DailyHeader/o || /$OldDailyHeader/o) { do { return unless ($_ = ); } until (/^$table_rule/o); while (($_ = ) && ($_ ne $EndTag)) { if (/\s(\d+)\s+(\d+)\s+\|\s+(\S.*)/) { $date = $3; # the date after "|" $DayRequests{$date} += $2; # 1st number left of "|" $DayBytes{$date} += $1; # 2nd number left of "|" } else { warn "Invalid summary line"; } } } elsif (/$HourlyHeader/o || /$OldHourlyHeader/o) { next unless $Do_Hourly; do { return unless ($_ = ); } until (/^$table_rule/o); while (($_ = ) && ($_ ne $EndTag)) { if (/\s(\d+)\s+(\d+)\s+\|\s+(\S+)/) { $hour = $3; # the hour after "|" $HourRequests{$hour} += $2; # 1st number left of "|" $HourBytes{$hour} += $1; # 2nd number left of "|" } else { warn "Invalid summary line"; } } } elsif (/$DomainHeader/o || /$OldDomainHeader/o) { next unless $Do_Domain; do { return unless ($_ = ); } until (/^$table_rule/o); while (($_ = ) && ($_ ne $EndTag)) { if (/\s(\d+)\s+(\d+)\s+\|\s+(\S+)/) { $domain = $3; # the word after "|" $DomainRequests{$domain} += $2; # 1st number left of "|" $DomainBytes{$domain} += $1; # 2nd number left of "|" } else { warn "Invalid summary line"; } } } elsif (/$SubdomainHeader/o || /$OldSubdomainHeader/o) { next unless $Do_Subdomain; do { return unless ($_ = ); } until (/^$table_rule/o); while (($_ = ) && ($_ ne $EndTag)) { if (/\s(\d+)\s+(\d+)\s+\|\s+(\S.*)/) { $subdomain = $3; $SubdomainRequests{$subdomain} += $2; $SubdomainBytes{$subdomain} += $1; } else { warn "Invalid summary line"; } } } elsif (/$ArchiveHeader/o || /$OldArchiveHeader/o) { next unless $Do_Archive; do { return unless ($_ = ); } until (/^$table_rule/o); while (($_ = ) && ($_ ne $EndTag)) { if (/\s(\d+)\s+(\d+)\s+\|\s+(\S.*)/) { $pathkey = &striplink($3); # the pathkey after "|" $ArchiveRequests{$pathkey} += $2; # 1st number left of "|" $ArchiveBytes{$pathkey} += $1; # 2nd number left of "|" } else { warn "Invalid summary line"; } } } elsif (/$IdentHeader/o || /$OldIdentHeader/o) { next unless $Do_Ident; do { return unless ($_ = ); } until (/^$table_rule/o); while (($_ = ) && ($_ ne $EndTag)) { if (/\s(\d+)\s+(\d+)\s+\|\s+(\S.*)/) { $ident = $3; # the identity after "|" $IdentRequests{$ident} += $2; # 1st number left of "|" $IdentBytes{$ident} += $1; # 2nd number left of "|" } else { warn "Invalid summary line"; } } } else { warn "Invalid summary file, skipping"; return; } } } # ========================================================================== # Strip the potential anchor tags from around an archive entry # sub striplink { local($_) = @_; s#^##; s#$##; return &truncated($_); } # ========================================================================== # If the options call for it, truncate the archive URL down to the # Nth hierarchy level and/or remove filename (if any) # sub truncated { local($_) = @_; local(@levels); return $_ unless m#^/#; if ($TruncateFile) { s#[^/]+$##; } if ($TruncateLevel > 0) { @levels = split(/\//); if ($#levels > $TruncateLevel) { $_ = join('/', @levels[0 .. $TruncateLevel], ''); } } return $_; } # ========================================================================== # Process the access_log FILE by reading each entry, validating and # categorizing the access, and then adding to that category's stats # sub process_log { local($_) = @_; # The first line has already been read. local($host, $rfc931, $authuser, $timestamp, $request, $status, $bytes); local($ident, $hour, $date, $method, $htv, $has_head, $idx); local($fname, $rname, $dvalue, $pathkey, @address, $domain, $subdomain); local($saveline); LINE: while() { s/^\0+//; # This is due to a bug in perl 5.002 s/\0//g; # because this line should be sufficient $saveline = $_; # # First, parse the logfile entry into its seven basic components # ($host, $rfc931, $authuser, $timestamp, $request, $status, $bytes) = /^(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^"]*)\" (\S+) (\S+)/; #" Now, is this garbage or is it memorex? Note that $bytes can be 0 if (!($host && $rfc931 && $authuser && $timestamp && $request && $status)) { if ($PrintInvalids) { print STDERR "$.:$saveline"; } next LINE; } if ($status eq '-') # Test the response code { $status = "200"; } elsif (!defined($RespEstimates{$status})) { if ($PrintInvalids) { print STDERR "$.:$saveline"; } next LINE; } if ($SearchCode) { next LINE unless ($status =~ m#$SearchCode#o); } if ($NotCode) { next LINE unless ($status !~ m#$NotCode#o );} if ($bytes eq '-') { $bytes = '0'; } elsif ($bytes !~ /^\d+$/ ) # Test the bytes transferred { if ($PrintInvalids) { print STDERR "$.:$saveline"; } next LINE; } if ($rfc931 ne '-') # Test the remote ident { $ident = $rfc931; # Save ident for later use $ident =~ s/\[[^\]]*\]/COOKIE/g; # Replace all magic cookies } elsif ($authuser ne '-') { $ident = $authuser; # Jury rig support for authuser $ident =~ s/\[[^\]]*\]/COOKIE/g; # Replace all magic cookies } else { $ident = "unknown"; } # # Looks okay so far -- Now figure out when the request was made. # $timestamp =~ s/^0/ /; # Remove leading zero from day if ($timestamp =~ m#^([ 1-3]?\d)/([A-Za-z]+)/(\d{4}):(\d\d):\d\d:\d\d ([+ -]\d{1,4})#) { $date = "$2 $1 $3"; $hour = "$4"; $TimeZone = "$5"; } else { if ($PrintInvalids) { print STDERR "$.:$saveline"; } next LINE; } if ($SearchDate) { next LINE unless ($date =~ m#$SearchDate#o); } if ($NotDate) { next LINE unless ($date !~ m#$NotDate#o); } if ($SearchTime) { next LINE unless ($hour =~ m#$SearchTime#o); } if ($NotTime) { next LINE unless ($hour !~ m#$NotTime#o); } # # Then parse the method and URL pathname from request # ($method, $fname, $htv) = split(' ',$request,3); if ($SearchMethod) { next LINE unless ($method =~ m#$SearchMethod#o); } if ($NotMethod) { next LINE unless ($method !~ m#$NotMethod#o); } $has_head = 0; # Check for HTTP/version tag if ($htv) { if ($htv !~ m#^HTTP/#i) { if ($PrintInvalids) { print STDERR "$.:$saveline"; } } else { $has_head = 1; } } # # Now we update the actual bytes transferred to include header/errors # if ($has_head || ($status ne '200')) { $bytes += $RespEstimates{$status}; } # # And then determine what the archive name should be # if ($rname = $RespCodes{$status}) # If not a successful status code { if ($PrintNonexist && ($status >= 400)) { print STDERR "$status $date $hour $fname BY $host\n"; } $fname = $rname unless ($SearchCode || $NotCode); } elsif (!$fname || ($fname =~ m#^HTTP/#i)) { $fname = '/'; } else { $fname =~ s/\#.*$//; # Remove any trailing anchor $fname =~ s/\?.*$//; # Remove any trailing query $fname =~ s/%7E/~/i; # Fix munging of URLs $fname =~ s/%3A/:/ig; $fname =~ s#/[^/]+/\.\./#/#; # Fix usage of relative paths $fname =~ s#/\./#/#; # and another one $fname =~ y#/#/#s; # Remove any extra slashes $fname =~ s#/$DirectoryIndex$#/#o; # Remove trailing index name } if ($SearchArchive) { next LINE unless ($fname =~ m#$SearchArchive#o); } if ($NotArchive) { next LINE unless ($fname !~ m#$NotArchive#o); } if ($Verbose) { print STDERR "$date $hour $bytes $fname\n"; } unless ($rname || $SearchArchive || $SearchCode || $NotCode) { for ($idx = 0; $idx < $#ArchiveMap; $idx += 3) { if ($ArchiveMap[$idx] eq 'i') { if ($fname =~ m/${ArchiveMap[$idx+1]}/i) { $fname = $ArchiveMap[$idx+2]; last; } } elsif ($fname =~ m/${ArchiveMap[$idx+1]}/) { $fname = $ArchiveMap[$idx+2]; last; } } next LINE unless $fname; # No value indicates URI exclusion } $pathkey = &truncated($fname); # Truncate parts if that is desired # # Get hostname/IP address and determine domain and reversed subdomain. # undef $domain; undef $subdomain; $host =~ tr/A-Z/a-z/; $host =~ s/\.$//; if ($host =~ /^[^.]+$/) # Unqualified hostname { if ($AppendToLocalhost) { $host .= $AppendToLocalhost; } else { if ($PrintInvalids) { print STDERR "$.:$saveline"; } $domain = 'localhost'; $subdomain = $host; } } elsif ($host =~ /^\d+\.\d+\.\d+\.\d+$/) # IP number { if ($LookupDNS && ($dvalue = &resolve($host))) { $host = $dvalue; if ($AppendToLocalhost && ($host =~ /^[^.]+$/)) { $host .= $AppendToLocalhost; } } else { $domain = 'unresolved'; $subdomain = $ShowUnresolved ? $host : 'Unresolved'; } } $host =~ s/\.[\d.]*in-addr\.arpa$//; # Remove any DNS garbage $ident .= '@' . $host; if ($SearchAddress) { next LINE unless ($host =~ m#$SearchAddress#o); } if ($NotAddress) { next LINE unless ($host !~ m#$NotAddress#o); } # # If we haven't already set the subdomain and we don't want the # full subdomain address and this one has a machine name, strip # the machine name and store it reversed (for later sorting/display). # if ($Do_Subdomain && !defined($subdomain)) { @address = split(/\./, $host); # Split into component names if ($host =~ /$AppendToLocalhost$/o) { if (!$LocalFullAddress && ($#address > 1)) { shift(@address); # clip off the machine name } } else { if (!$OthersFullAddress && ($#address > 1)) { shift(@address); # clip off the machine name } } $subdomain = join('.', reverse(@address)); } # # If we haven't already set the domain, find it in %DomainMap # by looking for the longest match. If none, just use last component. # Note that this destroys the value of $host. # if (!defined($domain)) { for (;;) { if (defined($dvalue = $DomainMap{$host})) { next LINE if ($dvalue eq ''); # Indicates domain exclusion last; } last unless ($host =~ s/^[^.]*\.//); } $domain = $host; } # # Now that we have categorized it, add it to the corresponding counters # $TotalRequests++; # Total requests received $TotalBytes += $bytes; # Total bytes sent $DayRequests{$date}++; # Add to Daily Table $DayBytes{$date} += $bytes; if ($Do_Hourly) # Add to Hourly Table? { $HourRequests{$hour}++; $HourBytes{$hour} += $bytes; } if ($Do_Domain) # Add to Domain Table? { $DomainRequests{$domain}++; $DomainBytes{$domain} += $bytes; } if ($Do_Subdomain) # Add to Subdomain Table? { $SubdomainRequests{$subdomain}++; $SubdomainBytes{$subdomain} += $bytes; } if ($Do_Archive) # Add to Archive Table? { $ArchiveRequests{$pathkey}++; $ArchiveBytes{$pathkey} += $bytes; } if ($Do_Ident) # Add to Ident Table? { $IdentRequests{$ident}++; $IdentBytes{$ident} += $bytes; } } continue { last if eof(FILE); $_ = ; } } # ========================================================================== # Initialize the DNS cache and remove entries that have expired. # sub init_DNS { local($ipnum, $value, $host, $seen, @expired); if ($DNScachefile) { dbmopen(%DNScache, $DNScachefile, 0666) || die "Cannot open DBM files $DNScachefile: $!\n"; while (($ipnum, $value) = each %DNScache) { ($host, $seen) = split(/\|/, $value); if ($StartTime > ($seen + $DNSexpires)) { push(@expired, $ipnum); } } foreach $ipnum (@expired) { delete $DNScache{$ipnum}; } } else { %DNScache = (); } } # ========================================================================== # Close the DNS cache # sub close_DNS { if ($DNScachefile) { dbmclose(%DNScache); } } # ========================================================================== # Resolve an IP address to its DNS hostname (if it has one) with caching. # sub resolve { local($ip) = @_; local($ipnum, $value, $host, $seen, $aliases, $addrtype, $length, @addrs); $ipnum = pack('C4', split(/\./, $ip)); if (defined($value = $DNScache{$ipnum})) { ($host, $seen) = split(/\|/, $value); return $host; } ($host, $aliases, $addrtype, $length, @addrs) = gethostbyaddr($ipnum, 2); if (!defined($host)) { $host = ''; } # Many hosts have no DNS names $DNScache{$ipnum} = join('|', $host, time); return $host; } # ========================================================================== # Output the summary in HTML # sub output_summary { local($prefix, $earliest, $latest, $section, $value, @dupes); # # We first need to combine slashless directory names with slashful ones # @dupes = (); while (($section, $value) = each %ArchiveRequests) { if (($section !~ m#/$#) && defined($ArchiveRequests{"$section/"})) { push(@dupes, $section); } } foreach $section (@dupes) { $ArchiveRequests{"$section/"} += $ArchiveRequests{$section}; delete $ArchiveRequests{$section}; $ArchiveBytes{"$section/"} += $ArchiveBytes{$section}; delete $ArchiveBytes{$section}; } undef @dupes; # # And create a sorted date array for later output # @SortedDates = sort datecompare keys %DayRequests; if ($#SortedDates < 0) # Handle case of no data to summarize { $SortedDates[0] = '[none]'; $LastSummary = ''; } if ($ReverseDateSort) { $earliest = $SortedDates[$#SortedDates]; $latest = $SortedDates[0]; } else { $earliest = $SortedDates[0]; $latest = $SortedDates[$#SortedDates]; } # # Finally, we can print out the resulting statistics by section. # # Note: if you have a heavily used server, you may need to increase # the length of the numeric output fields in the forms below. print "\n"; print "$OutputTitle\n"; print "\n"; print "

$OutputTitle

\n"; if ($TimeZone) { $Updated .= "(GMT $TimeZone)"; } else { $Updated .= "($UpdatedGMT)"; } print "", $UpdateHeader, $Updated, "\n"; print "
    \n"; &output_xref($Do_Daily, $TopDaily, 'Daily', $DailyHeader); &output_xref($Do_Hourly, $TopHourly, 'Hourly', $HourlyHeader); &output_xref($Do_Domain, $TopDomain, 'Domain', $DomainHeader); &output_xref($Do_Subdomain, $TopSubdomain, 'Subdomain', $SubdomainHeader); &output_xref($Do_Archive, $TopArchive, 'Archive', $ArchiveHeader); &output_xref($Do_Ident, $TopIdent, 'Ident', $IdentHeader); if ($LastSummary) { local($prevmonth, $prevyear) = &lastmonth($earliest); $LastSummary =~ s/%M/$prevmonth/g; $LastSummary =~ s/%Y/$prevyear/g; print "
  • $LastSumHeader\n"; } print "
\n"; print "

", $TotalsHeader, $earliest," to ", $latest, "

\n"; print $StartTag; printf $TotalsFormat, $ReqRcvHeader, $TotalRequests; printf $TotalsFormat, $BtransHeader, $TotalBytes; printf $TotalsFormat, $AvgReqHeader, ($TotalRequests/($#SortedDates + 1)); printf $TotalsFormat, $AvgByteHeader, ($TotalBytes/($#SortedDates + 1)); print $EndTag; if ($Do_Daily) { &output_daily('Daily'); if ($Do_Daily == 2) { $TopDaily = 0; $SortDaily = 0; &output_daily('AllDaily'); } } if ($Do_Hourly) { &output_hourly('Hourly'); if ($Do_Hourly == 2) { $TopHourly = 0; $SortHourly = 0; &output_hourly('AllHourly'); } } if ($Do_Domain) { &output_domain('Domain'); if ($Do_Domain == 2) { $TopDomain = 0; $SortDomain = 0; &output_domain('AllDomain'); } } if ($Do_Subdomain) { &output_subdomain('Subdomain'); if ($Do_Subdomain == 2) { $TopSubdomain = 0; $SortSubdomain = 0; &output_subdomain('AllSubdomain'); } } if ($Do_Archive) { &output_archive('Archive'); if ($Do_Archive == 2) { $TopArchive = 0; $SortArchive = 0; &output_archive('AllArchive'); } } if ($Do_Ident) { &output_ident('Ident'); if ($Do_Ident == 2) { $TopIdent = 0; $SortIdent = 0; &output_ident('AllIdent'); } } print "
\n"; print "
This summary was generated by \n"; print ""; print "$Version\n"; print "
\n"; print "\n"; } # ========================================================================== # Output the stats for each calendar day represented in the input file(s) # sub output_xref { local($do_section, $topn, $frag, $header) = @_; if ($do_section) { local($prefix) = $topn ? "$PrefixTop $topn" : $PrefixTotal; print "
  • $prefix $header\n"; if ($do_section == 2) { print "
  • $PrefixTotal $header\n"; } } } # ========================================================================== # Output the stats for each calendar day represented in the input file(s) # sub output_daily { local($frag) = @_; local($rqsts, $bytes, $pctrqsts, $pctbytes); local($top) = $TopDaily; local($prefix) = $top ? "$PrefixTop $top" : $PrefixTotal; print "
    \n"; print "

    $prefix $DailyHeader

    \n"; print $StartTag; print $StatsHeader, " Date\n"; print $StatsRule, "------------\n"; local($fmt) = "$StatsFormat %s\n"; foreach $date (@SortedDates) { $rqsts = $DayRequests{$date} || 0; $bytes = $DayBytes{$date} || 0; if ($rqsts == $TotalRequests) { $pctrqsts = "100.0"; } else { $pctrqsts = sprintf("%5.2f", 100*$rqsts/$TotalRequests); } if ($bytes == $TotalBytes) { $pctbytes = "100.0"; } else { $pctbytes = sprintf("%5.2f", 100*$bytes/$TotalBytes); } printf $fmt, $pctrqsts, $pctbytes, $bytes, $rqsts, $date; last if ($top && (--$top == 0)); } print $EndTag; } # ========================================================================== # Output the stats for each hour of the day, accumulating over all days. # sub output_hourly { local($frag) = @_; local($rqsts, $bytes, $pctrqsts, $pctbytes); local($top) = $TopHourly; local($prefix) = $top ? "$PrefixTop $top" : $PrefixTotal; print "
    \n"; print "

    $prefix $HourlyHeader

    \n"; print $StartTag; print $StatsHeader, " Time\n"; print $StatsRule, "-----\n"; local($fmt) = "$StatsFormat %s\n"; foreach $hour (sort hourcompare keys %HourRequests) { $rqsts = $HourRequests{$hour}; $bytes = $HourBytes{$hour}; if ($rqsts == $TotalRequests) { $pctrqsts = "100.0"; } else { $pctrqsts = sprintf("%5.2f", 100*$rqsts/$TotalRequests); } if ($bytes == $TotalBytes) { $pctbytes = "100.0"; } else { $pctbytes = sprintf("%5.2f", 100*$bytes/$TotalBytes); } printf $fmt, $pctrqsts, $pctbytes, $bytes, $rqsts, $hour; last if ($top && (--$top == 0)); } print $EndTag; } # ========================================================================== # Output the stats for each requesting client's domain/country/organization # sub output_domain { local($frag) = @_; local($rqsts, $bytes, $pctrqsts, $pctbytes); local($top) = $TopDomain; local($prefix) = $top ? "$PrefixTop $top" : $PrefixTotal; print "
    \n"; print "

    $prefix $DomainHeader

    \n"; print $StartTag; print $StatsHeader, " Domain\n"; print $StatsRule, "------------------------------------\n"; local($fmt) = "$StatsFormat %-5s %s\n"; foreach $domain (sort domaincompare keys %DomainRequests) { $country = $DomainMap{$domain} || ''; $rqsts = $DomainRequests{$domain}; $bytes = $DomainBytes{$domain}; if ($rqsts == $TotalRequests) { $pctrqsts = "100.0"; } else { $pctrqsts = sprintf("%5.2f", 100*$rqsts/$TotalRequests); } if ($bytes == $TotalBytes) { $pctbytes = "100.0"; } else { $pctbytes = sprintf("%5.2f", 100*$bytes/$TotalBytes); } printf $fmt, $pctrqsts, $pctbytes, $bytes, $rqsts, $domain, $country; last if ($top && (--$top == 0)); } print $EndTag; } # ========================================================================== # Output the stats for each requesting client's DNS subdomain # sub output_subdomain { local($frag) = @_; local($rqsts, $bytes, $pctrqsts, $pctbytes); local($top) = $TopSubdomain; local($prefix) = $top ? "$PrefixTop $top" : $PrefixTotal; print "
    \n"; print "

    $prefix $SubdomainHeader

    \n"; print $StartTag; print $StatsHeader, " Reversed Subdomain\n"; print $StatsRule, "------------------------------------\n"; local($fmt) = "$StatsFormat %s\n"; foreach $subdomain (sort subdomcompare keys %SubdomainRequests) { $rqsts = $SubdomainRequests{$subdomain}; $bytes = $SubdomainBytes{$subdomain}; if ($rqsts == $TotalRequests) { $pctrqsts = "100.0"; } else { $pctrqsts = sprintf("%5.2f", 100*$rqsts/$TotalRequests); } if ($bytes == $TotalBytes) { $pctbytes = "100.0"; } else { $pctbytes = sprintf("%5.2f", 100*$bytes/$TotalBytes); } printf $fmt, $pctrqsts, $pctbytes, $bytes, $rqsts, $subdomain; last if ($top && (--$top == 0)); } print $EndTag; } # ========================================================================== # Output the stats for each archive (URL path or category) # sub output_archive { local($frag) = @_; local($rqsts, $bytes, $pctrqsts, $pctbytes, $asec); local($top) = $TopArchive; local($prefix) = $top ? "$PrefixTop $top" : $PrefixTotal; print "
    \n"; print "

    $prefix $ArchiveHeader

    \n"; print $StartTag; print $StatsHeader, " Archive Section\n"; print $StatsRule, "------------------------------------\n"; local($fmt) = "$StatsFormat %s\n"; foreach $section (sort archivecompare keys %ArchiveRequests) { $rqsts = $ArchiveRequests{$section}; $bytes = $ArchiveBytes{$section}; next unless $rqsts; if ($rqsts == $TotalRequests) { $pctrqsts = "100.0"; } else { $pctrqsts = sprintf("%5.2f", 100*$rqsts/$TotalRequests); } if ($bytes == $TotalBytes) { $pctbytes = "100.0"; } else { $pctbytes = sprintf("%5.2f", 100*$bytes/$TotalBytes); } $asec = $section; $asec =~ s/\&/\&/g; # Replace HTML specials $asec =~ s//\>/g; if ($InsertLink && ($asec =~ m:^/:)) { $asec = "$asec"; } printf $fmt, $pctrqsts, $pctbytes, $bytes, $rqsts, $asec; last if ($top && (--$top == 0)); } print $EndTag; } # ========================================================================== # Output the stats for each calendar day represented in the input file(s) # sub output_ident { local($frag) = @_; local($rqsts, $bytes, $pctrqsts, $pctbytes); local($top) = $TopIdent; local($prefix) = $top ? "$PrefixTop $top" : $PrefixTotal; print "
    \n"; print "

    $prefix $IdentHeader

    \n"; print $StartTag; print $StatsHeader, " Remote Identity\n"; print $StatsRule, "------------------------------------\n"; local($fmt) = "$StatsFormat %s\n"; foreach $ident (sort identcompare keys %IdentRequests) { $rqsts = $IdentRequests{$ident}; $bytes = $IdentBytes{$ident}; if ($rqsts == $TotalRequests) { $pctrqsts = "100.0"; } else { $pctrqsts = sprintf("%5.2f", 100*$rqsts/$TotalRequests); } if ($bytes == $TotalBytes) { $pctbytes = "100.0"; } else { $pctbytes = sprintf("%5.2f", 100*$bytes/$TotalBytes); } printf $fmt, $pctrqsts, $pctbytes, $bytes, $rqsts, $ident; last if ($top && (--$top == 0)); } print $EndTag; } # ========================================================================== # ========================================================================== # The following sort comparison functions take $a and $b as the two # arguments keys or values to compare. Speed is important here. sub datecompare { local($date1) = substr($a, 7, 4) * 512; # Years local($date2) = substr($b, 7, 4) * 512; $date1 += index($AllMonths, substr($a,0,3)) * 12; # Months $date2 += index($AllMonths, substr($b,0,3)) * 12; $date1 += substr($a, 4, 2); # Days $date2 += substr($b, 4, 2); if ($SortDaily == 0) { return ($ReverseDateSort ? $date2 <=> $date1 : $date1 <=> $date2); } if ($SortDaily == 1) { $sdiff = $DayRequests{$b} - $DayRequests{$a}; } else { $sdiff = $DayBytes{$b} - $DayBytes{$a}; } ($sdiff < 0) ? -1 : ($sdiff > 0) ? 1 : ($date1 <=> $date2); } sub hourcompare { if ($SortHourly == 0) { return ($a <=> $b); } if ($SortHourly == 1) { $sdiff = $HourRequests{$b} - $HourRequests{$a}; } else { $sdiff = $HourBytes{$b} - $HourBytes{$a}; } ($sdiff < 0) ? -1 : ($sdiff > 0) ? 1 : ($a <=> $b); } sub domaincompare { if ($SortDomain == 0) { $sdiff = length($a) - length($b); } elsif ($SortDomain == 1) { $sdiff = $DomainRequests{$b} - $DomainRequests{$a}; } else { $sdiff = $DomainBytes{$b} - $DomainBytes{$a}; } ($sdiff < 0) ? -1 : ($sdiff > 0) ? 1 : ($a cmp $b); } sub subdomcompare { if ($SortSubdomain == 0) { return ($a cmp $b); } if ($SortSubdomain == 1) { $sdiff = $SubdomainRequests{$b} - $SubdomainRequests{$a}; } else { $sdiff = $SubdomainBytes{$b} - $SubdomainBytes{$a}; } ($sdiff < 0) ? -1 : ($sdiff > 0) ? 1 : ($a cmp $b); } sub archivecompare { if ($SortArchive == 0) { return ($a cmp $b); } if ($SortArchive == 1) { $sdiff = $ArchiveRequests{$b} - $ArchiveRequests{$a}; } else { $sdiff = $ArchiveBytes{$b} - $ArchiveBytes{$a}; } ($sdiff < 0) ? -1 : ($sdiff > 0) ? 1 : ($a cmp $b); } sub identcompare { if ($SortIdent == 0) { return ($a cmp $b); } if ($SortIdent == 1) { $sdiff = $IdentRequests{$b} - $IdentRequests{$a}; } else { $sdiff = $IdentBytes{$b} - $IdentBytes{$a}; } ($sdiff < 0) ? -1 : ($sdiff > 0) ? 1 : ($a cmp $b); } # =========================================================================== # This is a modified (by Roy Fielding) version of Perl 4.036's ctime.pl # library by Waldemar Kebsch and # Marion Hakanson . It is distributed under the # Artistic License (included with your Perl distribution files). # # # wtime returns a time string in the format "Wkd, Dy Mon Year HH:MM:SS Zone" # with no newline appended. # # USAGE: # # wtime(time,''); -- returns the local time with no timezone appended # As in "Wed, 15 Dec 1993 23:59:59 " # # wtime(time,'GMT'); -- returns GMT time # As in "Wed, 16 Dec 1993 07:59:59 GMT" # sub wtime { local($time, $TZ) = @_; local($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst); local(@DoW) = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat'); local(@MoY) = ('Jan','Feb','Mar','Apr','May','Jun', 'Jul','Aug','Sep','Oct','Nov','Dec'); # Determine what time zone is in effect. Use local time if # TZ is anything other than 'GMT' # There's no portable way to find the system default timezone. ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = ($TZ eq 'GMT') ? gmtime($time) : localtime($time); $year += ($year < 70) ? 2000 : 1900; sprintf("%s, %02d %s %4d %02d:%02d:%02d %s", $DoW[$wday], $mday, $MoY[$mon], $year, $hour, $min, $sec, $TZ); } # =========================================================================== # This last routine returns the three letter abbreviation for the month # before the one in the date that was passed as an argument, and its year. # sub lastmonth { local($date) = @_; # Must be in the format "Feb 01 1994" local($midx) = index($AllMonths, substr($date,0,3)); local($year) = substr($date,7,4); if ($midx < 0) { return ('Err', ''); } elsif ($midx == 0) { return ('Dec', ($year - 1)); } else { return (substr($AllMonths,($midx - 3),3), $year); } }