Last Updated: 20 Oct 2023
|
Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision Last revision Both sides next revision | ||
server-tech:apache:importing-apache-httpd-logs-into-mysql [Feb 15, 2011 12:12 AM] dordal |
server-tech:apache:importing-apache-httpd-logs-into-mysql [Oct 20, 2023 12:20 PM] 111.225.149.133 removed |
||
---|---|---|---|
Line 1: | Line 1: | ||
+ | = Importing Apache (httpd) logs into MySQL = | ||
+ | It can be rather nice to use the full power of MySQL to analyze your apache (httpd) web logs. You can write SQL queries against the logs, and discover all sorts of relationships (and problems!) that you might not otherwise find. | ||
+ | |||
+ | This PHP script loads an apache combined-format log into a MySQL database. Download it, and then run it like this: | ||
+ | |||
+ | < | ||
+ | php mysql_httpd_log_import.php -h localhost -d logs -t logdata -u root -p foobar -c < website_access_log.020809 | ||
+ | </ | ||
+ | |||
+ | Or run it without arguments to see all the command line options. | ||
+ | < | ||
+ | php mysql_httpd_log_import.php | ||
+ | </ | ||
+ | |||
+ | **N.B.** If you improve this, please email me and I'll post the updated version. | ||
+ | |||
+ | <code php> | ||
+ | <?php | ||
+ | |||
+ | /** | ||
+ | * MySQL Apache Log Import | ||
+ | * | ||
+ | * This script imports apache combined log format logs into MySQL, so you can use standard SQL commands | ||
+ | * to query your logs. For usage, please run the script with no arguments. | ||
+ | | ||
+ | * Based in part on http:// | ||
+ | * | ||
+ | * @author David Ordal (david -at- ordal.com) | ||
+ | * @requires PHP 5.X | ||
+ | * @requires MySQL 5.X | ||
+ | * | ||
+ | */ | ||
+ | |||
+ | define(' | ||
+ | define(' | ||
+ | |||
+ | // | ||
+ | // STEP 1: GET CMD LINE ARGS | ||
+ | // | ||
+ | |||
+ | // command line arguments; check below for usage | ||
+ | $cmdArgs = getopt(' | ||
+ | |||
+ | // check args | ||
+ | if (!(isset($cmdArgs[' | ||
+ | displayUsage(); | ||
+ | |||
+ | // connect to mysql database | ||
+ | $dbHost = isset($cmdArgs[' | ||
+ | $dbUser = isset($cmdArgs[' | ||
+ | $dbPass = isset($cmdArgs[' | ||
+ | $dbTable = $cmdArgs[' | ||
+ | $dbName = $cmdArgs[' | ||
+ | $mysqli = dbConnect($dbHost, | ||
+ | |||
+ | // check to see if we need to drop and/or create the table | ||
+ | $quotedDbTable = dbQuoteIdentifier($mysqli, | ||
+ | if (isset($cmdArgs[' | ||
+ | $cmdArgs[' | ||
+ | $queryResult = dbQuery($mysqli, | ||
+ | } | ||
+ | $quotedDbTable = dbQuoteValue($mysqli, | ||
+ | $queryResult = dbQuery($mysqli, | ||
+ | if ($queryResult-> | ||
+ | if(isset($cmdArgs[' | ||
+ | dbCreateTable($mysqli, | ||
+ | } else { | ||
+ | die (" | ||
+ | } | ||
+ | } | ||
+ | |||
+ | // | ||
+ | // STEP 2: COPY DATA INTO TAB-DELIMITED FILE | ||
+ | // | ||
+ | |||
+ | // open the temp CSV file for copying data | ||
+ | $tmpFile = fopen(TMP_FILE, | ||
+ | |||
+ | // read each line of STDIN and process a log | ||
+ | $checkDb = isset($cmdArgs[' | ||
+ | while (!feof(STDIN)) { | ||
+ | $line = fgets(STDIN); | ||
+ | |||
+ | if (empty($line)) | ||
+ | continue; | ||
+ | |||
+ | $results = processLine($line); | ||
+ | |||
+ | // check the first and last entries; print an error if something went wrong | ||
+ | if (empty($results[' | ||
+ | echo " | ||
+ | continue; | ||
+ | } | ||
+ | |||
+ | // convert entries to database format. NOTE: doing the timestamp conversion this way converts | ||
+ | // each entry to the local timezone on the local box. Stupid MySQL doesn' | ||
+ | // with a timestamp, so we covert everything from the web server' | ||
+ | // and store that. | ||
+ | $results[' | ||
+ | $logTimestamp = strtotime(" | ||
+ | $sqlTimestamp = date(' | ||
+ | $results[' | ||
+ | |||
+ | // run a $mysqli-> | ||
+ | // dbQuoteValue(), | ||
+ | $remote_host = $mysqli-> | ||
+ | $ident_user = $mysqli-> | ||
+ | $auth_user = $mysqli-> | ||
+ | $timestamp = $mysqli-> | ||
+ | $method = $mysqli-> | ||
+ | $url = $mysqli-> | ||
+ | $protocol = $mysqli-> | ||
+ | $status = $mysqli-> | ||
+ | $bytes = $mysqli-> | ||
+ | $referrer = $mysqli-> | ||
+ | $user_agent = $mysqli-> | ||
+ | |||
+ | // figure out if we should check the database for the first entry. This helps prevent | ||
+ | // duplicates. Use -f to override | ||
+ | if ($checkDb) { | ||
+ | $quotedDbTable = dbQuoteIdentifier($mysqli, | ||
+ | $sql = <<< | ||
+ | SELECT TRUE FROM {$quotedDbTable} | ||
+ | WHERE | ||
+ | remote_host = ' | ||
+ | ident_user = ' | ||
+ | auth_user = ' | ||
+ | time_stamp = ' | ||
+ | request_method = ' | ||
+ | request_uri = ' | ||
+ | request_protocol = ' | ||
+ | status = ' | ||
+ | bytes = ' | ||
+ | referer = ' | ||
+ | user_agent = ' | ||
+ | QQ; | ||
+ | |||
+ | $queryResult = dbQuery($mysqli, | ||
+ | if ($queryResult-> | ||
+ | die(" | ||
+ | |||
+ | // check only the first row | ||
+ | $checkDb = false; | ||
+ | } | ||
+ | |||
+ | $logString = " | ||
+ | fwrite($tmpFile, | ||
+ | |||
+ | } | ||
+ | fclose($tmpFile); | ||
+ | |||
+ | // | ||
+ | // STEP 3: COPY TAB-DELIMITED FILE INTO DB | ||
+ | // | ||
+ | |||
+ | // load data into database | ||
+ | $quotedFile = dbQuoteValue($mysqli, | ||
+ | $quotedDbTable = dbQuoteIdentifier($mysqli, | ||
+ | $sql = <<< | ||
+ | LOAD DATA LOCAL INFILE {$quotedFile} INTO TABLE {$quotedDbTable} | ||
+ | FIELDS TERMINATED BY ' | ||
+ | LINES TERMINATED BY ' | ||
+ | QQ; | ||
+ | |||
+ | dbQuery($mysqli, | ||
+ | |||
+ | // delete the tmp file after importing | ||
+ | unlink(TMP_FILE); | ||
+ | |||
+ | $mysqli-> | ||
+ | |||
+ | / | ||
+ | | ||
+ | | ||
+ | |||
+ | /** | ||
+ | * processLine(): | ||
+ | * with the component parts | ||
+ | * | ||
+ | * @param string $line the line of the log | ||
+ | * @return array associative array of values from log file | ||
+ | * | ||
+ | */ | ||
+ | function processLine($line) { | ||
+ | $matches = array(); | ||
+ | |||
+ | // process the string. This regular expression was adapted from http:// | ||
+ | preg_match('/ | ||
+ | |||
+ | if (isset($matches[0])) { | ||
+ | return array(' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ' | ||
+ | ); | ||
+ | } else { | ||
+ | return array(); | ||
+ | } | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * displayUsage(): | ||
+ | * | ||
+ | */ | ||
+ | function displayUsage() { | ||
+ | $version = VERSION; | ||
+ | echo <<< | ||
+ | {$_SERVER[' | ||
+ | Usage: mysql_httpd_log_import -d < | ||
+ | -d < | ||
+ | -t <table name> | ||
+ | -h <host name> | ||
+ | -u < | ||
+ | -p < | ||
+ | | ||
+ | | ||
+ | | ||
+ | the first entry in a given file already exists in the database | ||
+ | |||
+ | QQ; | ||
+ | |||
+ | exit; | ||
+ | } | ||
+ | |||
+ | / | ||
+ | | ||
+ | | ||
+ | |||
+ | /** | ||
+ | * dbConnect(): | ||
+ | * | ||
+ | * @param string $dbHost | ||
+ | * @param string $dbUser | ||
+ | * @param string $dbPass | ||
+ | * @param string $dbName | ||
+ | * @return mysqli object | ||
+ | * | ||
+ | */ | ||
+ | function dbConnect($dbHost, | ||
+ | $mysqli = new mysqli($dbHost, | ||
+ | if ($mysqli-> | ||
+ | die(' | ||
+ | return $mysqli; | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * dbQuery(): queries the DB; exits on error | ||
+ | * | ||
+ | * @param mysqli $mysqli | ||
+ | * @param string $query | ||
+ | * @return result object | ||
+ | * | ||
+ | */ | ||
+ | function dbQuery($mysqli, | ||
+ | if (!$result = $mysqli-> | ||
+ | die(' | ||
+ | |||
+ | return $result; | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * dbQuoteValue(): | ||
+ | * the database. Note this function DOES include surrounding quotes, but | ||
+ | * only when necessary (e.g. O' | ||
+ | * 4.5 will come back as 4.5) | ||
+ | * | ||
+ | * @param | ||
+ | * @param | ||
+ | | ||
+ | * @return | ||
+ | */ | ||
+ | function dbQuoteValue($mysqli, | ||
+ | |||
+ | // check for magic quotes. these should just be off, so we throw an exception | ||
+ | if (get_magic_quotes_gpc()) | ||
+ | die(" | ||
+ | |||
+ | if(!$alwaysQuote) { | ||
+ | if (is_null($value)) { | ||
+ | return ' | ||
+ | } elseif (is_bool($value)) { | ||
+ | return $value ? ' | ||
+ | } elseif (is_numeric($value)) { | ||
+ | return $value; | ||
+ | } | ||
+ | } | ||
+ | return "'" | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * dbQuoteIdentifier(): | ||
+ | * column name | ||
+ | * | ||
+ | * @param mysqli $mysqli | ||
+ | * @param string $value | ||
+ | * @return string | ||
+ | * | ||
+ | */ | ||
+ | function dbQuoteIdentifier($mysqli, | ||
+ | return ' | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * dbCreateTable(): | ||
+ | * | ||
+ | * @param result $mysqli MySQLi result resource | ||
+ | * @param string $tableName | ||
+ | * | ||
+ | */ | ||
+ | function dbCreateTable($mysqli, | ||
+ | $quotedTableName = dbQuoteIdentifier($mysqli, | ||
+ | |||
+ | $sql = <<< | ||
+ | CREATE TABLE {$quotedTableName} ( | ||
+ | `remote_host` VARCHAR(50), | ||
+ | `ident_user` VARCHAR(50), | ||
+ | `auth_user` VARCHAR(50), | ||
+ | `time_stamp` TIMESTAMP, | ||
+ | `request_method` VARCHAR(10), | ||
+ | `request_uri` VARCHAR(1024), | ||
+ | `request_protocol` VARCHAR(10), | ||
+ | `status` INT, | ||
+ | `bytes` INT UNSIGNED, | ||
+ | `referer` VARCHAR(2048), | ||
+ | `user_agent` VARCHAR(2048), | ||
+ | `id` BIGINT auto_increment, | ||
+ | PRIMARY KEY (`id`) | ||
+ | ); | ||
+ | QQ; | ||
+ | |||
+ | dbQuery($mysqli, | ||
+ | |||
+ | } | ||
+ | |||
+ | ?> | ||
+ | |||
+ | </ |