module Hit (parseHit, Hit, ip, time, query, status, epoch, CalendarTime) where import Text.Regex import System.Time import Data.FiniteMap type IP = String data HTTPStatus = Ok | Error deriving Show data Hit = Hit { ip :: IP, time :: CalendarTime, query :: String, status :: HTTPStatus } instance Show Hit where show hit = "Hit " ++ (show $ ip hit) ++ " " ++ (calendarTimeToString $ time hit) ++ " " ++ (show $ query hit) -- Handling of log timestamps _months = listToFM [ ("Jan", January), ("Feb", February), ("Mar", March), ("Apr", April), ("May", May), ("Jun", June), ("Jul", July), ("Aug", August), ("Sep", September), ("Oct", October), ("Nov", November), ("Dec", December) ] _mkMonth month = case lookupFM _months month of Nothing -> error ("invalid month " ++ month) Just m -> m _mkTZ (sign:tz) = let (q, r) = divMod (read tz :: Int) 100 s = if (sign == '-') then 1 else -1 in s * (60 * q + r) * 60 _timeRe = mkRegex "([0-9]+)/([a-zA-Z]+)/([0-9]+):([0-9]+):([0-9]+):([0-9]+) (.[0-9]+)" _timeSplit t = ( read year :: Int, _mkMonth month, read day :: Int, read h :: Int, read m :: Int, read s :: Int, _mkTZ tz ) where [day, month, year, h, m, s, tz] = case matchRegex _timeRe t of Nothing -> error ("invalid time: " ++ t) Just parts -> parts -- Parse a log timestamp into a calendar time timeParse :: String -> CalendarTime timeParse t = let (year, month, day, hour, min, sec, tz) = _timeSplit t in toUTCTime $ toClockTime (CalendarTime { ctYear = year, ctMonth = month, ctDay = day, ctHour = hour, ctMin = min, ctSec = sec, ctTZ = tz, ctPicosec = 0, ctWDay = undefined, ctYDay = undefined, ctTZName = undefined, ctIsDST = False }) -- Convenience "minimal value" for the time stamps epoch = toUTCTime $ toClockTime (CalendarTime { ctYear = 1970, ctMonth = January, ctDay = 1, ctHour = 0, ctMin = 0, ctSec = 0, ctTZ = 0, ctPicosec = 0, ctWDay = undefined, ctYDay = undefined, ctTZName = undefined, ctIsDST = False }) _short = "^([0-9.]+) - - \\[([^]]+)\\] \"([^\"]+)\" ([0-9]+) ([0-9]+|-)" _lineRe = mkRegex (_short ++ " \"([^\"]+)\" \"([^\"]+)\"$") _shortRe = mkRegex (_short ++ "$") _mkStatus status = let v = read status :: Int in if (v == 200) then Ok else Error parseHit line = case (matchRegex _lineRe line) of Nothing -> case (matchRegex _shortRe line) of Nothing -> error ("invalid log: " ++ line) Just [client, stamp, query, status, size] -> Hit client (timeParse stamp) query (_mkStatus status) Just [client, stamp, query, status, size, referrer, agent] -> Hit client (timeParse stamp) query (_mkStatus status)