from difflib import SequenceMatcher
from collections import namedtuple
from hashlib import shake_128
from re import sub, search
import sys

#'t202-201705.log' 't3-201705.log'
filepath = 'node-2017.log'
patternFilepath = '../patterns.list'
DEBUG = False

logEntry = namedtuple("logEntry", "key epoch date time node facility severity pid daemon message")

eventClass = {}

totalLogCount = 0
ignoredLogCount = 0
acceptedLogCount = 0

def logIsProcessed():
    global totalLogCount
    totalLogCount += 1
    if(totalLogCount % 10000 == 0):
        saveIt(eventClass)
    
def logIsIgnored():
    global ignoredLogCount
    ignoredLogCount += 1
    
def logIsAccepted(log):
    global acceptedLogCount
    acceptedLogCount += 1
    #print(log)
            
def saveIt(eventClass):
        ft = open(filepath+'-counted','w')
        ft.write("Logs: {} - {} = {} ({} Event Classes)\n\n".format(totalLogCount,
                                                                  ignoredLogCount,
                                                                  acceptedLogCount,
                                                                  len(eventClass)))
        for e in eventClass:
            ft.write("{} | {}\n".format(eventClass[e],e.strip()))
            
        ft.close()

def loadPatternList():
    with open(patternFilepath) as pfp:
        for line in pfp:
            eventClass[line] = 0
    print("{} patterns loaded successfully.".format(len(eventClass)-1))
        
            
with open(filepath) as fp:
    eventClass["_unknown_"] = 0
    loadPatternList()
    for line in sys.stdin:
        logIsProcessed()
        
        try:
            logMeta, logMsg = line.strip().split(" <> ")
            logMeta = logMeta.split(" ")
        except:
            print(line.strip(), "is broken!")
            logIsIgnored()
            continue

        ### TODO: catch exception! log entry might be brocken.
        logEntry.key = 0
        logEntry.epoch = logMeta[0]
        logEntry.date = logMeta[1]
        logEntry.time = logMeta[2]
        logEntry.node = logMeta[3]
        logEntry.facility = logMeta[4]
        logEntry.severity = logMeta[5]
        logEntry.pid = logMeta[6]
        logEntry.daemon = logMeta[7]
        logEntry.message = logMsg.strip().lower()                                     ### Converts to lower case

        logEntry.message = sub('\((.*?)\)', '0', logEntry.message)                    ### Removing all parentheses
        logEntry.message = sub('=+', ' ', logEntry.message)                           ### Replacing '=' with ' '
        logEntry.message = sub('[^a-z0-9_\ ]+', '', logEntry.message)                 ### Removing all special characters
        logEntry.message = sub('(\w*\d\w*)+', '0', logEntry.message)                  ### Removing all number-character variables
        logEntry.message = sub('0+', '# ', logEntry.message)                          ### Mergin consecutive 0s to a single $ 
        logEntry.message = sub('\s+', ' ', logEntry.message)                          ### Merging consecutive spaces into a single space
        logEntry.message = logEntry.message.strip()                                   ### Strip additional spaces
        
        #print("\n\n"+logMsg.strip().lower()+"\n"+logEntry.message)
        #logEntry.message = logEntry.message.split(' ')
        
        l = logEntry.message.split()
        for pattern in eventClass.keys():
            p = pattern.split()
            isMatching = False
            if (len(l) == len(p)):
                #print("same len {} {} ".format(l,p))
                isMatching = True
                for i in range(len(p)):
                    if (p[i] !='#' and p[i] != l[i]):
                        isMatching = False
                        break
            if isMatching:
                break
                        
        if isMatching:
            logIsAccepted(logEntry.message)
            eventClass[pattern] += 1
        else:
            logIsIgnored()
            print("Unknown pattern: {}".format(logEntry.message))
            eventClass["_unknown_"] += 1
            
    saveIt(eventClass)
