1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- """
- This file looks into the logs to determine if there is any intrusion or provides means to assess logs
- """
- import pandas as pd
- import numpy as np
- import transport
- import datetime
- import io
- import json
- import re
- from datetime import datetime
- _date = "(^[A-Z][a-z]{2}) ([0-9]{2}) ([0-9]{2})\:([0-9]){2}\:([0-9]{2})"
- _ip = "\d+\.\d+\.\d+\.\d+"
- _regex = {
- 'login':{'pattern':f'{_date} .*Accepted password for ([a-z]+) from ({_ip})', 'columns':['month','day','hour','minute','second','user','ip']},
- 'attacks':{'pattern':f'{_date} .*Invalid user ([a-z,0-9]+) from ({_ip})','columns':['month','day','hour','minute','second','user','ip']},
- 'risk':{'pattern':f'{_date} .*Failed password for ([a-z,0-9]+) from ({_ip})','columns':['month','day','hour','minute','second','user','ip']} #-- accounts at risk
-
- }
- _map = {'Jan':1,'Feb':2,'Mar':3,'Apr':4,'May':5,'Jun':6,'Jul':7,'Aug':8,'Sep':9,'Oct':10,'Nov':11,'Dec':12}
- def risk (_content,_id='user'):
- """
- compute the risk associated with accounts given the counts, this should be indicated by the number of failed password attempts in a given time frame
- """
- _df = pd.DataFrame(_content)
- _g = _df.groupby([_id]).apply(lambda row: {'start_date':row.date.min(),'end_date':row.date.max() ,'count':row[_id].size} )
- _df = pd.DataFrame(_g.tolist())
- _df[_id] = _g.index
- _df.start_date = _df.start_date.astype(str)
- _df.end_date = _df.end_date.astype(str)
- return _df
- def attacks (_content):
- """
- This function will compute counts associated with a given set of ip addresses. If behind a load balancer IP can be ignored and counts will reflect break-in attempts
- """
- return risk(_content,'ip')
- def login(_content):
- return risk(_content,'user')
- def read (**_args):
- """
- :path path of the auth.log files to load
- """
- _year = _args['year'] if 'year' in _args else datetime.now().year
- _path = _args['path']
- f = open(_path)
- _content = f.read().split('\n')
- f.close()
- r = {}
- for line in _content :
- for _id in _regex :
- _pattern = _regex[_id]['pattern']
- _columns = _regex[_id]['columns']
-
- _out = re.search(_pattern,line)
- if _out :
- try:
- _object = dict(zip(_columns,_out.groups()[:]))
- if _id not in r :
- r[_id] = []
- _month = _object['month']
- if _month in _map :
- _object['month'] = _map[ _month ]
- for field in ['day','month','hour','minute','second'] :
- _object[field] = int (_object[field])
- _object['date'] = datetime ( year=_year,month=_object['month'], day=_object['day'], hour=_object['hour'],minute=_object['minute'],second=_object['second'])#'-'.join([str(_object['month']),str(_object['day'])]) + ' '+_object['time']
- # _object['date'] = np.datetime64(_object['date'])
- r[_id].append(_object)
- except Exception as e:
- print(e)
- pass
- #
- # At this point we have essential information formatted
- # Summarizing this information will serve as a means to compress it
- #
- return r
|