__init__.py 3.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. """
  2. This file looks into the logs to determine if there is any intrusion or provides means to assess logs
  3. """
  4. import pandas as pd
  5. import numpy as np
  6. import transport
  7. import datetime
  8. import io
  9. import json
  10. import re
  11. from datetime import datetime
  12. _date = "(^[A-Z][a-z]{2}) ([0-9]{2}) ([0-9]{2})\:([0-9]){2}\:([0-9]{2})"
  13. _ip = "\d+\.\d+\.\d+\.\d+"
  14. _regex = {
  15. 'login':{'pattern':f'{_date} .*Accepted password for ([a-z]+) from ({_ip})', 'columns':['month','day','hour','minute','second','user','ip']},
  16. 'attacks':{'pattern':f'{_date} .*Invalid user ([a-z,0-9]+) from ({_ip})','columns':['month','day','hour','minute','second','user','ip']},
  17. 'risk':{'pattern':f'{_date} .*Failed password for ([a-z,0-9]+) from ({_ip})','columns':['month','day','hour','minute','second','user','ip']} #-- accounts at risk
  18. }
  19. _map = {'Jan':1,'Feb':2,'Mar':3,'Apr':4,'May':5,'Jun':6,'Jul':7,'Aug':8,'Sep':9,'Oct':10,'Nov':11,'Dec':12}
  20. def risk (_content,_id='user'):
  21. """
  22. compute the risk associated with accounts given the counts, this should be indicated by the number of failed password attempts in a given time frame
  23. """
  24. _df = pd.DataFrame(_content)
  25. _g = _df.groupby([_id]).apply(lambda row: {'start_date':row.date.min(),'end_date':row.date.max() ,'count':row[_id].size} )
  26. _df = pd.DataFrame(_g.tolist())
  27. _df[_id] = _g.index
  28. _df.start_date = _df.start_date.astype(str)
  29. _df.end_date = _df.end_date.astype(str)
  30. return _df
  31. def attacks (_content):
  32. """
  33. This function will compute counts associated with a given set of ip addresses. If behind a load balancer IP can be ignored and counts will reflect break-in attempts
  34. """
  35. return risk(_content,'ip')
  36. def login(_content):
  37. return risk(_content,'user')
  38. def read (**_args):
  39. """
  40. :path path of the auth.log files to load
  41. """
  42. _year = _args['year'] if 'year' in _args else datetime.now().year
  43. _path = _args['path']
  44. f = open(_path)
  45. _content = f.read().split('\n')
  46. f.close()
  47. r = {}
  48. for line in _content :
  49. for _id in _regex :
  50. _pattern = _regex[_id]['pattern']
  51. _columns = _regex[_id]['columns']
  52. _out = re.search(_pattern,line)
  53. if _out :
  54. try:
  55. _object = dict(zip(_columns,_out.groups()[:]))
  56. if _id not in r :
  57. r[_id] = []
  58. _month = _object['month']
  59. if _month in _map :
  60. _object['month'] = _map[ _month ]
  61. for field in ['day','month','hour','minute','second'] :
  62. _object[field] = int (_object[field])
  63. _object['date'] = datetime ( year=_year,month=_object['month'], day=_object['day'], hour=_object['hour'],minute=_object['minute'],second=_object['second'])#'-'.join([str(_object['month']),str(_object['day'])]) + ' '+_object['time']
  64. # _object['date'] = np.datetime64(_object['date'])
  65. r[_id].append(_object)
  66. except Exception as e:
  67. print(e)
  68. pass
  69. #
  70. # At this point we have essential information formatted
  71. # Summarizing this information will serve as a means to compress it
  72. #
  73. return r