__init__.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. """
  2. This file contains class and functions that extract data from running processes like top and stores them into a data store of the calling codes choice
  3. dependencies:
  4. - top (on the os)
  5. @TODO:
  6. Test this thing on windows to see if it works
  7. """
  8. import pandas as pd
  9. import numpy as np
  10. import subprocess
  11. import os
  12. import datetime
  13. # from transport import factory
  14. import sys
  15. import hashlib
  16. from io import StringIO
  17. class Util:
  18. def app(self,stream):
  19. """
  20. Formatting application name, sometimes the name has parameters os separators ...
  21. """
  22. index = 1 if os.path.exists(" ".join(stream[:1])) else len(stream)-1
  23. cmd = " ".join(stream[:index]) if index > 0 else " ".join(stream)
  24. if ' ' in cmd.split(os.sep)[len(cmd.split(os.sep))-1] :
  25. p = cmd.split(os.sep)[len(cmd.split(os.sep))-1].split(' ')
  26. name = p[0]
  27. args = " ".join(p[1:])
  28. else:
  29. name = cmd.split('/')[len(cmd.split(os.sep))-1]
  30. args = " ".join(stream[index:]) if index > 0 else ""
  31. return [name,cmd,args]
  32. def parse(self,rows,xchar=';'):
  33. """
  34. This function parses the document returned by the execution of the command returns a document that will have to be parsed and formatted
  35. """
  36. m = []
  37. TIME_INDEX = 5
  38. ARGS_INDEX = 6
  39. for item in rows :
  40. if rows.index(item) != 0 :
  41. parts = item.split(xchar)
  42. row = parts[:TIME_INDEX]
  43. row.append(' '.join(parts[TIME_INDEX:ARGS_INDEX]))
  44. row += self.app(parts[ARGS_INDEX:])
  45. else:
  46. row = item.split(xchar)
  47. row = (xchar.join(row)).strip()
  48. if len(row.replace(";","")) > 0 :
  49. m.append(row)
  50. return m
  51. def read(**args) :
  52. """
  53. This function will perform the actual reads of process informations.
  54. @return {user,pid,start,status, name, args, mem,cpu}
  55. """
  56. cmd = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'"
  57. xchar = ";"
  58. try:
  59. handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
  60. stream = handler.communicate()[0]
  61. if sys.version_info[0] > 2 :
  62. rows = str(stream).split('\\n')
  63. else:
  64. rows = stream.split('\n')
  65. formatter = Util()
  66. m = formatter.parse(rows)
  67. d = datetime.datetime.now().strftime('%m-%d-%Y')
  68. t = datetime.datetime.now().strftime('%H:%M:%S')
  69. n = os.uname()[1]
  70. m = [item for item in m if len(item) != len (m[0])]
  71. m = "\n".join(m[1:])
  72. df = pd.read_csv(StringIO(m),sep=xchar)
  73. df['date'] = np.repeat(d,df.shape[0])
  74. df['time'] = np.repeat(t,df.shape[0])
  75. df['node'] = np.repeat(os.uname()[1],df.shape[0])
  76. df.columns =['pid','user','mem','cpu','status','started','name','cmd','args','date','time','node']
  77. #
  78. # We should filter the name of the apps we are interested in here (returning the full logs )
  79. # @TODO: Add filter here to handle filter on different columns
  80. #
  81. if 'name' in args :
  82. names = args['name'].split(',')
  83. r = pd.DataFrame()
  84. for name in names :
  85. tmp = df[df.name == name.strip()]
  86. if not tmp.shape[0] :
  87. tmp = {"pid":None,"user":None,"mem":0,"cpu":0,"status":"-100","started":None,"name":name,"cmd":None,"args":None,"date":d,"time":t,"node":n}
  88. r = r.append(tmp)
  89. df = r
  90. #
  91. # For security reasons lets has the args columns with an MD5 or sha256
  92. #
  93. if 'args' in df :
  94. df.args = [hashlib.md5(str(value).encode('utf-8')).hexdigest() for value in df.args.tolist()]
  95. STATUS = {'R':'RUNNING','Z':'DEAD','D':'STASIS','S':'SLEEP','Sl':'SLEEP','Ss':'SLEEP','W':'PAGING','T':'DEAD'}
  96. df.status = df.status.apply(lambda value: STATUS.get(value,'UNKNOWN'))
  97. if 'cols' in args :
  98. _cols = list(set(df.columns.tolist()) & set(args['cols']))
  99. if _cols :
  100. df = df[_cols]
  101. #
  102. # we return a list of objects (no data-frames)
  103. if 'logger' in args and args['logger'] != None :
  104. logger = args['logger']
  105. logger(data=df)
  106. return df.to_dict(orient='records')
  107. except Exception as e:
  108. print (e)
  109. pass
  110. if __name__ == '__main__' :
  111. #
  112. # Being directly called (external use of the )
  113. print(read())