__init__.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. """
  2. This file contains class and functions that extract data from running processes like top and stores them into a data store of the calling codes choice
  3. dependencies:
  4. - top (on the os)
  5. @TODO:
  6. Test this thing on windows to see if it works
  7. """
  8. import pandas as pd
  9. import numpy as np
  10. import subprocess
  11. import os
  12. import datetime
  13. # from transport import factory
  14. import sys
  15. import hashlib
  16. import re
  17. from io import StringIO
  18. class Util:
  19. def app(self,stream):
  20. """
  21. Formatting application name, sometimes the name has parameters os separators ...
  22. """
  23. index = 1 if os.path.exists(" ".join(stream[:1])) else len(stream)-1
  24. cmd = " ".join(stream[:index]) if index > 0 else " ".join(stream)
  25. if ' ' in cmd.split(os.sep)[len(cmd.split(os.sep))-1] :
  26. p = cmd.split(os.sep)[len(cmd.split(os.sep))-1].split(' ')
  27. name = p[0]
  28. args = " ".join(p[1:])
  29. else:
  30. name = cmd.split(os.sep)[len(cmd.split(os.sep))-1]
  31. args = " ".join(stream[index:]) if index > 0 else ""
  32. return [name,cmd.replace('"',"\\'"),args.replace('"',"\\'")]
  33. def parse(self,rows,xchar=';'):
  34. """
  35. This function parses the document returned by the execution of the command returns a document that will have to be parsed and formatted
  36. """
  37. m = []
  38. TIME_INDEX = 5
  39. ARGS_INDEX = 6
  40. for item in rows :
  41. if rows.index(item) != 0 :
  42. parts = item.split(xchar)
  43. row = parts[:TIME_INDEX]
  44. row.append(' '.join(parts[TIME_INDEX:ARGS_INDEX]))
  45. row += self.app(parts[ARGS_INDEX:])
  46. else:
  47. row = item.split(xchar)
  48. row = (xchar.join(row)).strip()
  49. if len(row.replace(";","")) > 0 :
  50. m.append(row)
  51. return m
  52. def read(**args) :
  53. """
  54. This function will perform the actual reads of process informations.
  55. @return {user,pid,start,status, name, args, mem,cpu}
  56. """
  57. cmd = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'"
  58. xchar = ";"
  59. try:
  60. handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
  61. stream = handler.communicate()[0]
  62. if sys.version_info[0] > 2 :
  63. rows = str(stream).split('\\n')
  64. else:
  65. rows = stream.split('\n')
  66. formatter = Util()
  67. m = formatter.parse(rows)
  68. d = datetime.datetime.now().strftime('%m-%d-%Y')
  69. t = datetime.datetime.now().strftime('%H:%M:%S')
  70. n = os.uname()[1]
  71. m = [item for item in m if len(item) != len (m[0])]
  72. m = "\n".join(m[1:])
  73. df = pd.read_csv(StringIO(m),sep=xchar)
  74. df['date'] = np.repeat(d,df.shape[0])
  75. df['time'] = np.repeat(t,df.shape[0])
  76. df['node'] = np.repeat(os.uname()[1],df.shape[0])
  77. df.columns =['pid','user','mem','cpu','status','started','name','cmd','args','date','time','node']
  78. #
  79. # We should filter the name of the apps we are interested in here (returning the full logs )
  80. # @TODO: Add filter here to handle filter on different columns
  81. #
  82. if 'name' in args and args['name']:
  83. names = args['name'].split(',')
  84. r = pd.DataFrame()
  85. for name in names :
  86. # tmp = df[df.name == name.strip() ]
  87. # ii = df.apply(lambda row: row['name'] == name.strip() or (name.strip() in str(row['name'])),axis=1).tolist()
  88. ii = df.apply(lambda row: type(row['cmd']) ==str and name.strip() == row['name'] in row['cmd'],axis=1).tolist()
  89. tmp= df[ii]
  90. # tmp.index = np.arange(tmp.shape[0])
  91. if tmp.empty:
  92. tmp = {"pid":None,"user":None,"mem":0,"cpu":0,"status":"-100","started":None,"name":name,"cmd":None,"args":None,"date":d,"time":t,"node":n}
  93. else:
  94. #r = r.append(tmp,ignore_index=False)
  95. r = pd.concat([r, tmp]) #r.append(tmp,ignore_index=False)
  96. if not r.empty :
  97. # r.index = np.arange(r.shape[0])
  98. df = r.copy()
  99. #
  100. # For security reasons lets has the args columns with an MD5 or sha256
  101. #
  102. # if not df.empty and 'args' in df :
  103. # df.args = [hashlib.md5(str(value).encode('utf-8')).hexdigest() for value in df.args.tolist()]
  104. STATUS = {'R':'RUNNING','Z':'DEAD','D':'STASIS','S':'SLEEP','Sl':'SLEEP','Ss':'SLEEP','W':'PAGING','T':'DEAD'}
  105. df.status = df.status.apply(lambda value: STATUS.get(value,'UNKNOWN'))
  106. if 'cols' in args :
  107. _cols = list(set(df.columns.tolist()) & set(args['cols']))
  108. if _cols :
  109. df = df[_cols]
  110. #
  111. # we return a list of objects (no data-frames)
  112. if 'logger' in args and args['logger'] != None :
  113. logger = args['logger']
  114. logger(data=df)
  115. df.index = np.arange(df.shape[0])
  116. return df #.to_dict(orient='records')
  117. except Exception as e:
  118. print (e)
  119. pass
  120. # if __name__ == '__main__' :
  121. # #
  122. # # Being directly called (external use of the )
  123. # print(read())