|
@@ -0,0 +1,124 @@
|
|
|
+"""
|
|
|
+This file contains class and functions that extract data from running processes like top and stores them into a data store of the calling codes choice
|
|
|
+dependencies:
|
|
|
+ - top (on the os)
|
|
|
+@TODO:
|
|
|
+ Test this thing on windows to see if it works
|
|
|
+"""
|
|
|
+import pandas as pd
|
|
|
+import numpy as np
|
|
|
+import subprocess
|
|
|
+import os
|
|
|
+import datetime
|
|
|
+# from transport import factory
|
|
|
+import sys
|
|
|
+import hashlib
|
|
|
+class Util:
|
|
|
+
|
|
|
+ def app(self,stream):
|
|
|
+ """
|
|
|
+ Formatting application name, sometimes the name has parameters os separators ...
|
|
|
+ """
|
|
|
+ index = 1 if os.path.exists(" ".join(stream[:1])) else len(stream)-1
|
|
|
+ cmd = " ".join(stream[:index]) if index > 0 else " ".join(stream)
|
|
|
+
|
|
|
+ if ' ' in cmd.split(os.sep)[len(cmd.split(os.sep))-1] :
|
|
|
+ p = cmd.split(os.sep)[len(cmd.split(os.sep))-1].split(' ')
|
|
|
+ name = p[0]
|
|
|
+ args = " ".join(p[1:])
|
|
|
+ else:
|
|
|
+ name = cmd.split('/')[len(cmd.split(os.sep))-1]
|
|
|
+ args = " ".join(stream[index:]) if index > 0 else ""
|
|
|
+
|
|
|
+ return [name,cmd,args]
|
|
|
+ def parse(self,rows,xchar=';'):
|
|
|
+ """
|
|
|
+ This function parses the document returned by the execution of the command returns a document that will have to be parsed and formatted
|
|
|
+ """
|
|
|
+ m = []
|
|
|
+ TIME_INDEX = 5
|
|
|
+ ARGS_INDEX = 6
|
|
|
+
|
|
|
+ for item in rows :
|
|
|
+ if rows.index(item) != 0 :
|
|
|
+ parts = item.split(xchar)
|
|
|
+ row = parts[:TIME_INDEX]
|
|
|
+ row.append(' '.join(parts[TIME_INDEX:ARGS_INDEX]))
|
|
|
+ row += self.app(parts[ARGS_INDEX:])
|
|
|
+ else:
|
|
|
+ row = item.split(xchar)
|
|
|
+ row = (xchar.join(row)).strip()
|
|
|
+ if len(row.replace(";","")) > 0 :
|
|
|
+ m.append(row)
|
|
|
+ return m
|
|
|
+
|
|
|
+
|
|
|
+def read(args) :
|
|
|
+ """
|
|
|
+ This function will perform the actual reads of process informations.
|
|
|
+ @return {user,pid,start,status, name, args, mem,cpu}
|
|
|
+ """
|
|
|
+ cmd = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'"
|
|
|
+ xchar = ";"
|
|
|
+ try:
|
|
|
+ handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
|
|
|
+ stream = handler.communicate()[0]
|
|
|
+ if sys.version_info[0] > 2 :
|
|
|
+ rows = str(stream).split('\\n')
|
|
|
+ else:
|
|
|
+ rows = stream.split('\n')
|
|
|
+
|
|
|
+ formatter = Util()
|
|
|
+ m = formatter.parse(rows)
|
|
|
+
|
|
|
+ d = datetime.datetime.now().strftime('%m-%d-%Y')
|
|
|
+ t = datetime.datetime.now().strftime('%H:%M:%S')
|
|
|
+ m = [item for item in m if len(item) != len (m[0])]
|
|
|
+ m = "\n".join(m[1:])
|
|
|
+ df = pd.read_csv(pd.compat.StringIO(m),sep=xchar)
|
|
|
+ df['date'] = np.repeat(d,df.shape[0])
|
|
|
+ df['time'] = np.repeat(t,df.shape[0])
|
|
|
+ df['node'] = np.repeat(os.uname()[1],df.shape[0])
|
|
|
+ df.columns =['pid','user','mem','cpu','status','started','name','cmd','args','date','time','node']
|
|
|
+
|
|
|
+
|
|
|
+ #
|
|
|
+ # We should filter the name of the apps we are interested in here (returning the full logs )
|
|
|
+ # @TODO: Add filter here to handle filter on different columns
|
|
|
+ #
|
|
|
+
|
|
|
+ if 'name' in args :
|
|
|
+ names = args['name'].split(',')
|
|
|
+ r = pd.DataFrame()
|
|
|
+ for name in names :
|
|
|
+ tmp = df[df.name == name.strip()]
|
|
|
+ if tmp.shape[0] :
|
|
|
+ r = r.append(tmp)
|
|
|
+ df = r
|
|
|
+ #
|
|
|
+ # For security reasons lets has the args columns with an MD5 or sha256
|
|
|
+ #
|
|
|
+
|
|
|
+
|
|
|
+ df.args = [hashlib.md5(str(value).encode('utf-8')).hexdigest() for value in df.args.tolist()]
|
|
|
+ STATUS = {'R':'RUNNING','Z':'DEAD','D':'STASIS','S':'SLEEP','Sl':'SLEEP','Ss':'SLEEP','W':'PAGING','T':'DEAD'}
|
|
|
+ df.status = df.status.apply(lambda value: STATUS.get(value,'UNKNOWN'))
|
|
|
+ if 'cols' in args :
|
|
|
+ _cols = list(set(df.columns.tolist()) & set(args['cols']))
|
|
|
+ if _cols :
|
|
|
+ df = df[_cols]
|
|
|
+ #
|
|
|
+ # we return a list of objects (no data-frames)
|
|
|
+ if 'logger' in args and args['logger'] != None :
|
|
|
+ logger = args['logger']
|
|
|
+ logger(data=df)
|
|
|
+ return df.to_dict(orient='records')
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print (e)
|
|
|
+ pass
|
|
|
+
|
|
|
+if __name__ == '__main__' :
|
|
|
+ #
|
|
|
+ # Being directly called (external use of the )
|
|
|
+ print(read())
|