code
/
smart-top
réplica de https://dev.the-phi.com/git/library/smart-top


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
							"""
This file contains class and functions that extract data from running processes like top and stores them into a data store of the calling codes choice
dependencies:
    - top (on the os)
@TODO:
    Test this thing on windows to see if it works
"""
import pandas as pd
import numpy as np
import subprocess
import os
import datetime
# from transport import factory
import sys
import hashlib
import re
from io import StringIO
class Util:
    
    def app(self,stream):
        """
        Formatting application name, sometimes the name has parameters os separators ...
        """
        index =  1	if os.path.exists(" ".join(stream[:1])) else len(stream)-1
        cmd = " ".join(stream[:index]) if index > 0 else " ".join(stream)
		
        if ' ' in cmd.split(os.sep)[len(cmd.split(os.sep))-1] :
            p = cmd.split(os.sep)[len(cmd.split(os.sep))-1].split(' ')
            name = p[0]
            args = " ".join(p[1:])
        else:
            name = cmd.split(os.sep)[len(cmd.split(os.sep))-1]
            args = " ".join(stream[index:]) if index > 0 else ""
        
        return [name,cmd.replace('"',"\\'"),args.replace('"',"\\'")]        
    def parse(self,rows,xchar=';'):
        """
        This function parses the document returned by the execution of the command returns a document that will have to be parsed and formatted
        """
        m = []
        TIME_INDEX = 5
        ARGS_INDEX = 6
        
        for item in rows :
            if rows.index(item) != 0 :
                parts = item.split(xchar)
                row = parts[:TIME_INDEX]
                row.append(' '.join(parts[TIME_INDEX:ARGS_INDEX]))
                row += self.app(parts[ARGS_INDEX:])
            else:
                row = item.split(xchar)
            row = (xchar.join(row)).strip()
            if len(row.replace(";","")) > 0 :
                m.append(row)
        return m 
    
          
def read(**args) :
    """
    This function will perform the actual reads of process informations. 
    @return {user,pid,start,status, name, args, mem,cpu}
    """
    cmd     = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'"
    xchar   = ";"
    try:
        handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)			
        stream = handler.communicate()[0]        
        if sys.version_info[0] > 2 :
            rows = str(stream).split('\\n')
        else:
            rows = stream.split('\n')

        formatter = Util()
        m = formatter.parse(rows)
        
        d = datetime.datetime.now().strftime('%m-%d-%Y')
        t = datetime.datetime.now().strftime('%H:%M:%S')
        n = os.uname()[1]
        m = [item for item in m if len(item) != len (m[0])]
        m = "\n".join(m[1:])    
        df = pd.read_csv(StringIO(m),sep=xchar)
        df['date'] = np.repeat(d,df.shape[0])
        df['time'] = np.repeat(t,df.shape[0])
        df['node'] = np.repeat(os.uname()[1],df.shape[0])
        df.columns =['pid','user','mem','cpu','status','started','name','cmd','args','date','time','node']
        
        #
        # We should filter the name of the apps we are interested in here (returning the full logs )
        # @TODO: Add filter here to handle filter on different columns
        #
        

        if 'name' in args and args['name']:
            names = args['name'].split(',')
            r = pd.DataFrame()
            for name in names :
                # tmp = df[df.name == name.strip() ]
                # ii = df.apply(lambda row: row['name'] == name.strip() or (name.strip() in str(row['name'])),axis=1).tolist()
                ii = df.apply(lambda row: type(row['cmd']) ==str and name.strip() == row['name'] in row['cmd'],axis=1).tolist()
                tmp= df[ii]
                # tmp.index = np.arange(tmp.shape[0])
                if tmp.empty:
                    tmp = {"pid":None,"user":None,"mem":0,"cpu":0,"status":"-100","started":None,"name":name,"cmd":None,"args":None,"date":d,"time":t,"node":n}
                    
                else:
                    #r = r.append(tmp,ignore_index=False)
                    r = pd.concat([r, tmp]) #r.append(tmp,ignore_index=False)
            if not r.empty :
                # r.index = np.arange(r.shape[0])
                df = r.copy()
        #
        # For security reasons lets has the args columns with an MD5 or sha256
        #
        
        # if not df.empty and 'args' in df : 
        #     df.args = [hashlib.md5(str(value).encode('utf-8')).hexdigest() for value in df.args.tolist()]
        STATUS = {'R':'RUNNING','Z':'DEAD','D':'STASIS','S':'SLEEP','Sl':'SLEEP','Ss':'SLEEP','W':'PAGING','T':'DEAD'}
        df.status = df.status.apply(lambda value: STATUS.get(value,'UNKNOWN'))
        if 'cols' in args :
            _cols = list(set(df.columns.tolist()) & set(args['cols']))
            if _cols :
                df = df[_cols]
        #
        # we return a list of objects (no data-frames)
        if 'logger' in args and args['logger'] != None :
            logger = args['logger']
            logger(data=df)
        df.index = np.arange(df.shape[0])
        
        return df #.to_dict(orient='records')

    except Exception as e:
        print (e)
        pass
    
# if __name__ == '__main__' :
#     #
#     # Being directly called (external use of the )
#     print(read())