| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380 |
- """
- This program is designed to inspect an application environment
- This program should only be run on unix friendly systems
- We enable the engines to be able to run a several configurations
- Similarly to what a visitor design-pattern would do
- """
- from __future__ import division
- import os
- import subprocess
- from sets import Set
- import re
- import datetime
- import urllib2 as http, base64
- from threading import Thread, RLock
- import time
- import numpy as np
- from utils.ml import ML
- class Analysis:
- def __init__(self):
- self.logs = []
- pass
- def post(self,object):
- self.logs.append(object)
- def init(self):
- d = datetime.datetime.now()
- self.now = {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute}
- def getNow(self):
- d = datetime.datetime.now()
- return {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute}
- def getName(self):
- return self.__class__.__name__
- """
- This class is designed to analyze environment variables. Environment variables can either be folders, files or simple values
- The class returns a quantifiable assessment of the environment variables (expected 100%)
- """
- class Env(Analysis):
- def __init__(self):
- Analysis.__init__(self)
- def init(self,values):
- #Analysis.init(self)
- self.values = values
- """
- This function evaluate the validity of an environment variable by returning a 1 or 0 (computable)
- The function will use propositional logic (https://en.wikipedia.org/wiki/Propositional_calculus)
- """
- def evaluate(self,id):
-
- if id in os.environ :
- #
- # We can inspect to make sure the environment variable is not a path or filename.
- # Using propositional logic we proceed as follows:
- # - (p) We determine if the value is an folder or file name (using regex)
- # - (q) In case of a file or folder we check for existance
- # The final result is a conjuction of p and q
- #
- value = os.environ[id]
- expressions = [os.sep,'(\\.\w+)$']
- p = sum([ re.search(xchar,value) is not None for xchar in expressions])
- q = os.path.exists(value)
-
- return int(p and q)
- else:
- return 0
-
- def composite (self):
- #Analysis.init(self)
- r = [ self.evaluate(id) for id in self.values] ;
- N = len(r)
- n = sum(r)
- value = 100 * round(n/N,2)
- missing = [self.values[i] for i in range(0,N) if r[i] == 0]
- return dict(self.getNow(),**{"value":value,"missing":missing})
- """
- This class is designed to handle analaysis of the a python virtual environment i.e deltas between requirments file and a virtualenv
- @TODO: update the virtual environment
- """
- class Sandbox(Analysis):
- def __init__(self):
- Analysis.__init__(self)
- def init(self,conf):
- #Analysis.init(self)
- if os.path.exists(conf['sandbox']) :
- self.sandbox_path = conf['sandbox']
- else:
- self.sandbox_path = None
- if os.path.exists(conf['requirements']) :
- self.requirements_path = conf['requirements']
- else:
- self.requirements_path = None
- def get_requirements (self):
- f = open(self.requirements_path)
- return [ name.replace('-',' ').replace('_',' ') for name in f.read().split('\n') if name != '']
- """
- This function will return the modules installed in the sandbox (virtual environment)
- """
- def get_sandbox_requirements(self):
- cmd = ['freeze']
- xchar = ''.join([os.sep]*2)
- pip_vm = ''.join([self.sandbox_path,os.sep,'bin',os.sep,'pip']).replace(xchar,os.sep)
- cmd = [pip_vm]+cmd
- r = subprocess.check_output(cmd).split('\n')
- return [row.replace('-',' ').replace('_',' ') for row in r if row.strip() != '']
- def evaluate(self):
- pass
- """
- This function returns the ratio of existing modules relative to the ones expected
- """
- def composite(self):
- Analysis.init(self)
- if self.sandbox_path and self.requirements_path :
- required_modules= self.get_requirements()
- sandbox_modules = self.get_sandbox_requirements()
- N = len(required_modules)
- n = len(Set(required_modules) - Set(sandbox_modules))
- value = round(1 - (n/N),2)*100
- missing = list(Set(required_modules) - Set(sandbox_modules))
-
- return dict(self.getNow(),**{"value":value,"missing":missing})
- else:
- return None
- """
- This class performs the analysis of a list of processes and determines
- The class provides a quantifiable measure of how many processes it found over all
- """
- class ProcessCounter(Analysis):
- def __init__(self):
- Analysis.__init__(self)
- def init(self,names):
- #Analysis.init(self)
- self.names = names
- def evaluate(self,name):
- cmd = "".join(['ps -eo comm |grep ',name,' |wc -l'])
- handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
-
- return int(handler.communicate()[0].replace("\n","") )
- def composite(self):
- #Analysis.init(self)
- r = {}
- for name in self.names :
- r[name] = self.evaluate(name)
-
- #N = len(r)
- #n = sum(r)
- #return n/N
- return dict(self.getNow(),**r)
- """
- This class returns an application's both memory and cpu usage
- """
- class DetailProcess(Analysis):
- def __init__(self):
- Analysis.__init__(self)
- def init (self,names):
- #Analysis.init(self)
- self.names = names;
- def getName(self):
- return "apps"
- def split(self,name,stream):
-
- pattern = "(\d+.{0,1}\d*)\x20*(\d+.{0,1}\d*)\x20*(\d+.{0,1}\d*)".replace(":name",name).strip()
- g = re.match(pattern,stream.strip())
- if g :
- return list(g.groups())+['1']+[name]
- else:
- return ''
- def evaluate(self,name) :
- cmd = "ps -eo pmem,pcpu,vsize,command|grep -E \":app\""
- handler = subprocess.Popen(cmd.replace(":app",name),shell=True,stdout=subprocess.PIPE)
- ostream = handler.communicate()[0].split('\n')
- #xstr = ostream
- ostream = [ self.split(name,row) for row in ostream if row != '' and 'grep' not in row]
- if len(ostream) == 0 or len(ostream[0]) < 4 :
- ostream = [['0','0','0','0',name]]
- r = []
- for row in ostream :
- #
- # Though the comm should only return the name as specified,
- # On OSX it has been observed that the fully qualified path is sometimes returned (go figure)
- #
- row = [float(value) for value in row if value.strip() != '' and name not in value ] +[re.sub('\$|^','',name)]
- r.append(row)
- #
- # At this point we should aggregate results
- # The aggregation is intended for applications with several processes (e.g: apache2)
- #
- if len(r) > 1:
- m = None
- for row in r:
- if m is None:
- m = row
- else:
- m[3] += row[3]
- m[0] += row[0]
- m[1] += row[1]
- m[2] += row[2]
- m[0] = round((m[0] / m[3]),2)
- m[1] = round((m[1] / m[3]),2)
- m[2] = round((m[2] / m[3]),2)
- r = [m]
- return r
- def status(self,row):
- x = row['memory_usage']
- y = row['cpu_usage']
- z = row['memory_available']
- if z :
- if y :
- return "running"
- return "idle"
- else:
- return "crash"
- def format(self,row):
- r= {"memory_usage":row[0],"cpu_usage":row[1],"memory_available":row[2]/1000,"proc_count":row[3],"label":row[4]}
- status = self.status(r)
- r['status'] = status
- return r
- def composite(self):
- ma = []
- now = self.getNow()
- for name in self.names:
-
- matrix = self.evaluate(name)
-
- ma += [ dict(now, **self.format(row)) for row in matrix]
-
- return ma
- """
- This class evaluates a list of folders and provides detailed informaiton about age/size of each file
- Additionally the the details are summarized in terms of global size, and oldest file.
- """
- class FileWatch(Analysis):
- def __init__(self):
- pass
- def init(self,folders):
- self.folders = folders;
- def getName(self):
- return "folders"
- def split(self,row):
-
- x = row.split(' ')
- r = {}
- months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
- if x:
- BYTES_TO_MB = 1000000
- size = int(x[0])/BYTES_TO_MB
- month = months.index(x[1]) + 1
- day = int(x[2])
- age = -1
- hour=minute = 0
- if ':' in x[3] :
- hour,minute = x[3].split(':')
- now = datetime.datetime.today()
- if month == now.month :
- year = now.year
- else:
- year = now.year - 1
- else:
- year = int(x[3])
- hour = 0
- minute = 0
-
-
-
- file_date = datetime.datetime(year,month,day,int(hour),int(minute))
- # size = round(size,2)
- #file_date = datetime.datetime(year,month,day,hour,minute)
- now = datetime.datetime.now()
- age = (now - file_date ).days
-
- return {"size":size,"age":age}
- return None
- def evaluate(self,path):
- cmd = "find :path -print0|xargs -0 ls -ls |awk '{print $6,$7,$8,$9,$10}'".replace(":path",path)
-
- handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
- ostream = handler.communicate()[0].split('\n')
-
- #return [self.split(stream) for stream in ostream if stream.strip() != '' and '.DS_Store' not in stream and 'total' not in stream]
- return [self.split(stream) for stream in ostream if path not in stream and not set(['','total','.DS_Store']) & set(stream.split(' '))]
- def composite(self):
- d = [] #-- vector of details (age,size)
-
- now = datetime.datetime.today()
- for folder in self.folders:
- if os.path.exists(folder):
- xo_raw = self.evaluate(folder)
- xo = np.array(ML.Extract(['size','age'],xo_raw))
- if len(xo) == 0:
- continue
- name = re.findall("([a-z,A-Z,0-9]+)",folder)
- name = folder.split(os.sep)
- if len(name) == 1:
- name = [folder]
- else:
- i = len(name) -1
- name = [name[i-1]+' '+name[i]]
- name = name[0]
- size = round(np.sum(xo[:,0]),2)
- if size > 1000 :
- size = round(size/1000,2)
- units = ' GB'
- elif size > 1000000:
- size = round(size/1000000,2)
- units = ' TB'
- else:
- size = size
- units = ' MB'
- size = str(size)+ units
- age = round(np.mean(xo[:,1]),2)
- if age > 30 and age <= 365 :
- age = round(age/30,2)
- units = ' Months'
- elif age > 365 :
- age = round(age/365,2)
- units = ' Years'
- else:
- age = age
- units = ' Days'
- age = str(age)+units
- N = len(xo[:,1])
- xo = {"label":folder} #,"details":xo_raw,"summary":{"size":size,"age":age,"count":len(xo[:,1])}}
- xo = dict(xo,**{"size":size,"age":age,"count":N})
- xo["name"] = name
- xo['day'] = now.day
- xo['month'] = now.month
- xo['year'] = now.year
- xo['date'] = time.mktime(now.timetuple())
-
- d.append(xo)
-
- return d
- # class Monitor (Thread):
- # def __init__(self,pConfig,pWriter,id='processes') :
- # Thread.__init__(self)
-
- # self.config = pConfig[id]
- # self.writer = pWriter;
- # self.logs = []
- # self.handler = self.config['class']
- # self.mconfig = self.config['config']
-
-
-
- # def stop(self):
- # self.keep_running = False
- # def run(self):
- # r = {}
- # self.keep_running = True
- # lock = RLock()
- # while self.keep_running:
- # lock.acquire()
- # for label in self.mconfig:
-
- # self.handler.init(self.mconfig[label])
- # r = self.handler.composite()
- # self.writer.write(label=label,row = r)
-
- # time.sleep(2)
- # lock.release()
-
- # self.prune()
- # TIME_LAPSE = 60*2
- # time.sleep(TIME_LAPSE)
- # print "Stopped ..."
- # def prune(self) :
-
- # MAX_ENTRIES = 100
- # if len(self.logs) > MAX_ENTRIES :
- # BEG = len(self.logs) - MAX_SIZE -1
- # self.logs = self.logs[BEG:]
|