monitor.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. """
  2. This program is designed to inspect an application environment
  3. This program should only be run on unix friendly systems
  4. We enable the engines to be able to run a several configurations
  5. Similarly to what a visitor design-pattern would do
  6. """
  7. from __future__ import division
  8. import os
  9. import subprocess
  10. from sets import Set
  11. import re
  12. import datetime
  13. import urllib2 as http, base64
  14. from threading import Thread, RLock
  15. import time
  16. import numpy as np
  17. from utils.ml import ML
  18. class Analysis:
  19. def __init__(self):
  20. self.logs = []
  21. pass
  22. def post(self,object):
  23. self.logs.append(object)
  24. def init(self):
  25. d = datetime.datetime.now()
  26. self.now = {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute}
  27. def getNow(self):
  28. d = datetime.datetime.now()
  29. return {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute}
  30. def getName(self):
  31. return self.__class__.__name__
  32. def cleanup(self,text):
  33. return re.sub('[^a-zA-Z0-9\s:]',' ',str(text)).strip()
  34. """
  35. This class is designed to analyze environment variables. Environment variables can either be folders, files or simple values
  36. The class returns a quantifiable assessment of the environment variables (expected 100%)
  37. """
  38. class Env(Analysis):
  39. def __init__(self):
  40. Analysis.__init__(self)
  41. def init(self,values):
  42. #Analysis.init(self)
  43. self.values = values
  44. """
  45. This function evaluate the validity of an environment variable by returning a 1 or 0 (computable)
  46. The function will use propositional logic (https://en.wikipedia.org/wiki/Propositional_calculus)
  47. """
  48. def evaluate(self,id):
  49. if id in os.environ :
  50. #
  51. # We can inspect to make sure the environment variable is not a path or filename.
  52. # Using propositional logic we proceed as follows:
  53. # - (p) We determine if the value is an folder or file name (using regex)
  54. # - (q) In case of a file or folder we check for existance
  55. # The final result is a conjuction of p and q
  56. #
  57. value = os.environ[id]
  58. expressions = [os.sep,'(\\.\w+)$']
  59. p = sum([ re.search(xchar,value) is not None for xchar in expressions])
  60. q = os.path.exists(value)
  61. return int(p and q)
  62. else:
  63. return 0
  64. def composite (self):
  65. #Analysis.init(self)
  66. r = [ self.evaluate(id) for id in self.values] ;
  67. N = len(r)
  68. n = sum(r)
  69. value = 100 * round(n/N,2)
  70. missing = [self.values[i] for i in range(0,N) if r[i] == 0]
  71. return dict(self.getNow(),**{"value":value,"missing":missing})
  72. """
  73. This class is designed to handle analaysis of the a python virtual environment i.e deltas between requirments file and a virtualenv
  74. @TODO: update the virtual environment
  75. """
  76. class Sandbox(Analysis):
  77. def __init__(self):
  78. Analysis.__init__(self)
  79. def init(self,conf):
  80. #Analysis.init(self)
  81. if os.path.exists(conf['sandbox']) :
  82. self.sandbox_path = conf['sandbox']
  83. else:
  84. self.sandbox_path = None
  85. if os.path.exists(conf['requirements']) :
  86. self.requirements_path = conf['requirements']
  87. else:
  88. self.requirements_path = None
  89. def get_requirements (self):
  90. f = open(self.requirements_path)
  91. return [ name.replace('-',' ').replace('_',' ') for name in f.read().split('\n') if name != '']
  92. """
  93. This function will return the modules installed in the sandbox (virtual environment)
  94. """
  95. def get_sandbox_requirements(self):
  96. cmd = ['freeze']
  97. xchar = ''.join([os.sep]*2)
  98. pip_vm = ''.join([self.sandbox_path,os.sep,'bin',os.sep,'pip']).replace(xchar,os.sep)
  99. cmd = [pip_vm]+cmd
  100. r = subprocess.check_output(cmd).split('\n')
  101. return [row.replace('-',' ').replace('_',' ') for row in r if row.strip() != '']
  102. def evaluate(self):
  103. pass
  104. """
  105. This function returns the ratio of existing modules relative to the ones expected
  106. """
  107. def composite(self):
  108. Analysis.init(self)
  109. if self.sandbox_path and self.requirements_path :
  110. required_modules= self.get_requirements()
  111. sandbox_modules = self.get_sandbox_requirements()
  112. N = len(required_modules)
  113. n = len(Set(required_modules) - Set(sandbox_modules))
  114. value = round(1 - (n/N),2)*100
  115. missing = list(Set(required_modules) - Set(sandbox_modules))
  116. return dict(self.getNow(),**{"value":value,"missing":missing})
  117. else:
  118. return None
  119. """
  120. This class performs the analysis of a list of processes and determines
  121. The class provides a quantifiable measure of how many processes it found over all
  122. """
  123. class ProcessCounter(Analysis):
  124. def __init__(self):
  125. Analysis.__init__(self)
  126. def init(self,names):
  127. #Analysis.init(self)
  128. self.names = names
  129. def evaluate(self,name):
  130. cmd = "".join(['ps -eo comm |grep ',name,' |wc -l'])
  131. handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
  132. return int(handler.communicate()[0].replace("\n","") )
  133. def composite(self):
  134. #Analysis.init(self)
  135. r = {}
  136. for name in self.names :
  137. r[name] = self.evaluate(name)
  138. #N = len(r)
  139. #n = sum(r)
  140. #return n/N
  141. return dict(self.getNow(),**r)
  142. """
  143. This class returns an application's both memory and cpu usage
  144. """
  145. class DetailProcess(Analysis):
  146. def __init__(self):
  147. Analysis.__init__(self)
  148. def init (self,names):
  149. #Analysis.init(self)
  150. self.names = names;
  151. def getName(self):
  152. return "apps"
  153. def split(self,name,stream):
  154. pattern = "(\d+.{0,1}\d*)\x20*(\d+.{0,1}\d*)\x20*(\d+.{0,1}\d*)".replace(":name",name).strip()
  155. g = re.match(pattern,stream.strip())
  156. if g :
  157. return list(g.groups())+['1']+[name]
  158. else:
  159. return ''
  160. def evaluate(self,name) :
  161. cmd = "ps -eo pmem,pcpu,vsize,command|grep -E \":app\""
  162. handler = subprocess.Popen(cmd.replace(":app",name),shell=True,stdout=subprocess.PIPE)
  163. ostream = handler.communicate()[0].split('\n')
  164. #xstr = ostream
  165. ostream = [ self.split(name,row) for row in ostream if row != '' and 'grep' not in row]
  166. if len(ostream) == 0 or len(ostream[0]) < 4 :
  167. ostream = [['0','0','0','0',name]]
  168. r = []
  169. for row in ostream :
  170. #
  171. # Though the comm should only return the name as specified,
  172. # On OSX it has been observed that the fully qualified path is sometimes returned (go figure)
  173. #
  174. row = [float(value) for value in row if value.strip() != '' and name not in value ] +[re.sub('\$|^','',name)]
  175. r.append(row)
  176. #
  177. # At this point we should aggregate results
  178. # The aggregation is intended for applications with several processes (e.g: apache2)
  179. #
  180. if len(r) > 1:
  181. m = None
  182. for row in r:
  183. if m is None:
  184. m = row
  185. else:
  186. m[3] += row[3]
  187. m[0] += row[0]
  188. m[1] += row[1]
  189. m[2] += row[2]
  190. m[0] = round((m[0] / m[3]),2)
  191. m[1] = round((m[1] / m[3]),2)
  192. m[2] = round((m[2] / m[3]),2)
  193. r = [m]
  194. return r
  195. def status(self,row):
  196. x = row['memory_usage']
  197. y = row['cpu_usage']
  198. z = row['memory_available']
  199. if z :
  200. if y :
  201. return "running"
  202. return "idle"
  203. else:
  204. return "crash"
  205. def format(self,row):
  206. r= {"memory_usage":row[0],"cpu_usage":row[1],"memory_available":row[2]/1000,"proc_count":row[3],"label":self.cleanup(row[4])}
  207. status = self.status(r)
  208. r['status'] = status
  209. return r
  210. def composite(self):
  211. ma = []
  212. now = self.getNow()
  213. for name in self.names:
  214. matrix = self.evaluate(name)
  215. ma += [ dict(now, **self.format(row)) for row in matrix]
  216. return ma
  217. """
  218. This class evaluates a list of folders and provides detailed informaiton about age/size of each file
  219. Additionally the the details are summarized in terms of global size, and oldest file.
  220. """
  221. class FileWatch(Analysis):
  222. def __init__(self):
  223. pass
  224. def init(self,folders):
  225. self.folders = folders;
  226. def getName(self):
  227. return "folders"
  228. def split(self,row):
  229. x = row.split(' ')
  230. r = {}
  231. months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
  232. if x:
  233. BYTES_TO_MB = 1000000
  234. size = int(x[0])/BYTES_TO_MB
  235. month = months.index(x[1]) + 1
  236. day = int(x[2])
  237. age = -1
  238. hour=minute = 0
  239. if ':' in x[3] :
  240. hour,minute = x[3].split(':')
  241. now = datetime.datetime.today()
  242. if month == now.month :
  243. year = now.year
  244. else:
  245. year = now.year - 1
  246. else:
  247. year = int(x[3])
  248. hour = 0
  249. minute = 0
  250. file_date = datetime.datetime(year,month,day,int(hour),int(minute))
  251. # size = round(size,2)
  252. #file_date = datetime.datetime(year,month,day,hour,minute)
  253. now = datetime.datetime.now()
  254. age = (now - file_date ).days
  255. return {"size":size,"age":age}
  256. return None
  257. def evaluate(self,path):
  258. cmd = "find :path -print0|xargs -0 ls -ls |awk '{print $6,$7,$8,$9,$10}'".replace(":path",path)
  259. handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
  260. ostream = handler.communicate()[0].split('\n')
  261. #return [self.split(stream) for stream in ostream if stream.strip() != '' and '.DS_Store' not in stream and 'total' not in stream]
  262. return [self.split(stream) for stream in ostream if path not in stream and not set(['','total','.DS_Store']) & set(stream.split(' '))]
  263. def composite(self):
  264. d = [] #-- vector of details (age,size)
  265. now = datetime.datetime.today()
  266. for folder in self.folders:
  267. if os.path.exists(folder):
  268. xo_raw = self.evaluate(folder)
  269. xo = np.array(ML.Extract(['size','age'],xo_raw))
  270. if len(xo) == 0:
  271. continue
  272. name = re.findall("([a-z,A-Z,0-9]+)",folder)
  273. name = folder.split(os.sep)
  274. if len(name) == 1:
  275. name = [folder]
  276. else:
  277. i = len(name) -1
  278. name = [name[i-1]+' '+name[i]]
  279. name = name[0]
  280. size = round(np.sum(xo[:,0]),2)
  281. if size > 1000 :
  282. size = round(size/1000,2)
  283. units = ' GB'
  284. elif size > 1000000:
  285. size = round(size/1000000,2)
  286. units = ' TB'
  287. else:
  288. size = size
  289. units = ' MB'
  290. size = str(size)+ units
  291. age = round(np.mean(xo[:,1]),2)
  292. if age > 30 and age <= 365 :
  293. age = round(age/30,2)
  294. units = ' Months'
  295. elif age > 365 :
  296. age = round(age/365,2)
  297. units = ' Years'
  298. else:
  299. age = age
  300. units = ' Days'
  301. age = str(age)+units
  302. N = len(xo[:,1])
  303. xo = {"label":folder} #,"details":xo_raw,"summary":{"size":size,"age":age,"count":len(xo[:,1])}}
  304. xo = dict(xo,**{"size":size,"age":age,"count":N})
  305. xo["name"] = name
  306. xo['day'] = now.day
  307. xo['month'] = now.month
  308. xo['year'] = now.year
  309. xo['date'] = time.mktime(now.timetuple())
  310. d.append(xo)
  311. return d
  312. # class Monitor (Thread):
  313. # def __init__(self,pConfig,pWriter,id='processes') :
  314. # Thread.__init__(self)
  315. # self.config = pConfig[id]
  316. # self.writer = pWriter;
  317. # self.logs = []
  318. # self.handler = self.config['class']
  319. # self.mconfig = self.config['config']
  320. # def stop(self):
  321. # self.keep_running = False
  322. # def run(self):
  323. # r = {}
  324. # self.keep_running = True
  325. # lock = RLock()
  326. # while self.keep_running:
  327. # lock.acquire()
  328. # for label in self.mconfig:
  329. # self.handler.init(self.mconfig[label])
  330. # r = self.handler.composite()
  331. # self.writer.write(label=label,row = r)
  332. # time.sleep(2)
  333. # lock.release()
  334. # self.prune()
  335. # TIME_LAPSE = 60*2
  336. # time.sleep(TIME_LAPSE)
  337. # print "Stopped ..."
  338. # def prune(self) :
  339. # MAX_ENTRIES = 100
  340. # if len(self.logs) > MAX_ENTRIES :
  341. # BEG = len(self.logs) - MAX_SIZE -1
  342. # self.logs = self.logs[BEG:]