Browse Source

refactoring workers for simpler use & minor ui bug fixes

Steve L. Nyemba 8 years ago
parent
commit
99f43359ed

+ 1 - 1
src/api/index.py

@@ -99,7 +99,7 @@ def procs(id):
 def sandbox():
 	global CONFIG
 	
-	if 'sandbox' in CONFIG['monitor']:
+	if 'sandbox' in CONFIG: #CONFIG['monitor']:
 		#handler = HANDLERS['sandbox']['class']
 		#conf = HANDLERS['sandbox']['config']
 		r = []

+ 50 - 49
src/api/templates/dashboard.html

@@ -85,67 +85,68 @@
 				</div>
 				<div id="sandbox_pager"></div>
 			</div>
-			<div style="margin-top:10px">
-				<div id="folder_summary">
-					<div class="">
-						
-						<div class="bold">Smart Folder Analysis/Monitoring</div>
-						<div class="small">Powered By Machine Learning</div>
-					</div>
-					<div class=" border" style="margin-top:4px; padding:2px; height:34px">
-						<i class="fa fa-search left" style="margin:4px; padding:4px; ; color:gray;"></i> 
-						<input id="folder_search" type="text" class="small left" placeholder="hostname" style="width:87%; padding-left:4px;" onkeyup="monitor.folders.search.init()"/>
-						<i class="fa fa-trash right action right" style="margin:4px; padding:4px; color:maroon" onclick="monitor.folders.search.reset()"></i>
-					</div>
-					<div style="margin-top:10px; height:150px">
-						<div id="gridfolders"></div>
-						<div id="folderspager"></div>
-					</div>
+		</div>
+		<div style="margin-top:10px">
+			<div id="folder_summary">
+				<div class="">
+					
+					<div class="bold">Smart Folder Analysis/Monitoring</div>
+					<div class="small">Powered By Machine Learning</div>
+				</div>
+				<div class=" border" style="margin-top:4px; padding:2px; height:34px">
+					<i class="fa fa-search left" style="margin:4px; padding:4px; ; color:gray;"></i> 
+					<input id="folder_search" type="text" class="small left" placeholder="hostname" style="width:87%; padding-left:4px;" onkeyup="monitor.folders.search.init()"/>
+					<i class="fa fa-trash right action right" style="margin:4px; padding:4px; color:maroon" onclick="monitor.folders.search.reset()"></i>
 				</div>
+				<div style="margin-top:10px; height:150px">
+					<div id="gridfolders"></div>
+					<div id="folderspager"></div>
+				</div>
+			</div>
 
-				<div id="folder_plan" style="display:none">
-					<div class="border-bottom" style="height:32px">
-						<div class="bold">Deletion/Archiving Plan 
-							<i class="fa fa-quote-left"></i> <span id="folder_name"></span> <i class="fa fa-quote-right"></i>
-							<i class="fa fa-angle-up action right bold" style="font-size:16px; margin:4px;" onclick="monitor.folders.show.grid()"></i>
-						</div>
-						<div class="small left">Powered By Machine Learning</div>
-						
+			<div id="folder_plan" style="display:none">
+				<div class="border-bottom" style="height:32px">
+					<div class="bold">Deletion/Archiving Plan 
+						<i class="fa fa-quote-left"></i> <span id="folder_name"></span> <i class="fa fa-quote-right"></i>
+						<i class="fa fa-angle-up action right bold" style="font-size:16px; margin:4px;" onclick="monitor.folders.show.grid()"></i>
 					</div>
+					<div class="small left">Powered By Machine Learning</div>
+					
+				</div>
 
-					<div id="delete_age" class="left width-half border-right" style="margin:2px; padding:2px;">
-						<div class="small" align="left">By Age</div>
-						<div class="number" style="height:42px">
-							
-							<div id="age_count"  align="right" class="left width-75" style="margin-right:4px">00</div>
-							<div class="small" class="left" style="height:100%; padding-top:15px">Files</div>
-								
-							
-						</div>
-						<div class="small border-top" align="center" style="padding-top:4px">
-							Approximately <span id="age_value">00</span> <span id="age_units"></span>
-						</div>
-						
-					</div>
-					<div id="delete_size" class="right width-half" class="number" style="margin:2px; padding:2px">
-						<div class="small" align="left">By Size</div>
+				<div id="delete_age" class="left width-half border-right" style="margin:2px; padding:2px;">
+					<div class="small" align="left">By Age</div>
+					<div class="number" style="height:42px">
 						
-						<div  class=" number" style="height:42px">
+						<div id="age_count"  align="right" class="left width-75" style="margin-right:4px">00</div>
+						<div class="small" class="left" style="height:100%; padding-top:15px">Files</div>
 							
-							<div id="size_count" align="right" class="left width-75" style="margin-right:4px">00</div>	
-							<div class="small" class="left" style="height:100%; padding-top:15px">Files</div>						
-						</div>
-						<div class="small border-top"align="center" style="padding-top:4px">
-							Approximately <span id="size_value">00</span> <span id="size_units"></span>
-						</div>
 						
+					</div>
+					<div class="small border-top" align="center" style="padding-top:4px">
+						Approximately <span id="age_value">00</span> <span id="age_units"></span>
+					</div>
+					
+				</div>
+				<div id="delete_size" class="right width-half" class="number" style="margin:2px; padding:2px">
+					<div class="small" align="left">By Size</div>
+					
+					<div  class=" number" style="height:42px">
 						
+						<div id="size_count" align="right" class="left width-75" style="margin-right:4px">00</div>	
+						<div class="small" class="left" style="height:100%; padding-top:15px">Files</div>						
+					</div>
+					<div class="small border-top"align="center" style="padding-top:4px">
+						Approximately <span id="size_value">00</span> <span id="size_units"></span>
 					</div>
+					
+					
 				</div>
-				<div id="chartfolder" ></div>
 			</div>
-			
+			<div id="chartfolder" ></div>
 		</div>
+			
+		
 
 		
 	</div>

+ 6 - 1
src/monitor.py

@@ -28,6 +28,8 @@ class Analysis:
 	def getNow(self):
 		d = datetime.datetime.now()
 		return {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute}
+	def getName(self):
+		return self.__class__.__name__
 
 """
 	This class is designed to analyze environment variables. Environment variables can either be folders, files or simple values
@@ -155,7 +157,8 @@ class DetailProcess(Analysis):
 	def init (self,names):
 		#Analysis.init(self)
 		self.names = names;
-	
+	def getName(self):
+		return "apps"
 	def split(self,name,stream):
 		
 		pattern = "(\d+.{0,1}\d*)\x20*(\d+.{0,1}\d*)\x20*(\d+.{0,1}\d*)".replace(":name",name).strip()
@@ -235,6 +238,8 @@ class FileWatch(Analysis):
 		pass
 	def init(self,folders):
 		self.folders = folders;
+	def getName(self):
+		return "folders"
 	def split(self,row):
 		
 		x = row.split(' ')

+ 1 - 0
src/utils/agents/__init__.py

@@ -0,0 +1 @@
+

+ 102 - 0
src/utils/agents/data-collector.py

@@ -0,0 +1,102 @@
+"""
+	This is the implementation of a data collection agent
+	The agent's role is intended to :
+		- collect data associated with folder and processes
+		- The agent will also perform various learning tasks
+
+	Usage:
+	python --path <config> --delay xxx --procs p1,p2,p3 --folders path1,path2
+"""
+from threading import Thread, RLock
+from utils.params import PARAMS
+import os
+import json
+import time
+from datetime import datetime
+from utils.transport import *
+import monitor
+class ICollector(Thread) :
+	
+	def __init__(self) :
+		Thread.__init__(self)
+		self.folders 	= None
+		self.procs	= None
+		self.config	= None
+		self.pool	= []
+		self.lock 	= RLock()
+		self.factory	= DataSourceFactory()
+		self.init()
+	def init(self):
+		
+		
+		#
+		# data store configuration (needs to be in a file)
+		#
+		path = PARAMS['path']
+		if os.path.exists(path) :
+			f = open(path)
+			self.config = json.loads(f.read())
+			#if 'store' in self.config :
+			#	self.config = self.config['store']
+			f.close()
+		self.id = self.config['id'] #PARAMS['id']
+		if 'folders' in self.config : #PARAMS :
+			folders = self.config['folders'] #PARAMS['folders'].split(',')
+			self.register('monitor.FileWatch',folders)
+		if 'procs' in self.config : #PARAMS :
+			procs = self.config['procs'] #PARAMS['procs'].split(',')
+			self.register('monitor.DetailProcess',procs)
+	
+		self.quit = False
+		#self.DELAY = PARAMS['delay']*60
+		self.DELAY = self.config['delay']
+		
+	"""
+		This function returns an instance of a data collector class :
+		ProcessDetails, FileWatch, ... provided the class name
+	"""
+	def register(self,className,params) :
+		try:			
+			
+			agent = eval(className+"()")			
+			agent.init(params)
+			self.pool.append( agent )
+		except Exception,e:
+			print e
+	def stop(self):
+		self.quit = True
+	def run(self):
+		write_class 	= self.config['store']['class']['write']
+		read_args	= self.config['store']['args']
+		
+		while self.quit == False:
+			
+			for thread in self.pool :
+				id	= "@".join([thread.getName(),self.id])
+				
+				data	= thread.composite()
+				label	= thread.getName()
+				row	= {}
+				if label == 'folders':
+					row[id]	= data
+				else:
+					label = id
+					row = data
+				self.lock.acquire()
+				store = self.factory.instance(type=write_class,args=read_args)
+				store.write(label=label,row=row)
+				self.lock.release()
+			if 'MONITOR_CONFIG_PATH' in os.environ :
+				break
+			time.sleep(self.DELAY)
+			
+		print ' *** Exiting ',self.name
+		# read_class=self.config['class']['read']
+		# store = self.factory.instance(type=write_class,args=read_args)
+		# store.flush()
+		
+
+if __name__ == '__main__':
+	thread = ICollector()
+	# thread.daemon = True
+	thread.start()

+ 83 - 0
src/utils/agents/learner.py

@@ -0,0 +1,83 @@
+"""
+	This file encapsulates a class that is intended to perform learning
+"""
+from __future__ import division
+import numpy as np
+from threading import Thread,RLock
+from utils.transport import *
+from utils.ml import AnomalyDetection,ML
+from utils.params import PARAMS
+import time
+"""
+	This class is intended to apply anomaly detection to various areas of learning
+	The areas of learning that will be skipped are :
+	['_id','_rev','learn'] ... 
+	
+	@TODO:
+		- Find a way to perform dimensionality reduction if need be
+"""
+class Anomalies(Thread) :
+	def __init__(self,lock):		
+		Thread.__init__(self)
+		path = PARAMS['path']
+		self.name = self.__class__.__name__.lower()
+		if os.path.exists(path) :
+			f = open(path)
+			self.config = json.loads(f.read())
+			f.close()
+		
+			#
+			# Initializing data store & factory class
+			#
+			self.id		= self.config['id']
+			self.apps	= self.config['procs'] if 'procs' in self.config else []
+			self.rclass	= self.config['store']['class']['read']
+			self.wclass	= self.config['store']['class']['write']		
+			self.rw_args	= self.config['store']['args']
+			self.factory 	= DataSourceFactory()
+			self.quit	= False
+			self.lock 	= lock
+	def format(self,stream):
+		pass
+	def stop(self):
+		self.quit = True
+	def run(self):
+		DELAY = self.config['delay'] * 60
+		reader 	= self.factory.instance(type=self.rclass,args=self.rw_args)
+		data	= reader.read()
+		key	= 'apps'
+		rdata	= data[key]
+		features = ['memory_usage','cpu_usage']
+		yo = {"1":["running"],"name":"status"}				
+		while self.quit == False :
+			print ' *** ',self.name, ' ' , str(datetime.today())
+			for app in self.apps:
+				print '\t',str(datetime.today()),' ** ',app
+				logs = ML.Filter('label',app,rdata)
+				if logs :
+					handler = AnomalyDetection()
+					value 	= handler.learn(logs,'label',app,features,yo)
+					print value
+					if value is not None:
+						value = dict(value,**{"features":features})
+						r[id][app] = value
+						self.lock.acquire()
+						writer = self.factory.instance(type=self.wclass,args=self.rw_args)
+						writer.write(label='learn',row=value)
+						self.lock.release()
+			#
+			if 'MONITOR_CONFIG_PATH' in os.environ :
+				break
+			time.sleep(DELAY)
+		print ' *** Exiting ',self.name.replace('a','A')
+
+				
+		
+		
+class Regression(Thread):
+	def __init__(self,params):
+		pass
+if __name__ == '__main__' :
+	lock = RLock()
+	thread = Anomalies(lock)
+	thread.start()

+ 19 - 1
src/utils/transport.py

@@ -10,6 +10,7 @@ import numpy as np
 from couchdbkit import Server
 import re
 from csv import reader
+from datetime import datetime
 """
 	@TODO: Write a process by which the class automatically handles reading and creating a preliminary sample and discovers the meta data
 """
@@ -555,7 +556,24 @@ class CouchdbWriter(Couchdb,Writer):
 			document[label] = []
 		document[label].append(row)
 		self.dbase.save_doc(document)
-
+	
+	def flush(self,params=None):
+		document = self.dbase.get(self.uid)
+		content = {}
+		_doc = {}
+		for id in document:
+			if id in ['_id','_rev','_attachments'] :
+				_doc[id] = document[id]
+			else:
+				content[id] = document[id]
+				
+		content = json.dumps(content)	
+		document= _doc
+		now = str(datetime.today())
+		
+		name = '-'.join([document['_id'] , now,'.json'])			
+		self.dbase.save_doc(document)
+		self.dbase.put_attachment(document,content,name,'application/json')
 """
 	This class acts as a factory to be able to generate an instance of a Reader/Writer
 	Against a Queue,Disk,Cloud,Couchdb