7 tahun lalu · 8e7cad9a11
--- a/src/utils/mailer.py
+++ b/src/utils/mailer.py
@@ -1,38 +0,0 @@
 
				-import smtplib
			
 
				-from email.mime.multipart import MIMEMultipart
			
 
				-from email.mime.text import MIMEText
			
 
				-
			
 
				-class MailAgent :
			
 
				-	def __init__(self,conf) :
			
 
				-		self.uid = conf['uid']
			
 
				-
			
 
				-		
			
 
				-		try:
			
 
				-	
			
 
				-			self.handler = smtplib.SMTP_SSL(conf['host'],conf['port'])
			
 
				-			r = self.handler.login(self.uid,conf['password'])
			
 
				-			#
			
 
				-			# @TODO: Check the status of the authentication
			
 
				-			# If not authenticated the preconditions have failed
			
 
				-			#
			
 
				-		except Exception,e:
			
 
				-			print e
			
 
				-			self.handler = None
			
 
				-			pass
			
 
				-
			
 
				-
			
 
				-	def send(self,**args) :
			
 
				-		subject = args['subject']
			
 
				-		message = args['message']
			
 
				-		to	= args['to']
			
 
				-		if '<' in message and '>' in message :
			
 
				-			message = MIMEText(message,'html')
			
 
				-		else:
			
 
				-			message = MIMEText(message,'plain')
			
 
				-		message['From'] = self.uid
			
 
				-		message['To']	= to
			
 
				-		message['Subject'] = subject
			
 
				-		return self.handler.sendmail(self.uid,to,message.as_string())
			
 
				-	def close(self):
			
 
				-		self.handler.quit()
			
 
				-
			
--- a/src/utils/ml.py
+++ b/src/utils/ml.py
@@ -1,312 +0,0 @@
 
				-"""
			
 
				-	This file is intended to perfom certain machine learning tasks based on numpy
			
 
				-	We are trying to keep it lean that's why no sklearn involved yet
			
 
				-
			
 
				-	@TODO:
			
 
				-	Create factory method for the learners implemented here
			
 
				-	Improve preconditions (size of the dataset, labels)
			
 
				-"""
			
 
				-from __future__ import division
			
 
				-import numpy as np
			
 
				-
			
 
				-class ML:
			
 
				-	@staticmethod
			
 
				-	def Filter (attr,value,data) :
			
 
				-		#
			
 
				-		# @TODO: Make sure this approach works across all transport classes
			
 
				-		# We may have a potential issue of how the data is stored ... it may not scale
			
 
				-		#
			
 
				-		value = ML.CleanupName(value)
			
 
				-		#return [item[0] for item in data if item and attr in item[0] and item[0][attr] == value]
			
 
				-		#return [[item for item in row if item[attr] == value][0] for row in data]
			
 
				-		#
			
 
				-		# We are making the filtering more rescillient, i.e if an item doesn't exist we don't have to throw an exception
			
 
				-		# This is why we expanded the loops ... fully expressive but rescilient
			
 
				-		#
			
 
				-		r = []
			
 
				-		for row in data :
			
 
				-			if isinstance(row,list) :
			
 
				-				for item in row :
			
 
				-					
			
 
				-					if attr in item and item[attr] == value:
			
 
				-						r.append(item)
			
 
				-			else:
			
 
				-				#
			
 
				-				# We are dealing with a vector of objects
			
 
				-				# 
			
 
				-				if attr in row and row[attr] == value:
			
 
				-					r.append(row)
			
 
				-
			
 
				-		return r
			
 
				-	@staticmethod
			
 
				-	def Extract(lattr,data):
			
 
				-		if isinstance(lattr,basestring):
			
 
				-			lattr = [lattr]
			
 
				-		# return  [[row[id] for id in lattr] for row in data]
			
 
				-		r =  [[row[id] for id in lattr] for row in data]
			
 
				-		if len(lattr) == 1 :
			
 
				-			return [x[0] for x in r]
			
 
				-		else:
			
 
				-			return r
			
 
				-	@staticmethod
			
 
				-	def CleanupName(value) :
			
 
				-		return value.replace('$','').replace('.+','')
			
 
				-	@staticmethod
			
 
				-	def distribution(xo,lock,scale=False) :
			
 
				-		
			
 
				-		d = []
			
 
				-		m = {}
			
 
				-		if scale :
			
 
				-			xu = np.mean(xo)
			
 
				-			sd = np.sqrt(np.var(xo))
			
 
				-		for xi in xo :
			
 
				-			value = round(xi,2)
			
 
				-			if scale :
			
 
				-				value = round((value - xu)/sd,2)
			
 
				-			id = str(value)
			
 
				-			lock.acquire()
			
 
				-			if id in m :
			
 
				-				index = m[id]
			
 
				-				d[index][1] += 1
			
 
				-			else:
			
 
				-				m[id] = len(d)
			
 
				-				d.append([value,1])
			
 
				-			lock.release()
			
 
				-		del m
			
 
				-		return d
			
 
				-	
			
 
				-"""
			
 
				-	Implements a multivariate anomaly detection
			
 
				-	@TODO: determine computationally determine epsilon
			
 
				-"""
			
 
				-class AnomalyDetection:
			
 
				-	def __init__(self):
			
 
				-		pass	
			
 
				-	def split(self,data,index=-1,threshold=0.65) :
			
 
				-		N	= len(data)
			
 
				-		# if N < LIMIT:
			
 
				-		# 	return None
			
 
				-		
			
 
				-		end 	= int(N*threshold)
			
 
				-		train	= data[:end]
			
 
				-		test	= data[end:]
			
 
				-		
			
 
				-		return {"train":train,"test":test}
			
 
				-	
			
 
				-	"""
			
 
				-
			
 
				-		@param key 	field name by which the data will be filtered
			
 
				-		@param value 	field value for the filter
			
 
				-		@param features	features to be used in the analysis
			
 
				-		@param labels	used to assess performance
			
 
				-	@TODO: Map/Reduce does a good job at filtering
			
 
				-	"""
			
 
				-	def learn(self,data,key,value,features,label):
			
 
				-		
			
 
				-		
			
 
				-		if len(data) < 10:
			
 
				-			return None
			
 
				-		xo = ML.Filter(key,value,data)
			
 
				-		if len(xo) < 10 :
			
 
				-			return None
			
 
				-		# attr = conf['features']
			
 
				-		# label= conf['label']
			
 
				-		
			
 
				-		yo= ML.Extract([label['name']],xo)
			
 
				-		xo = ML.Extract(features,xo)
			
 
				-		yo = self.getLabel(yo,label)
			
 
				-		#
			
 
				-		# @TODO: Insure this can be finetuned, training size matters for learning. It's not obvious to define upfront
			
 
				-		# 
			
 
				-		xo = self.split(xo)
			
 
				-		yo = self.split(yo)
			
 
				-		p = self.gParameters(xo['train'])
			
 
				-		has_cov =   np.linalg.det(p['cov']) if p else False #-- making sure the matrix is invertible
			
 
				-		
			
 
				-		if xo['train'] and has_cov :
			
 
				-			E = 0.001
			
 
				-			ACCEPTABLE_FSCORE = 0.6
			
 
				-			fscore = 0
			
 
				-			#
			
 
				-			# We need to find an appropriate epsilon for the predictions
			
 
				-			# The appropriate epsilon is one that yields an f-score [0.5,1[
			
 
				-			#
			
 
				-			
			
 
				-			__operf__ = None
			
 
				-			perf = None
			
 
				-			for i in range(0,10):
			
 
				-				Epsilon = E + (2*E*i)
			
 
				-				
			
 
				-				if p is None :
			
 
				-					return None
			
 
				-				#
			
 
				-				# At this point we've got enough data for the parameters
			
 
				-				# We should try to fine tune epsilon for better results
			
 
				-				#
			
 
				-				
			
 
				-				px =  self.gPx(p['mean'],p['cov'],xo['test'],Epsilon)
			
 
				-				
			
 
				-				
			
 
				-				__operf__ = self.gPerformance(px,yo['test'])
			
 
				-				print value,__operf__
			
 
				-				if __operf__['fscore'] == 1 :
			
 
				-					continue
			
 
				-				if perf is None :
			
 
				-					perf = __operf__
			
 
				-				elif perf['fscore'] < __operf__['fscore'] and __operf__['fscore'] > ACCEPTABLE_FSCORE :
			
 
				-					perf = __operf__
			
 
				-				perf['epsilon'] = Epsilon
			
 
				-			#
			
 
				-			# At this point we are assuming we came out of the whole thing with an acceptable performance
			
 
				-			# The understanding is that error drives performance thus we reject fscore==1
			
 
				-			#
			
 
				-			
			
 
				-			if perf and perf['fscore'] > ACCEPTABLE_FSCORE :
			
 
				-				return {"label":value,"parameters":p,"performance":perf}
			
 
				-			else:
			
 
				-				return None
			
 
				-		return None
			
 
				-	"""
			
 
				-		This function determines if the preconditions for learning are met
			
 
				-		For that parameters are passed to the function
			
 
				-		p
			
 
				-	"""
			
 
				-	def canLearn(self,p) :
			
 
				-		pass
			
 
				-	def getLabel(self,yo,label_conf):
			
 
				-		return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ]
			
 
				-
			
 
				-
			
 
				-	"""
			
 
				-		This function will compute the probability density function given a particular event/set of events
			
 
				-		The return value is [px,yo]
			
 
				-		@pre xu.shape[0] == sigma[0] == sigma[1]
			
 
				-	"""
			
 
				-	def gPx(self,xu,sigma,data,EPSILON=0.01):
			
 
				-		n = len(data[0])
			
 
				-		
			
 
				-		r = []
			
 
				-		a  = (2*(np.pi)**(n/2))*np.linalg.det(sigma)**0.5
			
 
				-		# EPSILON = np.float64(EPSILON)
			
 
				-		test = np.array(data)
			
 
				-		for row in test:
			
 
				-			row = np.array(row)
			
 
				-			d = np.matrix(row - xu)
			
 
				-			d.shape = (n,1)
			
 
				-			
			
 
				-			b = np.exp((-0.5*np.transpose(d)) * (np.linalg.inv(sigma)*d))
			
 
				-			
			
 
				-			px = float(b/a)
			
 
				-			r.append([px,int(px < EPSILON)])
			
 
				-		return r
			
 
				-	"""
			
 
				-		This function uses stored learnt information to predict on raw data
			
 
				-		In this case it will determin if we have an anomaly or not 
			
 
				-		@param xo	raw observations (matrix)
			
 
				-		@param info	stored information about this	
			
 
				-	"""
			
 
				-	def predict(self,xo,info):
			
 
				-			
			
 
				-		xo = ML.Extract(info['features'],xo)
			
 
				-		
			
 
				-		if not xo :
			
 
				-			return None
			
 
				-		
			
 
				-		sigma = info['parameters']['cov']
			
 
				-		xu	= info['parameters']['mean']
			
 
				-		epsilon = info['performance']['epsilon']
			
 
				-		
			
 
				-		return self.gPx(xu,sigma,xo,epsilon)
			
 
				-	"""
			
 
				-		This function computes performance metrics i.e precision, recall and f-score
			
 
				-		for details visit https://en.wikipedia.org/wiki/Precision_and_recall
			
 
				-
			
 
				-	"""
			
 
				-	def gPerformance(self,test,labels) :
			
 
				-		N = len(test)
			
 
				-		tp = 0 # true positive
			
 
				-		fp = 0 # false positive
			
 
				-		fn = 0 # false negative
			
 
				-		tn = 0 # true negative
			
 
				-		for i in range(0,N):
			
 
				-			tp += 1 if (test[i][1]==labels[i] and test[i][1] == 1) else 0
			
 
				-			fp += 1 if (test[i][1] != labels[i] and test[i][1] == 1) else 0
			
 
				-			fn += 1 if (test[i][1] != labels[i] and test[i][1] == 0) else 0
			
 
				-			tn += 1 if (test[i][1] == labels[i] and test[i][1] == 0) else 0
			
 
				-		precision = tp /( (tp + fp) if tp + fp > 0 else 1)
			
 
				-		recall	= tp / ((tp + fn) if tp  + fn > 0 else 1)
			
 
				-		
			
 
				-		fscore 	= (2 * precision * recall)/ ((precision + recall) if (precision + recall) > 0  else 1)
			
 
				-		return {"precision":precision,"recall":recall,"fscore":fscore}
			
 
				-
			
 
				-	"""
			
 
				-		This function returns gaussian parameters i.e means and covariance
			
 
				-		The information will be used to compute probabilities
			
 
				-	"""
			
 
				-	def gParameters(self,train) :
			
 
				-
			
 
				-		n = len(train[0])
			
 
				-		m = np.transpose(np.array(train))
			
 
				-		
			
 
				-		u = np.array([ np.mean(m[i][:]) for i in range(0,n)])		
			
 
				-		if np.sum(u) == 0:
			
 
				-			return None
			
 
				-		r = np.array([ np.sqrt(np.var(m[i,:])) for i in range(0,n)])
			
 
				-		#
			
 
				-		# Before we normalize the data we must insure there's is some level of movement in this application
			
 
				-		# A lack of movement suggests we may not bave enough information to do anything
			
 
				-		#
			
 
				-		if 0 in r :
			
 
				-			return None
			
 
				-		#
			
 
				-		#-- Normalizing the matrix then we will compute covariance matrix
			
 
				-		#
			
 
				-		
			
 
				-		m = np.array([ (m[i,:] - u[i])/r[i] for i in range(0,n)])
			
 
				-		sigma = np.cov(m)
			
 
				-		sigma = [ list(row) for row in sigma]
			
 
				-		return {"cov":sigma,"mean":list(u)}
			
 
				-
			
 
				-class AnalyzeAnomaly(AnomalyDetection):
			
 
				-	def __init__(self):
			
 
				-		AnomalyDetection.__init__(self)
			
 
				-	"""
			
 
				-		This analysis function will include a predicted status because an anomaly can either be 
			
 
				-			- A downtime i.e end of day 
			
 
				-			- A spike and thus a potential imminent crash
			
 
				-		@param xo	matrix of variables
			
 
				-		@param info	information about what was learnt 
			
 
				-	"""
			
 
				-	def predict(self,xo,info):
			
 
				-		x = xo[len(xo)-1]
			
 
				-		r = AnomalyDetection.predict(self,[x],info)
			
 
				-		#
			
 
				-		# In order to determine what the anomaly is we compute the slope (idle or crash)
			
 
				-		# The slope is computed using the covariance / variance of features
			
 
				-		#
			
 
				-		if r is not None:
			
 
				-			N = len(info['features'])
			
 
				-			xy = ML.Extract(info['features'],xo)
			
 
				-			xy = np.array(xy)
			
 
				-				
			
 
				-			vxy= np.array([ np.var(xy[:,i]) for i in range(0,N)])
			
 
				-			cxy=np.array(info['parameters']['cov'])
			
 
				-			#cxy=np.cov(np.transpose(xy))
			
 
				-			if np.sum(vxy) == 0:
			
 
				-				vxy = cxy
			
 
				-			
			
 
				-			alpha = cxy/vxy
			
 
				-			
			
 
				-			
			
 
				-
			
 
				-			r =  {"anomaly":r[0][1],"slope":list(alpha[:,0])}
			
 
				-			
			
 
				-		return r
			
 
				-class Regression:
			
 
				-	parameters = {}
			
 
				-	@staticmethod
			
 
				-	def predict(xo):
			
 
				-		pass
			
 
				-	
			
 
				-	def __init__(self,config):
			
 
				-		pass
			
--- a/src/utils/workers.py
+++ b/src/utils/workers.py
@@ -1,266 +0,0 @@
 
				-#import multiprocessing
			
 
				-from threading import Thread, RLock
			
 
				-#from utils import transport
			
 
				-from utils.transport import *
			
 
				-from utils.ml import AnomalyDetection,ML
			
 
				-import time
			
 
				-import monitor
			
 
				-import sys
			
 
				-import os
			
 
				-import datetime
			
 
				-class BasicWorker(Thread):
			
 
				-	def __init__(self,config,lock):
			
 
				-		Thread.__init__(self)
			
 
				-		self.reader_class	= config['store']['class']['read']
			
 
				-		self.write_class	= config['store']['class']['write']
			
 
				-		self.rw_args		= config['store']['args']
			
 
				-		self.factory 		= DataSourceFactory()
			
 
				-		self.lock 		= lock
			
 
				-		
			
 
				-"""
			
 
				-	This class is intended to collect data given a configuration
			
 
				-
			
 
				-"""
			
 
				-class Top(Thread):
			
 
				-	def __init__(self,_config,lock):
			
 
				-		Thread.__init__(self)
			
 
				-		self.lock = lock
			
 
				-		self.reader_class	= _config['store']['class']['read']
			
 
				-		self.write_class	= _config['store']['class']['write']
			
 
				-		self.rw_args		= _config['store']['args']
			
 
				-		self.factory 		= DataSourceFactory()
			
 
				-		
			
 
				-		self.name = 'Zulu-Top'
			
 
				-		self.quit = False
			
 
				-
			
 
				-		
			
 
				-		className = ''.join(['monitor.',_config['monitor']['processes']['class'],'()'])
			
 
				-		self.handler = eval(className)
			
 
				-		self.config = _config['monitor']['processes']['config']
			
 
				-	def stop(self):
			
 
				-		self.quit = True
			
 
				-	def run(self):
			
 
				-		while self.quit == False:
			
 
				-			print ' ** ',self.name,datetime.datetime.today()
			
 
				-			for label in self.config :
			
 
				-				self.lock.acquire()
			
 
				-				gwriter = self.factory.instance(type=self.write_class,args=self.rw_args)
			
 
				-				apps = self.config[label]
			
 
				-				self.handler.init(apps)	
			
 
				-				r = self.handler.composite()
			
 
				-				
			
 
				-				gwriter.write(label=label,row=r)
			
 
				-				time.sleep(5)
			
 
				-				self.lock.release()
			
 
				-			if 'MONITOR_CONFIG_PATH' in os.environ:
			
 
				-				#
			
 
				-				# This suggests we are in development mode
			
 
				-				#
			
 
				-				break
			
 
				-			ELLAPSED_TIME = 60*20
			
 
				-			time.sleep(ELLAPSED_TIME)
			
 
				-		print "Exiting ",self.name
			
 
				-				
			
 
				-class Learner(Thread) :
			
 
				-	
			
 
				-	"""
			
 
				-		This function expects paltform config (store,learner)
			
 
				-		It will leverage store and learner in order to operate
			
 
				-	"""
			
 
				-	def __init__(self,config,lock):
			
 
				-		Thread.__init__(self)
			
 
				-		self.name		= 'Zulu-Learner'
			
 
				-		self.lock 		= lock
			
 
				-		self.reader_class	= config['store']['class']['read']
			
 
				-		self.write_class	= config['store']['class']['write']
			
 
				-		self.rw_args		= config['store']['args']
			
 
				-		self.features 		= config['learner']['anomalies']['features']
			
 
				-		self.yo			= config['learner']['anomalies']['label']
			
 
				-		self.apps 		= config['learner']['anomalies']['apps']
			
 
				-		self.factory 		= DataSourceFactory()
			
 
				-		self.quit		= False
			
 
				-	
			
 
				-	def stop(self):
			
 
				-		self.quit = True
			
 
				-	"""
			
 
				-		This function will initiate learning every (x-hour)
			
 
				-		If there is nothing to learn the app will simply go to sleep
			
 
				-	"""
			
 
				-	def run(self):
			
 
				-		reader = self.factory.instance(type=self.reader_class,args=self.rw_args)
			
 
				-		data = reader.read()
			
 
				-		
			
 
				-		
			
 
				-		#
			
 
				-		# Let's make sure we extract that which has aleady been learnt
			
 
				-		#
			
 
				-		
			
 
				-		if 'learn' in data:
			
 
				-			r = data['learn']
			
 
				-			del data['learn']
			
 
				-			
			
 
				-			r = ML.Extract('label',r)
			
 
				-			logs = [row[0] for row in r]
			
 
				-			logs = list(set(logs))
			
 
				-				
			
 
				-			
			
 
				-		else:
			
 
				-			logs = []
			
 
				-		#
			
 
				-		# In order to address the inefficiencies below, we chose to adopt the following policy
			
 
				-		# We don't learn that which is already learnt, This measure consists in filtering out the list of the apps that already have learning data
			
 
				-		#
			
 
				-		self.apps = list(set(self.apps) - set(logs))
			
 
				-		while self.quit == False:
			
 
				-			r = {}
			
 
				-			lapps = list(self.apps)
			
 
				-			print ' ** ',self.name,datetime.datetime.today()
			
 
				-			for key in data :
			
 
				-				logs = data[key]
			
 
				-				#
			
 
				-				# There poor design at this point, we need to make sure things tested don't get tested again
			
 
				-				# This creates innefficiencies (cartesian product)
			
 
				-				#
			
 
				-				for app in lapps:
			
 
				-					handler = AnomalyDetection()
			
 
				-					value = handler.learn(logs,'label',app,self.features,self.yo)
			
 
				-					
			
 
				-					if value is not None:
			
 
				-						
			
 
				-						if key not in r:
			
 
				-							r[key] = {}
			
 
				-						r[key][app] = value
			
 
				-						i = lapps.index(app)
			
 
				-						del lapps[i]
			
 
				-						#
			
 
				-						# This offers a clean write to the data store upon value retrieved
			
 
				-						# The removal of the application enables us to improve efficiency (among other things)
			
 
				-						#
			
 
				-						value = dict(value,**{"features":self.features})
			
 
				-						self.lock.acquire()
			
 
				-						writer = self.factory.instance(type=self.write_class,args=self.rw_args)
			
 
				-						writer.write(label='learn',row=value)
			
 
				-						self.lock.release()
			
 
				-
			
 
				-				
			
 
				-			#
			
 
				-			# Usually this is used for development
			
 
				-			# @TODO : Remove this  and find a healthy way to stop the server
			
 
				-			#
			
 
				-			if 'MONITOR_CONFIG_PATH' in os.environ:
			
 
				-				#
			
 
				-				# This suggests we are in development mode
			
 
				-				#
			
 
				-				break
			
 
				-
			
 
				-			TIME_ELLAPSED = 60*120	#-- Every 2 hours
			
 
				-			time.sleep(TIME_ELLAPSED)
			
 
				-		print "Exiting ",self.name
			
 
				-			
			
 
				-class FileWatchWorker(BasicWorker):
			
 
				-	def __init__(self,config,lock):
			
 
				-		BasicWorker.__init__(self,config,lock)
			
 
				-		self.name = "Zulu-FileWatch"
			
 
				-		self.config = config ;
			
 
				-		self.folder_config = config['monitor']['folders']['config']
			
 
				-		self.quit = False
			
 
				-	def stop(self):
			
 
				-		self.quit = True
			
 
				-	def run(self):
			
 
				-		TIME_ELAPSED = 60 * 10
			
 
				-		handler = monitor.FileWatch()
			
 
				-		ml_handler = ML()
			
 
				-		while self.quit == False :
			
 
				-			r = []
			
 
				-			print ' ** ',self.name,datetime.datetime.today()
			
 
				-			for id in self.folder_config :
			
 
				-				folders = self.folder_config [id]
			
 
				-				handler.init(folders)
			
 
				-				xo = handler.composite()
			
 
				-				#
			
 
				-				# We should perform a distribution analysis of the details in order to have usable data
			
 
				-				#
			
 
				-				xrow = {}
			
 
				-				xrow[id] = []
			
 
				-				for xo_row in xo:
			
 
				-					xo_age = [row['age'] for row in xo_row['details']]
			
 
				-					xo_size= [row['size'] for row in xo_row['details']]
			
 
				-					xo_row['details'] = {"age":ML.distribution(xo_age,self.lock),"size":ML.distribution(xo_size,self.lock)}
			
 
				-					
			
 
				-					xo_row['id'] = id
			
 
				-					xrow[id].append(xo_row)
			
 
				-					#
			
 
				-					# Now we can save the file
			
 
				-					# 
			
 
				-				self.lock.acquire()
			
 
				-				writer = self.factory.instance(type=self.write_class,args=self.rw_args)
			
 
				-				writer.write(label='folders',row=xrow)
			
 
				-				self.lock.release()
			
 
				-			if 'MONITOR_CONFIG_PATH' in os.environ:
			
 
				-				#
			
 
				-				# This suggests we are in development mode
			
 
				-				#
			
 
				-				break
			
 
				-			time.sleep(TIME_ELAPSED)
			
 
				-		print 'Exiting ',self.name
			
 
				-	
			
 
				-"""
			
 
				-	This class is a singleton designed to start quit dependent threads
			
 
				-		* monitor	is designed to act as a data collection agent
			
 
				-		* learner	is designed to be a learner i.e machine learning model(s)
			
 
				-	@TODO: 
			
 
				-		- How to move them to processes that can be read by the os (that would allow us to eat our own dog-food)
			
 
				-		- Additionally we also need to have a pruning thread, to control the volume of data we have to deal with.This instills the "will to live" in the application
			
 
				-"""
			
 
				-class ThreadManager:
			
 
				-		
			
 
				-	Pool = {}
			
 
				-	@staticmethod
			
 
				-	def start(config):
			
 
				-		lock = RLock()
			
 
				-		ThreadManager.Pool['monitor'] = Top(config,lock)
			
 
				-		ThreadManager.Pool['learner'] = Learner(config,lock)
			
 
				-		if 'folders' not in config :
			
 
				-			ThreadManager.Pool['file-watch'] = FileWatchWorker(config,lock)
			
 
				-		for id in ThreadManager.Pool :
			
 
				-			thread = ThreadManager.Pool[id]
			
 
				-			thread.start()
			
 
				-	@staticmethod
			
 
				-	def stop():
			
 
				-		for id in ThreadManager.Pool :
			
 
				-			thread = ThreadManager.Pool[id]
			
 
				-			thread.stop()
			
 
				-	@staticmethod
			
 
				-	def status():
			
 
				-		r = {}
			
 
				-		for id in ThreadManager.Pool :
			
 
				-			thread = ThreadManager.Pool[id]
			
 
				-			r[id] = thread.isAlive()
			
 
				-
			
 
				-			
			
 
				-		
			
 
				-		
			
 
				-class Factory :
			
 
				-	"""
			
 
				-		This function will return an instance of an object in the specified in the configuration file
			
 
				-	"""
			
 
				-	@staticmethod
			
 
				-	def instance(id,config):
			
 
				-		if id in config['monitor'] :
			
 
				-			className 	= config['monitor'][id]['class']
			
 
				-			ref		= "".join(["monitor.",className,"()"])
			
 
				-			ref 	=  eval(ref)
			
 
				-			return {"class":ref,"config":config['monitor'][id]["config"]}
			
 
				-		else:
			
 
				-			return None
			
 
				-
			
 
				-
			
 
				-if __name__ =='__main__' :
			
 
				-	import utils.params as SYS_ARGS	
			
 
				-	import json
			
 
				-	PARAMS = SYS_ARGS.PARAMS
			
 
				-	f = open(PARAMS['path'])
			
 
				-	CONFIG 	= json.loads(f.read())
			
 
				-	f.close()
			
 
				-	ThreadManager.start(CONFIG)	
			
 
				-