Explorar el Código

ui improvement and learning improvement

Steve L. Nyemba hace 8 años
padre
commit
81bd6bd658
Se han modificado 5 ficheros con 91 adiciones y 45 borrados
  1. 1 1
      src/api/static/js/dashboard.js
  2. 7 2
      src/monitor.py
  3. 73 39
      src/utils/agents/learner.py
  4. 4 3
      src/utils/ml.py
  5. 6 0
      src/utils/transport.py

+ 1 - 1
src/api/static/js/dashboard.js

@@ -143,7 +143,7 @@ monitor.processes.trend.render = function (logs, key,label) {
 			
 			type: 'time',
 			gridLines: {display:false},
-			unitStepSize:25,
+			
 			time: {
 				format:'DD-MMM HH:mm'
 			}

+ 7 - 2
src/monitor.py

@@ -269,7 +269,8 @@ class FileWatch(Analysis):
 			file_date = datetime.datetime(year,month,day,int(hour),int(minute))
 			# size = round(size,2)
 			#file_date = datetime.datetime(year,month,day,hour,minute)
-			age = (datetime.datetime.now() - file_date ).days
+			now = datetime.datetime.now()
+			age = (now - file_date ).days
 			
 			return {"size":size,"age":age}	
 		return None
@@ -319,11 +320,15 @@ class FileWatch(Analysis):
 					age = age
 					units = ' Days'
 				age = str(age)+units
-				xo = {"label":folder,"details":xo_raw,"summary":{"size":size,"age":age,"count":len(xo[:,1])}}
+				N = len(xo[:,1])
+				xo = {"label":folder} #,"details":xo_raw,"summary":{"size":size,"age":age,"count":len(xo[:,1])}}
+				xo = dict(xo,**{"size":size,"age":age,"count":N})
 				xo["name"] = name
 				xo['day'] = now.day
 				xo['month'] = now.month
 				xo['year'] = now.year
+				xo['date'] = time.mktime(now.timetuple())
+				
 				d.append(xo)
 		
 		return d

+ 73 - 39
src/utils/agents/learner.py

@@ -8,6 +8,28 @@ from utils.transport import *
 from utils.ml import AnomalyDetection,ML
 from utils.params import PARAMS
 import time
+class BaseLearner(Thread):
+	def __init__(self,lock) :
+		Thread.__init__(self)
+		path = PARAMS['path']
+		self.name = self.__class__.__name__.lower()
+		if os.path.exists(path) :
+			f = open(path)
+			self.config = json.loads(f.read())
+			f.close()
+		else:
+			self.config = None
+		self.lock = lock
+		self.factory 	= DataSourceFactory()
+		self.quit	= False
+	"""
+		This function is designed to stop processing gracefully
+		
+	"""
+	def stop(self):
+		self.quit = True
+		
+		
 """
 	This class is intended to apply anomaly detection to various areas of learning
 	The areas of learning that will be skipped are :
@@ -16,16 +38,10 @@ import time
 	@TODO:
 		- Find a way to perform dimensionality reduction if need be
 """
-class Anomalies(Thread) :
+class Anomalies(BaseLearner) :
 	def __init__(self,lock):		
-		Thread.__init__(self)
-		path = PARAMS['path']
-		self.name = self.__class__.__name__.lower()
-		if os.path.exists(path) :
-			f = open(path)
-			self.config = json.loads(f.read())
-			f.close()
-		
+		BaseLearner.__init__(self,lock)
+		if self.config :
 			#
 			# Initializing data store & factory class
 			#
@@ -34,9 +50,9 @@ class Anomalies(Thread) :
 			self.rclass	= self.config['store']['class']['read']
 			self.wclass	= self.config['store']['class']['write']		
 			self.rw_args	= self.config['store']['args']
-			self.factory 	= DataSourceFactory()
+			# self.factory 	= DataSourceFactory()
 			self.quit	= False
-			self.lock 	= lock
+			# self.lock 	= lock
 	def format(self,stream):
 		pass
 	def stop(self):
@@ -46,39 +62,57 @@ class Anomalies(Thread) :
 		DELAY = self.config['delay'] * 60
 		reader 	= self.factory.instance(type=self.rclass,args=self.rw_args)
 		data	= reader.read()
-		key	= 'apps'
-		rdata	= data[key]
-		features = ['memory_usage','cpu_usage']
-		yo = {"1":["running"],"name":"status"}				
-		while self.quit == False :
-			print ' *** ',self.name, ' ' , str(datetime.today())
-			for app in self.apps:
-				print '\t',app,str(datetime.today()),' ** ',app
-				logs = ML.Filter('label',app,rdata)
-				
-				if logs :
-					handler = AnomalyDetection()
-					value 	= handler.learn(logs,'label',app,features,yo)
-					if value is not None:
-						value = dict(value,**{"features":features})
-						value = dict({"id":self.id},**value)
-						#r[id][app] = value
-						self.lock.acquire()
-						writer = self.factory.instance(type=self.wclass,args=self.rw_args)
-						writer.write(label='learn',row=value)
-						self.lock.release()
-			#
-			if 'MONITOR_CONFIG_PATH' in os.environ :
-				break
-			time.sleep(DELAY)
+		key	= 'apps@'+self.id
+		if key in data:
+			rdata	= data[key]
+			features = ['memory_usage','cpu_usage']
+			yo = {"1":["running"],"name":"status"}				
+			while self.quit == False :
+				print ' *** ',self.name, ' ' , str(datetime.today())
+				for app in self.apps:
+					print '\t',app,str(datetime.today()),' ** ',app
+					logs = ML.Filter('label',app,rdata)
+					
+					if logs :
+						handler = AnomalyDetection()
+						value 	= handler.learn(logs,'label',app,features,yo)
+						if value is not None:
+							value = dict(value,**{"features":features})
+							value = dict({"id":self.id},**value)
+							#r[id][app] = value
+							self.lock.acquire()
+							writer = self.factory.instance(type=self.wclass,args=self.rw_args)
+							writer.write(label='learn',row=value)
+							self.lock.release()
+				#
+				if 'MONITOR_CONFIG_PATH' in os.environ :
+					break
+				time.sleep(DELAY)
 		print ' *** Exiting ',self.name.replace('a','A')
 
 				
 		
+"""
+	Let's estimate how many files we will have for a given date
+	y = ax + b with y: number files, x: date, y: Number of files
+"""		
+class Regression(BaseLearner):
+	def __init__(self,lock):
+		BaseLearner.__init__(self)
+		self.folders 	= self.config['folders']
+		self.id 	= self.config['id']
+	def run(self):
+		DELAY = self.config['delay'] * 60
+		reader 	= self.factory.instance(type=self.rclass,args=self.rw_args)
+		data	= reader.read()
+		if 'folders' in data :
+			data = ML.Filter('id',self.id,data['folders'])
+			xo  	= ML.Extract(['date'],data)
+			yo	= ML.Extract(['count'],data)
+			numpy.linalg.lstsq(xo, yo, rcond=-1)
 		
-class Regression(Thread):
-	def __init__(self,params):
-		pass
+
+
 if __name__ == '__main__' :
 	lock = RLock()
 	thread = Anomalies(lock)

+ 4 - 3
src/utils/ml.py

@@ -117,6 +117,7 @@ class AnomalyDetection:
 		yo = self.split(yo)
 		p = self.gParameters(xo['train'])
 		has_cov =   np.linalg.det(p['cov']) if p else False #-- making sure the matrix is invertible
+		
 		if xo['train'] and has_cov :
 			E = 0.001
 			ACCEPTABLE_FSCORE = 0.6
@@ -142,7 +143,7 @@ class AnomalyDetection:
 				
 				
 				__operf__ = self.gPerformance(px,yo['test'])
-
+				print value,__operf__
 				if __operf__['fscore'] == 1 :
 					continue
 				if perf is None :
@@ -227,8 +228,8 @@ class AnomalyDetection:
 			fp += 1 if (test[i][1] != labels[i] and test[i][1] == 1) else 0
 			fn += 1 if (test[i][1] != labels[i] and test[i][1] == 0) else 0
 			tn += 1 if (test[i][1] == labels[i] and test[i][1] == 0) else 0
-		precision = tp / (tp + fp) if tp + fp > 0 else 1
-		recall	= tp / (tp + fn) if tp  + fp > 0 else 1
+		precision = tp /( (tp + fp) if tp + fp > 0 else 1)
+		recall	= tp / ((tp + fn) if tp  + fn > 0 else 1)
 		
 		fscore 	= (2 * precision * recall)/ ((precision + recall) if (precision + recall) > 0  else 1)
 		return {"precision":precision,"recall":recall,"fscore":fscore}

+ 6 - 0
src/utils/transport.py

@@ -448,6 +448,8 @@ class Couchdb:
 		dbname		= args['dbname']
 		self.server 	= Server(uri=uri) 
 		self.dbase	= self.server.get_db(dbname)
+		if self.dbase.doc_exist(self.uid) == False:
+			self.dbase.save_doc({"_id":self.uid})
 	"""
 		Insuring the preconditions are met for processing
 	"""
@@ -542,6 +544,10 @@ class CouchdbWriter(Couchdb,Writer):
 		dbname		= args['dbname']
 		self.server 	= Server(uri=uri) 
 		self.dbase	= self.server.get_db(dbname)
+		#
+		# If the document doesn't exist then we should create it
+		#
+
 	"""
 		write a given attribute to a document database
 		@param	label	scope of the row repair|broken|fixed|stats