8 年之前 · 4184d0bdc7
--- a/src/utils/ml.py
+++ b/src/utils/ml.py
@@ -18,7 +18,7 @@ class ML:
 
				 		#
			
 
				 		
			
 
				 		#return [item[0] for item in data if item and attr in item[0] and item[0][attr] == value]
			
 
				-		return [[item for item in row if item[attr] == value] for row in data]
			
 
				+		return [[item for item in row if item[attr] == value][0] for row in data]
			
 
				 	@staticmethod
			
 
				 	def Extract(lattr,data):
			
 
				 		if isinstance(lattr,basestring):
			
@@ -32,7 +32,7 @@ class ML:
 
				 """
			
 
				 class AnomalyDetection:
			
 
				 		
			
 
				-	def split(self,data,index=-1,threshold=0.8) :
			
 
				+	def split(self,data,index=-1,threshold=0.9) :
			
 
				 		N	= len(data)
			
 
				 		# if N < LIMIT:
			
 
				 		# 	return None
			
@@ -53,7 +53,6 @@ class AnomalyDetection:
 
				 	"""
			
 
				 	def learn(self,data,key,value,features,label):
			
 
				 		xo = ML.Filter(key,value,data)
			
 
				-		print key,value, len(xo)
			
 
				 		
			
 
				 		if not xo or len(xo) < 100:
			
 
				 			return None
			
@@ -69,25 +68,47 @@ class AnomalyDetection:
 
				 		
			
 
				 		xo = self.split(xo)
			
 
				 		yo = self.split(yo)
			
 
				-
			
 
				-		if xo['train'] :
			
 
				-			E = 0.01
			
 
				+		p = self.gParameters(xo['train'])
			
 
				+		has_cov =  np.linalg.det(p['cov']) #-- making sure the matrix is invertible
			
 
				+		if xo['train'] and has_cov :
			
 
				+			E = 0.001
			
 
				 			fscore = 0
			
 
				+			#
			
 
				+			# We need to find an appropriate epsilon for the predictions
			
 
				+			# The appropriate epsilon is one that yields an f-score [0.5,1[
			
 
				+			#
			
 
				+			
			
 
				+			__operf__ = None
			
 
				+			perf = None
			
 
				 			for i in range(0,10):
			
 
				 				Epsilon = E + (2*E*i)
			
 
				-				p = self.gParameters(xo['train'])
			
 
				+				
			
 
				 				if p is None :
			
 
				 					return None
			
 
				+				#
			
 
				+				# At this point we've got enough data for the parameters
			
 
				+				# We should try to fine tune epsilon for better results
			
 
				+				#
			
 
				+				
			
 
				 				px =  self.gPx(p['mean'],p['cov'],xo['test'],Epsilon)
			
 
				 				
			
 
				-				perf = self.gPerformance(px,yo['test'])
			
 
				-				if fscore == 0 :
			
 
				-					fscore = perf['fscore']
			
 
				-				elif perf['fscore'] > fscore and perf['fscore'] > 0.5 :
			
 
				-					
			
 
				-					perf['epsilon'] = Epsilon
			
 
				+				
			
 
				+				__operf__ = self.gPerformance(px,yo['test'])
			
 
				+				print __operf__
			
 
				+				if __operf__['fscore'] == 1 :
			
 
				+					break
			
 
				+				if perf is None :
			
 
				+					perf = __operf__['fscore']
			
 
				+				elif perf['fscore'] < __perf__['fscore'] and __operf__['fscore']> 0.5 :
			
 
				+					perf = __operf__
			
 
				+				
			
 
				+				perf['epsilon'] = Epsilon
			
 
				 			
			
 
				-			return {"label":value,"parameters":p,"performance":perf}
			
 
				+			
			
 
				+			if perf and perf['fscore'] > 0.5 :
			
 
				+				return {"label":value,"parameters":p,"performance":perf}
			
 
				+			else:
			
 
				+				return None
			
 
				 		return None
			
 
				 	def getLabel(self,yo,label_conf):
			
 
				 		return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ]
			
@@ -109,6 +130,7 @@ class AnomalyDetection:
 
				 			row = np.array(row)
			
 
				 			d = np.matrix(row - xu)
			
 
				 			d.shape = (n,1)
			
 
				+			
			
 
				 			b = np.exp((-0.5*np.transpose(d)) * (np.linalg.inv(sigma)*d))
			
 
				 			
			
 
				 			px = float(b/a)
			
--- a/test/TestML.py
+++ b/test/TestML.py
@@ -25,14 +25,20 @@ class TestML(unittest.TestCase):
 
				 	def test_Filter(self):
			
 
				 		r = self.greader.read()
			
 
				 		r = r['apps']
			
 
				-		x = ML.Filter('label','Google Chrome',r)
			
 
				+		#
			
 
				+		# To make this test case extensible we need to pull apps from the configuration
			
 
				+		#
			
 
				+		app = CONFIG['monitor']['processes']['config']['apps'][0]
			
 
				+		x = ML.Filter('label',app,r)
			
 
				 		for row in x:
			
 
				-			self.assertTrue(row['label'] == 'Google Chrome')
			
 
				+			self.assertTrue(row['label'] == app)
			
 
				 	def test_Extract(self):
			
 
				 		r = self.greader.read()
			
 
				 		r = r['apps']
			
 
				-		x = ML.Filter('label','Google Chrome',r)
			
 
				+		app = CONFIG['monitor']['processes']['config']['apps'][0]
			
 
				+		x = ML.Filter('label',app,r)
			
 
				 		x_ = ML.Extract(['cpu_usage','memory_usage'], x)
			
 
				+		
			
 
				 		self.assertTrue (len (x) == len(x_))
			
 
				 		pass
			
 
				 	def test_Learn(self):
			
@@ -43,11 +49,12 @@ class TestML(unittest.TestCase):
 
				 		data = greader.read()
			
 
				 		
			
 
				 		data = data['apps']
			
 
				+		app = CONFIG['monitor']['processes']['config']['apps'][1]
			
 
				 		lhandler = AnomalyDetection()
			
 
				 		features = CONFIG['learner']['anomalies']['features']
			
 
				 		label	= CONFIG['learner']['anomalies']['label']
			
 
				-		lhandler.learn(data,'label','Google Chrome',features,label)
			
 
				-		
			
 
				+		x = lhandler.learn(data,'label',app,features,label)
			
 
				+		print x
			
 
				 		
			
 
				 
			
 
				 if __name__ == '__main__' :