Explorar o código

learner testing

Steve L. Nyemba %!s(int64=8) %!d(string=hai) anos
pai
achega
f3d9e03b95
Modificáronse 3 ficheiros con 49 adicións e 26 borrados
  1. 32 21
      src/utils/ml.py
  2. 2 1
      src/utils/transport.py
  3. 15 4
      test/TestML.py

+ 32 - 21
src/utils/ml.py

@@ -23,30 +23,41 @@ class ML:
 class AnomalyDetection:
 	def split(self,data,index=-1,threshold=0.7) :
 		N	= len(data)
-		if N < LIMIT:
-			return None
+		# if N < LIMIT:
+		# 	return None
 		
 		end 	= int(N*threshold)
 		train	= data[:end]
 		test	= data[end:]
-		if index > 0:
-		return {"train":train,"test":test,"labels":[]}
-	def learn(self,data,conf):
-		if 'filter' in conf:
-			filter = conf['filter']
-			data = ML.Filter(filter['key'],filter['value'],data)
-		attr = conf['features']
-		label= conf['label']
-		labels= ML.Extract([label],data)
-		data = ML.Extract(attr,data)
 		
-		r = self.split(data)
-		labels = self.split(labels)
+		return {"train":train,"test":test}
+	"""
 
-		p = self.gParameters(r['train'])
-		test =  self.gPx(p['mean'],p['cov'],r['test'])
-		return self.gPerformance(test,labels['test'])
+		@param key 	field name by which the data will be filtered
+		@param value 	field value for the filter
+		@param features	features to be used in the analysis
+		@param labels	used to assess performance
+	@TODO: Map/Reduce does a good job at filtering
+	"""
+	def learn(self,data,key,value,features,label):
+		xo = ML.Filter(key,value,data)
+		
+		# attr = conf['features']
+		# label= conf['label']
+		yo= ML.Extract([label['name']],xo)
+		xo = ML.Extract(features,xo)
+		yo = self.getLabel(yo,label)
+		
+		xo = self.split(xo)
+		yo = self.split(yo)
 
+		p = self.gParameters(xo['train'])
+		
+		px =  self.gPx(p['mean'],p['cov'],xo['test'])
+		
+		print self.gPerformance(px,yo['test'])
+	def getLabel(self,yo,label_conf):
+		return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ]
 
 
 	"""
@@ -80,10 +91,10 @@ class AnomalyDetection:
 		fn = 0 # false negative
 		tn = 0 # true negative
 		for i in range(0,N):
-			tp += 1 if test[i][1]==labels[i] and test[i][1] == 1
-			fp += 1 if test[i][1] != labels[i] and test[i][1] == 1
-			fn += 1 if test[i][1] != labels[i] and test[i][1] == 0
-			tn += 1 if test[i][1] == labels[i] and test[i][1] == 0
+			tp += 1 if (test[i][1]==labels[i] and test[i][1] == 1) else 0
+			fp += 1 if (test[i][1] != labels[i] and test[i][1] == 1) else 0
+			fn += 1 if (test[i][1] != labels[i] and test[i][1] == 0) else 0
+			tn += 1 if (test[i][1] == labels[i] and test[i][1] == 0) else 0
 		precision = tp / (tp + fp)
 		recall	= tp / (tp + fn)
 		fscore 	= (2 * precision * recall)/ (precision + recall)

+ 2 - 1
src/utils/transport.py

@@ -250,7 +250,8 @@ class MessageQueue:
 		resp =  self.connection is not None and self.connection.is_open
 		self.close()
 		return resp
-	def close(self):	
+	def close(self):
+			
 		self.channel.close()
 		self.connection.close()
 """

+ 15 - 4
test/TestML.py

@@ -1,5 +1,5 @@
 from utils import transport
-from utils.ml import ML
+from utils.ml import ML, AnomalyDetection
 import unittest
 import json
 import os
@@ -10,7 +10,6 @@ f = open(path)
 CONFIG = json.loads( f.read())
 f.close()
 factory = transport.DataSourceFactory()
-#greader = factory.instance(type=ref,args=p)
 
 class TestML(unittest.TestCase):
 	def setUp(self):
@@ -34,9 +33,21 @@ class TestML(unittest.TestCase):
 		r = r['apps']
 		x = ML.Filter('label','Google Chrome',r)
 		x_ = ML.Extract(['cpu_usage','memory_usage'], x)
-		print x[0]
-		print x_
+		self.assertTrue (len (x) == len(x_))
 		pass
+	def test_Learn(self):
+		ref = CONFIG['store']['class']['read']
+		p	= CONFIG['store']['args']
+		greader = factory.instance(type=ref,args=p)
+		
+		data = greader.read()
+		
+		data = data['apps']
+		lhandler = AnomalyDetection()
+		features = CONFIG['learner']['anomalies']['features']
+		label	= CONFIG['learner']['anomalies']['label']
+		lhandler.learn(data,'label','Google Chrome',features,label)
+		
 		
 
 if __name__ == '__main__' :