浏览代码

removing unused packages

Steve Nyemba 4 年之前
父节点
当前提交
1a9c4b6630

+ 0 - 134
src/data-collector.py

@@ -1,134 +0,0 @@
-h="""
-	This is a data-collector client, that is intended to perform data-collection operations and submit them to an endpoint
-	@required:
-		- key	application/service key
-		- id	node identifier
-	usage :
-		python data-collector.py --path config.json
-	The configuration file is structured as JSON object as follows :
-	{
-		id : identifier
-		key: customer's identification key,
-		api: http://localhost/monitor/1/client
-		folders:[]
-	}
-	NOTE: You can download a sample configuration file from https://the-phi.com/smart-top
-"""
-from utils.params import PARAMS as SYS_ARGS, Logger
-import os
-import requests
-import json
-# from threading import Thread, RLock
-from monitor import Apps, Folders
-import time
-from datetime import datetime
-class Collector :
-	def __init__(self) :
-		"""
-			The configuration file is passed to the class for basic initialization of variables
-		"""
-		self.httpclient = requests.Session()
-		
-	def init(self):
-		# if 'folders' not in SYS_ARGS :
-		# 	#
-		# 	# If nothing is set it will monitor the temporary directory
-		# 	self.locations = [os.environ[name] for name in ['TEMP','TMP','TMPDIR'] if name in os.environ]
-		# else:
-		# 	self.locations = SYS_ARGS['folders']
-		
-		#
-		# -- let's get the list of features we are interested .
-		url = SYS_ARGS['api']+'/1/client/login'
-		key = SYS_ARGS['key']
-		self.id = SYS_ARGS['id'] if 'id' in SYS_ARGS else os.environ['HOSTNAME']
-		headers = {"key":key,"id":self.id}
-		
-		#
-		#-- what features are allowed
-		r = self.httpclient.post(url,headers=headers)	
-		
-		if r.status_code == 200 :	
-			r = r.json()
-			
-			self.features = r['features']
-			self.config = r['config']	#-- contains apps and folders
-			Logger.log(action="login",value=r)
-		else:
-			self.features = None
-			self.config = None
-			Logger.log(action='login',value='error')
-	def callback(self,channel,method,header,stream):
-		pass
-	def listen(self):
-		
-		factory	= DataSourceFactory()
-		
-		# self.qlistener = factory.instance(type="QueueListener",args=_args)
-		# self.qlistener.callback = self.callback
-		# self.qlistener.init(SYS_ARGS['id'])
-	def post(self,**args) :
-		"""
-			This function will post data to the endpoint
-		"""
-		url = SYS_ARGS['api']+'/1/client/log'
-		key = SYS_ARGS['key']
-		id = SYS_ARGS['id'] if 'id' in SYS_ARGS else os.environ['HOSTNAME']
-		headers = {"key":key,"id":id,"context":args['context'],"content-type":"application/json"}
-		
-		body = args['data'].fillna('').to_json(orient='records')
-		
-		if args['data'].shape[0] > 0 :
-			r = self.httpclient.post(url,headers=headers,data=body)				
-			Logger.log(action="post."+args['context'],value=r.status_code)
-		else:
-			Logger.log(action="data.error",value="no data :: "+args['context'])
-		
-	def run(self):
-		"""
-			This function will execute the basic functions to monitor folders and apps running on the system
-			given the configuration specified on the server .
-		"""
-		while True :
-			try:
-				self.init()
-				if self.config and self.features :
-					ELAPSED_TIME = 60* int(self.features['schedule'].replace("min","").strip())
-					if 'apps' in self.config :					
-						self.post( data=(Apps(node=self.id)).get(filter=self.config['apps']),context="apps")
-					if 'folders' in self.config and self.config['folders'] :
-						folder = Folders(node=self.id)
-						f = folder.get(path=self.config['folders'])
-						self.post(data = f ,context="folders")
-				
-				Logger.log(action='sleeping',value=ELAPSED_TIME)
-				#
-				# In case no configuration is provided, the system will simply fall asleep and wait
-				# @TODO: Evaluate whether to wake up the system or not (security concerns)!
-				#
-				time.sleep(ELAPSED_TIME)
-				
-
-			except Exception,e:
-				Logger.log(action='error',value=e.message)
-				print e
-			#break
-
-
-		
-			
-			pass
-if __name__ == '__main__' :
-	#
-	#
-	if 'path' in SYS_ARGS :	
-		path = SYS_ARGS['path']
-		f = open(path)
-		SYS_ARGS = json.loads(f.read())
-		f.close()
-		
-	Logger.init('data-collector')
-	collector = Collector()
-	collector.run()
-else:
-	print (h)

+ 0 - 174
src/monitor.py

@@ -1,174 +0,0 @@
-"""
-	Steve L. Nyemba <steve@the-phi.com>
-	The Phi Technology - Smart Top
-
-	This program is the core for evaluating folders and applications. Each class is specialized to generate a report in a pandas data-frame
-	The classes will focus on Apps, Folders and Protocols
-		- SmartTop.get(**args)
-	@TODO:
-		Protocols (will be used in anomaly detection)
-"""
-from __future__  import division
-import os
-import subprocess
-import numpy as np
-import sys
-import pandas as pd
-import datetime
-class SmartTop:
-	def __init__(self,**args):
-		self.node = args['node']
-	def get(self,**args):
-		return None
-
-class Apps(SmartTop) :
-	def __init__(self,**args):
-		"""
-			This class will process a system command and parse the outpout accordingly given a parser
-			@param parse is a parser pointer
-		"""
-		SmartTop.__init__(self,**args)
-		self.cmd = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'"
-		self.xchar = ';'
-		
-	def get_app(self,stream):
-		index =  1	if os.path.exists(" ".join(stream[:1])) else len(stream)-1
-
-		cmd = " ".join(stream[:index]) if index > 0 else " ".join(stream)
-		
-		if ' ' in cmd.split('/')[len(cmd.split('/'))-1] :
-			p = cmd.split('/')[len(cmd.split('/'))-1].split(' ')
-			name = p[0]
-			args = " ".join(p[1:])
-		else:
-			name = cmd.split('/')[len(cmd.split('/'))-1]
-			args = " ".join(stream[index:]) if index > 0 else ""
-		return [name,cmd,args]
-	def to_pandas(self,m):
-		"""
-			This function will convert the output of ps to a data-frame
-			@param m raw matrix i.e list of values like a csv
-			
-		"""
-		d = datetime.datetime.now().strftime('%m-%d-%Y')
-		t = datetime.datetime.now().strftime('%H:%M:%S')
-		m = [item for item in m if len(item) != len (m[0])]
-		m = "\n".join(m[1:])    
-		df = pd.read_csv(pd.compat.StringIO(m),sep=self.xchar)
-		df['date'] = np.repeat(d,df.shape[0])
-		df['time'] = np.repeat(t,df.shape[0])
-		df['node'] = np.repeat(self.node,df.shape[0])
-		df.columns =['pid','user','mem','cpu','status','started','name','cmd','args','date','time','node']
-		return df
-	def empty(self,name):
-		return pd.DataFrame([{"pid":None,"user":None,"mem":0,"cpu":0,"status":"X","started":None,"name":name,"cmd":None,"args":None,"date":None,"time":None,"node":self.node}])
-	def parse(self,rows):
-		m = []
-		TIME_INDEX = 5
-		ARGS_INDEX = 6
-		
-		for item in rows :
-			if rows.index(item) != 0 :
-				parts = item.split(self.xchar)
-				row = parts[:TIME_INDEX]
-				row.append(' '.join(parts[TIME_INDEX:ARGS_INDEX]))
-				row += self.get_app(parts[ARGS_INDEX:])
-			else:
-				row = item.split(self.xchar)
-			row = (self.xchar.join(row)).strip()
-			if len(row.replace(";","")) > 0 :
-				m.append(row)		
-		
-		return m
-	def get(self,**args):
-		"""
-			This function returns a the output of a command to the calling code that is piped into the class			
-			The output will be stored in a data frame with columns
-			@
-		"""
-		try:
-
-			handler = subprocess.Popen(self.cmd,shell=True,stdout=subprocess.PIPE)			
-			stream = handler.communicate()[0]
-			rows = stream.split('\n')
-			df =  self.to_pandas(self.parse(rows))
-			r = pd.DataFrame()
-			if 'filter' in args :
-				pattern = "|".join(args['filter']) 				
-				i = df.cmd.str.contains(pattern)
-				r =  df[i].copy()
-				r.index = np.arange(0,r.shape[0])
-				ii=  (1 + np.array(i)*-1) == 1
-				other = pd.DataFrame(df[ii].sum()).T.copy()
-				other.index = np.arange(0,other.shape[0])
-				other.user = other.name = other.status = other.cmd = other.args = 'other'
-				other.started = other.pid = -1
-				other = other[other.columns[1:]]
-				for name in args['filter'] :
-					i = r.cmd.str.contains(str(name.strip()),case=False,na=False)
-					if i.sum() == 0:
-						r = r.append(self.empty(name),sort=False)
-					else :
-						pass
-						# r[i].update (pd.DataFrame({"name":np.repeat(name,r.shape[0])}))
-						r.loc[i, 'name'] = np.repeat(name,i.sum())
-						# r.loc[i].name = name
-						
-			
-			r = r.append(other,sort=False)			
-			r.index = np.arange(r.shape[0])
-			
-			return r
-		except Exception,e:
-			print (e)
-		return None
-
-
-class Folders(SmartTop):
-	"""
-		This class will assess a folder and produce a report in a data-frame that can be later on used for summary statistics
-	"""
-	def __init__(self,**args):
-		SmartTop.__init__(self,**args)
-		
-	def _get(self,dir_path,r=[]):
-		for child in os.listdir(dir_path):
-			path = os.path.join(dir_path, child)
-			if os.path.isdir(path):				
-				self._get(path,r)
-
-			else:
-				size 		= os.path.getsize(path)
-				file_date 	= os.path.getatime(path)
-				file_date	= datetime.datetime.fromtimestamp(file_date)
-				now 		= datetime.datetime.now()
-				age 		= (now - file_date ).days
-				name		= os.path.basename(path)
-				r.append({"name":name,"path":path,"size":size,"age":age,"date":now.strftime('%m-%d-%Y'),"time":now.strftime('%H:%M:%S'),"node":self.node })
-		return r
-
-	def get(self,**args):
-		# path = args['path']
-		
-		if isinstance(args['path'],list) == False:
-			paths = [args['path']]
-		else:
-			paths = args['path']
-		paths = paths
-		_out = pd.DataFrame()
-		for path in paths :
-			name = os.path.basename(path)
-			if os.path.exists(path) :
-				#
-				# If the folder does NOT exists it should not be treated.
-				#
-				rows =  self._get(path)
-				if len(rows) > 0 :
-					r =  pd.DataFrame(rows)		
-					r =  pd.DataFrame([{"name":name,"path":path,"files":r.shape[0],"age_in_days":r.age.mean(),"size_in_kb":r['size'].sum(),"date":r.date.max(),"time":r.time.max(),"node":r.node.max()}])
-					_out = _out.append(r,sort=False)
-		#
-		# @TODO: The state of the hard-drive would be a good plus
-		# os.system('df -h /')
-		_out.index = np.arange(0,_out.shape[0])
-		return _out

+ 0 - 1
src/utils/__init__.py

@@ -1 +0,0 @@
-

+ 0 - 1
src/utils/agents/__init__.py

@@ -1 +0,0 @@
-

+ 0 - 368
src/utils/agents/actor.py

@@ -1,368 +0,0 @@
-"""
-	This class is designed to be an actor class i.e it will undertake certain actions given an event detected
-	The platform has 2 main sections (detection & analysis).
-	Action Types (Actors):
-		- Alert : Sends an email or Webhook
-		- Apps 	: Kill, Start
-		- Folder: Archive, Delete (all, age, size)
-        By design we are to understand that a message is structured as follows:
-            {to,from,content} with content either being an arbitrary stream (or JSON)
-	@TODO: 
-		- upgrade to python 3.x
-"""
-import json
-from threading import Thread
-import os
-import shutil
-import subprocess
-import re
-from monitor import ProcessCounter
-from utils.transport import QueueListener, QueueWriter, QueueReader
-from utils.params import PARAMS
-import smtplib
-from email.mime.multipart import MIMEMultipart
-from email.mime.text import MIMEText
-from datetime import datetime
-from StringIO import StringIO
-from utils.services import Dropbox, Google
-class Actor():
-    @staticmethod
-    def instance(name,args,logger=None):
-        """
-            This function is a singleton that acts as a factory object for all the instances of this subclass
-            @param name name of the class to instantiate
-            @param args arguments to be passed in {configuration}
-        """
-        r = []
-        if not isinstance(name,list):
-            name  = [name]
-        for id in name :
-            try:
-                o = eval("".join([id,"()"]))
-                o.Initialize(args,logger)
-                r.append(o)
-            except Exception,e:
-                if logger is not None :
-                    logger.log(subject='Actor',object='Factory',action='error',value=e.message)
-                
-                print str(e)
-        return r[0] if len(r) == 1 else r
-    def __init__(self):        
-        """
-            Initializing the class with configuration. The configuration will be specific to each subclass
-            @param args arguments the class needs to be configured
-        """
-        
-        pass
-    def getName(self):
-        return self.__class__.__name__.lower()
-
-    # def getIdentifier(self):
-    #     return self.__class__.__name__.lower()
-
-    def Initialize(self,args,logger=None):
-        self.config = args
-        self.logger = logger
-        
-    def isValid(self,**item):
-        return False
-
-    def execute(self,cmd):
-        stream = None
-        try:
-            # subprocess.call (cmd,shell=False)
-            out = subprocess.Popen(cmd,stdout=subprocess.PIPE)            
-            #stream = handler.communicate()[0]
-        except Exception,e:
-            pass      
-    def post(self,**args):    
-        pass
-    def log(self,**args):
-        if self.logger :
-            args['subject'] = self.getName()
-            self.logger.log(args)
-class Apps(Actor) :
-    """
-	    This class is designed to handle application, restart, if need be.
-	    conf{app-name:{args}}
-    """
-    def __init__(self):
-        Actor.__init__(self)
-        # self.ng = None
-    
-    def isValid(self,**args):
-        """ 
-            We insure that the provided application exists and that the payload is correct
-            The class will only respond to reboot,kill,start actions
-            p   validate the payload
-            q   validate the app can be restarted
-
-            @NOTE: killing the application has no preconditions/requirements
-        """
-        params = args['params']
-        action = args['action']
-        p = len(set(params.keys()) & set(['cmd','label'])) == 2
-        q = False
-        r = action in ['reboot','kill','start']
-        if p :
-            q = os.path.exists(params['cmd'])
-        return p and q and r
-    def init(self,action,params) :
-        """
-            This function will initialize the the actor with applications and associated arguments
-            @param args {"apps_o":"","app_x":params}
-        """
-        self.action = action
-        self.params = params  
-        self.log(action='init',object=action,value=params)   
-
-    def startup(self,cmd) :
-        """
-            This function is intended to start a program given the configuration
-            @TODO We need to find the command in case the app has crashed
-        """        
-        try:
-            print""
-            print cmd
-            os.system(cmd +" &")
-            self.log(action='startup',value=cmd)
-        except Exception, e:
-            print e
-        
-    def kill(self,name) :
-        """ 
-            kill processes given the name, The function will not be case sensitive and partial names are accepted
-            @NOTE: Make sure the reference to the app is not ambiguous
-        """
-        try:
-            args = "".join(['ps -eo pid,command|grep -Ei "',name.lower(),'"|grep -E "^ {0,}[0-9]+" -o|xargs kill -9'])        
-
-            #self.execute([args])
-            subprocess.call([args],shell=True)
-        except Exception,e:
-            print e
-    
-    def run(self):
-        __action    = str(self.action).strip()
-        __params    = dict(self.params)
-        pointer     = None
-        if __action == 'reboot' :
-            def pointer():
-                self.kill(__params['label'])
-                self.startup(__params['cmd'])
-        elif __action == 'kill':
-            def pointer():
-                self.kill(__params['label'])
-        elif __action =='start':
-            def pointer() :
-                self.startup(__params['cmd'])
-        if pointer :
-            thread = Thread(target=pointer)        
-            thread.start()
-        # pointer()
-    def analyze(self,logs) :
-        """
-            This function is designed to analyze a few logs and take appropriate action
-            @param logs logs of application/process data; folder analysis or sandbox analysis
-        """
-        pass
-        # for item in logs :
-        #     name = item['label']
-        #     if self.can_start(name) :
-        #         self.startup(name)
-        #     #
-            
-
-class Mailer (Actor):
-    """
-        This class is a mailer agent
-    """
-    def __init__(self):
-        Actor.__init__(self)
-    """
-        conf = {uid:<account>,host:<host>,port:<port>,password:<password>}
-    """
-    def init(self,conf) :
-        self.uid = conf['uid']
-
-
-        try:
-
-            self.handler = smtplib.SMTP_SSL(conf['host'],conf['port'])
-            r = self.handler.login(self.uid,conf['password'])
-            #
-            # @TODO: Check the status of the authentication
-            # If not authenticated the preconditions have failed
-            #
-        except Exception,e:
-            print str(e)
-            self.handler = None
-            pass
-
-
-    def send(self,**args) :
-        subject = args['subject']
-        message = args['message']
-        to	= args['to']
-        if '<' in message and '>' in message :
-            message = MIMEText(message,'html')
-        else:
-            message = MIMEText(message,'plain')
-        message['From'] = self.uid
-        message['To']	= to
-        message['Subject'] = subject
-        return self.handler.sendmail(self.uid,to,message.as_string())
-    def close(self):
-        self.handler.quit()
-
-            
-class Folders(Actor):
-    def __init__(self):
-        Actor.__init__(self)
-    """
-        This is designed to handle folders i.e cleaning/archiving the folders
-        if the user does NOT have any keys to cloud-view than she will not be able to archive
-        {threshold:value}
-        @params threshold   in terms of size, or age. It will be applied to all folders
-    """
-    def init(self,action,params):
-        self.action = action
-        # print args
-    # def init(self,args):
-        """
-            This is initialized with parameters from the plan.
-            The parameters should be specific to the actor (folder)
-                folder_size
-        """
-        #self.lfolders   = args['folders'] #config['folders']
-        #self.action     = args['action'] #{clear,archive} config['actions']['folders']
-        
-        plan = params['plan']
-        self.threshold  = self.get_size( plan['folder_size']) #self.config['threshold'])
-        # self.action = action
-        self.params = params 
-        
-        
-    def isValid(self,**args):
-        action = args['action']
-        
-        
-        params = args['params']
-        p = len(set(action) & set(['clean','archive','backup'])) > 0
-        q = False
-        r = False
-        if p :
-            q = len(set(params.keys()) & set( ['label','folder'])) > 0
-        if q :
-            folder = params['label'] if 'label' in params else params['folder']
-            r = os.path.exists(folder)
-        
-        return p and q and r
-    def archive(self,item):
-        """
-            This function will archive all files in a given folder
-            @pre : isValid
-        """
-        
-        folder = item['label']
-        name = folder.split(os.sep)
-        name = name[len(name)-1]
-        date =  str(datetime.now()).replace(' ','@')#''.join([str(i) for i in item['date'].values()])
-        
-        # signature='-'.join([name,date,str(item['stats']['file_count']),'files'])
-        signature='-'.join([name,date])
-        tarball=os.sep.join([folder,'..',signature])
-        shutil.make_archive(tarball,'tar',folder)
-        
-        #self.clean(item)
-        #
-        # @TODO: The archive can be uploaded to the cloud or else where
-        #   @param id   cloud service idenfier {dropbox,box,google-drive,one-drive}
-        #   @param key  authorization key for the given service
-        #
-        pass
-        return tarball+".tar"
-    def backup(self,tarball):
-        """
-            This function will initiate backup to the cloud given
-        """
-        if os.path.exists(tarball) :
-            
-            key = self.params['key']
-            
-            sid = self.params['user']['sid']
-            if sid == 'dropbox' :
-                cloud = Dropbox()
-            elif sid == 'google-drive' :
-                cloud = Google()
-            cloud.init(key)
-            file = open(tarball) 
-            out = cloud.upload('backup','application/octet-stream',file)
-            file.close()
-            print out
-            pass
-        else:
-            pass
-        print tarball
-        print self.params['user']['sid']
-        print self.params['key']
-        #
-        # let's upload to the cloud
-        pass
-    def clean(self,item):
-        """
-            This function consists in deleting files from a given folder
-        """
-        rpath = item['label']
-        files = os.listdir(item['label'])
-        for name in list(files) :
-            path = os.sep.join([item['label'],name])
-            if os.path.isdir(path) :
-                shutil.rmtree(path)
-            else:
-                os.remove(path)
-        #
-        # 
-    
-    def get_size(self,value):
-        """
-            converts size values into MB and returns the value without units
-        """
-        units = {'MB':1000,'GB':1000000,'TB':1000000000} # converting to kb
-        key = set(units.keys()) & set(re.split('(\d+)',value.replace(' ','').upper()))
-        
-        
-        if len(key) == 0:
-            return -1
-        key = key.pop()
-        return float(value.upper().replace('MB','').strip()) * units[key]
-    
-    def can_clean(self,item):        
-        """
-            This function returns whether the following :
-            p : folder exists
-            q : has_reached threashold
-            
-            @TODO: Add a user defined configuration element to make this happen
-        """
-        
-        p = os.path.exists(item['label']) and item['label'] in self.lfolders    
-        q = item['stats']['size']['mean'] >= self.threshold and self.threshold > 0
-        return p and q
-    def run(self):
-        tarball = None
-        if 'archive' in self.action :
-            tarball = self.archive(self.params)
-        
-        if 'backup' in self.action and tarball:
-            self.backup(tarball)
-        if 'delete' in self.action and self.can_clean():
-            self.clean()
-    def analyze(self,logs):
-        r = {'clean':self.clean,'archive':self.archive}
-        self.lfolders = [ folder['label'] for folder in logs]
-        #for item in logs :
-            
-        #    if self.can_clean(item) :
-        #        self.archive(item)
-        #        self.clean(item)

+ 0 - 132
src/utils/agents/learner.py

@@ -1,132 +0,0 @@
-"""
-	This file encapsulates a class that is intended to perform learning
-"""
-from __future__ import division
-import numpy as np
-from sklearn import linear_model
-from threading import Thread,RLock
-from utils.transport import *
-from utils.ml import AnomalyDetection,ML
-from utils.params import PARAMS
-import time
-class BaseLearner(Thread):
-	def __init__(self,lock) :
-		Thread.__init__(self)
-		path = PARAMS['path']
-		self.name = self.__class__.__name__.lower()
-		self.rclass= None
-		self.wclass= None
-		self.rw_args=None
-		if os.path.exists(path) :
-			f = open(path)
-			self.config = json.loads(f.read())
-			f.close()
-			self.rclass	= self.config['store']['class']['read']
-			self.wclass	= self.config['store']['class']['write']		
-			self.rw_args	= self.config['store']['args']
-
-		else:
-			self.config = None
-		self.lock = lock
-		self.factory 	= DataSourceFactory()
-		self.quit	= False
-	"""
-		This function is designed to stop processing gracefully
-		
-	"""
-	def stop(self):
-		self.quit = True
-		
-		
-"""
-	This class is intended to apply anomaly detection to various areas of learning
-	The areas of learning that will be skipped are :
-	['_id','_rev','learn'] ... 
-	
-	@TODO:
-		- Find a way to perform dimensionality reduction if need be
-"""
-class Anomalies(BaseLearner) :
-	def __init__(self,lock):		
-		BaseLearner.__init__(self,lock)
-		if self.config :
-			#
-			# Initializing data store & factory class
-			#
-			self.id		= self.config['id']
-			self.apps	= self.config['procs'] if 'procs' in self.config else []
-			self.rclass	= self.config['store']['class']['read']
-			self.wclass	= self.config['store']['class']['write']		
-			self.rw_args	= self.config['store']['args']
-			# self.factory 	= DataSourceFactory()
-			self.quit	= False
-			# self.lock 	= lock
-	def format(self,stream):
-		pass
-	def stop(self):
-		self.quit = True
-            
-	def run(self):
-		DELAY = self.config['delay'] * 60
-		reader 	= self.factory.instance(type=self.rclass,args=self.rw_args)
-		data	= reader.read()
-		key	= 'apps@'+self.id
-		if key in data:
-			rdata	= data[key]
-			features = ['memory_usage','cpu_usage']
-			yo = {"1":["running"],"name":"status"}				
-			while self.quit == False :
-				print ' *** ',self.name, ' ' , str(datetime.today())
-				for app in self.apps:
-					print '\t',app,str(datetime.today()),' ** ',app
-					logs = ML.Filter('label',app,rdata)
-					
-					if logs :
-						handler = AnomalyDetection()
-						value 	= handler.learn(logs,'label',app,features,yo)
-						if value is not None:
-							value = dict(value,**{"features":features})
-							value = dict({"id":self.id},**value)
-							#r[id][app] = value
-							self.lock.acquire()
-							writer = self.factory.instance(type=self.wclass,args=self.rw_args)
-							writer.write(label='learn',row=value)
-							self.lock.release()
-				#
-				if 'MONITOR_CONFIG_PATH' in os.environ :
-					break
-				time.sleep(DELAY)
-		print ' *** Exiting ',self.name.replace('a','A')
-
-				
-		
-"""
-	Let's estimate how many files we will have for a given date
-	y = ax + b with y: number files, x: date, y: Number of files
-"""		
-class Regression(BaseLearner):
-	def __init__(self,lock):
-		BaseLearner.__init__(self,lock)
-		self.folders 	= self.config['folders']
-		self.id 	= self.config['id']
-	def run(self):
-		DELAY = self.config['delay'] * 60
-		reader 	= self.factory.instance(type=self.rclass,args=self.rw_args)
-		data	= reader.read()
-		if 'folders' in data :
-			data = ML.Filter('id',self.id,data['folders'])
-			xo  	= ML.Extract(['date'],data)
-			yo	= ML.Extract(['count'],data)
-			
-			
-			pass
-			# print np.var(xo,yo)
-
-			
-		
-
-
-if __name__ == '__main__' :
-	lock = RLock()
-	thread = Anomalies(lock)
-	thread.start()

+ 0 - 220
src/utils/agents/manager.py

@@ -1,220 +0,0 @@
-"""
-	Features :
-		- data collection
-		- detection, reboot	(service)
-		- respond to commands	(service)
-"""
-#from threading import Thread, RLock
-from __future__ import division
-import os
-import json
-import time
-from datetime import datetime
-from utils.transport import *
-import monitor
-import requests
-from threading import Thread
-class Manager() :
-	def version(self):
-		return 1.1
-	"""
-		
-		delay : <value>
-		limit : <value>
-		scope : apps,folders,learner,sandbox
-	"""
-	def __init__(self):
-		self.factory	= DataSourceFactory()
-	def set(self,name,value):
-		setattr(name,value)
-	def init(self,**args) :
-		self.id		= args['node']
-		self.agents 	= args['agents']
-		self.config	= dict(args['config'])
-		self.key	= args['key']
-		self.actors	= args['actors']
-		self.plan	= self.config['plan']
-		
-		self.DELAY  = int(self.plan['metadata']['delay'])
-		self.host = args['host']
-		self.update()	#-- Initializing status information
-		_args={"host":"dev.the-phi.com","qid":self.id,"uid":self.key}
-		#
-		# Connecting to the messaging service
-		
-		self.qlistener = self.factory.instance(type="QueueListener",args=_args)
-		self.qlistener.callback = self.callback
-		self.qlistener.init(self.id)
-		
-		# self.qlistener.read()
-		thread = (Thread(target=self.qlistener.read))
-		thread.start()
-	
-	def update(self) :
-		"""
-			This method inspect the plans for the current account and makes sure it can/should proceed
-			The user must be subscribed and to the service otherwise this is not going to work
-		"""
-		
-		url="http://:host/monitor/init/collector".replace(':host',self.host)
-			
-		r = requests.post(url,headers={"key":self.key,"id":self.id})
-		r =  json.loads(r.text)
-		# meta =  [item['metadata'] for item in plans if item['status']=='active' ]
-		self.plan = r['plan']
-		meta = self.plan['metadata']
-		
-		if meta :
-			self.DELAY = 60* int(meta['delay'])
-			self.LIMIT = int(meta['limit'])
-			#dbname = [item['name'] for item in plans if int(item['metadata']['limit']) == self.LIMIT][0]
-			#self.config['store']['args']['dbname'] = dbname
-
-		else:
-			self.DELAY = -1
-			self.LIMIT = -1
-	
-		#self.filter(meta)
-		#
-		# We are removing all that is not necessary i.e making sure the features matches the plan user has paid for
-		#
-		self.agents  = self.filter('agents',meta,self.agents)
-		self.actors = self.filter('actors',meta,self.actors)
-		self.setup(meta)
-
-	# def filter_collectors(self,meta) :
-	# 	"""
-	# 		remove collectors that are not specified by the plan
-	# 		Note that the agents (collectors) have already been initialized ?
-	# 	"""
-	# 	values = meta['agents'].replace(' ','').split(',')
-	# 	self.agents = [agent for agent in self.agents if agent.getName() in values]
-
-		
-	# def filter_actors(self,meta):
-	# 	"""
-	# 		removes actors that are NOT specified by the subscription plan
-	# 		Note that the actor have already been instatiated and need initialization
-	# 	"""
-	# 	values = meta['actors'].replace(' ','').split('.')
-	# 	self.actors = [actor for actor in self.actors if actor.getName() in values]
-	
-	def filter(self,id,meta,objects):
-		"""
-			This function filters the agents/actors given what is available in the user's plan
-			
-		"""
-		values = meta[id].replace(' ','').split(',')
-		return [item for item in objects if item.getName() in values]
-	
-	def setup(self,meta) :
-		# conf = {"folders":None,"apps":None}
-		# read_class 	= self.config['store']['class']['read']
-		# read_args	= self.config['store']['args']
-		
-
-		# args = None
-		# couchdb	= self.factory.instance(type=read_class,args=read_args)
-		# args 	= couchdb.view('config/apps',key=self.key)
-		# if len(args.keys()) > 0 :
-		# 	self.apply_setup('apps',args)
-		# args = couchdb.view('config/folders',key=self.key)
-		# if 'folder_size' not in meta :
-		# # 	args['threshold'] = meta['folder_size']
-		# 	self.apply_setup('folders',meta)			
-		#self.apply_setup('folders',meta)
-		#@TODO: For now app actors don't need any particular initialization
-		pass
-			
-	def apply_setup(self,name,args) :
-		for actor in self.actors :
-			if args is not None and actor.getName() == name and len(args.keys()) > 0:												
-				actor.init(args)
-		
-	def isvalid(self):
-		self.update()
-		return self.DELAY > -1 and self.LIMIT > -1
-	def post(self,row) :
-		"""
-			This function is designed to take appropriate action if a particular incident has been detected
-			@param label	
-			@param row	data pulled extracted
-		"""
-		message = {}
-		message['action'] = 'reboot'
-		message['node']	= label
-
-	def callback(self,channel,method,header,stream):
-		"""
-			This function enables the manager to be able to receive messages and delegate them to the appropriate actor
-			@channel
-		"""
-		message = json.loads(stream)
-		#
-		# we should inspect the message and insure it has reached the appropriate recepient
-		#
-		if 'node' in message and message['node'] == self.id :
-			action = message['action']
-			params = message['params']
-			# params['plan'] = self.plan['metadata']
-			self.delegate(action,params)
-
-	def delegate(self,action,params):
-		for actor in self.actors :
-			
-			if actor.isValid(action=action,params=params) :
-				
-				actor.init(action,params)
-				actor.run()
-
-				break
-		pass
-	def run(self):
-		#DELAY=35*60 #- 35 Minutes
-		#LIMIT=1000
-		COUNT = 0
-		COUNT_STOP 	= int(24*60/ self.DELAY)
-		write_class	= self.config['store']['class']['write']
-		read_args	= self.config['store']['args']
-		
-		while True :
-			COUNT += 1
-			if COUNT > COUNT_STOP :
-				if self.isvalid() :
-					COUNT = 0
-				else:
-					break
-			for agent in self.agents :
-				data	= agent.composite()
-				label	= agent.getName()
-				node	= '@'.join([label,self.id])
-				row		= {}
-				
-				if label == 'folders':
-					row = [ dict({"id":self.id}, **_row) for _row in data]										
-				else:
-					#label = id
-					row = data
-					#
-					# @TODO:
-					# This data should be marked if it has been flagged for reboot
-					# 
-				if type(row)==list and len(row) == 0 :
-
-					continue
-				#
-				#
-				index = self.agents.index(agent)
-
-				if len(self.actors) > index and self.actors[index].getName() == agent.getName() :
-					actor = self.actors[index]
-					actor.analyze(row)
-
-				# self.lock.acquire()
-				store = self.factory.instance(type=write_class,args=read_args)
-				store.flush(size=self.LIMIT)				
-				store.write(label=node,row=[row])
-				# self.lock.release()
-			print (["Falling asleep ",self.DELAY/60])
-			time.sleep(self.DELAY)
-				

+ 0 - 32
src/utils/params.py

@@ -1,32 +0,0 @@
-import sys
-PARAMS  = {'context':''}
-if len(sys.argv) > 1:
-	
-	N = len(sys.argv)
-	for i in range(1,N):
-		value = None
-		if sys.argv[i].startswith('--'):
-			key = sys.argv[i].replace('-','')
-			PARAMS[key] = 1
-			if i + 1 < N:
-				value = sys.argv[i + 1] = sys.argv[i+1].strip()
-			if key and value:
-				PARAMS[key] = value
-				if key == 'context':
-					PARAMS[key] = ('/'+value).replace('//','/')
-		
-		i += 2
-
-import logging		
-import json
-from datetime import datetime
-class Logger :
-	@staticmethod
-	def init(filename):
-		name = "-".join([filename,datetime.now().strftime('%d-%m-%Y')])+".log"
-		logging.basicConfig(filename=name,level=logging.INFO,format="%(message)s")
-	@staticmethod
-	def log(**args) :
-		args['date'] = datetime.now().strftime('%d-%m-%Y %H:%M:%S')
-		logging.info(json.dumps(args))
-		

+ 0 - 598
src/utils/services.py

@@ -1,598 +0,0 @@
-"""
-	CloudView Engine 2.0
-	The Phi Technology LLC - Steve L. Nyemba <steve@the-phi.com>
-
-	This is a basic cloud view engine that is designed to be integrated into any service and intended to work for anyone provided they have signed up with the cloud service provider
-	The intent is to make the engine a general purpose engine that can be either deployed as a service (3-launchpad) or integrated as data feed for a third party utility
-	
-"""
-from __future__ import division
-from threading import Thread
-import json
-import requests
-from xmljson import yahoo as bf
-from xml.etree.ElementTree import Element, tostring, fromstring, ElementTree as ET
-import xmltodict
-from email.mime.base import MIMEBase
-from email.mime.multipart import MIMEMultipart
-from StringIO import StringIO
-class Cloud:
-	BYTES_TO_GB = 1000000000	
-	Config = None
-	STREAMING_URI = None
-	@staticmethod
-	def instance(id,**args):
-		id = id.strip()
-		if id == 'skydrive' :
-			id = 'one-drive'
-		
-		handler = None
-		path = args['path'] if 'path' in args else None
-		if not Cloud.Config and path:
-			f = open(path)
-			Cloud.Config = json.loads(f.read())
-			Cloud.STREAMING_URI = str(Cloud.Config['api'])
-			Cloud.Config = Cloud.Config['cloud']
-			f.close()
-		if path and id in Cloud.Config :
-			context 	= Cloud.Config[id]
-			className 	= context['class']
-			config		= json.dumps(context['config'])
-			handler		= eval( "".join([className,"(",config,")"]))
-			
-		#
-		# In case a stream was passed in ...
-		#
-		if 'stream' in args:
-			stream 		= args['stream']
-			context 	= Cloud.Config[id]
-			className 	= context['class']
-			
-			handler = eval("".join([className,"(None)"]))
-			handler.from_json(stream)
-		#
-		# Once the handler is rovided we must retrieve the service given the key
-		# The key provides information about what files to extract as well as the preconditions
-		# @TODO: 
-		#	- Keys are maintained within the stripe account/couchdb
-		#	- 	
-		return handler
-	
-	def __init__(self):
-		self.access_token = None
-		self.refresh_token= None
-		self.files	  = []
-		self.client_id	= None
-		self.secret	= None
-		self.mfiles	= {}
-		self.folders={}
-
-	def to_json(self):
-		object = {}
-		keys = vars(self)
-		for key in keys:
-			value = getattr(self,key)
-			object[key] = value
-		return json.dumps(object)
-	def from_json(self,stream):
-		ref = json.loads(stream) ;
-		for key in ref.keys() :
-			value = ref[key]			
-			setattr(self,key,value)
-		# self.access_token 	= ref['access_token']
-		# self.refesh_token 	= ref['refresh_token']
-		# self.files		= ref['files']
-	"""
-		This function matches a name with a list of possible features/extensions
-
-	"""
-	def match(self,filename,filters):
-		if isinstance(filters,str):
-			filters = [filters]
-		return len(set(filename.lower().split('.')) & set(filters)) > 0
-		
-	def getName(self):
-		return self.__class__.__name__.lower()
-	def get_authURL(self):
-		
-		config = Cloud.Config[self.getName()]['config']
-		url = config['authURL']
-		
-		if '?' in url == False:
-			url += '?'
-		keys=['client_id','redirect_uri']
-		p = []
-		for id in keys:
-			value = config[id]
-			p.append(id+'='+value)
-		url = url +"&"+ "&".join(p)
-		
-		return url
-Cloud.Config = {}	
-		
-class Google(Cloud):
-	def __init__(self,conf=None):
-		Cloud.__init__(self)
-	def getName(self):
-		return 'google-drive'
-	def init(self,token):
-		self.refresh_token  	= token
-		self._refresh()
-	
-	def _refresh(self,code=None):
-		url 	= "https://accounts.google.com/o/oauth2/token"
-		headers = {"Content-Type":"application/x-www-form-urlencoded"}
-		data 	= {"client_id":self.client_id,"client_secret":self.secret}
-		if code :
-			grant_type = 'authorization_code'
-			data['code']	   = code
-		else:
-			data['refresh_token'] = self.refresh_token
-			grant_type = 'refresh_token'
-		
-		data['grant_type'] = grant_type
-		data['redirect_uri'] = self.redirect_uri
-
-		resp 	= requests.post(url,headers=headers,data=data)
-		r	= json.loads(resp.text)
-		if 'access_token' in r:			
-			self.access_token = r['access_token']
-		 	self.refresh_token = r['refresh_token'] if 'refresh_token' in r else r['access_token']
-			self.id_token = r['id_token']
-			
-
-	def create_file(self,**args):
-		url = "https://www.googleapis.com/upload/drive/v2/files" ;
-		headers = {"Authorization":"Bearer "+self.access_token}
-		headers['Content-Type']  = args['mimetype']
-		params = args['params']
-		if 'data' not in args :
-			r = requests.post(url,params = params,headers=headers)
-		else:
-			data = args['data']
-			r = requests.post(url,data=data,params = params,headers=headers)
-		return r.json()
-	def update_metadata(self,id,metadata) :
-		url = "https://www.googleapis.com/drive/v2/files"
-		headers = {"Authorization":"Bearer "+self.access_token}
-		headers['Content-Type'] = 'application/json; charset=UTF-8'
-		
-		if id is not None :
-			url += ("/"+id)		
-			r = requests.put(url,json=metadata,headers=headers)
-		else:
-			# url += ("/?key="+self.secret)
-			r = requests.post(url,data=json.dumps(metadata),headers=headers)
-			
-		return r.json()
-
-	def upload(self,folder,mimetype,file):
-		"""
-			This function will upload a file to a given folder and will provide
-			If the folder doesn't exist it will be created otherwise the references will be fetched
-			This allows us to avoid having to create several folders with the same name
-		"""
-		r = self.get_files(folder)
-		
-		if len(r) == 0 :
-			info = {"name":folder, "mimeType":"application/vnd.google-apps.folder"}		
-			r = self.update_metadata(None,{"name":folder,"title":folder, "mimeType":"application/vnd.google-apps.folder"})
-		else:
-			r = r[0]
-		parent = r
-		parent = {"kind":"drive#file","name":folder,"id":parent['id'],"mimeType":"application/vnd.google-apps.folder"}
-
-
-		r = self.create_file(data=file.read(),mimetype=mimetype,params={"uploadType":"media"})
-		info = {"title":file.filename,"description":"Create by Cloud View"}
-		info['parents'] = [parent]
-
-		r = self.update_metadata(r['id'],metadata=info)
-		return r
-		
-			
-"""
-	This class is designed to allow users to interact with one-drive
-"""
-class OneDrive(Cloud):
-	def __init__(self,conf):
-		Cloud.__init__(self)
-	def getName(self):
-		return 'one-drive'
-	def init(self,token):
-		self.refresh_token  	= token
-		self._refresh()
-	
-	def _refresh(self,code=None):		
-		url = "https://login.live.com/oauth20_token.srf"
-		#url="https://login.microsoftonline.com/common/oauth2/v2.0/token"
-
-		headers = {"Content-Type":"application/x-www-form-urlencoded"}
-		form = {"client_id":self.client_id,"client_secret":self.secret}
-		if code:
-			grant_type = 'authorization_code'
-			form['code'] = str(code)
-		else:
-			grant_type = 'refresh_token'
-			form['refresh_token'] = self.refresh_token
-		form['grant_type'] = grant_type	
-		if self.redirect_uri:
-			form['redirect_uri'] = self.redirect_uri
-		r = requests.post(url,headers=headers,data=form)
-		r = json.loads(r.text)
-		if 'access_token' in r:
-			self.access_token = r['access_token']
-			self.refresh_token = r['refresh_token']
-
-	def upload(self,folder,mimetype,file):
-		"""
-			@param folder	parent.id
-			@param name		name of the file with extension
-			@param stream	file content
-					
-		"""
-		path	= folder+"%2f"+file.filename
-		url 	= "https://apis.live.net/v5.0/me/skydrive/files/:name?access_token=:token".replace(":name",path).replace(":token",self.access_token) ;
-		
-		header = {"Authorization": "Bearer "+self.access_token,"Content-Type":mimetype}
-		header['Content-Type']= mimetype
-		r = requests.put(url,header=header,files=file)
-		r = r.json()
-		return r
-		
-"""
-	This class uses dropbox version 2 API
-"""
-class Dropbox(Cloud):
-	def __init__(self):
-		Cloud.__init__(self)
-	def init(self,access_token):
-		self.access_token = access_token
-	def upload(self,folder,mimetype,file):
-		"""
-			@param folder	parent.id
-			@param name		name of the file with extension
-			@param stream	file content
-			
-			@TODO: This upload will only limit itself to 150 MB, it is possible to increase this size
-		"""
-		url 	= "https://content.dropboxapi.com/2/files/upload"
-		folder	= folder if folder is not None else ""
-		path	= "/"+folder+"/"+file.name.split('/')[-1]
-		path	= path.replace("//","/")
-		header = {"Authorization":"Bearer "+self.access_token,"Content-Type":mimetype}
-		#header['autorename']= "false"
-		header['mode']		= "add"
-		#header['mute']		= "false"
-		header['Dropbox-API-Arg']	= json.dumps({"path":path})
-		r = requests.post(url,headers=header,data=file.read())
-		print r.text
-		r = r.json()
-		return r
-
-	
-"""
-	This class implements basic interactions with box (cloud service providers)
-	Available functionalities are: authentication, file access,share and stream/download
-"""	
-class Box(Cloud) :
-	def __init__(self,conf):
-		Cloud.__init__(self);
-		if conf is not None:
-			self.client_id	  = conf['client_id']
-			self.secret	  = conf['secret']
-			self.redirect_uri = conf['redirect_uri'] if 'redirect_uri' in conf else None
-	def init(self,token):
-		self.refresh_token = token
-	def set(self,code) :
-		self._access(code)
-		return 1 if self.access_token else 0
-
-	def _access(self,code):
-		body 	= {"client_id":self.client_id,"client_secret":self.secret,"grant_type":"authorization_code","code":code,"redirect_uri":self.redirect_uri}
-		headers = {"Content-Type":"application/x-www-form-urlencoded"}
-		url 	= "https://app.box.com/api/oauth2/token"
-		r	= requests.post(url,headers=headers,data=body)
-		r	= json.loads(r.text)
-		if 'error' not in r:
-			self.access_token = r['access_token']
-			self.refresh_token= r['refresh_token']
-	def _refresh(self,authToken) :
-		body 	= {"client_id":self.client_id,"client_secret":self.secret,"grant_type":"refresh_token"}
-		url 	= "https://app.box.com/api/oauth2/token";
-		headers = {"Content-Type":"application/x-www-form-urlencoded"}
-		r	= requests.post(url,headers=headers,data=body)
-		r	= json.loads(r.text)
-		if 'error' not in r :
-			self.access_token = r['access_token']
-	def get_user(self):
-		url = "https://api.box.com/2.0/users/me"
-		headers = {"Authorization":"Bearer "+self.access_token}
-		r = requests.get(url,headers=headers)
-		r = json.loads(r.text)
-		if 'login' in r :
-			#BYTES_TO_GB = 1000000000
-			user = {"uii":r['name'],"uid":r['login']}
-			usage = {"size":r['space_amount']/Cloud.BYTES_TO_GB,"used":r['space_used']/Cloud.BYTES_TO_GB,"units":"GB"}
-			user['usage'] = usage
-			return user
-		else:
-			return None
-		
-	def format(self,item) :
-		file = {"name":item['name'],"origin":"box","id":item['id'],"url":""}
-		meta = {"last_modified":item['content_modified_at']}		
-		return file
-	def get_files(self,ext,url=None):
-		ext = " ".join(ext)
-		url = "https://api.box.com/2.0/search?query=:filter&type=file"
-		url = url.replace(":filter",ext)
-		headers = {"Authorization":"Bearer "+self.access_token}
-
-		r = requests.get(url,headers=headers) ;
-		r = json.loads(r.text)
-		if 'entries' in r:
-			#self.files = [ self.format(file) for file in r['entries'] if file['type'] == 'file' and 'id' in file]
-			for item in r :
-				if item['type'] == 'file' and 'id' in item :
-					self.files.append( self.format(item))
-				else:
-					#
-					# We are dealing with a folder, this is necessary uploads
-					#
-					self.folder[item['name']] = item["id"]
-		
-		return self.files
-	def stream(self,url):		
-		headers = {"Authorization":"Bearer "+self.access_token}
-		r = requests.get(url,headers=headers,stream=True)
-		yield r.content
-	def share(self,id):		
-		url = "https://api.box.com/2.0/files/:id".replace(":id",id);
-		headers = {"Authorization":"Bearer "+self.access_token,"Content-Type":"application/json"}
-		body = {"shared_link":{"access":"open","permissions":{"can_download":True}}}
-		r = requests.put(url,headers=headers,data=json.dumps(body))
-		r = json.loads(r.text)
-		if 'shared_link' in r:
-			return r['shared_link']['download_url']
-			
-		return None
-	def upload(self,folder,mimetype,file):
-		"""
-			@param folder	parent.id
-			@param name		name of the file with extension
-			@param stream	file content
-		"""
-		if folder not in self.folders :
-			#
-			# Let us create the folder now
-			#
-			url = "https://api.box.com/2.0/folders"
-			header = {"Authorization":"Bearer "+self.access_token}
-			pid =  self.folders["/"] if "/" in self.folders else self.folders[""]
-			data = {"parent":{"id":str(pid)}}
-			
-			r = requests.post(url,header=header,data=data)
-			r = r.json()
-			pid = r["id"]
-		else:
-			pid = self.folders[folder]
-		url = "https://upload.box.com/api/2.0/files/content"
-		header = {"Authorization Bearer ":self.access_token,"Content-Type":mimetype}
-		r = requests.post(url,header=header,file=file)
-		r = r.json()
-		return r
-		
-
-class SugarSync(Cloud):
-	def __init__(self):
-		Cloud.__init__(self)
-	
-	def __init__(self,conf):
-		Cloud.__init__(self);
-		if conf is not None:
-			self.client_id		= conf['app_id']
-			self.private_key	= conf['private_key']
-			self.access_key		= conf['access_key']
-		#self.access_token = None
-		#self.refresh_token= None
-		# self.redirect_uri = conf['redirect_uri'] if 'redirect_uri' in conf else None
-		#self.files	  = []
-	def init(self,token):
-		self.refresh_token = token
-		self._refresh()	
-	def login(self,email,password):
-		xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><appAuthorization><username>:username</username><password>:password</password><application>:app_id</application><accessKeyId>:accesskey</accessKeyId><privateAccessKey>:privatekey</privateAccessKey></appAuthorization>'
-		xml = xml.replace(":app_id",self.app_id).replace(":privatekey",self.private_key).replace(":accesskey",self.access_key).replace(":username",email).replace(":password",password)
-		headers = {"Content-Type":"application/xml","User-Agent":"The Phi Technology"}
-		r = requests.post(url,headers=headers,data=xml)
-		self.refresh_token = r.headers['Location']
-		
-		
-	def _refresh(self):
-		xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><tokenAuthRequest><accessKeyId>:accesskey</accessKeyId><privateAccessKey>:privatekey</privateAccessKey><refreshToken>:authtoken</refreshToken></tokenAuthRequest>'
-		xml = xml.replace(":accesskey",self.access_key).replace(":privatekey",self.private_key).replace(":authtoken",self.refresh_token)
-		
-		headers = {"Content-Type":"application/xml","User-Agent":"The Phi Technology LLC"}
-		url = "https://api.sugarsync.com/authorization"
-		r 	=	 requests.post(url,data=xml,headers=headers)
-		
-		self.access_token = r.headers['Location']
-	def format(self,item):
-		file = {}
-		file['name']	= item['displayName']
-		file['url']	= item['fileData']
-		file['id']	= item['ref']
-		meta		= {}
-		meta['last_modified'] = item['lastModified']
-		file['meta']	= meta
-		return file
-	
-	def get_files(self,ext,url=None) :
-		if url is None:
-			url = "https://api.sugarsync.com/folder/:sc:3989243:2/contents";
-		headers = {"Authorization":self.access_token,"User-Agent":"The Phi Technology LLC","Content-Type":"application/xml;charset=utf-8"}
-		r = requests.get(url,headers=headers)
-		stream = r.text #.encode('utf-8')
-		r = xmltodict.parse(r.text)
-
-		if 'collectionContents' in r:
-			
-			r = r['collectionContents']
-			#
-			# Extracting files in the current folder then we will see if there are any subfolders
-			# The parser has weird behaviors that leave inconsistent objects (field names) 
-			# This means we have to filter it out by testing the item being processed			
-			if 'file' in r:	
-				if isinstance(r['file'],dict):
-					self.files += [ self.format(r['file']) ]			
-				else:
-					
-					#self.files += [self.format(item) for item in r['file'] if isinstance(item,(str, unicode)) == False and item['displayName'].endswith(ext)]
-					self.files += [self.format(item) for item in r['file'] if isinstance(item,(str, unicode)) == False and self.match(item['displayName'],ext)]
-			
-			if 'collection' in r:
-				if isinstance(r['collection'],dict) :
-					#
-					# For some unusual reason the parser handles single instances as objects instead of collection
-					# @NOTE: This is a behavior that happens when a single item is in the collection
-					#
-					self.get_files(ext,r['collection']['contents'])
-				for item in r['collection'] :						
-					if 'contents' in item:
-						if isinstance(item,(str, unicode)) == False:							
-							self.files += self.get_files(ext,item['contents'])
-				#[ self.get_files(ext,item['contents']) for item in r['collection'] if item['type'] == 'folder']
-		return self.files
-	
-	def get_user(self):
-		url = "https://api.sugarsync.com/user"
-		headers = {"Authorization":self.access_token,"User-Agent":"The Phi Technology LLC","Content-Type":"application/xml;charset=utf-8"}
-		r = requests.get(url,headers=headers)
-		
-		r = xmltodict.parse(r.text)
-		r = r['user']
-		
-		
-		if 'username' in r and 'quota' in r:
-			user = {"uid":r['username'],"uii":r['nickname']}
-			size = long(r['quota']['limit'])
-			used = long(r['quota']['usage'])
-			usage = {"size":size/Cloud.BYTES_TO_GB,"used":used/Cloud.BYTES_TO_GB,"units":"GB"}
-			user['usage'] = usage
-			return user
-		else:
-			return None
-	def stream(self,url):		
-		headers = {"Authorization":self.access_token}
-		r = requests.get(url,headers=headers,stream=True)
-		yield r.content
-	"""
-		This function will create a public link and share it to designated parties
-	"""
-	def share(self,id):
-		url = "https://api.sugarsync.com/file/:id".replace(":id",id);
-		xml = '<?xml version="1.0" encoding="UTF-8" ?><file><publicLink enabled="true"/></file>';
-		headers = {"Content-Type":"application/xml","Authorization":self.access_token,"User-Agent":"The Phi Technology LLC"}
-		r = requests.put(url,header=header,data=xml)
-		r = xmltodict.parse(r.text)
-		if 'file' in r:
-			return r['file']['publicLink']['content']+"?directDownload=true"
-		else:
-			return None
-
-	def upload(self,folder,mimetype,file):
-		
-		name  = foler+"/"+file.filename
-		xml = '<?xml version="1.0" encoding="UTF-8" ?><file><displayName>:name</displayName><mediaType>:type</mediaType></file>'
-		xml = xml.replace(':name',name).replace(':type',mimetype)
-		header = {"content-type":"application/xml","User-Agent":"The Phi Technology LLC"}
-		header['Authorization'] = self.access_token 
-		
-		r = requests.post(url,headers=header,files=file,data=xml)
-		pass
-
-class iTunes(Cloud):
-	def __init__(self):
-		Cloud.__init__(self)
-		self.url_topsongs 	= "http://ax.itunes.apple.com/WebObjects/MZStoreServices.woa/ws/RSS/topsongs/limit=:limit/explicit=false/json"
-		self.url_search		= "http://itunes.apple.com/search?term=:keyword&limit=:limit&media=music"
-	def parse_search(self,obj):
-		
-		files = []
-		try:
-			logs = obj['results']
-			
-			for item in logs :
-				
-				file = {}
-				file['id']   = item['trackId']
-				file['name'] = item['trackName']
-				file['id3']  = {}
-				file['id3']['track'] = item['trackName']
-				file['id3']['title'] = item['trackName']
-				file['id3']['artist']= item['artistName']
-				file['id3']['album'] = item['collectionName']
-				file['id3']['genre'] = item['primaryGenreName']
-				file['id3']['poster']= item['artworkUrl100']
-				file['url']          = item['previewUrl']
-
-				files.append(file)
-		except Exception,e:
-			print e
-		return files
-	def parse_chart(self,obj):
-		"""
-			This function will parse the tonsongs returned by the itunes API
-		"""
-		files = []	
-		
-		try:
-			logs = obj['feed']['entry']
-			if isinstance(logs,dict) :
-				logs = [logs]
-			
-			for item in logs :
-				
-				file = {'name':item['im:name']['label'],'id3':{}}
-				file['id'] = item['id']['attributes']['im:id']
-				file['id3'] = {}
-				file['id3']['artist']	= item['im:artist']['label']
-				file['id3']['track']	= item['title']['label']
-				file['id3']['title']	= item['title']['label']
-				file['id3']['album']	= item['im:collection']['im:name']['label']
-				file['id3']['genre']	= item['category']['attributes']['term']
-				index = len(item['im:image'])-1
-				file['id3']['poster']	= item['im:image'][index]['label']
-				url = [link['attributes']['href'] for link in item['link'] if 'im:assetType' in link['attributes'] and link['attributes']['im:assetType']=='preview']
-				
-				if len(url) > 0:
-					url = url[0]
-					file['url'] = url #item['link'][1]['attributes']['href'] //'im:assetType' == 'preview' and 'im:duration' is in the sub-item				
-					files.append(file)
-
-				else:
-					continue
-		except Exception,e:
-			print e
-			#
-			# @TODO: Log the error somewhere to make it useful
-		
-		return files 
-		
-	def parse(self,obj) :
-		if 'feed' in obj and 'entry' in obj['feed']:			
-			return self.parse_chart(obj)
-		elif 'results' in obj :
-			return self.parse_search(obj)
-		else:
-			return []
-	def get_files(self,keyword=None,limit="1") :
-		url = self.url_search if keyword is not None else self.url_topsongs
-		keyword = "" if keyword is None else keyword
-		# limit = "50" if keyword == "" else "1"
-		
-		url = url.replace(":keyword",keyword.replace(' ','+')).replace(':limit',limit)
-		r = requests.get(url)
-		r= r.json()
-		return self.parse(r)

+ 0 - 709
src/utils/transport.py

@@ -1,709 +0,0 @@
-"""
-	This file implements data transport stuctures in order to allow data to be moved to and from anywhere
-	We can thus read data from disk and write to the cloud,queue, or couchdb or SQL
-"""
-from flask import request, session
-import os
-import pika
-import json
-import numpy as np
-from couchdbkit import Server
-import re
-from csv import reader
-from datetime import datetime
-"""
-	@TODO: Write a process by which the class automatically handles reading and creating a preliminary sample and discovers the meta data
-"""
-class Reader:
-	def __init__(self):
-		self.nrows = 0
-		self.xchar = None
-		
-	def row_count(self):		
-		content = self.read()
-		return np.sum([1 for row in content])
-	"""
-		This function determines the most common delimiter from a subset of possible delimiters. It uses a statistical approach to guage the distribution of columns for a given delimiter
-	"""
-	def delimiter(self,sample):
-		
-		m = {',':[],'\t':[],'|':[],'\x3A':[]} 
-		delim = m.keys()
-		for row in sample:
-			for xchar in delim:
-				if row.split(xchar) > 1:	
-					m[xchar].append(len(row.split(xchar)))
-				else:
-					m[xchar].append(0)
-				
-				
-					
-		#
-		# The delimiter with the smallest variance, provided the mean is greater than 1
-		# This would be troublesome if there many broken records sampled
-		#
-		m = {id: np.var(m[id]) for id in m.keys() if m[id] != [] and int(np.mean(m[id]))>1}
-		index = m.values().index( min(m.values()))
-		xchar = m.keys()[index]
-		
-		return xchar
-	"""
-		This function determines the number of columns of a given sample
-		@pre self.xchar is not None
-	"""
-	def col_count(self,sample):
-		
-		m = {}
-		i = 0
-		
-		for row in sample:
-			row = self.format(row)
-			id = str(len(row))
-			#id = str(len(row.split(self.xchar))) 
-			
-			if id not in m:
-				m[id] = 0
-			m[id] = m[id] + 1
-		
-		index = m.values().index( max(m.values()) )
-		ncols = int(m.keys()[index])
-		
-		
-		return ncols;
-	"""
-		This function will clean records of a given row by removing non-ascii characters
-		@pre self.xchar is not None
-	"""
-	def format (self,row):
-		
-		if isinstance(row,list) == False:
-			#
-			# We've observed sometimes fields contain delimiter as a legitimate character, we need to be able to account for this and not tamper with the field values (unless necessary)
-			cols = self.split(row)
-			#cols = row.split(self.xchar)
-		else:
-			cols = row ;
-		return [ re.sub('[^\x00-\x7F,\n,\r,\v,\b,]',' ',col.strip()).strip().replace('"','') for col in cols]
-		
-		#if isinstance(row,list) == False:
-		#	return (self.xchar.join(r)).format('utf-8')
-		#else:
-		#	return r
-	"""
-		This function performs a split of a record and tries to attempt to preserve the integrity of the data within i.e accounting for the double quotes.
-		@pre : self.xchar is not None
-	""" 
-	def split (self,row):
-
-		pattern = "".join(["(?:^|",self.xchar,")(\"(?:[^\"]+|\"\")*\"|[^",self.xchar,"]*)"])
-		return re.findall(pattern,row.replace('\n',''))
-		
-class Writer:
-	
-	def format(self,row,xchar):
-		if xchar is not None and isinstance(row,list):
-			return xchar.join(row)+'\n'
-		elif xchar is None and isinstance(row,dict):
-			row = json.dumps(row)
-		return row
-	"""
-		It is important to be able to archive data so as to insure that growth is controlled
-		Nothing in nature grows indefinitely neither should data being handled.
-	"""
-	def archive(self):
-		pass
-	def flush(self):
-		pass
-	
-"""
-  This class is designed to read data from an Http request file handler provided to us by flask
-  The file will be heald in memory and processed accordingly
-  NOTE: This is inefficient and can crash a micro-instance (becareful)
-"""
-class HttpRequestReader(Reader):
-	def __init__(self,**params):
-		self.file_length = 0
-		try:
-			
-			#self.file = params['file']	
-			#self.file.seek(0, os.SEEK_END)
-			#self.file_length = self.file.tell()
-			
-			#print 'size of file ',self.file_length
-			self.content = params['file'].readlines()
-			self.file_length = len(self.content)
-		except Exception, e:
-			print "Error ... ",e
-			pass
-		
-	def isready(self):
-		return self.file_length > 0
-	def read(self,size =-1):
-		i = 1
-		for row in self.content:
-			i += 1
-			if size == i:
-				break
-			yield row
-		
-"""
-	This class is designed to write data to a session/cookie
-"""
-class HttpSessionWriter(Writer):
-	"""
-		@param key	required session key
-	"""
-	def __init__(self,**params):
-		self.session = params['queue']
-		self.session['sql'] = []
-		self.session['csv'] = []
-		self.tablename = re.sub('..+$','',params['filename'])
-		self.session['uid'] = params['uid']
-		#self.xchar = params['xchar']
-			
-		
-	def format_sql(self,row):
-		values = "','".join([col.replace('"','').replace("'",'') for col in row])
-		return "".join(["INSERT INTO :table VALUES('",values,"');\n"]).replace(':table',self.tablename)		
-	def isready(self):
-		return True
-	def write(self,**params):
-		label = params['label']
-		row = params ['row']
-		
-		if label == 'usable':
-			self.session['csv'].append(self.format(row,','))
-			self.session['sql'].append(self.format_sql(row))
-		
-"""
-  This class is designed to read data from disk (location on hard drive)
-  @pre : isready() == True
-"""
-class DiskReader(Reader) :
-	"""
-		@param	path	absolute path of the file to be read
-	"""
-	def __init__(self,**params):
-		Reader.__init__(self)
-		self.path = params['path'] ;
-
-	def isready(self):
-		return os.path.exists(self.path) 
-	"""
-		This function reads the rows from a designated location on disk
-		@param	size	number of rows to be read, -1 suggests all rows
-	"""
-	def read(self,size=-1):
-		f = open(self.path,'rU') 
-		i = 1
-		for row in f:
-			
-			i += 1
-			if size == i:
-				break
-			yield row
-		f.close()
-"""
-	This function writes output to disk in a designated location
-"""
-class DiskWriter(Writer):
-	def __init__(self,**params):
-		if 'path' in params:
-			self.path = params['path']
-		else:
-			self.path = None
-		if 'name' in params:
-			self.name = params['name'];
-		else:
-			self.name = None
-		if os.path.exists(self.path) == False:
-			os.mkdir(self.path)
-	"""
-		This function determines if the class is ready for execution or not
-		i.e it determines if the preconditions of met prior execution
-	"""
-	def isready(self):
-		
-		p =  self.path is not None and os.path.exists(self.path)
-		q = self.name is not None 
-		return p and q
-	"""
-		This function writes a record to a designated file
-		@param	label	<passed|broken|fixed|stats>
-		@param	row	row to be written
-	"""
-	def write(self,**params):
-		label 	= params['label']
-		row 	= params['row']
-		xchar = None
-		if 'xchar' is not None:
-			xchar 	= params['xchar']
-		path = ''.join([self.path,os.sep,label])
-		if os.path.exists(path) == False:
-			os.mkdir(path) ;
-		path = ''.join([path,os.sep,self.name]) 
-		f = open(path,'a')
-		row = self.format(row,xchar);
-		f.write(row)
-		f.close()
-"""
-	This class hierarchy is designed to handle interactions with a queue server using pika framework (our tests are based on rabbitmq)
-"""
-class MessageQueue:
-	def __init__(self,**params):
-		self.host= params['host']
-		self.uid = params['uid']
-		self.qid = params['qid']
-	
-	def isready(self):
-		#self.init()
-		resp =  self.connection is not None and self.connection.is_open
-		self.close()
-		return resp
-	def close(self):
-            if self.connection.is_closed == False :
-		self.channel.close()
-		self.connection.close()
-"""
-	This class is designed to publish content to an AMQP (Rabbitmq)
-	The class will rely on pika to implement this functionality
-
-	We will publish information to a given queue for a given exchange
-"""
-
-class QueueWriter(MessageQueue,Writer):
-	def __init__(self,**params):
-		#self.host= params['host']
-		#self.uid = params['uid']
-		#self.qid = params['queue']
-		MessageQueue.__init__(self,**params);
-		
-		
-	def init(self,label=None):
-		properties = pika.ConnectionParameters(host=self.host)
-		self.connection = pika.BlockingConnection(properties)
-		self.channel	= self.connection.channel()
-		self.info = self.channel.exchange_declare(exchange=self.uid,type='direct',durable=True)
-		if label is None:
-			self.qhandler = self.channel.queue_declare(queue=self.qid,durable=True)	
-		else:
-			self.qhandler = self.channel.queue_declare(queue=label,durable=True)
-		
-		self.channel.queue_bind(exchange=self.uid,queue=self.qhandler.method.queue) 
-		
-
-
-	"""
-		This function writes a stream of data to the a given queue
-		@param object	object to be written (will be converted to JSON)
-		@TODO: make this less chatty
-	"""
-	def write(self,**params):
-		xchar = None
-		if  'xchar' in params:
-			xchar = params['xchar']
-		object = self.format(params['row'],xchar)
-		
-		label	= params['label']
-		self.init(label)
-		_mode = 2
-		if isinstance(object,str):
-			stream = object
-			_type = 'text/plain'
-		else:
-			stream = json.dumps(object)
-			if 'type' in params :
-				_type = params['type']
-			else:
-				_type = 'application/json'
-
-		self.channel.basic_publish(
-			exchange=self.uid,
-			routing_key=label,
-			body=stream,
-			properties=pika.BasicProperties(content_type=_type,delivery_mode=_mode)
-		);
-		self.close()
-
-	def flush(self,label):
-		self.init(label)
-		_mode = 1  #-- Non persistent
-		self.channel.queue_delete( queue=label);
-		self.close()
-		
-"""
-	This class will read from a queue provided an exchange, queue and host
-	@TODO: Account for security and virtualhosts
-"""
-class QueueReader(MessageQueue,Reader):
-	"""
-		@param	host	host
-		@param	uid	exchange identifier
-		@param	qid	queue identifier
-	"""
-	def __init__(self,**params):
-		#self.host= params['host']
-		#self.uid = params['uid']
-		#self.qid = params['qid']
-		MessageQueue.__init__(self,**params);
-		if 'durable' in params :
-			self.durable = True
-		else:
-			self.durable = False
-		self.size = -1
-		self.data = {}
-	def init(self,qid):
-		
-		properties = pika.ConnectionParameters(host=self.host)
-		self.connection = pika.BlockingConnection(properties)
-		self.channel	= self.connection.channel()
-		self.channel.exchange_declare(exchange=self.uid,type='direct',durable=True)
-
-		self.info = self.channel.queue_declare(queue=qid,durable=True)
-	
-
-
-	"""
-		This is the callback function designed to process the data stream from the queue
-
-	"""
-	def callback(self,channel,method,header,stream):
-                
-		r = []
-		if re.match("^\{|\[",stream) is not None:
-			r = json.loads(stream)
-		else:
-			
-			r = stream
-		
-		qid = self.info.method.queue
-		if qid not in self.data :
-			self.data[qid] = []
-		
-		self.data[qid].append(r)
-		#
-		# We stop reading when the all the messages of the queue are staked
-		#
-		if self.size == len(self.data[qid]) or len(self.data[qid]) == self.info.method.message_count:		
-			self.close()
-
-	"""
-		This function will read, the first message from a queue
-		@TODO: 
-		Implement channel.basic_get in order to retrieve a single message at a time
-		Have the number of messages retrieved be specified by size (parameter)
-	"""
-	def read(self,size=-1):
-		r = {}
-		self.size = size
-		#
-		# We enabled the reader to be able to read from several queues (sequentially for now)
-		# The qid parameter will be an array of queues the reader will be reading from
-		#
-		if isinstance(self.qid,basestring) :
-                    self.qid = [self.qid]
-		for qid in self.qid:
-			self.init(qid)
-			# r[qid] = []
-			
-			if self.info.method.message_count > 0:
-				
-				self.channel.basic_consume(self.callback,queue=qid,no_ack=False);
-				self.channel.start_consuming()
-			else:
-				
-				pass
-				#self.close()
-			# r[qid].append( self.data)
-		
-		return self.data
-class QueueListener(QueueReader):
-	def init(self,qid):
-		properties = pika.ConnectionParameters(host=self.host)
-		self.connection = pika.BlockingConnection(properties)
-		self.channel	= self.connection.channel()
-		self.channel.exchange_declare(exchange=self.uid,type='direct',durable=True )
-
-		self.info = self.channel.queue_declare(passive=True,exclusive=True,queue=qid)
-		
-		self.channel.queue_bind(exchange=self.uid,queue=self.info.method.queue,routing_key=qid)
-		#self.callback = callback
-	def read(self):
-    	
-		self.init(self.qid)
-		self.channel.basic_consume(self.callback,queue=self.qid,no_ack=True);
-		self.channel.start_consuming()
-    		
-"""
-	This class is designed to write output as sql insert statements
-	The class will inherit from DiskWriter with minor adjustments
-	@TODO: Include script to create the table if need be using the upper bound of a learner
-"""
-class SQLDiskWriter(DiskWriter):
-	def __init__(self,**args):
-		DiskWriter.__init__(self,**args)
-		self.tablename = re.sub('\..+$','',self.name).replace(' ','_')
-	"""
-		@param label
-		@param row
-		@param xchar
-	"""
-	def write(self,**args):
-		label	= args['label']
-		row = args['row']
-		
-		if label == 'usable':
-			values = "','".join([col.replace('"','').replace("'",'') for col in row])
-			row = "".join(["INSERT INTO :table VALUES('",values,"');\n"]).replace(':table',self.tablename)
-
-			args['row']  = row
-		DiskWriter.write(self,**args)
-class Couchdb:
-	"""
-		@param	uri		host & port reference
-		@param	uid		user id involved
-
-		@param	dbname		database name (target)
-	"""
-	def __init__(self,**args):
-		uri 		= args['uri']
-		self.uid 	= args['uid']
-		dbname		= args['dbname']
-		self.server 	= Server(uri=uri) 
-		self.dbase	= self.server.get_db(dbname)
-		if self.dbase.doc_exist(self.uid) == False:
-			self.dbase.save_doc({"_id":self.uid})
-	"""
-		Insuring the preconditions are met for processing
-	"""
-	def isready(self):
-		p = self.server.info() != {}
-		if p == False or self.dbase.dbname not in self.server.all_dbs():
-			return False
-		#
-		# At this point we are sure that the server is connected
-		# We are also sure that the database actually exists
-		#
-		q = self.dbase.doc_exist(self.uid)
-		if q == False:
-			return False
-		return True
-	def view(self,id,**args):
-		r =self.dbase.view(id,**args)
-		r = r.all()		
-		return r[0]['value'] if len(r) > 0 else []
-		
-"""
-	This function will read an attachment from couchdb and return it to calling code. The attachment must have been placed before hand (otherwise oops)
-	@T: Account for security & access control
-"""
-class CouchdbReader(Couchdb,Reader):
-	"""
-		@param	filename	filename (attachment)
-	"""
-	def __init__(self,**args):
-		#
-		# setting the basic parameters for 
-		Couchdb.__init__(self,**args)
-		if 'filename' in args :
-			self.filename 	= args['filename']
-		else:
-			self.filename = None
-
-	def isready(self):
-		#
-		# Is the basic information about the database valid
-		#
-		p = Couchdb.isready(self)
-		
-		if p == False:
-			return False
-		#
-		# The database name is set and correct at this point
-		# We insure the document of the given user has the requested attachment.
-		# 
-		
-		doc = self.dbase.get(self.uid)
-		
-		if '_attachments' in doc:
-			r = self.filename in doc['_attachments'].keys()
-			
-		else:
-			r = False
-		
-		return r	
-	def stream(self):
-		content = self.dbase.fetch_attachment(self.uid,self.filename).split('\n') ;
-		i = 1
-		for row in content:
-			yield row
-			if size > 0 and i == size:
-				break
-			i = i + 1
-		
-	def read(self,size=-1):
-		if self.filename is not None:
-			self.stream()
-		else:
-			return self.basic_read()
-	def basic_read(self):
-		document = self.dbase.get(self.uid) 
-		del document['_id'], document['_rev']
-		return document
-"""
-	This class will write on a couchdb document provided a scope
-	The scope is the attribute that will be on the couchdb document
-"""
-class CouchdbWriter(Couchdb,Writer):		
-	"""
-		@param	uri		host & port reference
-		@param	uid		user id involved
-		@param	filename	filename (attachment)
-		@param	dbname		database name (target)
-	"""
-	def __init__(self,**args):
-
-		Couchdb.__init__(self,**args)
-		uri 		= args['uri']
-		self.uid 	= args['uid']
-		if 'filename' in args:
-			self.filename 	= args['filename']
-		else:
-			self.filename = None
-		dbname		= args['dbname']
-		self.server 	= Server(uri=uri) 
-		self.dbase	= self.server.get_db(dbname)
-		#
-		# If the document doesn't exist then we should create it
-		#
-
-	"""
-		write a given attribute to a document database
-		@param	label	scope of the row repair|broken|fixed|stats
-		@param	row	row to be written
-	"""
-	def write(self,**params):
-		
-		document = self.dbase.get(self.uid)
-		label = params['label']
-
-		
-		if 'row' in params :
-			row	= params['row']
-			row_is_list	= isinstance(row,list)
-			if label not in document :
-				document[label] = row if row_is_list else [row]
-			elif isinstance(document[label][0],list) :
-				document[label] += row
-				
-			else:
-				document[label].append(row)
-		else :
-			if label not in document :
-				document[label] = {}
-			if isinstance(params['data'],object) :
-				
-				document[label] = dict(document[label],**params['data'])
-			else:
-				document[label] = params['data']
-			
-		# if label not in document :
-		# 	document[label] = [] if isinstance(row,list) else {}
-		# if isinstance(document[label],list):
-		# 	document[label].append(row)
-		# else :
-		# 	document[label] = dict(document[label],**row)
-		self.dbase.save_doc(document)
-	def flush(self,**params) :
-		
-		size = params['size'] if 'size' in params else 0
-		has_changed = False	
-		document = self.dbase.get(self.uid)
-		for key in document:
-			if key not in ['_id','_rev','_attachments'] :
-				content = document[key]
-			else:
-				continue
-			if isinstance(content,list) and size > 0:
-				index = len(content) - size
-				content = content[index:]
-				document[key] = content
-				
-			else:
-				document[key] = {}
-				has_changed = True
-		
-		self.dbase.save_doc(document)
-			
-	def archive(self,params=None):
-		document = self.dbase.get(self.uid)
-		content = {}
-		_doc = {}
-		for id in document:
-			if id in ['_id','_rev','_attachments'] :
-				_doc[id] = document[id]
-			else:
-				content[id] = document[id]
-				
-		content = json.dumps(content)	
-		document= _doc
-		now = str(datetime.today())
-		
-		name = '-'.join([document['_id'] , now,'.json'])			
-		self.dbase.save_doc(document)
-		self.dbase.put_attachment(document,content,name,'application/json')
-"""
-	This class acts as a factory to be able to generate an instance of a Reader/Writer
-	Against a Queue,Disk,Cloud,Couchdb 
-	The class doesn't enforce parameter validation, thus any error with the parameters sent will result in a null Object
-"""
-class DataSourceFactory:
-	def instance(self,**args):
-		source = args['type']		
-		params = args['args']
-		anObject = None
-		
-		if source in ['HttpRequestReader','HttpSessionWriter']:
-			#
-			# @TODO: Make sure objects are serializable, be smart about them !!
-			#
-			aClassName = ''.join([source,'(**params)'])
-
-
-		else:
-			
-			stream = json.dumps(params)
-			aClassName = ''.join([source,'(**',stream,')'])
-		try:
-			
-			
-			anObject = eval( aClassName)
-			#setattr(anObject,'name',source)
-		except Exception,e:
-			print ['Error ',e]
-		return anObject
-"""
-	This class implements a data-source handler that is intended to be used within the context of data processing, it allows to read/write anywhere transparently.
-	The class is a facade to a heterogeneous class hierarchy and thus simplifies how the calling code interacts with the class hierarchy
-"""
-class DataSource:
-	def __init__(self,sourceType='Disk',outputType='Disk',params={}):
-		self.Input = DataSourceFactory.instance(type=sourceType,args=params)
-		self.Output= DataSourceFactory.instance(type=outputType,args=params)
-	def read(self,size=-1):
-		return self.Input.read(size)
-	def write(self,**args):
-		self.Output.write(**args)
-#p = {}
-#p['host'] = 'dev.the-phi.com'
-#p['uid'] = 'nyemba@gmail.com'
-#p['qid'] = 'repair'
-#factory = DataSourceFactory()
-#o =  factory.instance(type='QueueReader',args=p)		
-#print o is None
-#q = QueueWriter(host='dev.the-phi.com',uid='nyemba@gmail.com')
-#q.write(object='steve')
-#q.write(object='nyemba')
-#q.write(object='elon')
-
-