Browse Source

housekeeping work

Steve Nyemba 1 year ago
parent
commit
b160d0a295
5 changed files with 0 additions and 690 deletions
  1. 0 234
      transport/couch.py
  2. 0 241
      transport/mongo.py
  3. 0 80
      transport/nextcloud.py
  4. 0 47
      transport/qlistener.py
  5. 0 88
      transport/session.py

+ 0 - 234
transport/couch.py

@@ -1,234 +0,0 @@
-"""
-Data-Transport
-Steve L. Nyemba, The Phi Technology
-
-This file is a wrapper around couchdb using IBM Cloudant SDK that has an interface to couchdb
-
-"""
-import cloudant
-import json
-import sys
-if sys.version_info[0] > 2 :
-	from transport.common import Reader, Writer
-else:
-	from common import Reader, Writer
-class Couch:
-	"""
-	This class is a wrapper for read/write against couchdb. The class captures common operations for read/write.
-		@param	url		host & port reference default http://localhost:5984
-		@param	doc		user id involved
-		@param	dbname		database name (target)
-	"""
-	def __init__(self,**args):
-		url 		= args['url'] if 'url' in args else 'http://localhost:5984'
-		self._id 	= args['doc']
-		dbname		= args['dbname']
-		if 'username' not in args and 'password' not in args :
-			self.server 	= cloudant.CouchDB(None,None,url=url)
-		else:
-			self.server = cloudant.CouchDB(args['username'],args['password'],url=url)
-		self.server.connect()
-
-		if dbname in self.server.all_dbs() :
-			self.dbase	= self.server.get(dbname,dbname,True)
-			#
-			# @TODO Check if the database exists ...
-			#
-			doc = cloudant.document.Document(self.dbase,self._id) #self.dbase.get(self._id)
-			if not doc.exists():
-				doc = self.dbase.create_document({"_id":self._id})
-				doc.save()
-		else:
-			self.dbase = None
-	"""
-		Insuring the preconditions are met for processing
-	"""
-	def isready(self):
-		p = self.server.metadata() != {}
-		if p == False or not self.dbase:
-			return False
-		#
-		# At this point we are sure that the server is connected
-		# We are also sure that the database actually exists
-		#
-		doc = cloudant.document.Document(self.dbase,self._id)
-		# q = self.dbase.all_docs(key=self._id)['rows'] 
-		# if not q :
-		if not doc.exists():
-			return False
-		return True
-	
-	def view(self,**args):
-		"""
-		The function will execute a view (provivded a user is authenticated)
-		:id	design 	document _design/xxxx (provide full name with _design prefix)
-		:view_name	name of the view i.e 
-		:key(s)		key(s) to be used to filter the content
-		"""
-		document = cloudant.design_document.DesignDocument(self.dbase,args['id'])
-		document.fetch()
-		params = {'group_level':1,'group':True}
-		if 'key' in  args :
-			params ['key'] = args['key']
-		elif 'keys' in args :
-			params['keys'] = args['keys']
-		return document.get_view(args['view_name'])(**params)['rows']
-		
-		
-
-		
-class CouchReader(Couch,Reader):
-	"""
-		This function will read an attachment from couchdb and return it to calling code. The attachment must have been placed before hand (otherwise oops)
-		@T: Account for security & access control
-	"""
-	def __init__(self,**args):
-		"""
-			@param	filename	filename (attachment)
-		"""
-		#
-		# setting the basic parameters for 
-		Couch.__init__(self,**args)
-		if 'filename' in args :
-			self.filename 	= args['filename']
-		else:
-			self.filename = None
-
-	# def isready(self):
-	# 	#
-	# 	# Is the basic information about the database valid
-	# 	#
-	# 	p = Couchdb.isready(self)
-		
-	# 	if p == False:
-	# 		return False
-	# 	#
-	# 	# The database name is set and correct at this point
-	# 	# We insure the document of the given user has the requested attachment.
-	# 	# 
-		
-	# 	doc = self.dbase.get(self._id)
-		
-	# 	if '_attachments' in doc:
-	# 		r = self.filename in doc['_attachments'].keys()
-			
-	# 	else:
-	# 		r = False
-		
-	# 	return r	
-	def stream(self):
-		#
-		# @TODO Need to get this working ...
-		#
-		document = cloudant.document.Document(self.dbase,self._id)
-		# content = self.dbase.fetch_attachment(self._id,self.filename).split('\n') ;
-		content = self.get_attachment(self.filename)
-		for row in content:
-			yield row
-		
-	def read(self,**args):
-		if self.filename is not None:
-			self.stream()
-		else:
-			return self.basic_read()
-	def basic_read(self):
-		document = cloudant.document.Document(self.dbase,self._id)
-		
-		# document = self.dbase.get(self._id)
-		if document.exists() :			
-			document.fetch()
-			document = dict(document)
-			del document['_rev']
-		else:
-			document = {}
-		return document
-
-class CouchWriter(Couch,Writer):		
-	"""
-		This class will write on a couchdb document provided a scope
-		The scope is the attribute that will be on the couchdb document
-	"""
-	def __init__(self,**args):
-		"""
-			@param	uri		host & port reference
-			@param	uid		user id involved
-			@param	filename	filename (attachment)
-			@param	dbname		database name (target)
-		"""
-
-		Couch.__init__(self,**args)
-	def set (self,info):
-		document  = cloudand.document.Document(self.dbase,self._id)
-		if document.exists() :
-			keys = list(set(document.keys()) - set(['_id','_rev','_attachments']))
-			for id in keys :
-				document.field_set(document,id,None)
-			for id in args :
-				value = args[id]
-				document.field_set(document,id,value)
-			
-			document.save()
-			pass
-		else:
-			_document = dict({"_id":self._id},**args)
-			document.create_document(_document)
-	def write(self,info):
-		"""
-			write a given attribute to a document database
-			@info	object to be written to the to an attribute. this 
-		"""
-		
-		# document = self.dbase.get(self._id)
-		document = cloudant.document.Document(self.dbase,self._id) #.get(self._id)
-		if document.exists() is False :
-			document = self.dbase.create_document({"_id":self._id})
-		# label = params['label']
-		# row	= params['row']
-		# if label not in document :
-		# 	document[label] = []
-		# document[label].append(row)
-		for key in info :
-			if key in document and type(document[key]) == list :
-				document[key] += info[key]
-			else:
-				document[key] = info[key]
-				
-		document.save()
-		# self.dbase.bulk_docs([document])
-		# self.dbase.save_doc(document)
-	
-	def upload(self,**args):
-		"""
-		:param	name	name of the file to be uploaded
-		:param	data	content of the file (binary or text)
-		:param	content_type	(default)
-		"""
-		mimetype = args['content_type'] if 'content_type' in args else 'text/plain'
-		document = cloudant.document.Document(self.dbase,self.uid)		
-		document.put_attachment(self.dbase,args['filename'],mimetype,args['content'])
-		document.save()
-
-	def archive(self,params=None):
-		"""
-		This function will archive the document onto itself. 		
-		"""
-		# document = self.dbase.all_docs(self._id,include_docs=True)
-		document = cloudant.document.Document(self.dbase,self.filename)
-		document.fetch()
-		content = {}
-		# _doc = {}
-		for id in document:
-			if  id not in ['_id','_rev','_attachments'] :
-				content[id] = document[id]
-				del document[id]
-				
-		content = json.dumps(content)	
-		# document= _doc
-		now = str(datetime.today())
-		
-		name = '-'.join([document['_id'] , now,'.json'])	
-		self.upload(filename=name,data=content,content_type='application/json')		
-		# self.dbase.bulk_docs([document])
-		# self.dbase.put_attachment(document,content,name,'application/json')
-		# document.put_attachment(self.dbase,name,'application/json',content)
-		# document.save()

+ 0 - 241
transport/mongo.py

@@ -1,241 +0,0 @@
-"""
-Data Transport - 1.0
-Steve L. Nyemba, The Phi Technology LLC
-
-This file is a wrapper around mongodb for reading/writing content against a mongodb server and executing views (mapreduce)
-"""
-from pymongo        import MongoClient
-from bson.objectid  import ObjectId
-from bson.binary    import Binary
-# import nujson as json
-from datetime import datetime
-import pandas as pd
-import numpy as np
-import gridfs
-# from transport import Reader,Writer
-import sys
-if sys.version_info[0] > 2 :
-	from transport.common import Reader, Writer, IEncoder
-else:
-	from common import Reader, Writer
-import json
-import re
-from multiprocessing import Lock, RLock
-class Mongo :
-    lock = RLock()
-    """
-    Basic mongodb functions are captured here
-    """
-    def __init__(self,**args):
-        """
-            :dbname     database name/identifier
-            :host       host and port of the database by default localhost:27017
-            :username   username for authentication
-            :password   password for current user
-        """
-
-        self.mechanism= 'SCRAM-SHA-256' if 'mechanism' not in args else args['mechanism']
-        # authSource=(args['authSource'] if 'authSource' in args else self.dbname)
-        self._lock = False if 'lock' not in args else args['lock']
-        self.dbname = None
-        username = password = None
-        if 'auth_file' in args :
-            _info = json.loads((open(args['auth_file'])).read())
-
-            
-        else:
-            _info = {}
-        _args = dict(args,**_info)
-        _map = {'dbname':'db','database':'db','table':'uid','collection':'uid','col':'uid','doc':'uid'}
-        for key in _args :
-            if key in ['username','password'] :
-                username = _args['username'] if key=='username' else username
-                password = _args['password'] if key == 'password' else password
-                continue
-            value = _args[key]
-            if key in _map :
-                key = _map[key]
-            
-            self.setattr(key,value)
-        #
-        # Let us perform aliasing in order to remain backwards compatible
-        
-        self.dbname = self.db if hasattr(self,'db')else self.dbname
-        self.uid    = _args['table'] if 'table' in _args else (_args['doc'] if 'doc' in _args else (_args['collection'] if 'collection' in _args else None))
-        if username and password :
-            self.client = MongoClient(self.host,
-                      username=username,
-                      password=password ,
-                      authSource=self.authSource,
-                      authMechanism=self.mechanism)
-            
-        else:
-            self.client = MongoClient(self.host,maxPoolSize=10000)                    
-        
-        self.db = self.client[self.dbname]
-        
-    def isready(self):
-        p = self.dbname in self.client.list_database_names() 
-        q = self.uid in self.client[self.dbname].list_collection_names()
-        return p and q
-    def setattr(self,key,value):
-        _allowed = ['host','port','db','doc','collection','authSource','mechanism']
-        if key in _allowed :
-            setattr(self,key,value)
-        pass
-    def close(self):
-        self.client.close()
-    def meta(self,**_args):
-        return []
-class MongoReader(Mongo,Reader):
-    """
-    This class will read from a mongodb data store and return the content of a document (not a collection)
-    """
-    def __init__(self,**args):
-        Mongo.__init__(self,**args)
-    def read(self,**args):
-        
-        if 'mongo' in args or 'cmd' in args or 'pipeline' in args:
-            #
-            # @TODO:
-            cmd = {}
-            if 'aggregate' not in cmd and 'aggregate' not in args:
-                cmd['aggregate'] = self.uid
-            elif 'aggregate' in args :
-                cmd['aggregate'] = args['aggregate']
-            if 'pipeline' in args :
-                cmd['pipeline']= args['pipeline']
-
-            if 'pipeline' not in args or 'aggregate' not in cmd :
-                cmd = args['mongo'] if 'mongo' in args else args['cmd']
-            if "aggregate" in cmd :
-                if "allowDiskUse" not in cmd :
-                    cmd["allowDiskUse"] = True
-                if "cursor" not in cmd :
-                    cmd["cursor"] = {}
-            r =  []
-            out = self.db.command(cmd)
-            #@TODO: consider using a yield (generator) works wonders
-            while True :
-                if 'values' in out :
-                    r += out['values']
-                if 'cursor' in out :
-                    key = 'firstBatch' if 'firstBatch' in out['cursor'] else 'nextBatch'
-                else:
-                    key = 'n'
-                if 'cursor' in out and out['cursor'][key] :
-                    r += list(out['cursor'][key])
-                elif key in out and out[key]:
-                    r.append (out[key]) 
-                    # yield out['cursor'][key]
-                if key not in ['firstBatch','nextBatch'] or ('cursor' in out and out['cursor']['id']  == 0) :
-                    break
-                else:
-                    out = self.db.command({"getMore":out['cursor']['id'],"collection":out['cursor']['ns'].split(".")[-1]}) 
-                
-                
-            return pd.DataFrame(r)
-        else:
-            
-            
-            if 'table' in args  or 'collection' in args :
-                if 'table' in args:
-                    _uid = args['table']
-                elif 'collection' in args :
-                    _uid = args['collection']
-                else:
-                    _uid = self.uid 
-            else:
-                _uid = self.uid
-            collection = self.db[_uid]                
-            _filter = args['filter'] if 'filter' in args else {}
-            _df =  pd.DataFrame(collection.find(_filter))
-            columns = _df.columns.tolist()[1:]
-            return _df[columns]
-    def view(self,**args):
-        """
-        This function is designed to execute a view (map/reduce) operation
-        """
-        pass
-class MongoWriter(Mongo,Writer):
-    """
-    This class is designed to write to a mongodb collection within a database
-    """
-    def __init__(self,**args):
-        Mongo.__init__(self,**args)
-    def upload(self,**args) :
-        """
-        This function will upload a file to the current database (using GridFS)
-        :param  data        binary stream/text to be stored
-        :param  filename    filename to be used
-        :param  encoding    content_encoding (default utf-8)
-        
-        """
-        if 'encoding' not in args :
-            args['encoding'] = 'utf-8'
-        gfs = GridFS(self.db)
-        gfs.put(**args)
-
-    def archive(self):
-        """
-        This function will archive documents to the 
-        """
-        collection = self.db[self.uid]
-        rows  = list(collection.find())
-        for row in rows :
-            if type(row['_id']) == ObjectId :
-                row['_id'] = str(row['_id'])
-        stream = Binary(json.dumps(collection,cls=IEncoder).encode())
-        collection.delete_many({})
-        now = "-".join([str(datetime.now().year()),str(datetime.now().month), str(datetime.now().day)])
-        name = ".".join([self.uid,'archive',now])+".json"
-        description = " ".join([self.uid,'archive',str(len(rows))])
-        self.upload(filename=name,data=stream,description=description,content_type='application/json')
-        # gfs = GridFS(self.db)
-        # gfs.put(filename=name,description=description,data=stream,encoding='utf-8')
-        # self.write({{"filename":name,"file":stream,"description":descriptions}})
-        
-            
-        pass
-    def write(self,info,**_args):
-        """
-        This function will write to a given collection i.e add a record to a collection (no updates)
-        @param info new record in the collection to be added
-        """
-        # document  = self.db[self.uid].find()
-        #collection = self.db[self.uid]
-        # if type(info) == list :
-        #     self.db[self.uid].insert_many(info)
-        # else:
-        try:
-            if 'table' in _args or 'collection' in _args :
-                _uid = _args['table'] if 'table' in _args else _args['collection']
-            else:
-                _uid = self.uid if 'doc' not in _args else _args['doc']
-            if self._lock :
-                Mongo.lock.acquire()
-            if type(info) == list or type(info) == pd.DataFrame :
-                self.db[_uid].insert_many(info if type(info) == list else info.to_dict(orient='records'))
-            else:
-                self.db[_uid].insert_one(info)
-        finally:
-            if self._lock :
-                Mongo.lock.release()
-    def set(self,document):
-        """
-        if no identifier is provided the function will delete the entire collection and set the new document.
-        Please use this function with great care (archive the content first before using it... for safety)
-        """
-
-        collection = self.db[self.uid]
-        if collection.count_document() > 0  and '_id' in document:
-            id = document['_id']
-            del document['_id']
-            collection.find_one_and_replace({'_id':id},document)
-        else:
-            collection.delete_many({})
-            self.write(info)
-    def close(self):
-        Mongo.close(self)
-        # collecton.update_one({"_id":self.uid},document,True)
-

+ 0 - 80
transport/nextcloud.py

@@ -1,80 +0,0 @@
-"""
-We are implementing transport to and from nextcloud (just like s3)
-"""
-import os
-import sys
-from transport.common import Reader,Writer, IEncoder
-import pandas as pd
-from io import StringIO
-import json
-import nextcloud_client as nextcloud
-
-class Nextcloud :
-    def __init__(self,**_args):
-        pass
-        self._delimiter = None
-        self._handler = nextcloud.Client(_args['url'])
-        _uid = _args['uid']
-        _token = _args['token']
-        self._uri = _args['folder'] if 'folder' in _args else './'
-        if self._uri.endswith('/') :
-            self._uri = self._uri[:-1]
-        self._file = None if 'file' not in _args else _args['file']
-        self._handler.login(_uid,_token)
-    def close(self):
-        try:
-            self._handler.logout()
-        except Exception as e:
-            pass
-        
-    
-class NextcloudReader(Nextcloud,Reader):
-    def __init__(self,**_args):
-        # self._file = [] if 'file' not in _args else _args['file']
-        super().__init__(**_args)
-        pass
-    def read(self,**_args):
-        _filename = self._file if 'file' not in _args else _args['file']
-        #
-        # @TODO: if _filename is none, an exception should be raised
-        #
-        _uri = '/'.join([self._uri,_filename])
-        if self._handler.get_file(_uri) :
-            #
-            #
-            _info = self._handler.file_info(_uri)
-            _content = self._handler.get_file_contents(_uri).decode('utf8')
-            if _info.get_content_type() == 'text/csv' :
-                #
-                # @TODO: enable handling of csv, xls, parquet, pickles
-                _file = StringIO(_content)
-                return pd.read_csv(_file)
-            else:
-                #
-                # if it is neither a structured document like csv, we will return the content as is
-                return _content
-        return None     
-class NextcloudWriter (Nextcloud,Writer):
-    """
-    This class will write data to an instance of nextcloud
-    """
-    def __init__(self,**_args)    :
-        super().__init__(**_args)
-        self
-    def write(self,_data,**_args):
-        """
-        This function will upload a file to a given destination 
-        :file   has the uri of the location of the file
-        """
-        _filename = self._file if 'file' not in _args else _args['file']
-        _uri = '/'.join([self._uri,_filename])
-        if type(_data) == pd.DataFrame :
-            f = StringIO()
-            _data.to_csv(f,index=False)
-            _content = f.getvalue()
-        elif type(_data) == dict :
-            _content = json.dumps(_data,cls=IEncoder)
-        else:
-            _content = str(_data)
-        self._handler.put_file_contents(_uri,_content)
-

+ 0 - 47
transport/qlistener.py

@@ -1,47 +0,0 @@
-import queue
-from threading import Thread, Lock
-from transport.common import Reader,Writer
-import numpy as np
-import pandas as pd
-
-class qListener :
-    lock = Lock()
-    _queue = {'default':queue.Queue()}
-    def __init__(self,**_args):
-        self._cache     = {}
-        self._callback  = _args['callback'] if 'callback' in _args else None
-        self._id = _args['id'] if 'id' in _args else 'default'
-        if self._id not in qListener._queue :
-            qListener._queue[self._id] = queue.Queue()
-        thread = Thread(target=self._forward)
-        thread.start()
-    def _forward(self):
-        _q = qListener._queue[self._id]
-        _data = _q.get()
-        _q.task_done()       
-        self._callback(_data)
-
-    def has(self,**_args) :
-        return self._callback is not None
-        
-   
-    def close(self):
-        """
-        This will empty the queue and have it ready for another operation
-        """
-        _q = qListener._queue[self._id]
-        with _q.mutex:
-            _q.queue.clear()  
-            _q.all_tasks_done.notify_all()  
-
-    def write(self,_data,**_args):
-        _id = _args['id'] if 'id' in _args else self._id
-        
-        _q = qListener._queue[_id]
-        _q.put(_data)
-        _q.join()
-class Console (qListener):
-    def __init__(self,**_args):
-        super().__init__(callback=print)
-        
-        # self.callback = print

+ 0 - 88
transport/session.py

@@ -1,88 +0,0 @@
-from flask import request, session
-from datetime import datetime
-import re
-from transport.common import Reader, Writer
-import json
-import requests
-from io import StringIO
-import pandas as pd
-
-
-class HttpReader(Reader):
-	"""
-	This class is designed to read data from an Http request file handler provided to us by flask
-	The file will be heald in memory and processed accordingly
-	NOTE: This is inefficient and can crash a micro-instance (becareful)
-	"""
-
-	def __init__(self,**_args):
-		self._url = _args['url']
-		self._headers = None if 'headers' not in _args else _args['headers']
-					
-	# def isready(self):
-	# 	return self.file_length > 0
-	def format(self,_response):
-		_mimetype= _response.headers['Content-Type']
-		if _mimetype == 'text/csv' or 'text/csv':
-			_content = _response.text
-			return pd.read_csv(StringIO(_content))
-		#
-		# @TODO: Add support for excel, JSON and other file formats that fit into a data-frame
-		#
-		
-		return _response.text
-	def read(self,**_args):
-		if self._headers :
-			r = requests.get(self._url,headers = self._headers)
-		else:
-			r = requests.get(self._url,headers = self._headers)
-		return self.format(r)
-		
-class HttpWriter(Writer):
-	"""
-	This class is designed to submit data to an endpoint (url)
-	"""
-	def __init__(self,**_args):
-		"""
-			@param key	required session key
-		"""
-		self._url = _args['url']
-		self._name = _args['name']
-		self._method = 'post' if 'method' not in _args else _args['method']
-		
-		# self.session = params['queue']
-		# self.session['sql'] = []
-		# self.session['csv'] = []
-		# self.tablename = re.sub('..+$','',params['filename'])
-		# self.session['uid'] = params['uid']
-		#self.xchar = params['xchar']
-			
-		
-	def format_sql(self,row):
-		values = "','".join([col.replace('"','').replace("'",'') for col in row])
-		return "".join(["INSERT INTO :table VALUES('",values,"');\n"]).replace(':table',self.tablename)		
-	def isready(self):
-		return True
-	def write(self,_data,**_args):
-		#
-		#
-		_method = self._method if 'method' not in _args else _args['method']
-		_method = _method.lower()
-		_mimetype = 'text/csv'
-		if type(_data) == dict :
-			_mimetype = 'application/json'
-			_content = _data
-		else:
-			_content = _data.to_dict(orient='records')
-		_headers = {'Content-Type':_mimetype}
-		_pointer = getattr(requests,_method)
-		
-		_pointer ({self._name:_content},headers=_headers)
-
-
-		# label = params['label']
-		# row = params ['row']
-		
-		# if label == 'usable':
-		# 	self.session['csv'].append(self.format(row,','))
-		# 	self.session['sql'].append(self.format_sql(row))