Browse Source

support for filters in read

Steve Nyemba 5 years ago
parent
commit
d5ba648abf
6 changed files with 68 additions and 20 deletions
  1. 3 2
      transport/__init__.py
  2. 1 1
      transport/common.py
  3. 7 3
      transport/disk.py
  4. 4 3
      transport/mongo.py
  5. 2 2
      transport/queue.py
  6. 51 9
      transport/s3.py

+ 3 - 2
transport/__init__.py

@@ -50,10 +50,11 @@ import sys
 if sys.version_info[0] > 2 : 
 if sys.version_info[0] > 2 : 
     from transport.common import Reader, Writer #, factory
     from transport.common import Reader, Writer #, factory
     from transport import disk
     from transport import disk
-    from transport import queue as queue
+
+    from transport import s3 as s3
+    from transport import rabbitmq as queue
     from transport import couch as couch
     from transport import couch as couch
     from transport import mongo as mongo
     from transport import mongo as mongo
-    from transport import s3 as s3
 else:
 else:
     from common import Reader, Writer #, factory
     from common import Reader, Writer #, factory
     import disk
     import disk

+ 1 - 1
transport/common.py

@@ -47,7 +47,7 @@ class Reader (IO):
 		@return object of meta data information associated with the content of the store
 		@return object of meta data information associated with the content of the store
 		"""
 		"""
 		raise Exception ("meta function needs to be implemented")
 		raise Exception ("meta function needs to be implemented")
-	def read(**args):
+	def read(self,**args):
 		"""
 		"""
 		This function is intended to read the content of a store provided parameters to be used at the discretion of the subclass
 		This function is intended to read the content of a store provided parameters to be used at the discretion of the subclass
 		"""
 		"""

+ 7 - 3
transport/disk.py

@@ -1,5 +1,9 @@
 import os
 import os
-from .__init__ import Reader,Writer
+import sys
+if sys.version_info[0] > 2 : 
+    from transport.common import Reader, Writer #, factory
+else:
+	from common import Reader,Writer
 import json
 import json
 
 
 class DiskReader(Reader) :
 class DiskReader(Reader) :
@@ -18,12 +22,12 @@ class DiskReader(Reader) :
 		self.delimiter	= params['delimiter'] if 'delimiter' in params else None
 		self.delimiter	= params['delimiter'] if 'delimiter' in params else None
 	def isready(self):
 	def isready(self):
 		return os.path.exists(self.path) 
 		return os.path.exists(self.path) 
-	def read(self,size=-1):
+	def read(self,**args):
 		"""
 		"""
 		This function reads the rows from a designated location on disk
 		This function reads the rows from a designated location on disk
 		@param	size	number of rows to be read, -1 suggests all rows
 		@param	size	number of rows to be read, -1 suggests all rows
 		"""
 		"""
-
+		size = -1 if 'size' not in args else int(args['size'])
 		f = open(self.path,'rU') 
 		f = open(self.path,'rU') 
 		i = 1
 		i = 1
 		for row in f:
 		for row in f:

+ 4 - 3
transport/mongo.py

@@ -39,7 +39,7 @@ class Mongo :
             self.client = MongoClient(host)                    
             self.client = MongoClient(host)                    
         
         
         self.uid    = args['doc']  #-- document identifier
         self.uid    = args['doc']  #-- document identifier
-        self.dbname = args['dbname']
+        self.dbname = args['dbname'] if 'db' in args else args['db']
         self.db = self.client[self.dbname]
         self.db = self.client[self.dbname]
         
         
     def isready(self):
     def isready(self):
@@ -53,9 +53,10 @@ class MongoReader(Mongo,Reader):
     """
     """
     def __init__(self,**args):
     def __init__(self,**args):
         Mongo.__init__(self,**args)
         Mongo.__init__(self,**args)
-    def read(self,size=-1):
+    def read(self,**args):
         collection = self.db[self.uid]
         collection = self.db[self.uid]
-        return collection.find({})
+        _filter = args['filter'] if 'filter' in args else {}
+        return collection.find(_filter)
     def view(self,**args):
     def view(self,**args):
         """
         """
         This function is designed to execute a view (map/reduce) operation
         This function is designed to execute a view (map/reduce) operation

+ 2 - 2
transport/queue.py

@@ -183,7 +183,7 @@ class QueueReader(MessageQueue,Reader):
 		if self.size == len(self.data[qid]) or len(self.data[qid]) == self.info.method.message_count:		
 		if self.size == len(self.data[qid]) or len(self.data[qid]) == self.info.method.message_count:		
 			self.close()
 			self.close()
 
 
-	def read(self,size=-1):
+	def read(self,**args):
 		"""
 		"""
 		This function will read, the first message from a queue
 		This function will read, the first message from a queue
 		@TODO: 
 		@TODO: 
@@ -191,7 +191,7 @@ class QueueReader(MessageQueue,Reader):
 			Have the number of messages retrieved be specified by size (parameter)
 			Have the number of messages retrieved be specified by size (parameter)
 		"""
 		"""
 		r = {}
 		r = {}
-		self.size = size
+		self.size = -1 if 'size' in args else int(args['size'])
 		#
 		#
 		# We enabled the reader to be able to read from several queues (sequentially for now)
 		# We enabled the reader to be able to read from several queues (sequentially for now)
 		# The qid parameter will be an array of queues the reader will be reading from
 		# The qid parameter will be an array of queues the reader will be reading from

+ 51 - 9
transport/s3.py

@@ -6,6 +6,8 @@ This file is a wrapper around s3 bucket provided by AWS for reading and writing
 """
 """
 from datetime import datetime
 from datetime import datetime
 import boto
 import boto
+from boto.s3.connection import S3Connection, OrdinaryCallingFormat
+import numpy as np
 import botocore
 import botocore
 from smart_open import smart_open
 from smart_open import smart_open
 import sys
 import sys
@@ -14,13 +16,14 @@ if sys.version_info[0] > 2 :
 else:
 else:
 	from common import Reader, Writer
 	from common import Reader, Writer
 import json
 import json
-
+from io import StringIO
+import json
 
 
 class s3 :
 class s3 :
 	"""
 	"""
 		@TODO: Implement a search function for a file given a bucket??
 		@TODO: Implement a search function for a file given a bucket??
 	"""
 	"""
-	def __init__(self,args) :
+	def __init__(self,**args) :
 		"""
 		"""
 			This function will extract a file or set of files from s3 bucket provided
 			This function will extract a file or set of files from s3 bucket provided
 			@param access_key
 			@param access_key
@@ -29,18 +32,39 @@ class s3 :
 			@param filter		filename or filtering elements
 			@param filter		filename or filtering elements
 		"""
 		"""
 		try:
 		try:
-			self.s3 = boto.connect_s3(args['access_key'],args['secret_key'])
+			self.s3 = S3Connection(args['access_key'],args['secret_key'],calling_format=OrdinaryCallingFormat())			
 			self.bucket = self.s3.get_bucket(args['bucket'].strip(),validate=False) if 'bucket' in args else None
 			self.bucket = self.s3.get_bucket(args['bucket'].strip(),validate=False) if 'bucket' in args else None
 			# self.path = args['path']
 			# self.path = args['path']
 			self.filter = args['filter'] if 'filter' in args else None
 			self.filter = args['filter'] if 'filter' in args else None
 			self.filename = args['file'] if 'file' in args else None
 			self.filename = args['file'] if 'file' in args else None
+			self.bucket_name = args['bucket'] if 'bucket' in args else None
 
 
 		except Exception as e :
 		except Exception as e :
 			self.s3 = None
 			self.s3 = None
 			self.bucket = None
 			self.bucket = None
 			print (e)
 			print (e)
+	def meta(self,**args):
+		"""
+		:name name of the bucket
+		"""
+		info = self.list(**args)
+		[item.open() for item in info]
+		return [{"name":item.name,"size":item.size} for item in info]
+	def list(self,**args):
+		"""
+		This function will list the content of a bucket, the bucket must be provided by the name
+		:name	name of the bucket
+		"""
+		return list(self.s3.get_bucket(args['name']).list())
+
 
 
 	def buckets(self):
 	def buckets(self):
+		#
+		# This function will return all buckets, not sure why but it should be used cautiously 
+		# based on why the s3 infrastructure is used
+		#
+		return [item.name for item in self.s3.get_all_buckets()]
+
 		# def buckets(self):
 		# def buckets(self):
 		pass
 		pass
 		# """
 		# """
@@ -56,8 +80,8 @@ class s3Reader(s3,Reader) :
 		- stream content	if file is Not None
 		- stream content	if file is Not None
 		@TODO: support read from all buckets, think about it
 		@TODO: support read from all buckets, think about it
 	"""
 	"""
-	def __init__(self,args) :
-			s3.__init__(self,args)
+	def __init__(self,**args) :
+			s3.__init__(self,**args)
 	def files(self):
 	def files(self):
 		r = []
 		r = []
 		try:
 		try:
@@ -80,14 +104,32 @@ class s3Reader(s3,Reader) :
 						break
 						break
 				yield line
 				yield line
 				count += 1
 				count += 1
-	def read(self,limit=-1) :
+	def read(self,**args) :
 		if self.filename is None :
 		if self.filename is None :
 			# 
 			# 
 		# returning the list of files because no one file was specified.
 		# returning the list of files because no one file was specified.
 			return self.files()
 			return self.files()
 		else:
 		else:
-			return self.stream(10)
+			limit = args['size'] if 'size' in args else -1
+			return self.stream(limit)
 
 
 class s3Writer(s3,Writer) :
 class s3Writer(s3,Writer) :
-        def __init__(self,args) :
-        	s3.__init__(self,args)
+
+	def __init__(self,args) :
+		s3.__init__(self,args)
+	def mkdir(self,name):
+		"""
+		This function will create a folder in a bucket
+		:name name of the folder
+		"""
+		self.s3.put_object(Bucket=self.bucket_name,key=(name+'/'))
+	def write(self,content):
+		file = StringIO(content.decode("utf8"))
+		self.s3.upload_fileobj(file,self.bucket_name,self.filename)
+		pass
+		
+if __name__ == '__main__'		:
+	p = {'access_key':'AKIAJO7KII27XH3TCPJQ','secret_key':'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'}
+	reader = s3Reader(**p)
+	buckets = reader.buckets()
+	print(reader.list(name  = buckets[0]))