Browse Source

optimizations mongodb

Steve Nyemba 3 years ago
parent
commit
e5fadc64a0
2 changed files with 21 additions and 11 deletions
  1. 7 7
      bin/transport
  2. 14 4
      transport/mongo.py

+ 7 - 7
bin/transport

@@ -63,8 +63,8 @@ class Post(Process):
 		else:
 			self.PROVIDER = args['target']['provider']
 			args['target']['context'] = 'write'
-			
-			self.writer = transport.instance(**args['target'])
+			self.store = args['target']
+			# self.writer = transport.instance(**args['target'])
 		#
 		# If the table doesn't exists maybe create it ?
 		#
@@ -86,9 +86,9 @@ class Post(Process):
 			else:
 				value = ''
 			_info[name] = _info[name].fillna(value)
-		
-		self.writer.write(_info)
-		self.writer.close()
+		writer = transport.factory.instance(**self.store)
+		writer.write(_info)
+		writer.close()
 
 		
 class ETL (Process):
@@ -139,11 +139,11 @@ class ETL (Process):
 			#
 			# @TODO: locks
 			for i in np.arange(self.JOB_COUNT) :
-				print ()
-				print (i)
 				_id = 'segment # '.join([str(i),' ',self.name])
 				indexes = rows[i]
 				segment = idf.loc[indexes,:].copy() #.to_dict(orient='records')
+				if segment.shape[0] == 0 :
+					continue
 				proc = Post(target = self._oargs,rows = segment,name=_id)
 				self.jobs.append(proc)
 				proc.start()

+ 14 - 4
transport/mongo.py

@@ -20,7 +20,9 @@ else:
 	from common import Reader, Writer
 import json
 import re
+from multiprocessing import Lock, RLock
 class Mongo :
+    lock = RLock()
     """
     Basic mongodb functions are captured here
     """
@@ -44,6 +46,7 @@ class Mongo :
         self.uid    = args['doc']  #-- document identifier
         self.dbname = args['dbname'] if 'dbname' in args else args['db']
         self.db = self.client[self.dbname]
+        self._lock = False if 'lock' not in args else args['lock']
         
     def isready(self):
         p = self.dbname in self.client.list_database_names() 
@@ -144,10 +147,17 @@ class MongoWriter(Mongo,Writer):
         # if type(info) == list :
         #     self.db[self.uid].insert_many(info)
         # else:
-        if type(info) == list or type(info) == pd.DataFrame :
-            self.db[self.uid].insert_many(info if type(info) == list else info.to_dict(orient='records'))
-        else:
-            self.db[self.uid].insert_one(info)
+        try:
+
+            if self._lock :
+                Mongo.lock.acquire()
+            if type(info) == list or type(info) == pd.DataFrame :
+                self.db[self.uid].insert_many(info if type(info) == list else info.to_dict(orient='records'))
+            else:
+                self.db[self.uid].insert_one(info)
+        finally:
+            if self._lock :
+                Mongo.lock.release()
     def set(self,document):
         """
         if no identifier is provided the function will delete the entire collection and set the new document.