Browse Source

bug fixes and optimizations

Steve Nyemba 3 years ago
parent
commit
38e1bce6c2
3 changed files with 46 additions and 8 deletions
  1. 36 1
      README.md
  2. 1 1
      setup.py
  3. 9 6
      transport/sql.py

+ 36 - 1
README.md

@@ -35,9 +35,44 @@ Within the virtual environment perform the following :
 
     pip install git+https://dev.the-phi.com/git/steve/data-transport.git
 
+Once installed **data-transport** can be used as a library in code or a command line interface (CLI)
 
+## Data Transport as a Library (in code)
+---
+
+The data-transport can be used within code as a library 
+* Read/Write against [mongodb](https://github.com/lnyemba/data-transport/wiki/mongodb)
+* Read/Write against tranditional [RDBMS](https://github.com/lnyemba/data-transport/wiki/rdbms)
+* Read/Write against [bigquery](https://github.com/lnyemba/data-transport/wiki/bigquery)
+
+The read/write functions make data-transport a great candidate for **data-science**; **data-engineering** or all things pertaining to data. It enables operations across multiple data-stores(relational or not)
+
+## Command Line Interface (CLI)
+---
+The CLI program is called **transport** and it requires a configuratio file
+
+```
+[
+    {
+    "id":"logs",
+    "source":{
+        "provider":"postgresql","context":"read","database":"mydb",
+        "cmd":{"sql":"SELECT * FROM logs limit 10"}
+        },
+    "target":{
+        "provider":"bigquery","private_key":"/bgqdrive/account/bq-service-account-key.json",
+        "dataset":"mydataset"
+        }    
+    },
+    
+]
+```
 
-## In code (Embedded)
+Assuming the above content is stored in a file called **etl-config.json**, we would perform the following in a terminal window:
+
+```
+[steve@data-transport]$ transport --config ./etl-config.json [--index <value>]
+```
 
 **Reading/Writing Mongodb**
 

+ 1 - 1
setup.py

@@ -13,7 +13,7 @@ args    = {
     "license":"MIT",
     "packages":["transport"]}
 args["keywords"]=['mongodb','couchdb','rabbitmq','file','read','write','s3','sqlite']
-args["install_requires"] = ['pymongo','pandas','numpy','cloudant','pika','nzpy','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python']
+args["install_requires"] = ['pymongo','sqlalchemy','pandas','numpy','cloudant','pika','nzpy','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python']
 args["url"] =   "https://healthcareio.the-phi.com/git/code/transport.git"
 args['scripts'] = ['bin/transport']
 if sys.version_info[0] == 2 :

+ 9 - 6
transport/sql.py

@@ -125,9 +125,9 @@ class SQLRW :
         _out = None
         try:
             if "select" in _sql.lower() :
-                cursor.close()
-                _conn = self._engine.connect() if self._engine else self.conn
-                return pd.read_sql(_sql,_conn)
+                
+                # _conn = self._engine if self._engine else self.conn
+                return pd.read_sql(_sql,self.conn)
             else:
                 # Executing a command i.e no expected return values ...
                 cursor.execute(_sql)
@@ -151,7 +151,8 @@ class SQLReader(SQLRW,Reader) :
         if 'sql' in _args :            
             _sql = (_args['sql'])
         else:
-            _sql = "SELECT :fields FROM "+self.table
+            table = self.table if self.table is not None else _args['table']
+            _sql = "SELECT :fields FROM "+self._tablename(table)
             if 'filter' in _args :
                 _sql = _sql +" WHERE "+_args['filter']
             _fields = '*' if not self.fields else ",".join(self.fields) 
@@ -220,7 +221,7 @@ class SQLWriter(SQLRW,Writer):
             # cursor.close()
             self.conn.commit()
             pass
-    def write(self,info):
+    def write(self,info,**_args):
         """
         :param info writes a list of data to a given set of fields
         """
@@ -324,7 +325,8 @@ class BQReader(BigQuery,Reader) :
     def __init__(self,**_args):
         
         super().__init__(**_args)    
-
+    def apply(self,sql):
+        self.read(sql=sql)
         pass
     def read(self,**_args):
         SQL = None
@@ -359,6 +361,7 @@ class BQWriter(BigQuery,Writer):
         try:
             if self.parallel or 'lock' in _args :
                 BQWriter.lock.acquire()
+            _args['table'] = self.table if 'table' not in _args else _args['table']
             self._write(_info,**_args)
         finally:
             if self.parallel: