|
@@ -32,10 +32,18 @@ Usage :
|
|
|
from healthcareio.params import SYS_ARGS
|
|
|
from transport import factory
|
|
|
import requests
|
|
|
+
|
|
|
+from healthcareio import analytics
|
|
|
+from healthcareio import server
|
|
|
from healthcareio.parser import get_content
|
|
|
import os
|
|
|
import json
|
|
|
import sys
|
|
|
+import numpy as np
|
|
|
+from multiprocessing import Process
|
|
|
+import time
|
|
|
+
|
|
|
+
|
|
|
PATH = os.sep.join([os.environ['HOME'],'.healthcareio'])
|
|
|
OUTPUT_FOLDER = os.sep.join([os.environ['HOME'],'healthcare-io'])
|
|
|
INFO = None
|
|
@@ -60,7 +68,8 @@ def register (**args) :
|
|
|
|
|
|
#
|
|
|
#
|
|
|
- headers = {"email":email,"client":platform.node()}
|
|
|
+ store = args['store'] if 'store' in args else 'sqlite'
|
|
|
+ headers = {"email":email,"client":platform.node(),"store":store,"db":args['db']}
|
|
|
http = requests.session()
|
|
|
r = http.post(url,headers=headers)
|
|
|
|
|
@@ -82,22 +91,6 @@ def register (**args) :
|
|
|
# Create the sqlite3 database to
|
|
|
|
|
|
|
|
|
-def analytics(**args):
|
|
|
- """
|
|
|
- This fucntion will only compute basic distributions of a given feature for a given claim
|
|
|
- @args
|
|
|
- @param x: vector of features to process
|
|
|
- @param apply: operation to be applied {dist}
|
|
|
- """
|
|
|
- if args['apply'] in ['dist','distribution'] :
|
|
|
- """
|
|
|
- This section of the code will return the distribution of a given space.
|
|
|
- It is intended to be applied on several claims/remits
|
|
|
- """
|
|
|
- x = pd.DataFrame(args['x'],columns=['x'])
|
|
|
- return x.groupby(['x']).size().to_frame().T.to_dict(orient='record')
|
|
|
-
|
|
|
-
|
|
|
def log(**args):
|
|
|
"""
|
|
|
This function will perform a log of anything provided to it
|
|
@@ -152,7 +145,39 @@ def parse(**args):
|
|
|
|
|
|
return get_content(args['filename'],CONFIG,SECTION)
|
|
|
|
|
|
+def apply(files,store_info,logger_info=None):
|
|
|
+ """
|
|
|
+ :files list of files to be processed in this given thread/process
|
|
|
+ :store_info information about data-store, for now disk isn't thread safe
|
|
|
+ :logger_info information about where to store the logs
|
|
|
+ """
|
|
|
|
|
|
+ if not logger_info :
|
|
|
+ logger = factory.instance(type='disk.DiskWriter',args={'path':os.sep.join([info['out-folder'],SYS_ARGS['parse']+'.log'])})
|
|
|
+ else:
|
|
|
+ logger = factory.instance(**logger_info)
|
|
|
+
|
|
|
+ writer = factory.instance(**store_info)
|
|
|
+ for filename in files :
|
|
|
+
|
|
|
+ if filename.strip() == '':
|
|
|
+ continue
|
|
|
+ # content,logs = get_content(filename,CONFIG,CONFIG['SECTION'])
|
|
|
+ #
|
|
|
+ try:
|
|
|
+ content,logs = parse(filename = filename,type=SYS_ARGS['parse'])
|
|
|
+ if content :
|
|
|
+ writer.write(content)
|
|
|
+ if logs :
|
|
|
+ [logger.write(dict(_row,**{"parse":SYS_ARGS['parse']})) for _row in logs]
|
|
|
+ else:
|
|
|
+ logger.write({"parse":SYS_ARGS['parse'],"name":filename,"completed":True,"rows":len(content)})
|
|
|
+ except Exception as e:
|
|
|
+ logger.write({"parse":SYS_ARGS['parse'],"filename":filename,"completed":False,"rows":-1,"msg":e.args[0]})
|
|
|
+ # print ([filename,len(content)])
|
|
|
+ #
|
|
|
+ # @TODO: forward this data to the writer and log engine
|
|
|
+ #
|
|
|
def upgrade(**args):
|
|
|
"""
|
|
|
:email provide us with who you are
|
|
@@ -175,8 +200,9 @@ if __name__ == '__main__' :
|
|
|
|
|
|
email = SYS_ARGS['signup'].strip() if 'signup' in SYS_ARGS else SYS_ARGS['init']
|
|
|
url = SYS_ARGS['url'] if 'url' in SYS_ARGS else 'https://healthcareio.the-phi.com'
|
|
|
-
|
|
|
- register(email=email,url=url)
|
|
|
+ store = SYS_ARGS['store'] if 'store' in SYS_ARGS else 'sqlite'
|
|
|
+ db='healthcareio' if 'db' not in SYS_ARGS else SYS_ARGS['db']
|
|
|
+ register(email=email,url=url,store=store,db=db)
|
|
|
# else:
|
|
|
# m = """
|
|
|
# usage:
|
|
@@ -218,46 +244,95 @@ if __name__ == '__main__' :
|
|
|
# CONFIG = CONFIG[ int(SYS_ARGS['version'])]
|
|
|
# else:
|
|
|
# CONFIG = CONFIG[-1]
|
|
|
+ logger = factory.instance(type='disk.DiskWriter',args={'path':os.sep.join([info['out-folder'],SYS_ARGS['parse']+'.log'])})
|
|
|
if info['store']['type'] == 'disk.DiskWriter' :
|
|
|
info['store']['args']['path'] += (os.sep + 'healthcare-io.json')
|
|
|
elif info['store']['type'] == 'disk.SQLiteWriter' :
|
|
|
# info['store']['args']['path'] += (os.sep + 'healthcare-io.db3')
|
|
|
pass
|
|
|
+
|
|
|
+
|
|
|
if info['store']['type'] == 'disk.SQLiteWriter' :
|
|
|
info['store']['args']['table'] = SYS_ARGS['parse'].strip().lower()
|
|
|
else:
|
|
|
+ #
|
|
|
+ # if we are working with no-sql we will put the logs in it (performance )?
|
|
|
|
|
|
info['store']['args']['doc'] = SYS_ARGS['parse'].strip().lower()
|
|
|
+ _info = json.loads(json.dumps(info['store']))
|
|
|
+ _info['args']['doc'] = 'logs'
|
|
|
+ logger = factory.instance(**_info)
|
|
|
+
|
|
|
writer = factory.instance(**info['store'])
|
|
|
- logger = factory.instance(type='disk.DiskWriter',args={'path':os.sep.join([info['out-folder'],SYS_ARGS['parse']+'.log'])})
|
|
|
+
|
|
|
+ #
|
|
|
+ # we need to have batches ready for this in order to run some of these queries in parallel
|
|
|
+ # @TODO: Make sure it is with a persistence storage (not disk .. not thread/process safe yet)
|
|
|
+ # - Make sure we can leverage this on n-cores later on, for now the assumption is a single core
|
|
|
+ #
|
|
|
+ BATCH_COUNT = 1 if 'batch' not in SYS_ARGS else int (SYS_ARGS['batch'])
|
|
|
+
|
|
|
#logger = factory.instance(type='mongo.MongoWriter',args={'db':'healthcareio','doc':SYS_ARGS['parse']+'_logs'})
|
|
|
# schema = info['schema']
|
|
|
|
|
|
# for key in schema :
|
|
|
# sql = schema[key]['create']
|
|
|
# writer.write(sql)
|
|
|
- for filename in files :
|
|
|
+ files = np.array_split(files,BATCH_COUNT)
|
|
|
+ procs = []
|
|
|
+ index = 0
|
|
|
+ for row in files :
|
|
|
+
|
|
|
+ row = row.tolist()
|
|
|
+ logger.write({"process":index,"parse":SYS_ARGS['parse'],"file_count":len(row)})
|
|
|
+ proc = Process(target=apply,args=(row,info['store'],_info,))
|
|
|
+ proc.start()
|
|
|
+ procs.append(proc)
|
|
|
+ index = index + 1
|
|
|
+ while len(procs) > 0 :
|
|
|
+ procs = [proc for proc in procs if proc.is_alive()]
|
|
|
+ time.sleep(2)
|
|
|
+ # for filename in files :
|
|
|
+
|
|
|
+ # if filename.strip() == '':
|
|
|
+ # continue
|
|
|
+ # # content,logs = get_content(filename,CONFIG,CONFIG['SECTION'])
|
|
|
+ # #
|
|
|
+ # try:
|
|
|
+ # content,logs = parse(filename = filename,type=SYS_ARGS['parse'])
|
|
|
+ # if content :
|
|
|
+ # writer.write(content)
|
|
|
+ # if logs :
|
|
|
+ # [logger.write(dict(_row,**{"parse":SYS_ARGS['parse']})) for _row in logs]
|
|
|
+ # else:
|
|
|
+ # logger.write({"parse":SYS_ARGS['parse'],"name":filename,"completed":True,"rows":len(content)})
|
|
|
+ # except Exception as e:
|
|
|
+ # logger.write({"parse":SYS_ARGS['parse'],"filename":filename,"completed":False,"rows":-1,"msg":e.args[0]})
|
|
|
+ # # print ([filename,len(content)])
|
|
|
+ # #
|
|
|
+ # # @TODO: forward this data to the writer and log engine
|
|
|
+ # #
|
|
|
+
|
|
|
|
|
|
- if filename.strip() == '':
|
|
|
- continue
|
|
|
- # content,logs = get_content(filename,CONFIG,CONFIG['SECTION'])
|
|
|
- #
|
|
|
- try:
|
|
|
- content,logs = parse(filename = filename,type=SYS_ARGS['parse'])
|
|
|
- if content :
|
|
|
- writer.write(content)
|
|
|
- if logs :
|
|
|
- [logger.write(_row) for _row in logs]
|
|
|
- else:
|
|
|
- logger.write({"name":filename,"completed":True,"rows":len(content)})
|
|
|
- except Exception as e:
|
|
|
- logger.write({"filename":filename,"completed":False,"rows":-1,"msg":e.args[0]})
|
|
|
- # print ([filename,len(content)])
|
|
|
- #
|
|
|
- # @TODO: forward this data to the writer and log engine
|
|
|
- #
|
|
|
|
|
|
pass
|
|
|
+ elif 'analytics' in SYS_ARGS :
|
|
|
+ PORT = int(SYS_ARGS['port']) if 'port' in SYS_ARGS else 5500
|
|
|
+ DEBUG= int(SYS_ARGS['debug']) if 'debug' in SYS_ARGS else 0
|
|
|
+ SYS_ARGS['context'] = SYS_ARGS['context'] if 'context' in SYS_ARGS else ''
|
|
|
+ #
|
|
|
+ #
|
|
|
+
|
|
|
+ # PATH= SYS_ARGS['config'] if 'config' in SYS_ARGS else os.sep.join([os.environ['HOME'],'.healthcareio','config.json'])
|
|
|
+
|
|
|
+ e = analytics.engine(os.sep.join([PATH,'config.json'])) #--@TODO: make the configuration file globally accessible
|
|
|
+ e.apply(type='claims',serialize=True)
|
|
|
+ SYS_ARGS['engine'] = e
|
|
|
+
|
|
|
+ pointer = lambda : server.app.run(host='0.0.0.0',port=PORT,debug=DEBUG,threaded=False)
|
|
|
+ pthread = Process(target=pointer,args=())
|
|
|
+ pthread.start()
|
|
|
+
|
|
|
elif 'export' in SYS_ARGS:
|
|
|
#
|
|
|
# this function is designed to export the data to csv
|
|
@@ -267,7 +342,17 @@ if __name__ == '__main__' :
|
|
|
if set([format]) not in ['xls','csv'] :
|
|
|
format = 'csv'
|
|
|
|
|
|
-
|
|
|
+ else:
|
|
|
+ msg = """
|
|
|
+ CLI Usage
|
|
|
+ healthcare-io.py --register <email> --store <sqlite|mongo>
|
|
|
+ healthcare-io.py --parse claims --folder <path> [--batch <value>]
|
|
|
+ healthcare-io.py --parse remits --folder <path> [--batch <value>]
|
|
|
+ parameters :
|
|
|
+ --<[signup|init]> signup or get a configuration file from a parsing server
|
|
|
+ --store data store mongo or sqlite
|
|
|
+ """
|
|
|
+ print(msg)
|
|
|
pass
|
|
|
# """
|
|
|
# The program was called from the command line thus we are expecting
|