code
/
parser
mirror of https://hiplab.mc.vanderbilt.edu/git/hiplab/parser


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
							import copy
from . import util
import transport
import numpy as np
import time
import pandas as pd
from multiprocessing import Process
import json
from healthcareio.logger import X12Logger
def build (**_args):
    """
    This function will build SQL statements to create a table (perhaps not needed)
    :plugins    loaded plugins
    :x12        837|835 file types
    """
    _plugins=_args['plugins']
    _x12 = _args['x12']
    _template = util.template(plugins=_plugins)[_x12]
    _primaryKey = util.getPrimaryKey(plugins=_plugins,x12=_x12)
    _tables = []
    _main = {}
    for _name in _template :
        _item = _template[_name] #copy.deepcopy(_template[_name])
        if _primaryKey not in _item and type(_item) == dict:
            _item[_primaryKey] = ''
            _tables.append({_name:_item})
        else:
            _main[_name] = ''

    _name = getContext(_x12)
    _tables += [{_name:_main}]
    _template[_name] = _main

    return _template #_tables    
def getContext(_x12) :
    return 'claims' if _x12 == '837' else 'remits'    
def format(**_args) :
    """
    :rows           rows for the 
    :primary_key    primary_key field name
    :x12            file format
    """

    # _name   = _args['table']
    _rows   = _args['rows']
    
    _primary_key = _args['primary_key']
    _x12    = _args['x12']
    _mainTableName = getContext(_x12)
    _tables = {_mainTableName:[]}
    
    for _claim in _rows :
    #     #
    #     # Turn the claim into a relational model ...
    #     #
        _main = {}
        _pkvalue = None
        if _primary_key in _claim :
            _pkvalue = _claim[_primary_key]
              
        for _attrName in _claim :
            _item = _claim[_attrName]
            _item = update(_item,_primary_key,_pkvalue)
            #
            # We have a casting problem, with relational data-store and JSON objects
            #
            if type(_item) == str and (_item.startswith("{") or _item.startswith("{"))  :
                try:
                    
                    _item = json.loads(_item)
                    
                except Exception as ee :
                    # print (ee)
                    pass
            if _attrName not in _tables and type(_item) in [dict,list]:
                _tables[_attrName] = []
            if type(_item) in [dict,list] :
                _tables[_attrName] += _item if type(_item) == list else [_item]
                pass
            else:
                #
                # This section suggests we found a main table attribute
                _main[_attrName] = _item

        _tables[_mainTableName].append(_main)  
    
    return _tables
def update (_item,key,value):
    if type(_item) not in [dict,list] :
        return _item
    if type(_item) == dict :
        _item[key] = value
    else:
        #
        # List, we will go through every item and update accordingly 
        _index = 0
        for _row in _item :
            if type(_row) == dict :
                _row['_index'] = _index 
                _row[key] = value
    return _item
def init(**_args):
    """
    This function will kick off the export process provided claims/remits and the loaded plugins (not sure why)
    It requires the data it is pulling to be consistently formatted (otherwise nothing can be done)
    :plugins
    :store      data store information i.e {source,target} specifications for data-transport
    :x12        file type i.e 837|835
    """
    _file_type = _args['x12']
    _plugins = _args['plugins']
    _store = _args['store']
    _default = build(plugins=_plugins,x12=_file_type)
    _logger = X12Logger(store = _store['source'])

    _df = read(store = _store['source'],x12=_file_type)
    #
    # @LOG :
    if _logger :
        _logger.log(module='init',action='export-init',data={'rows':_df.shape[0],'attributes':list(_df.columns)})

    _pkey = util.getPrimaryKey(plugins = _plugins, x12=_file_type)
    SEGMENTS = 4    # arbitrary choice
    _indexes = np.array_split(np.arange(_df.shape[0]),SEGMENTS)
    jobs = []
    _tables  = {}
    for _ii in _indexes :
        try:
            _data = format(rows= _df.iloc[_ii].to_dict(orient='records'),x12=_file_type,primary_key=_pkey)            
            _thread = Process(target=post,args=({'store':_store['target'],'data':_data,'default':_default,'x12':_file_type},))
            _thread.start()
            jobs.append(_thread)
            _tables = list(_data.keys())
        except Exception as e:
            #
            # Log: sigment, 
            print (e)
            pass
        
    #
    # @LOG :
    if _logger :
        _logger.log(module='init',action='export-wait',data={'jobs':len(jobs),'tables':_tables})

    if jobs :
        # jobs[0].start()
        # jobs[0].join()
        while jobs :
            jobs = [thread for thread in jobs if thread.is_alive()]
            time.sleep(1)

def read (**_args):
    _store = copy.copy(_args['store'])
    _x12 = _args['x12']
    _store['table'] =  getContext(_x12) #'claims' if _x12 == '837' else 'remits'
    _store['context'] = 'read'
    reader = transport.factory.instance(**_store)
    #
    # @TODO: reading should support streaming (for scalability)
    _df = reader.read()
    
    return _df

def post(_args):
    _data = _args['data']
    _store = _args['store']
    _store['context'] = 'write'
    _default = _args['default']
    _prefix = 'clm_' if _args['x12'] == '837' else 'rem_'
    # if 'claims' in _data or 'remits' in _data :
    #     _key = 'claims' if 'claims' in _data else 'remits'
    #     _data = _data[_key]
    for _name in _data :
        
        _tablename = _prefix+_name
        _store['table'] = _tablename if _name not in ['remits','claims'] else _name
        _store['context']='write'
        _store['lock'] = True
        writer = transport.factory.instance(**_store)
        if len(_data[_name]) == 0 and _name in _default and not writer.has(table=_tablename):
           _rows = [_default[_name]]
        else:
            _rows = _data[_name]
        writer.write(pd.DataFrame(_rows).fillna(''))
        if hasattr(writer,'close') :
            writer.close()
        #
        # Have a logger here to log what's going on ...
    # _xwriter = trasnport.factory.instance(**_store)
    # _xwriter.write(_df)
    # _info = format()
    pass