code
/
parser
spogulis no https://hiplab.mc.vanderbilt.edu/git/hiplab/parser


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
							"""
This class refactors the default parsing class (better & streamlined implementation)
The class will rely on the new plug/play architectural style perform parsing
"""
from multiprocessing import Process, RLock
import os
import json
# from  healthcareio.x12.util
from healthcareio import x12
from healthcareio.x12.util import file, document
import numpy as np
import transport
import copy
# from healthcareio.x12.util import file as File, document as Document
from datetime import datetime
from healthcareio.logger import X12Logger
import time
import pandas as pd

from transport import providers

class BasicParser (Process) :
    def __init__(self,**_args):
        super().__init__()
        self._plugins   = _args['plugins']
        self._parents   = _args['parents']
        self._files     = _args['files'] 
        self._store     = dict(_args['store'],**{'lock':True})
        self._template = x12.util.template(plugins=self._plugins)
        self._logger = X12Logger(store = self._store)
        if self._logger :
            _info = { key:len(self._plugins[key].keys())for key in self._plugins}
            _data = {'plugins':_info,'files': len(self._files),'model': self._template}
            self._logger.log(module='BasicParser',action='init',data=_data)
            
    def log (self,**_args):
        """
        This function logs data into a specified location in JSON format
            datetime,module,action,data
        """
        if self._logger :
            self._logger.log(**_args)
        pass
    def apply(self,**_args):
        """
        :content    raw claim i.e CLP/CLM Loops and related content
        :x12        file type 837|835
        :document   document template with attributes pre-populated
        """
        _content    = _args['content']
        _filetype   = _args['x12']
        _doc        = _args['document'] #{}
        
        _documentHandler = x12.util.document.Builder(plugins = self._plugins,parents=self._parents, logger=self._logger)
        try:
            _tmp = {}
            for _row in _content :
                # _data = None
                
                _data,_meta = _documentHandler.bind(row=_row,x12=_filetype)
                
                if _data and _meta :
                    
                    _doc = _documentHandler.build(data=_data,document=_doc,meta=_meta,row=_row)
                    # print (['*** ',_doc])
                    pass
               
                
        except Exception as e:
            #
            # Log something here ....
            # print (_row)
            
            print (e)
            # print (_row,_doc.keys())
            pass
        return _doc
    def run(self):
        _handleContent = file.Content() #x12.util.file.Content()
        _handleDocument = document.Builder(plugins = self._plugins,parents=self._parents,logger=self._logger) 
        
        _template = self._template #x12.util.template(plugins=self._plugins)
        
        #
        # @TODO: starting initializing parsing jobs :
        #   - number of files, plugins meta data

        _log = {}
        for _absolute_path in self._files :
            try:
                
                _content = _handleContent.read(filename=_absolute_path)                
                _content,_filetype = _handleContent.split(_content)
                
                #
                # LOG: filename with claims found in it
                
                #
                # The first row is the header (it will be common to all claims)
                _header = copy.deepcopy(_template[_filetype])
                _header = self.apply(content=_content[0],x12=_filetype, document=_header)
                _docs = []
                _ids = []
                for _rawclaim in _content[1:] :
                    
                    _document = copy.deepcopy(_header) #copy.deepcopy(_template[_filetype])
                    if 'claim_id' in _document :
                        #
                        # @TODO: Have a way to get the attribute for CLP or CLM
                        _ids.append(_document['claim_id'])
                    # _document = dict(_document,**_header)
                    if  type(_absolute_path) == str:
                        _document['filename'] = _absolute_path
                    _doc = self.apply(content=_rawclaim,x12=_filetype, document=_document)
                    if _doc :                                               
                        _docs.append(_doc)
                    else:
                        # print (['wtf ...',_rawclaim])
                        pass
                #
                # LOG: information abou the file that has just been processed.
                _location = _absolute_path if type(_absolute_path) == str else 'In-Memory'
                _data = {'filename':_location, 'available':len(_content[1:]),'x12':_filetype}
                _args = {'module':'parse','action':'parse','data':_data}
                _data['parsed'] = len(_docs)

                self.log(**_args)
                self.log(module='parse',action='file-count', data={'file_name':_absolute_path,'file_type':_filetype,'claims':_ids, 'claim_count':len(_ids)})
                #
                # Let us submit the batch we have thus far
                #
                
                self.post(documents=_docs,x12=_filetype,filename=_location)


            except Exception as e:
                #
                # LOG: We have filename and segment of the claim within filename
                #
                print (e)
    def post(self,**_args):
        pass
class X12Parser(BasicParser):
    def __init__(self,**_args):
        super().__init__(**_args)
        self._store = _args['store']
    def post(self,**_args):
        """
        Writing the files to a persistent storage in JSON format (hopefully)
        """
        
        _documents = _args['documents']
        if _documents :
            _store = copy.deepcopy(self._store)
            TABLE = 'claims' if _args['x12'] in ['837','claims'] else 'remits'
            _store['table'] = TABLE
            _store['cotnext'] = 'write'
            _writer = transport.factory.instance(**_store)                    
            # if _store['provider'] not in [providers.MONGODB, providers.COUCHDB] :
               
                
            _writer.write(_documents,table=TABLE)
            if getattr(_writer,'close') :
                _writer.close()
        #
        # LOG: report what was written
        _data = {'x12':_args['x12'], 'documents':len(_documents),'filename':_args['filename']}
        # self._logger.log(module='write',action='write',data=_data)
        self.log(module='parse',action='write',data=_data)