Parcourir la source

Upload files to 'edi'

steve il y a 5 ans
Parent
commit
9d53b15e4d
4 fichiers modifiés avec 333 ajouts et 0 suppressions
  1. 16 0
      edi/__init__.py
  2. 100 0
      edi/__main__.py
  3. 18 0
      edi/params.py
  4. 199 0
      edi/parser.py

+ 16 - 0
edi/__init__.py

@@ -0,0 +1,16 @@
+"""
+(c) 2019 EDI Parser Toolkit, 
+Health Information Privacy Lab, Vanderbilt University Medical Center
+
+Steve L. Nyemba <steve.l.nyemba@vanderbilt.edu>
+Khanhly Nguyen <khanhly.t.nguyen@gmail.com>
+
+
+This code is intended to process and parse healthcare x12 837 (claims) and x12 835 (remittances) into human readable JSON format.
+The claims/outpout can be forwarded to a NoSQL Data store like couchdb and mongodb
+Usage :
+    Commandline :
+    python xreader.py --parse claims|remits --config <path>
+    Embedded    :
+
+"""

+ 100 - 0
edi/__main__.py

@@ -0,0 +1,100 @@
+"""
+(c) 2019 Claims Toolkit, 
+Health Information Privacy Lab, Vanderbilt University Medical Center
+
+Steve L. Nyemba <steve.l.nyemba@vanderbilt.edu>
+Khanhly Nguyen <khanhly.t.nguyen@gmail.com>
+
+
+This code is intended to process and parse healthcare x12 837 (claims) and x12 835 (remittances) into human readable JSON format.
+The claims/outpout can be forwarded to a NoSQL Data store like couchdb and mongodb
+Usage :
+    Commandline :
+        python edi --scope --config <path> --folder <path> --store <[mongo|disk|couch]> --<db|path]> <id|path>
+
+        with :
+            --scope     <claims|remits>
+            --config    path of the x12 to be parsed i.e it could be 835, or 837
+            --folder    location of the files (they must be decompressed)
+            --store     data store could be disk, mongodb, couchdb
+            --db|path    name of the folder to store the output or the database name
+    
+    Embedded in Code   :
+
+        import edi.parser
+        import json
+
+        file = '/data/claim_1.x12'
+        conf = json.loads(open('config/837.json').read())
+        edi.parser.get_content(filename,conf)
+"""
+from params import SYS_ARGS
+from transport import factory
+from parser import *
+import os
+import json
+import sys
+if __name__ == '__main__' :
+    """
+    The program was called from the command line thus we are expecting 
+        parse   in [claims,remits]
+        config  os.sep.path.exists(path)
+        folder    os.sep.path.exists(path)
+        store   store ()
+    """
+    p = len( set(['store','config','folder']) & set(SYS_ARGS.keys())) == 3 and ('db' in SYS_ARGS or 'path' in SYS_ARGS)
+    TYPE = {
+        'mongo':'mongo.MongoWriter',
+        'couch':'couch.CouchWriter',
+        'disk':'disk.DiskWriter'
+    }
+    INFO = {
+        '837':{'scope':'claims','section':'HL'},
+        '835':{'scope':'remits','section':'CLP'}
+    }
+    if p :
+        args = {}
+        scope = SYS_ARGS['config'][:-5].split(os.sep)[-1]
+        CONTEXT = INFO[scope]['scope']
+        #
+        # @NOTE:
+        # improve how database and data stores are handled.
+        if SYS_ARGS['store'] == 'couch' :
+            args = {'url': SYS_ARGS['url'] if 'url' in SYS_ARGS else 'http://localhost:5984'}
+            args['dbname'] = SYS_ARGS['db']
+            
+        elif SYS_ARGS ['store'] == 'mongo':
+            args = {'host':SYS_ARGS['host']if 'host' in SYS_ARGS else 'localhost:27217'}
+        if SYS_ARGS['store'] in ['mongo','couch']:
+            args['dbname'] = SYS_ARGS['db'] if 'db' in SYS_ARGS else 'claims_outcomes'
+            args['doc'] = CONTEXT
+
+        TYPE = TYPE[SYS_ARGS['store']] 
+        writer = factory.instance(type=TYPE,args=args)
+        logger = factory.instance(type=TYPE,args= dict(args,**{"doc":"logs"}))
+        files = os.listdir(SYS_ARGS['folder'])
+        CONFIG = json.loads(open(SYS_ARGS['config']).read())
+        SECTION= INFO[scope]['section']
+        for file in files :
+            if 'limit' in SYS_ARGS and files.index(file) == int(SYS_ARGS['limit']) :
+                break
+            else:
+                filename = os.sep.join([SYS_ARGS['folder'],file])
+                
+                try:
+                    content,logs = get_content(filename,CONFIG,SECTION)
+                except Exception as e:
+                    if sys.version_info[0] > 2 :
+                        logs = [{"filename":filename,"msg":e.args[0]}]
+                    else:
+                        logs = [{"filename":filename,"msg":e.message}]
+                    content = None
+                if content :
+                    writer.write(row= content)
+                if logs:
+                    
+                    logger.write(row=logs)
+                
+        pass
+    else:
+        print (__doc__)

+ 18 - 0
edi/params.py

@@ -0,0 +1,18 @@
+import sys
+
+SYS_ARGS  = {'context':''}
+if len(sys.argv) > 1:
+	
+	N = len(sys.argv)
+	for i in range(1,N):
+		value = None
+		if sys.argv[i].startswith('--'):
+			key = sys.argv[i][2:] #.replace('-','')
+			SYS_ARGS[key] = 1
+			if i + 1 < N:
+				value = sys.argv[i + 1] = sys.argv[i+1].strip()
+			if key and value:
+				SYS_ARGS[key] = value
+				
+		
+		i += 2

+ 199 - 0
edi/parser.py

@@ -0,0 +1,199 @@
+"""
+    (c) 2019 EDI-Parser 1.0
+    Vanderbilt University Medical Center, Health Information Privacy Laboratory
+    https://hiplab.mc.vanderbilt.edu/tools
+
+
+    Authors:
+        Khanhly Nguyen, 
+        Steve L. Nyemba<steve.l.nyemba@vanderbilt.edu>
+
+    License:
+        MIT, terms are available at https://opensource.org/licenses/MIT
+
+    This parser was originally written by Khanhly Nguyen for her internship and is intended to parse x12 835,837 and others provided the appropriate configuration
+    USAGE :
+        - COMMAND LINE
+        
+        - EMBEDDED
+"""
+import os
+import sys
+def split(row,sep='*',prefix='HI'):
+    """
+    This function is designed to split an x12 row and 
+    """
+    if row.startswith(prefix) is False:
+        value = []
+        for row_value in row.replace('~','').split(sep) :
+            
+            if '>' in row_value :
+                if row_value.startswith('HC') or row_value.startswith('AD'):
+                
+                    value += row_value.split('>')[:2]
+                else:
+                    value += row_value.split('>')
+            else :
+                value.append(row_value)
+        return [xchar.replace('\r','') for xchar in value] #row.replace('~','').split(sep)
+    else:
+        
+        return [ [prefix]+ split(item,'>') for item in row.replace('~','').split(sep)[1:] ]
+def get_config(config,row):
+    """
+    This function will return the meaningfull parts of the configuration for a given item
+    """
+    _row = list(row) if type(row[0]) == str else list(row[0])
+    _info = config[_row[0]] if _row[0] in config else {}
+    key = None
+    if '@ref' in _info:
+        key = list(set(_row) & set(_info['@ref'].keys()))
+        if key :
+            key  = key[0]
+            return _info['@ref'][key]
+        else:
+            return {}
+        
+    if not _info and 'SIMILAR' in config:
+        #
+        # Let's look for the nearest key using the edit distance
+        if _row[0] in config['SIMILAR']    :
+            key = config['SIMILAR'][_row[0]]
+            _info = config[key]
+    return _info
+def format_date(value) :
+    year = value[:4]
+    month = value[4:6]
+    day = value[6:]
+    return "-".join([year,month,day])[:10] #{"year":year,"month":month,"day":day}
+def format_time(value):
+    return ":".join([value[:2],value[2:] ])[:5]
+def format_proc(value):
+    if ':' in value :
+        return {"procedure_type":value.split(':')[0].strip(),"procedure_code":value.split(':')[1].strip()}
+    else:
+        return value
+
+def map(row,config,version):
+    
+    label = config['label'] if 'label' in config else None    
+    
+    omap = config['map'] if version not in config else config[version]
+    anchors = config['anchors'] if 'anchors' in config else []
+    if type(row[0]) == str:
+        object_value = {}
+        for key in omap :
+            index = omap[key]
+            if anchors and set(anchors) & set(row):
+                _key = list(set(anchors) & set(row))[0]
+                
+                aindex = row.index(_key)
+                index = aindex +  index
+
+            if index < len(row) :
+                value = row[index] 
+                if 'cast' in config and key in config['cast'] and value.strip() != '' :
+                    value = eval(config['cast'][key])(value)
+                    
+                    pass
+                if 'syn' in config and value in config['syn'] :
+                    value = config['syn'][value]
+                if type(value) == dict :
+                    object_value = dict(object_value, **value)
+                else:
+                    object_value[key] = value
+    else:
+        #
+        # we are dealing with a complex object
+        object_value = []
+        for row_item in row :
+            object_value.append( list(map(row_item,config,version)))
+        # object_value = {label:object_value}
+    return object_value
+
+def get_locations(x12_file,section='HL') :
+    locations = []
+    for line in x12_file :
+        
+        if line.strip().startswith(section) :
+            i = x12_file.index(line)
+            locations.append(i)
+    return locations
+
+#def get_claims(filename,config,section) :
+def get_content(filename,config,section=None) :
+    """
+    This function returns the of the EDI file parsed given the configuration specified
+    :section    loop prefix (HL, CLP)
+    :config     configuration with formatting rules, labels ...
+    :filename   location of the file
+    """
+    section = section if section else config['SECTION']
+    x12_file = open(filename).read().split('\n')
+    if len(x12_file) == 1 :
+        
+        x12_file = x12_file[0].split('~')
+        
+
+    locations = get_locations(x12_file,section)
+    claims = []
+     
+    logs = []
+    
+    # VERSION = x12_file[2].split('*')[3].replace('~','')    
+    VERSION = x12_file[1].split('*')[-1].replace('~','')    
+    
+    row = split(x12_file[3])
+    _info = get_config(config,row)
+    _default_value = list(map(row,_info,VERSION)) if _info else None
+    N = len(locations)
+
+    for index in range(0,N-1):
+        beg = locations[index]
+        end = locations[index+1]
+        claim = {}
+        for row in x12_file[beg:end] :
+            row = split(row)
+            _info = get_config(config,row)
+            if _info :
+                try:                    
+                    # tmp = map(row,_info,VERSION)
+                    tmp = list(map(row,_info,VERSION))
+                except Exception as e:                    
+                    if sys.verion_info[0] > 2 :
+                        logs.append ({"version":VERSION,"filename":filename,"msg":e.args[0],"X12":x12_file[beg:end]})
+                    else:
+                        logs.append ({"version":VERSION,"filename":filename,"msg":e.message,"X12":x12_file[beg:end]})
+                    claim = {}
+                    break
+                
+                if 'label' not in _info :
+                    tmp['version'] = VERSION                    
+                    claim = dict(claim, **tmp)
+                    
+                    
+                else:
+                    label = _info['label']
+                    if type(tmp) == list :
+                        
+                        claim[label] = tmp if label not in claim else claim[label] + tmp
+                    else:
+                        if label not in claim:                    
+                            claim[label] = [tmp]
+                        elif len(list(tmp.keys())) == 1 :
+                            # print "\t",len(claim[label]),tmp
+                            index = len(claim[label]) -1 
+                            claim[label][index] = dict(claim[label][index],**tmp)
+                        else:
+                            claim[label].append(tmp)
+        
+        if claim and 'claim_id' in claim:
+            
+            claim = dict(claim,**_default_value)
+            claim['name'] = filename[:-5].split(os.sep)[-1] #.replace(ROOT,'')
+            claim['index'] = index
+            claims.append(claim)
+            
+            
+    
+    return claims,logs