123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466 |
- """
- (c) 2019 Healthcare/IO 1.0
- Vanderbilt University Medical Center, Health Information Privacy Laboratory
- https://hiplab.mc.vanderbilt.edu/healthcareio
- Authors:
- Khanhly Nguyen,
- Steve L. Nyemba<steve.l.nyemba@vanderbilt.edu>
- License:
- MIT, terms are available at https://opensource.org/licenses/MIT
- This parser was originally written by Khanhly Nguyen for her internship and is intended to parse x12 835,837 and others provided the appropriate configuration
- USAGE :
- - COMMAND LINE
-
- - EMBEDDED
- """
- import hashlib
- import json
- import os
- import sys
- from itertools import islice
- from multiprocessing import Process
- import transport
- class void :
- pass
- class Formatters :
- def __init__(self):
- # self.config = config
- self.get = void()
- self.get.config = self.get_config
-
- self.parse = void()
- self.parse.sv3 = self.sv3
- self.parse.sv2 = self.sv2
- self.sv2_parse = self.sv2
- self.sv3_parse = self.sv3
- self.format_proc = self.procedure
- self.format_diag = self.diagnosis
- self.parse.procedure = self.procedure
- self.parse.diagnosis = self.diagnosis
- self.parse.date = self.date
- self.format_date = self.date
- self.format_pos = self.pos
- self.format_time = self.time
- def split(self,row,sep='*',prefix='HI') :
- """
- This function is designed to split an x12 row and
- """
- if row.startswith(prefix) is False:
- value = []
-
- for row_value in row.replace('~','').split(sep) :
-
- if '>' in row_value :
- if row_value.startswith('HC') or row_value.startswith('AD'):
-
- value += row_value.split('>')[:2]
- else:
-
- value += row_value.split('>') if row.startswith('CLM') is False else [row_value]
-
- else :
-
- value.append(row_value.replace('\n',''))
- return [xchar.replace('\r','') for xchar in value] #row.replace('~','').split(sep)
- else:
-
- return [ [prefix]+ self.split(item,'>') for item in row.replace('~','').split(sep)[1:] ]
- def get_config(self,config,row):
- """
- This function will return the meaningfull parts of the configuration for a given item
- """
-
- _row = list(row) if type(row[0]) == str else list(row[0])
-
- _info = config[_row[0]] if _row[0] in config else {}
- key = None
- if '@ref' in _info:
- key = list(set(_row) & set(_info['@ref'].keys()))
- if key :
- key = key[0]
- return _info['@ref'][key]
- else:
- return {}
-
- if not _info and 'SIMILAR' in config:
- #
- # Let's look for the nearest key using the edit distance
- if _row[0] in config['SIMILAR'] :
- key = config['SIMILAR'][_row[0]]
- _info = config[key]
-
- return _info
-
- def hash(self,value):
- salt = os.environ['HEALTHCAREIO_SALT'] if 'HEALTHCAREIO_SALT' in os.environ else ''
- _value = str(value)+ salt
- if sys.version_info[0] > 2 :
- return hashlib.md5(_value.encode('utf-8')).hexdigest()
- else:
- return hashlib.md5(_value).hexdigest()
- def suppress (self,value):
- return 'N/A'
- def date(self,value):
- if len(value) > 8 or '-' in value:
- value = value.split('-')[0]
- if len(value) == 8 :
- year = value[:4]
- month = value[4:6]
- day = value[6:]
- return "-".join([year,month,day])[:10] #{"year":year,"month":month,"day":day}
- elif len(value) == 6 :
- year = '20' + value[:2]
- month = value[2:4]
- day = value[4:]
-
- #
- # We have a date formatting issue
-
- return "-".join([year,month,day])
- def time(self,value):
- pass
- def sv3(self,value):
- if '>' in value [1]:
- terms = value[1].split('>')
- return {'type':terms[0],'code':terms[1],"amount":float(value[2])}
- else:
-
- return {"code":value[2],"type":value[1],"amount":float(value[3])}
- def sv2(self,value):
- #
- # @TODO: Sometimes there's a suffix (need to inventory all the variations)
- #
- if '>' in value or ':' in value:
- xchar = '>' if '>' in value else ':'
- _values = value.split(xchar)
- modifier = {}
-
- if len(_values) > 2 :
- modifier= {"code":_values[2]}
- if len(_values) > 3 :
- modifier['type'] = _values[3]
- _value = {"code":_values[1],"type":_values[0]}
- if modifier :
- _value['modifier'] = modifier
- return _value
- else:
- return value
-
-
- def procedure(self,value):
- for xchar in [':','<'] :
- if xchar in value and len(value.split(xchar)) > 1 :
- #_value = {"type":value.split(':')[0].strip(),"code":value.split(':')[1].strip()}
- _value = {"type":value.split(xchar)[0].strip(),"code":value.split(xchar)[1].strip()}
- break
- else:
- _value = str(value)
- return _value
- def diagnosis(self,alue):
- return [ {"code":item[2], "type":item[1]} for item in value if len(item) > 1]
- def pos(self,value):
- """
- formatting place of service information within a segment (REF)
- """
-
- xchar = '>' if '>' in value else ':'
- x = value.split(xchar)
- x = {"code":x[0],"indicator":x[1],"frequency":x[2]} if len(x) == 3 else {"code":x[0],"indicator":None,"frequency":None}
- return x
- class Parser (Process):
- def __init__(self,path):
- Process.__init__(self)
- self.utils = Formatters()
- self.get = void()
- self.get.value = self.get_map
- self.get.default_value = self.get_default_value
- _config = json.loads(open(path).read())
-
- self.config = _config['parser']
- self.store = _config['store']
-
- self.files = []
- self.set = void()
- self.set.files = self.set_files
- def set_files(self,files):
- self.files = files
- def get_map(self,row,config,version=None):
-
- # label = config['label'] if 'label' in config else None
- handler = Formatters()
- if 'map' not in config and hasattr(handler,config['apply']):
-
- pointer = getattr(handler,config['apply'])
- object_value = pointer(row)
- return object_value
- omap = config['map'] if not version or version not in config else config[version]
- anchors = config['anchors'] if 'anchors' in config else []
-
- if type(row[0]) == str:
- object_value = {}
- for key in omap :
-
- index = omap[key]
- if anchors and set(anchors) & set(row):
- _key = list(set(anchors) & set(row))[0]
-
- aindex = row.index(_key)
- index = aindex + index
- if index < len(row) :
- value = row[index]
-
- if 'cast' in config and key in config['cast'] and value.strip() != '' :
- if config['cast'][key] in ['float','int'] :
- value = eval(config['cast'][key])(value)
- elif hasattr(handler,config['cast'][key]):
- pointer = getattr(handler,config['cast'][key])
- value = pointer(value)
- else:
- print ("Missing Pointer ",config['cast'][key])
- # print (key,value)
-
- if type(value) == dict :
- for objkey in value :
-
- if type(value[objkey]) == dict :
- continue
- if 'syn' in config and value[objkey] in config['syn'] :
- value[objkey] = config['syn'][ value[objkey]]
-
- value = {key:value} if key not in value else value
-
- else:
- if 'syn' in config and value in config['syn'] :
- value = config['syn'][value]
- if type(value) == dict :
-
- object_value = dict(object_value, **value)
- else:
-
- object_value[key] = value
- else:
- #
- # we are dealing with a complex object
- object_value = []
-
- for row_item in row :
-
- value = self.get.value(row_item,config,version)
- object_value.append(value)
- #
- # We need to add the index of the object it matters in determining the claim types
- #
-
- # object_value.append( list(get_map(row_item,config,version)))
- # object_value = {label:object_value}
-
- return object_value
- def apply(self,content,_code) :
- """
- :file content i.e a segment with the envelope
- :_code 837 or 835 (helps get the appropriate configuration)
- """
- util = Formatters()
- # header = default_value.copy()
- value = {}
- for row in content[:] :
-
- row = util.split(row.replace('\n','').replace('~',''))
- _info = util.get.config(self.config[_code][0],row)
-
- if _info :
- try:
-
- tmp = self.get.value(row,_info)
- # if 'P1080351470' in content[0] and 'PLB' in row:
- # print (_info)
- # print (row)
- # print (tmp)
- if not tmp :
- continue
- if 'label' in _info :
- label = _info['label']
- if type(tmp) == list :
-
- value[label] = tmp if label not in value else value[label] + tmp
-
- else:
- if label not in value:
- value[label] = [tmp]
- elif len(list(tmp.keys())) == 1 :
- # print "\t",len(claim[label]),tmp
- index = len(value[label]) -1
- value[label][index] = dict(value[label][index],**tmp)
- else:
- value[label].append(tmp)
- tmp['_index'] = len(value[label]) -1
- # if len(value[label]) > 0 :
- # labels = []
- # for item in value[label] :
- # item['_index'] = len(labels)
- # if item not in labels :
-
- # labels.append(item)
- # value[label] = labels
- elif 'field' in _info :
-
- name = _info['field']
- value[name] = tmp
- else:
-
- value = dict(value,**tmp)
-
- pass
- except Exception as e :
-
- print ('__',e.args)
- pass
-
- return value if value else {}
- def get_default_value(self,content,_code):
- util = Formatters()
- TOP_ROW = content[1].split('*')
- CATEGORY= content[2].split('*')[1].strip()
- VERSION = content[1].split('*')[-1].replace('~','').replace('\n','')
- SUBMITTED_DATE = util.parse.date(TOP_ROW[4])
- SENDER_ID = TOP_ROW[2]
- row = util.split(content[3])
-
- _info = util.get_config(self.config[_code][0],row)
-
- value = self.get.value(row,_info,VERSION) if _info else {}
- value['category'] = {"setid": CATEGORY,"version":'X'+VERSION.split('X')[1],"id":VERSION.split('X')[0].strip()}
- value["submitted"] = SUBMITTED_DATE
- # value['version'] = VERSION
- if _code== '835' :
- value['payer_id'] = SENDER_ID
- else:
- value['provider_id'] = SENDER_ID
- return value
- def read(self,filename) :
- """
- :formerly get_content
- This function returns the of the EDI file parsed given the configuration specified. it is capable of identifying a file given the content
- :section loop prefix (HL, CLP)
- :config configuration with formatting rules, labels ...
- :filename location of the file
- """
- # section = section if section else config['SECTION']
- logs = []
- claims = []
- try:
- file = open(filename.strip(),errors='ignore')
- INITIAL_ROWS = list(islice(file,4)) #.readlines(4)
- _code = "unknown"
- if len(INITIAL_ROWS) == 1 :
- file = INITIAL_ROWS[0].split('~')
- INITIAL_ROWS = file[:4]
- if len(INITIAL_ROWS) < 3 :
- return None,[{"name":filename,"completed":False}],None
- section = 'HL' if INITIAL_ROWS[1].split('*')[1] == 'HC' else 'CLP'
- _code = '837' if section == 'HL' else '835'
- DEFAULT_VALUE = self.get.default_value(INITIAL_ROWS,_code)
- DEFAULT_VALUE['name'] = filename.strip()
- #
- # In the initial rows, there's redundant information (so much for x12 standard)
- # index 1 identifies file type i.e CLM for claim and CLP for remittance
- segment = []
- index = 0;
- for row in file :
- if row.startswith(section) and not segment:
-
- segment = [row]
- continue
-
- elif segment and not row.startswith(section):
-
- segment.append(row)
-
- if len(segment) > 1 and row.startswith(section):
- #
- # process the segment somewhere (create a thread maybe?)
- #
- # default_claim = dict({"index":index},**DEFAULT_VALUE)
- _claim = self.apply(segment,_code)
- # if _claim['claim_id'] == 'P1080351470' :
- # print (_claim)
- # _claim = dict(DEFAULT_VALUE,**_claim)
- if _claim :
- _claim['index'] = index #len(claims)
- claims.append(dict(DEFAULT_VALUE,**_claim))
- segment = [row]
- index += 1
-
-
- pass
- #
- # Handling the last claim found
- if segment[0].startswith(section) :
- default_claim = dict({"name":index},**DEFAULT_VALUE)
-
- claim = self.apply(segment,_code)
- if claim :
- claim['index'] = len(claims)
- claims.append(dict(DEFAULT_VALUE,**claim))
- if type(file) != list :
- file.close()
- # x12_file = open(filename.strip(),errors='ignore').read().split('\n')
- except Exception as e:
- logs.append ({"parse":_code,"completed":False,"name":filename,"msg":e.args[0]})
- return [],logs,None
-
- rate = 0 if len(claims) == 0 else (1 + index)/len(claims)
- logs.append ({"parse":"claims" if _code == '837' else 'remits',"completed":True,"name":filename,"rate":rate})
- # self.finish(claims,logs,_code)
- return claims,logs,_code
- def run(self):
- for filename in self.files :
- content,logs,_code = self.read(filename)
- self.finish(content,logs,_code)
- def finish(self,content,logs,_code) :
- args = self.store
- _args = json.loads(json.dumps(self.store))
- if args['type'] == 'mongo.MongoWriter' :
- args['args']['doc'] = 'claims' if _code == '837' else 'remits'
- _args['args']['doc'] = 'logs'
- if content :
- writer = transport.factory.instance(**args)
- writer.write(content)
- writer.close()
- if logs :
- logger = transport.factory.instance(**_args)
- logger.write(logs)
- logger.close()
- # p = Parser('/home/steve/.healthcareio/config.json')
- # p.set.files(['../../data/small/claims/ssiUB1122042711220427127438.clm_191122T043504'])
- # path = '../../data/small/claims/ssiUB1122042711220427127438.clm_191122T043504'
- # path = '../../data/small/claims/problems-with-procs'
- # path = '../../data/small/remits/1SG03927258.dat_181018T074559'
- # _path = "../../data/small/remits/1TR21426701.dat_180703T074559"
- # p.start()
- # p.join()
- # claims,logs = p.read(path)
- # print (json.dumps(claims[3]))
- # print (logs)
|