__init__.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. """
  2. (c) 2019 Healthcare/IO 1.0
  3. Vanderbilt University Medical Center, Health Information Privacy Laboratory
  4. https://hiplab.mc.vanderbilt.edu/healthcareio
  5. Authors:
  6. Khanhly Nguyen,
  7. Steve L. Nyemba<steve.l.nyemba@vanderbilt.edu>
  8. License:
  9. MIT, terms are available at https://opensource.org/licenses/MIT
  10. This parser was originally written by Khanhly Nguyen for her internship and is intended to parse x12 835,837 and others provided the appropriate configuration
  11. USAGE :
  12. - COMMAND LINE
  13. - EMBEDDED
  14. """
  15. import hashlib
  16. import json
  17. import os
  18. import sys
  19. from itertools import islice
  20. from multiprocessing import Process
  21. import transport
  22. class void :
  23. pass
  24. class Formatters :
  25. def __init__(self):
  26. # self.config = config
  27. self.get = void()
  28. self.get.config = self.get_config
  29. self.parse = void()
  30. self.parse.sv3 = self.sv3
  31. self.parse.sv2 = self.sv2
  32. self.sv2_parse = self.sv2
  33. self.sv3_parse = self.sv3
  34. self.parse.procedure = self.procedure
  35. self.parse.diagnosis = self.diagnosis
  36. self.parse.date = self.date
  37. self.format_date = self.date
  38. self.format_pos = self.pos
  39. self.format_time = self.time
  40. def split(self,row,sep='*',prefix='HI') :
  41. """
  42. This function is designed to split an x12 row and
  43. """
  44. if row.startswith(prefix) is False:
  45. value = []
  46. for row_value in row.replace('~','').split(sep) :
  47. if '>' in row_value :
  48. if row_value.startswith('HC') or row_value.startswith('AD'):
  49. value += row_value.split('>')[:2]
  50. else:
  51. value += row_value.split('>') if row.startswith('CLM') is False else [row_value]
  52. else :
  53. value.append(row_value.replace('\n',''))
  54. return [xchar.replace('\r','') for xchar in value] #row.replace('~','').split(sep)
  55. else:
  56. return [ [prefix]+ self.split(item,'>') for item in row.replace('~','').split(sep)[1:] ]
  57. def get_config(self,config,row):
  58. """
  59. This function will return the meaningfull parts of the configuration for a given item
  60. """
  61. _row = list(row) if type(row[0]) == str else list(row[0])
  62. _info = config[_row[0]] if _row[0] in config else {}
  63. key = None
  64. if '@ref' in _info:
  65. key = list(set(_row) & set(_info['@ref'].keys()))
  66. if key :
  67. key = key[0]
  68. return _info['@ref'][key]
  69. else:
  70. return {}
  71. if not _info and 'SIMILAR' in config:
  72. #
  73. # Let's look for the nearest key using the edit distance
  74. if _row[0] in config['SIMILAR'] :
  75. key = config['SIMILAR'][_row[0]]
  76. _info = config[key]
  77. return _info
  78. def hash(self,value):
  79. salt = os.environ['HEALTHCAREIO_SALT'] if 'HEALTHCAREIO_SALT' in os.environ else ''
  80. _value = str(value)+ salt
  81. if sys.version_info[0] > 2 :
  82. return hashlib.md5(_value.encode('utf-8')).hexdigest()
  83. else:
  84. return hashlib.md5(_value).hexdigest()
  85. def suppress (self,value):
  86. return 'N/A'
  87. def date(self,value):
  88. if len(value) > 8 or '-' in value:
  89. value = value.split('-')[0]
  90. if len(value) == 8 :
  91. year = value[:4]
  92. month = value[4:6]
  93. day = value[6:]
  94. return "-".join([year,month,day])[:10] #{"year":year,"month":month,"day":day}
  95. elif len(value) == 6 :
  96. year = '20' + value[:2]
  97. month = value[2:4]
  98. day = value[4:]
  99. #
  100. # We have a date formatting issue
  101. return "-".join([year,month,day])
  102. def time(self,value):
  103. pass
  104. def sv3(self,value):
  105. if '>' in value [1]:
  106. terms = value[1].split('>')
  107. return {'type':terms[0],'code':terms[1],"amount":float(value[2])}
  108. else:
  109. return {"code":value[2],"type":value[1],"amount":float(value[3])}
  110. def sv2(self,value):
  111. #
  112. # @TODO: Sometimes there's a suffix (need to inventory all the variations)
  113. #
  114. if '>' in value or ':' in value:
  115. xchar = '>' if '>' in value else ':'
  116. _values = value.split(xchar)
  117. modifier = {}
  118. if len(_values) > 2 :
  119. modifier= {"code":_values[2]}
  120. if len(_values) > 3 :
  121. modifier['type'] = _values[3]
  122. _value = {"code":_values[1],"type":_values[0]}
  123. if modifier :
  124. _value['modifier'] = modifier
  125. return _value
  126. else:
  127. return value
  128. def procedure(self,value):
  129. for xchar in [':','<'] :
  130. if xchar in value and len(value.split(xchar)) > 1 :
  131. #_value = {"type":value.split(':')[0].strip(),"code":value.split(':')[1].strip()}
  132. _value = {"type":value.split(xchar)[0].strip(),"code":value.split(xchar)[1].strip()}
  133. break
  134. else:
  135. _value = str(value)
  136. return _value
  137. def diagnosis(self,alue):
  138. return [ {"code":item[2], "type":item[1]} for item in value if len(item) > 1]
  139. def pos(self,value):
  140. """
  141. formatting place of service information within a segment (REF)
  142. """
  143. xchar = '>' if '>' in value else ':'
  144. x = value.split(xchar)
  145. x = {"code":x[0],"indicator":x[1],"frequency":x[2]} if len(x) == 3 else {"code":x[0],"indicator":None,"frequency":None}
  146. return x
  147. class Parser (Process):
  148. def __init__(self,path):
  149. Process.__init__(self)
  150. self.utils = Formatters()
  151. self.get = void()
  152. self.get.value = self.get_map
  153. self.get.default_value = self.get_default_value
  154. _config = json.loads(open(path).read())
  155. self.config = _config['parser']
  156. self.store = _config['store']
  157. self.files = []
  158. self.set = void()
  159. self.set.files = self.set_files
  160. def set_files(self,files):
  161. self.files = files
  162. def get_map(self,row,config,version=None):
  163. # label = config['label'] if 'label' in config else None
  164. handler = Formatters()
  165. if 'map' not in config and hasattr(handler,config['apply']):
  166. pointer = getattr(handler,config['apply'])
  167. object_value = pointer(row)
  168. return object_value
  169. omap = config['map'] if not version or version not in config else config[version]
  170. anchors = config['anchors'] if 'anchors' in config else []
  171. if type(row[0]) == str:
  172. object_value = {}
  173. for key in omap :
  174. index = omap[key]
  175. if anchors and set(anchors) & set(row):
  176. _key = list(set(anchors) & set(row))[0]
  177. aindex = row.index(_key)
  178. index = aindex + index
  179. if index < len(row) :
  180. value = row[index]
  181. if 'cast' in config and key in config['cast'] and value.strip() != '' :
  182. if config['cast'][key] in ['float','int'] :
  183. value = eval(config['cast'][key])(value)
  184. elif hasattr(handler,config['cast'][key]):
  185. pointer = getattr(handler,config['cast'][key])
  186. value = pointer(value)
  187. else:
  188. print ("Missing Pointer ",config['cast'][key])
  189. # print (key,value)
  190. if type(value) == dict :
  191. for objkey in value :
  192. if type(value[objkey]) == dict :
  193. continue
  194. if 'syn' in config and value[objkey] in config['syn'] :
  195. value[objkey] = config['syn'][ value[objkey]]
  196. value = {key:value} if key not in value else value
  197. else:
  198. if 'syn' in config and value in config['syn'] :
  199. value = config['syn'][value]
  200. if type(value) == dict :
  201. object_value = dict(object_value, **value)
  202. else:
  203. object_value[key] = value
  204. else:
  205. #
  206. # we are dealing with a complex object
  207. object_value = []
  208. for row_item in row :
  209. value = self.get.value(row_item,config,version)
  210. object_value.append(value)
  211. #
  212. # We need to add the index of the object it matters in determining the claim types
  213. #
  214. # object_value.append( list(get_map(row_item,config,version)))
  215. # object_value = {label:object_value}
  216. return object_value
  217. def apply(self,content,_code,default_value) :
  218. """
  219. :file content i.e a segment with the envelope
  220. :_code 837 or 835 (helps get the appropriate configuration)
  221. """
  222. util = Formatters()
  223. claim = default_value.copy()
  224. value = {}
  225. for row in content[:] :
  226. row = util.split(row)
  227. _info = util.get.config(self.config[_code][0],row)
  228. if _info :
  229. try:
  230. tmp = self.get.value(row,_info)
  231. if not tmp :
  232. continue
  233. if 'label' in _info :
  234. label = _info['label']
  235. if type(tmp) == list :
  236. value[label] = tmp if label not in value else value[label] + tmp
  237. else:
  238. if label not in value:
  239. value[label] = [tmp]
  240. elif len(list(tmp.keys())) == 1 :
  241. # print "\t",len(claim[label]),tmp
  242. index = len(value[label]) -1
  243. value[label][index] = dict(value[label][index],**tmp)
  244. else:
  245. value[label].append(tmp)
  246. if len(value[label]) > 0 :
  247. labels = []
  248. for item in value[label] :
  249. item['_index'] = len(labels)
  250. if item not in labels :
  251. labels.append(item)
  252. value[label] = labels
  253. elif 'field' in _info :
  254. name = _info['field']
  255. value[name] = tmp
  256. else:
  257. value = dict(value,**tmp)
  258. pass
  259. except Exception as e :
  260. print ('__',e)
  261. pass
  262. return dict(claim,**value) if value else {}
  263. def get_default_value(self,content,_code):
  264. util = Formatters()
  265. TOP_ROW = content[1].split('*')
  266. CATEGORY= content[2].split('*')[1].strip()
  267. VERSION = content[1].split('*')[-1].replace('~','').replace('\n','')
  268. SUBMITTED_DATE = util.parse.date(TOP_ROW[4])
  269. SENDER_ID = TOP_ROW[2]
  270. row = util.split(content[3])
  271. _info = util.get_config(self.config[_code][0],row)
  272. value = self.get.value(row,_info,VERSION) if _info else {}
  273. value['category'] = {"setid": CATEGORY,"version":'X'+VERSION.split('X')[1],"id":VERSION.split('X')[0].strip()}
  274. value["submitted"] = SUBMITTED_DATE
  275. # value['version'] = VERSION
  276. if _code== '835' :
  277. value['payer_id'] = SENDER_ID
  278. else:
  279. value['provider_id'] = SENDER_ID
  280. return value
  281. def read(self,filename) :
  282. """
  283. :formerly get_content
  284. This function returns the of the EDI file parsed given the configuration specified. it is capable of identifying a file given the content
  285. :section loop prefix (HL, CLP)
  286. :config configuration with formatting rules, labels ...
  287. :filename location of the file
  288. """
  289. # section = section if section else config['SECTION']
  290. logs = []
  291. claims = []
  292. try:
  293. file = open(filename.strip(),errors='ignore')
  294. INITIAL_ROWS = list(islice(file,4)) #.readlines(4)
  295. if len(INITIAL_ROWS) == 1 :
  296. file = INITIAL_ROWS[0].split('~')
  297. INITIAL_ROWS = file[:4]
  298. section = 'CLM' if INITIAL_ROWS[1].split('*')[1] == 'HC' else 'CLP'
  299. _code = '837' if section == 'CLM' else '835'
  300. DEFAULT_VALUE = self.get.default_value(INITIAL_ROWS,_code)
  301. DEFAULT_VALUE['name'] = filename.strip()
  302. #
  303. # In the initial rows, there's redundant information (so much for x12 standard)
  304. # index 1 identifies file type i.e CLM for claim and CLP for remittance
  305. segment = []
  306. index = 0;
  307. for row in file :
  308. if row.startswith(section) and not segment:
  309. segment = [row]
  310. continue
  311. elif segment:
  312. segment.append(row)
  313. if len(segment) > 1 and row.startswith(section):
  314. #
  315. # process the segment somewhere (create a thread maybe?)
  316. #
  317. default_claim = dict({"index":index},**DEFAULT_VALUE)
  318. claim = self.apply(segment,_code,default_claim)
  319. claims.append(claim)
  320. segment = [row]
  321. index += 1
  322. pass
  323. #
  324. # Handling the last claim found
  325. if segment[0].startswith(section) :
  326. default_claim = dict({"name":index},**DEFAULT_VALUE)
  327. claim = self.apply(segment,_code,DEFAULT_VALUE)
  328. claims.append(claim)
  329. if type(file) != list :
  330. file.close()
  331. # x12_file = open(filename.strip(),errors='ignore').read().split('\n')
  332. except Exception as e:
  333. logs.append ({"parse":"claims" if _code == '837' else 'remits',"completed":False,"name":filename,"msg":e.args[0]})
  334. return [],logs
  335. rate = 0 if len(claims) == 0 else (1 + index)/len(claims)
  336. logs.append ({"parse":"claims" if _code == '837' else 'remits',"completed":True,"name":filename,"rate":rate})
  337. # self.finish(claims,logs,_code)
  338. return claims,logs,_code
  339. def run(self):
  340. for filename in self.files :
  341. content,logs,_code = self.read(filename)
  342. self.finish(content,logs,_code)
  343. def finish(self,content,logs,_code) :
  344. args = self.store
  345. _args = json.loads(json.dumps(self.store))
  346. if args['type'] == 'mongo.MongoWriter' :
  347. args['args']['doc'] = 'claims' if _code == '837' else 'remits'
  348. _args['args']['doc'] = 'logs'
  349. if content :
  350. writer = transport.factory.instance(**args)
  351. writer.write(content)
  352. writer.close()
  353. if logs :
  354. logger = transport.factory.instance(**_args)
  355. logger.write(logs)
  356. logger.close()
  357. # p = Parser('/home/steve/.healthcareio/config.json')
  358. # p.set.files(['../../data/small/claims/ssiUB1122042711220427127438.clm_191122T043504'])
  359. # path = '../../data/small/claims/ssiUB1122042711220427127438.clm_191122T043504'
  360. # path = '../../data/small/claims/problems-with-procs'
  361. # path = '../../data/small/remits/1SG03927258.dat_181018T074559'
  362. # _path = "../../data/small/remits/1TR21426701.dat_180703T074559"
  363. # p.start()
  364. # p.join()
  365. # claims,logs = p.read(path)
  366. # print (json.dumps(claims[3]))
  367. # print (logs)