__init__.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545
  1. """
  2. (c) 2019 Healthcare/IO 1.0
  3. Vanderbilt University Medical Center, Health Information Privacy Laboratory
  4. https://hiplab.mc.vanderbilt.edu/healthcareio
  5. Authors:
  6. Khanhly Nguyen,
  7. Steve L. Nyemba<steve.l.nyemba@vanderbilt.edu>
  8. License:
  9. MIT, terms are available at https://opensource.org/licenses/MIT
  10. This parser was originally written by Khanhly Nguyen for her internship and is intended to parse x12 835,837 and others provided the appropriate configuration
  11. USAGE :
  12. - COMMAND LINE
  13. - EMBEDDED
  14. """
  15. import hashlib
  16. import json
  17. import os
  18. import sys
  19. from itertools import islice
  20. from multiprocessing import Process
  21. import transport
  22. import jsonmerge
  23. class void :
  24. pass
  25. class Formatters :
  26. def __init__(self):
  27. # self.config = config
  28. self.get = void()
  29. self.get.config = self.get_config
  30. self.parse = void()
  31. self.parse.sv3 = self.sv3
  32. self.parse.sv2 = self.sv2
  33. self.sv2_parse = self.sv2
  34. self.sv3_parse = self.sv3
  35. self.format_proc = self.procedure
  36. self.format_diag = self.diagnosis
  37. self.parse.procedure = self.procedure
  38. self.parse.diagnosis = self.diagnosis
  39. self.parse.date = self.date
  40. self.format_date = self.date
  41. self.format_pos = self.pos
  42. self.format_time = self.time
  43. def split(self,row,sep='*',prefix='HI') :
  44. """
  45. This function is designed to split an x12 row and
  46. """
  47. value = []
  48. if row.startswith(prefix) is False:
  49. for row_value in row.replace('~','').split(sep) :
  50. if '>' in row_value :
  51. if row_value.startswith('HC') or row_value.startswith('AD'):
  52. value += row_value.split('>')[:2]
  53. else:
  54. value += row_value.split('>') if row.startswith('CLM') is False else [row_value]
  55. else :
  56. value.append(row_value.replace('\n',''))
  57. value = [xchar.replace('\r','') for xchar in value] #row.replace('~','').split(sep)
  58. else:
  59. value = [ [prefix]+ self.split(item,'>') for item in row.replace('~','').split(sep)[1:] ]
  60. return value if type(value) == list and type(value[0]) != list else value[0]
  61. def get_config(self,config,row):
  62. """
  63. This function will return the meaningfull parts of the configuration for a given item
  64. """
  65. _row = list(row) if type(row[0]) == str else list(row[0])
  66. _info = config[_row[0]] if _row[0] in config else {}
  67. key = None
  68. if '@ref' in _info:
  69. key = list(set(_row) & set(_info['@ref'].keys()))
  70. if key :
  71. key = key[0]
  72. return _info['@ref'][key]
  73. else:
  74. return {}
  75. if not _info and 'SIMILAR' in config:
  76. #
  77. # Let's look for the nearest key using the edit distance
  78. if _row[0] in config['SIMILAR'] :
  79. key = config['SIMILAR'][_row[0]]
  80. _info = config[key]
  81. return _info
  82. def hash(self,value):
  83. salt = os.environ['HEALTHCAREIO_SALT'] if 'HEALTHCAREIO_SALT' in os.environ else ''
  84. _value = str(value)+ salt
  85. if sys.version_info[0] > 2 :
  86. return hashlib.md5(_value.encode('utf-8')).hexdigest()
  87. else:
  88. return hashlib.md5(_value).hexdigest()
  89. def suppress (self,value):
  90. return 'N/A'
  91. def date(self,value):
  92. if len(value) > 8 or '-' in value:
  93. value = value.split('-')[0]
  94. if len(value) == 8 :
  95. year = value[:4]
  96. month = value[4:6]
  97. day = value[6:]
  98. return "-".join([year,month,day])[:10] #{"year":year,"month":month,"day":day}
  99. elif len(value) == 6 :
  100. year = '20' + value[:2]
  101. month = value[2:4]
  102. day = value[4:]
  103. #
  104. # We have a date formatting issue
  105. return "-".join([year,month,day])
  106. def time(self,value):
  107. pass
  108. def sv3(self,value):
  109. if '>' in value [1]:
  110. terms = value[1].split('>')
  111. return {'type':terms[0],'code':terms[1],"amount":float(value[2])}
  112. else:
  113. return {"code":value[2],"type":value[1],"amount":float(value[3])}
  114. def sv2(self,value):
  115. #
  116. # @TODO: Sometimes there's a suffix (need to inventory all the variations)
  117. #
  118. if '>' in value or ':' in value:
  119. xchar = '>' if '>' in value else ':'
  120. _values = value.split(xchar)
  121. modifier = {}
  122. if len(_values) > 2 :
  123. modifier= {"code":_values[2]}
  124. if len(_values) > 3 :
  125. modifier['type'] = _values[3]
  126. _value = {"code":_values[1],"type":_values[0]}
  127. if modifier :
  128. _value['modifier'] = modifier
  129. return _value
  130. else:
  131. return value
  132. def procedure(self,value):
  133. for xchar in [':','<','|'] :
  134. if xchar in value and len(value.split(xchar)) > 1 :
  135. #_value = {"type":value.split(':')[0].strip(),"code":value.split(':')[1].strip()}
  136. _value = {"type":value.split(xchar)[0].strip(),"code":value.split(xchar)[1].strip()}
  137. if value.split(xchar) >= 3 :
  138. _value['modifier'] = value.split(xchar)[2]
  139. break
  140. else:
  141. _value = str(value)
  142. return _value
  143. def diagnosis(self,alue):
  144. return [ {"code":item[2], "type":item[1]} for item in value if len(item) > 1]
  145. def pos(self,value):
  146. """
  147. formatting place of service information within a segment (REF)
  148. @TODO: In order to accomodate the other elements they need to be specified in the configuration
  149. Otherwise it causes problems on export
  150. """
  151. xchar = '>' if '>' in value else ':'
  152. x = value.split(xchar)
  153. x = {"code":x[0],"indicator":x[1],"frequency":x[2]} if len(x) == 3 else {"code":x[0],"indicator":None,"frequency":None}
  154. return x['code']
  155. class Parser (Process):
  156. def __init__(self,path):
  157. """
  158. :path path of the configuration file (it can be absolute)
  159. """
  160. Process.__init__(self)
  161. self.utils = Formatters()
  162. self.get = void()
  163. self.get.value = self.get_map
  164. self.get.default_value = self.get_default_value
  165. _config = json.loads(open(path).read())
  166. self._custom_config = self.get_custom(path)
  167. self.config = _config['parser']
  168. self.store = _config['store']
  169. self.files = []
  170. self.set = void()
  171. self.set.files = self.set_files
  172. self.emit = void()
  173. self.emit.pre = None
  174. self.emit.post = None
  175. def get_custom(self,path) :
  176. """
  177. :path path of the configuration file (it can be absolute)
  178. """
  179. #
  180. #
  181. _path = path.replace('config.json','')
  182. if _path.endswith(os.sep) :
  183. _path = _path[:-1]
  184. _config = {}
  185. _path = os.sep.join([_path,'custom'])
  186. if os.path.exists(_path) :
  187. files = os.listdir(_path)
  188. if files :
  189. fullname = os.sep.join([_path,files[0]])
  190. _config = json.loads ( (open(fullname)).read() )
  191. return _config
  192. def set_files(self,files):
  193. self.files = files
  194. def get_map(self,row,config,version=None):
  195. # label = config['label'] if 'label' in config else None
  196. handler = Formatters()
  197. if 'map' not in config and hasattr(handler,config['apply']):
  198. pointer = getattr(handler,config['apply'])
  199. object_value = pointer(row)
  200. return object_value
  201. omap = config['map'] if not version or version not in config else config[version]
  202. anchors = config['anchors'] if 'anchors' in config else []
  203. if type(row[0]) == str:
  204. object_value = {}
  205. for key in omap :
  206. index = omap[key]
  207. if anchors and set(anchors) & set(row):
  208. _key = list(set(anchors) & set(row))[0]
  209. aindex = row.index(_key)
  210. index = aindex + index
  211. if index < len(row) :
  212. value = row[index]
  213. if 'cast' in config and key in config['cast'] and value.strip() != '' :
  214. if config['cast'][key] in ['float','int'] :
  215. value = eval(config['cast'][key])(value)
  216. elif hasattr(handler,config['cast'][key]):
  217. pointer = getattr(handler,config['cast'][key])
  218. value = pointer(value)
  219. else:
  220. print ("Missing Pointer ",config['cast'][key])
  221. # print (key,value)
  222. if type(value) == dict :
  223. for objkey in value :
  224. if type(value[objkey]) == dict :
  225. continue
  226. if 'syn' in config and value[objkey] in config['syn'] :
  227. value[objkey] = config['syn'][ value[objkey]]
  228. value = {key:value} if key not in value else value
  229. else:
  230. if 'syn' in config and value in config['syn'] :
  231. value = config['syn'][value]
  232. if type(value) == dict :
  233. object_value = dict(object_value, **value)
  234. else:
  235. object_value[key] = value
  236. else:
  237. #
  238. # we are dealing with a complex object
  239. object_value = []
  240. for row_item in row :
  241. value = self.get.value(row_item,config,version)
  242. object_value.append(value)
  243. #
  244. # We need to add the index of the object it matters in determining the claim types
  245. #
  246. # object_value.append( list(get_map(row_item,config,version)))
  247. # object_value = {label:object_value}
  248. return object_value
  249. def apply(self,content,_code) :
  250. """
  251. :content content of a file i.e a segment with the envelope
  252. :_code 837 or 835 (helps get the appropriate configuration)
  253. """
  254. util = Formatters()
  255. # header = default_value.copy()
  256. value = {}
  257. for row in content[:] :
  258. row = util.split(row.replace('\n','').replace('~',''))
  259. _info = util.get.config(self.config[_code][0],row)
  260. if self._custom_config and _code in self._custom_config:
  261. _cinfo = util.get.config(self._custom_config[_code],row)
  262. else:
  263. _cinfo = {}
  264. # _info = self.consolidate(row=row,type=_code,config=_info,util=util)
  265. # print ([row[0],_info])
  266. # print ()
  267. # continue
  268. # _cinfo = util.get.config(self._custom_config[_code],row)
  269. if _info :
  270. try:
  271. _info = jsonmerge.merge(_info,_cinfo)
  272. tmp = self.get.value(row,_info)
  273. if not tmp :
  274. continue
  275. if 'label' in _info :
  276. label = _info['label']
  277. if type(tmp) == list :
  278. value[label] = tmp if label not in value else value[label] + tmp
  279. else:
  280. if label not in value:
  281. value[label] = [tmp]
  282. # elif len(list(tmp.keys())) == 1 :
  283. # # print "\t",len(claim[label]),tmp
  284. # index = len(value[label]) -1
  285. # value[label][index] = dict(value[label][index],**tmp)
  286. else:
  287. value[label].append(tmp)
  288. tmp['_index'] = len(value[label]) -1
  289. # if len(value[label]) > 0 :
  290. # labels = []
  291. # for item in value[label] :
  292. # item['_index'] = len(labels)
  293. # if item not in labels :
  294. # labels.append(item)
  295. # value[label] = labels
  296. elif 'field' in _info :
  297. name = _info['field']
  298. # value[name] = tmp
  299. value = jsonmerge.merge(value,{name:tmp})
  300. else:
  301. value = dict(value,**tmp)
  302. pass
  303. except Exception as e :
  304. print ('__',e.args)
  305. pass
  306. return value if value else {}
  307. def get_default_value(self,content,_code):
  308. util = Formatters()
  309. TOP_ROW = content[1].split('*')
  310. CATEGORY= content[2].split('*')[1].strip()
  311. VERSION = content[1].split('*')[-1].replace('~','').replace('\n','')
  312. SUBMITTED_DATE = util.parse.date(TOP_ROW[4])
  313. SENDER_ID = TOP_ROW[2]
  314. row = util.split(content[3])
  315. _info = util.get_config(self.config[_code][0],row)
  316. value = self.get.value(row,_info,VERSION) if _info else {}
  317. value['category'] = {"setid": CATEGORY,"version":'X'+VERSION.split('X')[1],"id":VERSION.split('X')[0].strip()}
  318. value["submitted"] = SUBMITTED_DATE
  319. # value['version'] = VERSION
  320. if _code== '835' :
  321. value['payer_id'] = SENDER_ID
  322. else:
  323. value['provider_id'] = SENDER_ID
  324. #
  325. # Let's parse this for default values
  326. return value
  327. def read(self,filename) :
  328. """
  329. :formerly get_content
  330. This function returns the of the EDI file parsed given the configuration specified. it is capable of identifying a file given the content
  331. :section loop prefix (HL, CLP)
  332. :config configuration with formatting rules, labels ...
  333. :filename location of the file
  334. """
  335. # section = section if section else config['SECTION']
  336. logs = []
  337. claims = []
  338. try:
  339. file = open(filename.strip(),errors='ignore')
  340. INITIAL_ROWS = list(islice(file,4)) #.readlines(4)
  341. _code = "unknown"
  342. if len(INITIAL_ROWS) == 1 :
  343. file = INITIAL_ROWS[0].split('~')
  344. INITIAL_ROWS = file[:4]
  345. if len(INITIAL_ROWS) < 3 :
  346. return None,[{"name":filename,"completed":False}],None
  347. # section = 'HL' if INITIAL_ROWS[1].split('*')[1] == 'HC' else 'CLP'
  348. # _code = '837' if section == 'HL' else '835'
  349. # print ([_code,section])
  350. _code = INITIAL_ROWS[2].split('*')[1].strip()
  351. # section = 'CLP' if _code == '835' else 'HL'
  352. section = self.config[_code][0]['SECTION'].strip()
  353. #
  354. # adjusting the
  355. DEFAULT_VALUE = self.get.default_value(INITIAL_ROWS,_code)
  356. DEFAULT_VALUE['name'] = filename.strip()
  357. #
  358. # In the initial rows, there's redundant information (so much for x12 standard)
  359. # index 1 identifies file type i.e CLM for claim and CLP for remittance
  360. segment = []
  361. index = 0;
  362. _toprows = []
  363. for row in file :
  364. row = row.replace('\r','')
  365. if not segment and not row.startswith(section):
  366. _toprows += [row]
  367. if row.startswith(section) and not segment:
  368. segment = [row]
  369. continue
  370. elif segment and not row.startswith(section):
  371. segment.append(row)
  372. if len(segment) > 1 and row.startswith(section):
  373. #
  374. # process the segment somewhere (create a thread maybe?)
  375. #
  376. # default_claim = dict({"index":index},**DEFAULT_VALUE)
  377. # print (_toprows)
  378. _claim = self.apply(segment,_code)
  379. # if _claim['claim_id'] == 'P1080351470' :
  380. # print (_claim)
  381. # _claim = dict(DEFAULT_VALUE,**_claim)
  382. if _claim :
  383. _claim['index'] = index #len(claims)
  384. claims.append(dict(DEFAULT_VALUE,**_claim))
  385. segment = [row]
  386. index += 1
  387. pass
  388. #
  389. # Handling the last claim found
  390. if segment[0].startswith(section) :
  391. default_claim = dict({"name":index},**DEFAULT_VALUE)
  392. claim = self.apply(segment,_code)
  393. if claim :
  394. claim['index'] = len(claims)
  395. claim = jsonmerge.merge(claim,self.apply(_toprows,_code))
  396. claims.append(dict(DEFAULT_VALUE,**claim))
  397. if type(file) != list :
  398. file.close()
  399. # x12_file = open(filename.strip(),errors='ignore').read().split('\n')
  400. except Exception as e:
  401. logs.append ({"parse":_code,"completed":False,"name":filename,"msg":e.args[0]})
  402. return [],logs,None
  403. rate = 0 if len(claims) == 0 else (1 + index)/len(claims)
  404. logs.append ({"parse":"claims" if _code == '837' else 'remits',"completed":True,"name":filename,"rate":rate})
  405. # self.finish(claims,logs,_code)
  406. return claims,logs,_code
  407. def run(self):
  408. if self.emit.pre :
  409. self.emit.pre()
  410. for filename in self.files :
  411. content,logs,_code = self.read(filename)
  412. self.finish(content,logs,_code)
  413. def finish(self,content,logs,_code) :
  414. args = self.store
  415. _args = json.loads(json.dumps(self.store))
  416. if args['type'] == 'mongo.MongoWriter' :
  417. args['args']['doc'] = 'claims' if _code == '837' else 'remits'
  418. _args['args']['doc'] = 'logs'
  419. else:
  420. args['args']['table'] = 'claims' if _code == '837' else 'remits'
  421. _args['args']['table'] = 'logs'
  422. if content :
  423. writer = transport.factory.instance(**args)
  424. writer.write(content)
  425. writer.close()
  426. if logs :
  427. logger = transport.factory.instance(**_args)
  428. logger.write(logs)
  429. logger.close()
  430. if self.emit.post :
  431. self.emit.post(content,logs)
  432. # p = Parser('/home/steve/.healthcareio/config.json')
  433. # p.set.files(['../../data/small/claims/ssiUB1122042711220427127438.clm_191122T043504'])
  434. # path = '../../data/small/claims/ssiUB1122042711220427127438.clm_191122T043504'
  435. # path = '../../data/small/claims/problems-with-procs'
  436. # path = '../../data/small/remits/1SG03927258.dat_181018T074559'
  437. # _path = "../../data/small/remits/1TR21426701.dat_180703T074559"
  438. # p.start()
  439. # p.join()
  440. # claims,logs = p.read(path)
  441. # print (json.dumps(claims[3]))
  442. # print (logs)