parser.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. """
  2. (c) 2019 EDI-Parser 1.0
  3. Vanderbilt University Medical Center, Health Information Privacy Laboratory
  4. https://hiplab.mc.vanderbilt.edu/tools
  5. Authors:
  6. Khanhly Nguyen,
  7. Steve L. Nyemba<steve.l.nyemba@vanderbilt.edu>
  8. License:
  9. MIT, terms are available at https://opensource.org/licenses/MIT
  10. This parser was originally written by Khanhly Nguyen for her internship and is intended to parse x12 835,837 and others provided the appropriate configuration
  11. USAGE :
  12. - COMMAND LINE
  13. - EMBEDDED
  14. """
  15. import os
  16. import sys
  17. def split(row,sep='*',prefix='HI'):
  18. """
  19. This function is designed to split an x12 row and
  20. """
  21. if row.startswith(prefix) is False:
  22. value = []
  23. for row_value in row.replace('~','').split(sep) :
  24. if '>' in row_value :
  25. if row_value.startswith('HC') or row_value.startswith('AD'):
  26. value += row_value.split('>')[:2]
  27. else:
  28. value += row_value.split('>')
  29. else :
  30. value.append(row_value)
  31. return [xchar.replace('\r','') for xchar in value] #row.replace('~','').split(sep)
  32. else:
  33. return [ [prefix]+ split(item,'>') for item in row.replace('~','').split(sep)[1:] ]
  34. def get_config(config,row):
  35. """
  36. This function will return the meaningfull parts of the configuration for a given item
  37. """
  38. _row = list(row) if type(row[0]) == str else list(row[0])
  39. _info = config[_row[0]] if _row[0] in config else {}
  40. key = None
  41. if '@ref' in _info:
  42. key = list(set(_row) & set(_info['@ref'].keys()))
  43. if key :
  44. key = key[0]
  45. return _info['@ref'][key]
  46. else:
  47. return {}
  48. if not _info and 'SIMILAR' in config:
  49. #
  50. # Let's look for the nearest key using the edit distance
  51. if _row[0] in config['SIMILAR'] :
  52. key = config['SIMILAR'][_row[0]]
  53. _info = config[key]
  54. return _info
  55. def format_date(value) :
  56. year = value[:4]
  57. month = value[4:6]
  58. day = value[6:]
  59. return "-".join([year,month,day])[:10] #{"year":year,"month":month,"day":day}
  60. def format_time(value):
  61. return ":".join([value[:2],value[2:] ])[:5]
  62. def format_proc(value):
  63. if ':' in value :
  64. return {"procedure_type":value.split(':')[0].strip(),"procedure_code":value.split(':')[1].strip()}
  65. else:
  66. return value
  67. def get_map(row,config,version):
  68. label = config['label'] if 'label' in config else None
  69. omap = config['map'] if version not in config else config[version]
  70. anchors = config['anchors'] if 'anchors' in config else []
  71. if type(row[0]) == str:
  72. object_value = {}
  73. for key in omap :
  74. index = omap[key]
  75. if anchors and set(anchors) & set(row):
  76. _key = list(set(anchors) & set(row))[0]
  77. aindex = row.index(_key)
  78. index = aindex + index
  79. if index < len(row) :
  80. value = row[index]
  81. if 'cast' in config and key in config['cast'] and value.strip() != '' :
  82. value = eval(config['cast'][key])(value)
  83. pass
  84. if 'syn' in config and value in config['syn'] :
  85. value = config['syn'][value]
  86. if type(value) == dict :
  87. object_value = dict(object_value, **value)
  88. else:
  89. object_value[key] = value
  90. else:
  91. #
  92. # we are dealing with a complex object
  93. object_value = []
  94. for row_item in row :
  95. object_value.append( list(get_map(row_item,config,version)))
  96. # object_value = {label:object_value}
  97. return object_value
  98. def get_locations(x12_file,section='HL') :
  99. locations = []
  100. for line in x12_file :
  101. if line.strip().startswith(section) :
  102. i = x12_file.index(line)
  103. locations.append(i)
  104. return locations
  105. #def get_claims(filename,config,section) :
  106. def get_content(filename,config,section=None) :
  107. """
  108. This function returns the of the EDI file parsed given the configuration specified
  109. :section loop prefix (HL, CLP)
  110. :config configuration with formatting rules, labels ...
  111. :filename location of the file
  112. """
  113. section = section if section else config['SECTION']
  114. x12_file = open(filename).read().split('\n')
  115. if len(x12_file) == 1 :
  116. x12_file = x12_file[0].split('~')
  117. locations = get_locations(x12_file,section)
  118. claims = []
  119. logs = []
  120. # VERSION = x12_file[2].split('*')[3].replace('~','')
  121. VERSION = x12_file[1].split('*')[-1].replace('~','')
  122. row = split(x12_file[3])
  123. _info = get_config(config,row)
  124. _default_value = get_map(row,_info,VERSION) if _info else None
  125. N = len(locations)
  126. for index in range(0,N-1):
  127. beg = locations[index]
  128. end = locations[index+1]
  129. claim = {}
  130. for row in x12_file[beg:end] :
  131. row = split(row)
  132. _info = get_config(config,row)
  133. if _info :
  134. try:
  135. # tmp = get_map(row,_info,VERSION)
  136. tmp = get_map(row,_info,VERSION)
  137. except Exception as e:
  138. if sys.verion_info[0] > 2 :
  139. logs.append ({"version":VERSION,"filename":filename,"msg":e.args[0],"X12":x12_file[beg:end]})
  140. else:
  141. logs.append ({"version":VERSION,"filename":filename,"msg":e.message,"X12":x12_file[beg:end]})
  142. claim = {}
  143. break
  144. if 'label' not in _info :
  145. tmp['version'] = VERSION
  146. claim = dict(claim, **tmp)
  147. else:
  148. label = _info['label']
  149. if type(tmp) == list :
  150. claim[label] = tmp if label not in claim else claim[label] + tmp
  151. else:
  152. if label not in claim:
  153. claim[label] = [tmp]
  154. elif len(list(tmp.keys())) == 1 :
  155. # print "\t",len(claim[label]),tmp
  156. index = len(claim[label]) -1
  157. claim[label][index] = dict(claim[label][index],**tmp)
  158. else:
  159. claim[label].append(tmp)
  160. if claim and 'claim_id' in claim:
  161. claim = dict(claim,**_default_value)
  162. claim['name'] = filename[:-5].split(os.sep)[-1] #.replace(ROOT,'')
  163. claim['index'] = index
  164. claims.append(claim)
  165. return claims,logs