file.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. import os
  2. import numpy as np
  3. from io import StringIO
  4. # from .common import Common
  5. class Content :
  6. """
  7. This class implements functions that will manipulate content of a file
  8. :split splits the content
  9. :read reads the content of a file given a filename
  10. :parse parses the content of a file given a map {index:field_name}
  11. """
  12. def __init__(self,**_args):
  13. self._parents = {}
  14. self._lastelement = {}
  15. def split(self,_content):
  16. if type(_content) == str :
  17. _xchar = '~\n' if '~\n' in _content else ('~' if '~' in _content else ('\n' if '\n' in _content else None))
  18. _x12 = '837' if 'CLM*' in _content else ('835' if 'CLP*' in _content else None)
  19. _map = {'835':'CLP','837':'CLM'}
  20. _claim_mark = _map[_x12]
  21. _content = _content.split(_claim_mark)
  22. _xchar = ''.join(_xchar)
  23. _chunks = []
  24. for _block in _content :
  25. if len(_chunks) > 0 :
  26. _block = _claim_mark+ _block
  27. _splitblocks = [row.strip().split('*') for row in _block.split(_xchar) if row.strip()]
  28. _chunks.append(_splitblocks)
  29. return _chunks,_x12
  30. # if _xchar :
  31. # _xchar = ''.join(_xchar)
  32. # _rows = _content.split(_xchar)
  33. # return [row.strip().split('*') for row in _rows if row.strip()]
  34. # else:
  35. # return _content.split('*')
  36. return [],None
  37. def read(self,**_args):
  38. """
  39. This function will read and clean-up the content of a file
  40. """
  41. _filename = _args['filename']
  42. if type(_filename) == StringIO :
  43. return _filename.read()
  44. else:
  45. f = open(_filename)
  46. _content = f.read()
  47. f.close()
  48. return _content
  49. def _ix_parse (self,columns,index,**_args):
  50. """
  51. This function encapulates how an x12 document element will be processed
  52. :columns list of attributes that make up the object
  53. :index indexes of the said items in the element
  54. :_args
  55. - row raw x12 element (string)
  56. - pointer decorated function
  57. - document
  58. """
  59. _ELEMENT = _args['row'][0]
  60. _pointer = _args['pointer']
  61. _document = _args['document']
  62. if 'map' in _pointer.meta :
  63. _map = _pointer.meta['map']
  64. _index = list(_map.keys())
  65. _columns = [_map[_id] for _id in _index ]
  66. _info = {}
  67. _row = _args['row'] if type(_args['row']) == list else _args['row'].split('*')
  68. _index = np.array(_index)
  69. #
  70. # Sometimes the _row doesn't have all expected indexes, we will compensate
  71. # This allows to minimize parsing errors as it may relate to disconnects between configuration and x12 element variations (shitty format)
  72. #
  73. if np.max(_index) > len(_row) -1 :
  74. _delta = 1 + np.max(_index) - len(_row)
  75. _row = _row + np.repeat('',_delta).tolist()
  76. _row = np.array(_row)
  77. _info = dict(zip(_columns,_row[_index].tolist()))
  78. else:
  79. #
  80. # We should call the function that is intended to perform the parsing
  81. #
  82. _info = _pointer(row=_args['row'],document=_document,meta=_pointer.meta)
  83. #
  84. # @TODO: We should look into the object created and enforce the specifications are met
  85. #
  86. return _info
  87. # def consolidate(self,**_args):
  88. # """
  89. # This function takes an object and addit to the document given meta data
  90. # :document document associated associated with a claim (processing the loops)
  91. # :object
  92. # :caller attributes within the decorator
  93. # """
  94. # _document = _args['document'] if 'document' in _args else {}
  95. # _info = _args['object']
  96. # _meta = _args['meta']
  97. # #
  98. # # @TODO:
  99. # # Apply parsing/casting function to the object retrieved
  100. # # _apply(_info) #-- the object will be processed accordingly
  101. # #
  102. # #
  103. # # @TODO:
  104. # # The objects parsed must be augmented against the appropriate ones e.g: NM1 <- N1,N2,N3,N4
  105. # # - Find a way to drive this from a configuration ...
  106. # #
  107. # if 'field' in _meta : #hasattr(_meta,'field') :
  108. # _field = _meta['field']
  109. # if not _field in _document :
  110. # _item = {_field:_info}
  111. # else:
  112. # _item = self.merge(_document[_field],_info)
  113. # elif 'container' in _meta: # hasattr(_meta,'container') :
  114. # _label = _meta.container
  115. # if not _label in _document :
  116. # _item = {_label:[_info]}
  117. # else:
  118. # _item = _document[_label] + [_info]
  119. # else:
  120. # _item = _info
  121. # if 'parent' in _meta : #hasattr(_meta,'parent'):
  122. # _hasField = 'field' in _meta
  123. # _hasParent= _meta['element'] in self._parents
  124. # if _hasField and _hasParent: #_meta.element in self._parents and hasattr(_meta,'field'):
  125. # self_last = _item
  126. # pass
  127. # else:
  128. # for key in self._parents :
  129. # if _meta.element in self._parents[key] :
  130. # _ikey = list(self_last.keys())[0]
  131. # _oldinfo = self_last[_ikey]
  132. # if type(_oldinfo) != dict :
  133. # #
  134. # # Only applicable against a dictionary not a list (sorry)
  135. # pass
  136. # else:
  137. # _item = {_ikey: self.merge(_oldinfo,_item)}
  138. # break
  139. # pass
  140. # return _item
  141. class Location :
  142. @staticmethod
  143. def get(**_args):
  144. _path = _args['path']
  145. files = []
  146. if os.path.isdir(_path):
  147. for root,_dir,f in os.walk(_path) :
  148. if f :
  149. files += [os.sep.join([root,name]) for name in f]
  150. files = [path for path in files if os.path.isfile(path)]
  151. else:
  152. files = [_path]
  153. _chunks = 0 if 'chunks' not in _args else int(_args['chunks'])
  154. return files if not _chunks else np.array_split(files,_chunks)