document.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. """
  2. This file encapsulates the functions needed to build a document
  3. """
  4. import numpy as np
  5. import copy
  6. class Builder:
  7. __doc__ = """
  8. This class is intended to create and manipulate objects
  9. :merge The class merges two objects and accounts for attributes that are lists
  10. :parent returns the parent for a given object
  11. """
  12. def __init__(self,**_args):
  13. self._last = {}
  14. self._plugins = copy.deepcopy(_args['plugins'])
  15. self._parents = copy.deepcopy(_args['parents'])
  16. self._loop = {}
  17. self._logger = None if 'logger' not in _args else _args['logger']
  18. def reset (self):
  19. self._last = {}
  20. self._loop = {}
  21. def parent(self,**_args):
  22. """
  23. This function returns the parent item of an object
  24. :meta meta data of a decorated/annotated function
  25. """
  26. _meta = _args['meta']
  27. # _item = None
  28. if _meta['parent'] :
  29. _id = _meta['parent']
  30. if _id :
  31. return self._last[_id] if _id in self._last else None
  32. return None
  33. def count(self,_element):
  34. if _element not in self._loop :
  35. self._loop[_element] = 0
  36. self._loop[_element] += 1
  37. def pointer(self,**_args):
  38. """
  39. This function returns a pointer associated with a row element
  40. @TODO: Make sure we know what kind of file we are processing (it would help suppress the loop)
  41. """
  42. _id = _args['row'][0] if 'row' in _args else _args['element']
  43. _filetype = _args['x12']
  44. _pointer = None
  45. if _id in self._plugins[_filetype] :
  46. _pointer = self._plugins[_filetype][_id]
  47. else:
  48. for _x12 in self._plugins :
  49. if _id in self._plugins[_x12] :
  50. _pointer = self._plugins[_x12][_id]
  51. break
  52. return _pointer
  53. def field(self,**_args) :
  54. _row = _args['row']
  55. _meta= _args['meta']
  56. _field = None
  57. if _meta['parent'] :
  58. _field = self.parent(meta=_meta)['field']
  59. if 'field' in _meta or 'container' in _meta :
  60. _field = _meta['field'] if 'field' in _meta else _meta['container']
  61. if 'anchor' in _meta :
  62. _anchor = _meta['anchor']
  63. for key in _anchor :
  64. if key == _row[1].strip() :
  65. _field = _anchor[key]
  66. break
  67. return _field
  68. def merge (self,_x,_y):
  69. """
  70. This function will merge two objects _x, _y
  71. """
  72. _zcols = list(set(_x.keys()) & set(_y.keys())) #--common columns
  73. if _zcols :
  74. _out = dict(_x,**{})
  75. for _key in list(_y.keys()) :
  76. if _key not in _zcols and _key:
  77. _out[_key] = _y[_key]
  78. else:
  79. if type(_out[_key]) == list :
  80. for value in _y[_key] :
  81. if value not in _out[_key] :
  82. _out[_key].append(value)
  83. # _out[_key] += _y[_key]
  84. elif type(_out[_key]) == dict:
  85. _out[_key] = dict(_out[_key],**_y[_key])
  86. else:
  87. _out[_key] = _y[_key]
  88. return _out
  89. else:
  90. return dict(_x,**_y)
  91. def parse (self,**_args):
  92. """
  93. This function will perform parsing on behalf of the plugin by relying on map function
  94. :row raw x12 row
  95. :meta meta data of the plugin function
  96. """
  97. #-- Loop Markers
  98. _row = _args['row']
  99. _map = _args['meta']['map']
  100. # _map = self.pointer(row=_row).meta['map']
  101. _index = list(_map.keys())
  102. _columns = [] #[_map[_id] for _id in _index ]
  103. for _id in _index :
  104. _name = _map[_id]
  105. if type(_name) == list :
  106. _columns += _name
  107. _i = _index.index(_id)
  108. _index = (_index[:_i] + np.repeat(_index[_i], len(_name)).tolist()+_index[_i+1:])
  109. else:
  110. _columns.append(_name)
  111. _info = {}
  112. _index = np.array(_index).astype(int)
  113. # _document = _args['document']
  114. if np.max(_index) > len(_row) -1 :
  115. _delta = 1 + np.max(_index) - len(_row)
  116. _row = _row + np.repeat('',_delta).tolist()
  117. _row = np.array(_row)
  118. try:
  119. _info = dict(zip(_columns,_row[_index].tolist()))
  120. except Exception as e:
  121. # print (_row)
  122. # print ( e)
  123. pass
  124. return _info
  125. def meta (self,**_args):
  126. _row = _args['row']
  127. _id = _row[0]
  128. _meta = None
  129. for key in self._plugins :
  130. _items = self._plugins[key]
  131. if _id in _items :
  132. _meta = (_items[_id].meta)
  133. break
  134. return _meta
  135. def update(self,**_args):
  136. _element = _args['row'][0]
  137. if _element in self._parents :
  138. _meta = self.meta(row=_args['row'])
  139. if 'field' not in _meta :
  140. _field = self.field(row=_args['row'],meta=_meta)
  141. else:
  142. _field = _meta['field']
  143. self._last[_element] = {'data':_args['data'],'field':_field}
  144. def bind(self,**_args):
  145. """
  146. This function is intended to make an object out of an element
  147. :row raw row of x12
  148. :document object that is the document
  149. """
  150. _row = _args['row']
  151. _filetype = _args['x12']
  152. _id = _row[0]
  153. self.count(_id)
  154. _pointer = self.pointer(row=_row,x12=_filetype)
  155. _parent = None
  156. _data = {}
  157. if not _pointer :
  158. return None,None
  159. #
  160. # Should we use the built-in parser or not
  161. if _pointer and 'map' in _pointer.meta :
  162. _data = self.parse(row=_row,meta=_pointer.meta)
  163. #
  164. # This function will be used as formatter (at least)
  165. # We will also insure that the current element is not the last one
  166. _out = _pointer(row=_row,data=_data, meta=_pointer.meta)
  167. _data = _data if _out is None else _out
  168. self.update(row = _row, data=_data) #-- If this element is considered a parent, we store it
  169. return _data, _pointer.meta
  170. def build (self,**_args):
  171. """
  172. This function attemps to place a piece of data within a document
  173. """
  174. _meta = _args['meta']
  175. _data = _args['data']
  176. _row = _args['row']
  177. _document = _args['document']
  178. # if _meta['parent'] :
  179. # _field = self.parent(meta=_meta)['field']
  180. # elif 'field' in _meta :
  181. # _field = _meta['field']
  182. # elif 'container' in _meta :
  183. # _field = _meta['container']
  184. # if type(_document[_field]) != list :
  185. # _data = self.merge(_document[_field],_data)
  186. # _document[_field] = []
  187. # elif 'anchor' in _meta:
  188. # _field = self.field(row=_row,meta=_meta)
  189. # else:
  190. # _field = None
  191. _field = self.field(meta=_meta,row=_row)
  192. if _field :
  193. if 'container' in _meta and type(_document[_field]) != list :
  194. _document[_field] = []
  195. if _field and _document :
  196. if _field not in _document :
  197. _document[_field] =_data
  198. pass
  199. else:
  200. if 'container' in _meta :
  201. _document[_field].append(_data)
  202. else:
  203. _document[_field] = self.merge(_document[_field],_data)
  204. else:
  205. if not _field and 'anchor' in _meta :
  206. #
  207. # We should determine if the element is either a parent or has a parent
  208. # This would allow us to avoid having runaway attributes and undermine structural integrity
  209. #
  210. #
  211. # The element has NOT been specified by the plugin (alas)
  212. # For this case we would advise writing a user-defined plugin to handle this case
  213. #
  214. print (self._logger)
  215. if self._logger :
  216. print (['....................'])
  217. self._logger.log(action='missing-plugin',module='build',data={'element':_row[0],'anchor':_row[1]})
  218. return _document
  219. pass
  220. # print ([_row[0],set(_data) - set(_document.keys())])
  221. _document = self.merge(_document,_data)
  222. return _document