__init__.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. """
  2. This file is designed to retrieve information on a folder
  3. {files,size,hash}
  4. """
  5. import subprocess
  6. import sys
  7. import re
  8. import os
  9. import pandas as pd
  10. import io
  11. import datetime
  12. import glob
  13. class Util :
  14. def size(self,stream):
  15. PATTERN = '(^.+)([A-Z]+$)'
  16. value,units = re.match('^(.+)([A-Z]+$)',stream).groups()
  17. value = float(value)
  18. if 'G' == units :
  19. units = 'GB'
  20. # value *= 1000
  21. elif 'K' == units:
  22. units = 'KB'
  23. # value /= 1000
  24. else :
  25. units = 'MB'
  26. # units = 'MB'
  27. return {"size":value,"units":units}
  28. def content(self,stream):
  29. return {"content":stream.split(' ')[0].strip()}
  30. def read(**args):
  31. """
  32. The path can also take in regular expressions
  33. """
  34. cmd = {"size":"du -sh :path","content":"find :path -type f -exec md5sum {} + | sort -z|md5sum"}
  35. r = {}
  36. util = Util()
  37. for key in cmd :
  38. _cmd = cmd[key]
  39. handler = subprocess.Popen(_cmd.replace(':path',args['path']),shell=True,stdout=subprocess.PIPE,encoding='utf-8')
  40. stream = handler.communicate()[0]
  41. if sys.version_info[0] > 2 :
  42. rows = str(stream).split('\n')
  43. else:
  44. rows = stream.split('\n')
  45. if key == 'size' :
  46. rows = rows[0]
  47. rows = util.size(rows.split('\t')[0])
  48. elif key == 'content' :
  49. #
  50. # There is a hash key that is generated and should be extracted
  51. rows = rows[0]
  52. rows = util.content(rows)
  53. r = dict(r, **rows)
  54. N = 0 if not os.path.exists(args['path']) else len( os.listdir(args['path']))
  55. path = args['path'] if args['path'].endswith('/')else args['path']+os.sep
  56. r['path'] = args['path']
  57. r['files']= len([filename for filename in glob.iglob(path+'**/**', recursive=True)])
  58. r['name'] = args['path'].split(os.sep)[-1:][0]
  59. r['node'] = os.uname()[1]
  60. r['date'] = datetime.datetime.now().strftime('%m-%d-%Y')
  61. r['time'] = datetime.datetime.now().strftime('%H:%M:%S')
  62. return pd.DataFrame([r])
  63. pass