12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273 |
- """
- This file is designed to retrieve information on a folder
- {files,size,hash}
- """
- import subprocess
- import sys
- import re
- import os
- import pandas as pd
- import io
- import datetime
- import glob
- class Util :
- def size(self,stream):
-
-
- PATTERN = '(^.+)([A-Z]+$)'
- value,units = re.match('^(.+)([A-Z]+$)',stream).groups()
- value = float(value)
- if 'G' == units :
- units = 'GB'
- # value *= 1000
-
- elif 'K' == units:
- units = 'KB'
- # value /= 1000
- else :
- units = 'MB'
- # units = 'MB'
- return {"size":value,"units":units}
- def content(self,stream):
- return {"content":stream.split(' ')[0].strip()}
- def read(**args):
- """
- The path can also take in regular expressions
- """
- cmd = {"size":"du -sh :path","content":"find :path -type f -exec md5sum {} + | sort -z|md5sum"}
- r = {}
- util = Util()
- for key in cmd :
- _cmd = cmd[key]
- handler = subprocess.Popen(_cmd.replace(':path',args['path']),shell=True,stdout=subprocess.PIPE,encoding='utf-8')
- stream = handler.communicate()[0]
-
- if sys.version_info[0] > 2 :
- rows = str(stream).split('\n')
- else:
- rows = stream.split('\n')
- if key == 'size' :
- rows = rows[0]
- rows = util.size(rows.split('\t')[0])
- elif key == 'content' :
- #
- # There is a hash key that is generated and should be extracted
- rows = rows[0]
- rows = util.content(rows)
- r = dict(r, **rows)
- N = 0 if not os.path.exists(args['path']) else len( os.listdir(args['path']))
- path = args['path'] if args['path'].endswith('/')else args['path']+os.sep
- r['path'] = args['path']
- r['files']= len([filename for filename in glob.iglob(path+'**/**', recursive=True)])
- r['name'] = args['path'].split(os.sep)[-1:][0]
- r['node'] = os.uname()[1]
- r['date'] = datetime.datetime.now().strftime('%m-%d-%Y')
- r['time'] = datetime.datetime.now().strftime('%H:%M:%S')
-
- return pd.DataFrame([r])
-
- pass
|