code
/
smart-top
镜像来自 https://dev.the-phi.com/git/library/smart-top


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
							"""
This file is designed to retrieve information on a folder
{files,size,hash}
"""
import subprocess
import sys
import re
import os
import pandas as pd
import io
import datetime
import glob

class Util :
    def size(self,stream):
        
        
        PATTERN = '(^.+)([A-Z]+$)'
        value,units =  re.match('^(.+)([A-Z]+$)',stream).groups()
        value = float(value)
        if 'G' == units :
            units = 'GB'
        #     value *= 1000
            
        elif 'K' == units:
            units = 'KB'
        #     value /= 1000
        else :
            units = 'MB'
        # units = 'MB'
        return {"size":value,"units":units}
    def content(self,stream):
        return {"content":stream.split(' ')[0].strip()}


def read(**args):
    """
    The path can also take in regular expressions
    """
    cmd = {"size":"du -sh :path","content":"find :path -type f -exec md5sum {} + | sort -z|md5sum"}
    r = {}
    util = Util()
    for key in cmd :
        _cmd = cmd[key]
        handler = subprocess.Popen(_cmd.replace(':path',args['path']),shell=True,stdout=subprocess.PIPE,encoding='utf-8')	    		
        stream = handler.communicate()[0]  
             
        if sys.version_info[0] > 2 :
            rows = str(stream).split('\n')
        else:
            rows = stream.split('\n')
        if key == 'size' :
            rows = rows[0]
            rows = util.size(rows.split('\t')[0])
        elif key == 'content' :
            #
            # There is a hash key that is generated and should be extracted
            rows = rows[0]
            rows = util.content(rows)

        r = dict(r, **rows)
    N = 0 if not os.path.exists(args['path']) else len( os.listdir(args['path']))
    path = args['path'] if args['path'].endswith('/')else args['path']+os.sep
    r['path'] = args['path']
    r['files']= len([filename for filename in glob.iglob(path+'**/**', recursive=True)])
    r['name'] = args['path'].split(os.sep)[-1:][0]
    r['node'] = os.uname()[1]
    r['date'] = datetime.datetime.now().strftime('%m-%d-%Y')
    r['time'] = datetime.datetime.now().strftime('%H:%M:%S')
    
    return pd.DataFrame([r])
    
    pass