Browse Source

bug fix ...

Steve Nyemba 2 years ago
parent
commit
3c643eb4df
3 changed files with 15 additions and 28 deletions
  1. 3 3
      README.md
  2. 10 23
      privacykit/risk.py
  3. 2 2
      setup.py

+ 3 - 3
README.md

@@ -27,19 +27,19 @@ Install this package using pip as follows :
 
 
 Stable :
 Stable :
     
     
-    pip install git+https://hiplab.mc.vanderbilt.edu/git/steve/deid-risk.git
+    pip install git+https://dev.the-phi.com/git/healthcareio/privacykit.git@release
     
     
     
     
 Latest Development (not fully tested):
 Latest Development (not fully tested):
     
     
-    pip install git+https://hiplab.mc.vanderbilt.edu/git/steve/deid-risk.git@risk
+    pip install git+https://dev.the-phi.com/git/healthcareio/privacykit.git@dev
     
     
 The framework will depend on pandas and numpy (for now). Below is a basic sample to get started quickly.
 The framework will depend on pandas and numpy (for now). Below is a basic sample to get started quickly.
 
 
 
 
     import numpy as np
     import numpy as np
     import pandas as pd
     import pandas as pd
-    import risk
+    import privacykit
 
 
     mydf = pd.DataFrame({"x":np.random.choice( np.random.randint(1,10),50),"y":np.random.choice( np.random.randint(1,10),50),"z":np.random.choice( np.random.randint(1,10),50),"r":np.random.choice( np.random.randint(1,10),50)  })
     mydf = pd.DataFrame({"x":np.random.choice( np.random.randint(1,10),50),"y":np.random.choice( np.random.randint(1,10),50),"z":np.random.choice( np.random.randint(1,10),50),"r":np.random.choice( np.random.randint(1,10),50)  })
     print (mydf.risk.evaluate())
     print (mydf.risk.evaluate())

+ 10 - 23
privacykit/risk.py

@@ -107,38 +107,25 @@ class deid :
         for size in np.arange(2,len(columns)) :
         for size in np.arange(2,len(columns)) :
             p = list(combinations(columns,size))            
             p = list(combinations(columns,size))            
             p = (np.array(p)[ np.random.choice( len(p), _policy_count)].tolist())
             p = (np.array(p)[ np.random.choice( len(p), _policy_count)].tolist())
-            flag = 'Policy_'+str(_index)
-            _index += 1
+            
+            
             for cols in p :
             for cols in p :
+                flag = 'Policy_'+str(_index)
                 r = self.evaluate(sample=sample,cols=cols,flag = flag)
                 r = self.evaluate(sample=sample,cols=cols,flag = flag)
                 p =  pd.DataFrame(1*sample.columns.isin(cols)).T
                 p =  pd.DataFrame(1*sample.columns.isin(cols)).T
                 p.columns = sample.columns
                 p.columns = sample.columns
                 o = pd.concat([o,r.join(p)])
                 o = pd.concat([o,r.join(p)])
-        
+                o['attr'] = ','.join(cols)
+                _index += 1
+        #
+        # We rename flags to policies and adequately number them, we also have a column to summarize the attributes attr
+        #
            
            
-        # for i in np.arange(RUNS):
-        #     if 'strict' not in args or ('strict' in args and args['strict'] is False):
-        #         n = np.random.randint(2,k)
-        #     else:
-        #         n = args['field_count']
-        #     cols = np.random.choice(columns,n,replace=False).tolist()            
-        #     params = {'sample':sample,'cols':cols}
-        #     if pop is not None :
-        #         params['pop'] = pop
-        #     if pop_size > 0  :
-        #         params['pop_size'] = pop_size
-
-        #     r = self.evaluate(**params)
-        #     #
-        #     # let's put the policy in place
-        #     p =  pd.DataFrame(1*sample.columns.isin(cols)).T
-        #     p.columns = sample.columns
-        #     # o = o.append(r.join(p))
-        #     o = pd.concat([o,r.join(p)])
+      
 
 
             
             
         o.index = np.arange(o.shape[0]).astype(np.int64)
         o.index = np.arange(o.shape[0]).astype(np.int64)
-
+        o = o.rename(columns={'flag':'policies'})
         return o
         return o
     def evaluate(self, **args):
     def evaluate(self, **args):
         """
         """

+ 2 - 2
setup.py

@@ -4,11 +4,11 @@ This is a build file for the
 from setuptools import setup, find_packages
 from setuptools import setup, find_packages
  
  
 setup(
 setup(
-    name = "risk",
+    name = "privacykit",
     version = "0.8.1",
     version = "0.8.1",
     author = "Healthcare/IO - The Phi Technology LLC & Health Information Privacy Lab",
     author = "Healthcare/IO - The Phi Technology LLC & Health Information Privacy Lab",
     author_email = "info@the-phi.com",
     author_email = "info@the-phi.com",
     license = "MIT",
     license = "MIT",
-    packages=['risk'],
+    packages=['privacykit'],
     install_requires = ['numpy','pandas']
     install_requires = ['numpy','pandas']
     )
     )