py-why · amit-sharma · Aug 19, 2022 · Jul 20, 2022 · Jul 28, 2022 · Jul 28, 2022
diff --git a/docs/source/example_notebooks/dowhy_simple_example.ipynb b/docs/source/example_notebooks/dowhy_simple_example.ipynb
@@ -366,7 +366,7 @@
    "outputs": [],
    "source": [
     "res_subset=model.refute_estimate(identified_estimand, estimate,\n",
-    "        method_name=\"data_subset_refuter\", subset_fraction=0.9)\n",
+    "        method_name=\"data_subset_refuter\", show_progress_bar=True, subset_fraction=0.9)\n",
     "print(res_subset)"
    ]
   },
@@ -388,7 +388,7 @@
    "outputs": [],
    "source": [
     "res_subset=model.refute_estimate(identified_estimand, estimate,\n",
-    "        method_name=\"data_subset_refuter\", subset_fraction=0.9, random_seed = 1, n_jobs=-1, verbose=10)\n",
+    "        method_name=\"data_subset_refuter\", show_progress_bar=True, subset_fraction=0.9, random_seed = 1, n_jobs=-1, verbose=10)\n",
     "print(res_subset)"
    ]
   },
@@ -488,7 +488,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.7.10 ('venvrl')",
+   "display_name": "Python 3.8.13 ('base')",
    "language": "python",
    "name": "python3"
   },
@@ -502,7 +502,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.10"
+   "version": "3.8.13"
   },
   "toc": {
    "base_numbering": 1,
@@ -516,6 +516,11 @@
    "toc_position": {},
    "toc_section_display": true,
    "toc_window_display": false
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "f1b38bb04d81fd42ee2e2f288098ef35128fc088ebd616248212bf67c262eb73"
+   }
   }
  },
  "nbformat": 4,

diff --git a/dowhy/causal_estimators/propensity_score_estimator.py b/dowhy/causal_estimators/propensity_score_estimator.py
@@ -71,10 +71,15 @@ def __init__(self, *args, propensity_score_model=None,
             raise Exception(error_msg)
 
     def _refresh_propensity_score(self):
+        '''
+            A custom estimator based on the way the propensity score estimates are to be used.
+            Invoked from the '_estimate_effect' method of various propensity score subclasses when the propensity score is not pre-computed.      
+        '''
         if self.recalculate_propensity_score is True:
             if self.propensity_score_model is None:
                 self.propensity_score_model = linear_model.LogisticRegression()
-            self.propensity_score_model.fit(self._observed_common_causes, self._treatment)
+            treatment_reshaped = np.ravel(self._treatment)
+            self.propensity_score_model.fit(self._observed_common_causes, treatment_reshaped)
             self._data[self.propensity_score_column] = self.propensity_score_model.predict_proba(self._observed_common_causes)[:, 1]
         else:
             # check if user provides the propensity score column

diff --git a/dowhy/causal_model.py b/dowhy/causal_model.py
@@ -387,7 +387,7 @@ def do(self, x, identified_estimand, method_name=None,
                 raise NotImplementedError
         return estimate
 
-    def refute_estimate(self, estimand, estimate, method_name=None, **kwargs):
+    def refute_estimate(self, estimand, estimate, method_name=None, show_progress_bar=False, **kwargs):
         """Refute an estimated causal effect.
 
         If method_name is provided, uses the provided method. In the future, we may support automatic selection of suitable refutation tests. Following refutation methods are supported.
@@ -399,6 +399,7 @@ def refute_estimate(self, estimand, estimate, method_name=None, **kwargs):
         :param estimand: target estimand, an instance of the IdentifiedEstimand class (typically, the output of identify_effect)
         :param estimate: estimate to be refuted, an instance of the CausalEstimate class (typically, the output of estimate_effect)
         :param method_name: name of the refutation method
+        :param show_progress_bar: Boolean flag on whether to show a progress bar
         :param kwargs:  (optional) additional arguments that are passed directly to the refutation method. Can specify a random seed here to ensure reproducible results ('random_seed' parameter). For method-specific parameters, consult the documentation for the specific method. All refutation methods are in the causal_refuters subpackage.
 
         :returns: an instance of the RefuteResult class
@@ -418,7 +419,7 @@ def refute_estimate(self, estimand, estimate, method_name=None, **kwargs):
             estimate=estimate,
             **kwargs
         )
-        res = refuter.refute_estimate()
+        res = refuter.refute_estimate(show_progress_bar)
         return res
 
     def view_model(self, layout="dot", size=(8, 6), file_name="causal_model"):

diff --git a/dowhy/causal_refuter.py b/dowhy/causal_refuter.py
@@ -214,7 +214,7 @@ def perform_normal_distribution_test(self, estimate, simulations):
 
         return p_value
 
-    def refute_estimate(self):
+    def refute_estimate(self, show_progress_bar=False):
         raise NotImplementedError
 
 

diff --git a/dowhy/causal_refuters/add_unobserved_common_cause.py b/dowhy/causal_refuters/add_unobserved_common_cause.py
@@ -4,6 +4,9 @@
 import pandas as pd
 import scipy.stats
 
+import tqdm
+from tqdm.notebook import tqdm
+
 import math
 import statsmodels.api as sm
 from sklearn.preprocessing import StandardScaler
@@ -179,7 +182,7 @@ def infer_default_kappa_y(self, len_kappa_y = 10):
         else:
             return np.arange(min_coeff, max_coeff, step)
 
-    def refute_estimate(self):
+    def refute_estimate(self, show_progress_bar=False):
         """
         This function attempts to add an unobserved common cause to the outcome and the treatment. At present, we have implemented the behavior for one dimensional behaviors for continuous
         and binary variables. This function can either take single valued inputs or a range of inputs. The function then looks at the data type of the input and then decides on the course of
@@ -229,7 +232,8 @@ def refute_estimate(self):
 
                 results_matrix = np.random.rand(len(self.kappa_t),len(self.kappa_y)) # Matrix to hold all the results of NxM
                 orig_data = copy.deepcopy(self._data)
-                for i in range(len(self.kappa_t)):
+
+                for i in tqdm(range(len(self.kappa_t)), colour='green', disable = not show_progress_bar, desc="Refuting Estimates: "):
                     for j in range(len(self.kappa_y)):
                         new_data = self.include_confounders_effect(orig_data, self.kappa_t[i], self.kappa_y[j])
                         new_estimator = CausalEstimator.get_estimator_object(new_data, self._target_estimand, self._estimate)
@@ -282,7 +286,7 @@ def refute_estimate(self):
                 outcomes = np.random.rand(len(self.kappa_t))
                 orig_data = copy.deepcopy(self._data)
 
-                for i in range(0,len(self.kappa_t)):
+                for i in tqdm(range(0,len(self.kappa_t)), colour='green', disable = not show_progress_bar, desc="Refuting Estimates: "):
                     new_data = self.include_confounders_effect(orig_data, self.kappa_t[i], self.kappa_y)
                     new_estimator = CausalEstimator.get_estimator_object(new_data, self._target_estimand, self._estimate)
                     new_effect = new_estimator.estimate_effect()
@@ -316,7 +320,7 @@ def refute_estimate(self):
                 outcomes = np.random.rand(len(self.kappa_y))
                 orig_data = copy.deepcopy(self._data)
 
-                for i in range(0, len(self.kappa_y)):
+                for i in tqdm(range(0,len(self.kappa_y)), colour='green', disable = not show_progress_bar, desc="Refuting Estimates: "):
                     new_data = self.include_confounders_effect(orig_data, self.kappa_t, self.kappa_y[i])
                     new_estimator = CausalEstimator.get_estimator_object(new_data, self._target_estimand, self._estimate)
                     new_effect = new_estimator.estimate_effect()

diff --git a/dowhy/causal_refuters/data_subset_refuter.py b/dowhy/causal_refuters/data_subset_refuter.py
@@ -2,6 +2,9 @@
 import logging
 from joblib import Parallel, delayed
 
+import tqdm
+from tqdm.notebook import tqdm
+
 from dowhy.causal_refuter import CausalRefuter, CausalRefutation
 from dowhy.causal_estimator import CausalEstimator
 
@@ -37,7 +40,7 @@ def __init__(self, *args, **kwargs):
 
         self.logger = logging.getLogger(__name__)
 
-    def refute_estimate(self):
+    def refute_estimate(self, show_progress_bar=False):
 
         sample_estimates = np.zeros(self._num_simulations)
         self.logger.info("Refutation over {} simulated datasets of size {} each"
@@ -60,7 +63,7 @@ def refute_once():
         sample_estimates = Parallel(
             n_jobs=self._n_jobs,
             verbose=self._verbose
-        )(delayed(refute_once)() for _ in range(self._num_simulations))
+        )(delayed(refute_once)() for _ in tqdm(range(self._num_simulations), colour='green', disable = not show_progress_bar, desc="Refuting Estimates: "))
         sample_estimates = np.array(sample_estimates)
 
         refute = CausalRefutation(

diff --git a/dowhy/causal_refuters/placebo_treatment_refuter.py b/dowhy/causal_refuters/placebo_treatment_refuter.py
@@ -5,6 +5,9 @@
 import logging
 from joblib import Parallel, delayed
 
+import tqdm
+from tqdm.notebook import tqdm
+
 
 from dowhy.causal_refuter import CausalRefutation
 from dowhy.causal_refuter import CausalRefuter
@@ -52,7 +55,7 @@ def __init__(self, *args, **kwargs):
         self.logger = logging.getLogger(__name__)
 
 
-    def refute_estimate(self):
+    def refute_estimate(self, show_progress_bar=False):
         # only permute is supported for iv methods
         if self._target_estimand.identifier_method.startswith("iv"):
             if self._placebo_type != "permute":
@@ -145,7 +148,9 @@ def refute_once():
         sample_estimates = Parallel(
             n_jobs=self._n_jobs, 
             verbose=self._verbose
-        )(delayed(refute_once)() for _ in range(self._num_simulations))
+        )(delayed(refute_once)() for _ in tqdm(range(self._num_simulations), disable = not show_progress_bar, colour='green', desc="Refuting Estimates: "))
+
+        # for _ in range(self._num_simulations))
         sample_estimates = np.array(sample_estimates)
 
         # Restoring the value of iv_instrument_name

diff --git a/dowhy/causal_refuters/random_common_cause.py b/dowhy/causal_refuters/random_common_cause.py
@@ -4,6 +4,9 @@
 import logging
 from joblib import Parallel, delayed
 
+import tqdm
+from tqdm.notebook import tqdm
+
 from dowhy.causal_refuter import CausalRefutation
 from dowhy.causal_refuter import CausalRefuter
 from dowhy.causal_estimator import CausalEstimator
@@ -33,7 +36,7 @@ def __init__(self, *args, **kwargs):
 
         self.logger = logging.getLogger(__name__)
 
-    def refute_estimate(self):
+    def refute_estimate(self, show_progress_bar=False):
         num_rows = self._data.shape[0]
         self.logger.info("Refutation over {} simulated datasets, each with a random common cause added"
                          .format(self._num_simulations))
@@ -58,7 +61,7 @@ def refute_once():
         sample_estimates = Parallel(
             n_jobs=self._n_jobs,
             verbose=self._verbose
-        )(delayed(refute_once)() for _ in range(self._num_simulations))
+        )(delayed(refute_once)() for _ in tqdm(range(self._num_simulations), colour='green', disable = not show_progress_bar, desc="Refuting Estimates: "))
         sample_estimates = np.array(sample_estimates)
 
         refute = CausalRefutation(

diff --git a/requirements.txt b/requirements.txt
@@ -6,4 +6,5 @@ networkx>=2.0
 sympy>=1.4
 scikit-learn
 pydot>=1.4
-joblib>=1.0.0
+joblib>=1.0.0
+tqdm>=4.64.0