Skip to content

Commit

Permalink
Progress bar for refutation tests (#567)
Browse files Browse the repository at this point in the history
* fixed warnings and progrss bar improvements

Fixed the warnings arising in propensity score estimators and added optional progress bars for refuters

Signed-off-by: Amey Varhade <ameyvarhade@gmail.com>

* code struture improvements

Signed-off-by: Amey Varhade <ameyvarhade@gmail.com>

* removed vscode references

* Added progress bar for dummy outcome refuter

Signed-off-by: Amey Varhade <ameyvarhade@gmail.com>

Signed-off-by: Amey Varhade <ameyvarhade@gmail.com>
Co-authored-by: Amit Sharma <amit_sharma@live.com>
  • Loading branch information
yemaedahrav and amit-sharma committed Aug 19, 2022
1 parent a38a03f commit f946386
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 22 deletions.
12 changes: 6 additions & 6 deletions docs/source/example_notebooks/dowhy_simple_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@
"metadata": {},
"outputs": [],
"source": [
"res_random=model.refute_estimate(identified_estimand, estimate, method_name=\"random_common_cause\")\n",
"res_random=model.refute_estimate(identified_estimand, estimate, method_name=\"random_common_cause\", show_progress_bar=True)\n",
"print(res_random)"
]
},
Expand All @@ -348,7 +348,7 @@
"outputs": [],
"source": [
"res_placebo=model.refute_estimate(identified_estimand, estimate,\n",
" method_name=\"placebo_treatment_refuter\", placebo_type=\"permute\")\n",
" method_name=\"placebo_treatment_refuter\", show_progress_bar=True, placebo_type=\"permute\")\n",
"print(res_placebo)"
]
},
Expand All @@ -366,7 +366,7 @@
"outputs": [],
"source": [
"res_subset=model.refute_estimate(identified_estimand, estimate,\n",
" method_name=\"data_subset_refuter\", subset_fraction=0.9)\n",
" method_name=\"data_subset_refuter\", show_progress_bar=True, subset_fraction=0.9)\n",
"print(res_subset)"
]
},
Expand All @@ -388,7 +388,7 @@
"outputs": [],
"source": [
"res_subset=model.refute_estimate(identified_estimand, estimate,\n",
" method_name=\"data_subset_refuter\", subset_fraction=0.9, random_seed = 1, n_jobs=-1, verbose=10)\n",
" method_name=\"data_subset_refuter\", show_progress_bar=True, subset_fraction=0.9, random_seed = 1, n_jobs=-1, verbose=10)\n",
"print(res_subset)"
]
},
Expand Down Expand Up @@ -488,7 +488,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.7.10 ('venvrl')",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -502,7 +502,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.10"
"version": "3.8.13"
},
"toc": {
"base_numbering": 1,
Expand Down
7 changes: 6 additions & 1 deletion dowhy/causal_estimators/propensity_score_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,15 @@ def __init__(self, *args, propensity_score_model=None,
raise Exception(error_msg)

def _refresh_propensity_score(self):
'''
A custom estimator based on the way the propensity score estimates are to be used.
Invoked from the '_estimate_effect' method of various propensity score subclasses when the propensity score is not pre-computed.
'''
if self.recalculate_propensity_score is True:
if self.propensity_score_model is None:
self.propensity_score_model = linear_model.LogisticRegression()
self.propensity_score_model.fit(self._observed_common_causes, self._treatment)
treatment_reshaped = np.ravel(self._treatment)
self.propensity_score_model.fit(self._observed_common_causes, treatment_reshaped)
self._data[self.propensity_score_column] = self.propensity_score_model.predict_proba(self._observed_common_causes)[:, 1]
else:
# check if user provides the propensity score column
Expand Down
5 changes: 3 additions & 2 deletions dowhy/causal_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ def do(self, x, identified_estimand, method_name=None,
raise NotImplementedError
return estimate

def refute_estimate(self, estimand, estimate, method_name=None, **kwargs):
def refute_estimate(self, estimand, estimate, method_name=None, show_progress_bar=False, **kwargs):
"""Refute an estimated causal effect.
If method_name is provided, uses the provided method. In the future, we may support automatic selection of suitable refutation tests. Following refutation methods are supported.
Expand All @@ -399,6 +399,7 @@ def refute_estimate(self, estimand, estimate, method_name=None, **kwargs):
:param estimand: target estimand, an instance of the IdentifiedEstimand class (typically, the output of identify_effect)
:param estimate: estimate to be refuted, an instance of the CausalEstimate class (typically, the output of estimate_effect)
:param method_name: name of the refutation method
:param show_progress_bar: Boolean flag on whether to show a progress bar
:param kwargs: (optional) additional arguments that are passed directly to the refutation method. Can specify a random seed here to ensure reproducible results ('random_seed' parameter). For method-specific parameters, consult the documentation for the specific method. All refutation methods are in the causal_refuters subpackage.
:returns: an instance of the RefuteResult class
Expand All @@ -418,7 +419,7 @@ def refute_estimate(self, estimand, estimate, method_name=None, **kwargs):
estimate=estimate,
**kwargs
)
res = refuter.refute_estimate()
res = refuter.refute_estimate(show_progress_bar)
return res

def view_model(self, layout="dot", size=(8, 6), file_name="causal_model"):
Expand Down
3 changes: 2 additions & 1 deletion dowhy/causal_refuter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class CausalRefuter:
"""
# Default value for the number of simulations to be conducted
DEFAULT_NUM_SIMULATIONS = 100
PROGRESS_BAR_COLOR = 'green'

def __init__(self, data, identified_estimand, estimate, **kwargs):
self._data = data
Expand Down Expand Up @@ -214,7 +215,7 @@ def perform_normal_distribution_test(self, estimate, simulations):

return p_value

def refute_estimate(self):
def refute_estimate(self, show_progress_bar=False):
raise NotImplementedError


Expand Down
11 changes: 7 additions & 4 deletions dowhy/causal_refuters/add_unobserved_common_cause.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import pandas as pd
import scipy.stats

from tqdm.auto import tqdm

import math
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
Expand Down Expand Up @@ -179,7 +181,7 @@ def infer_default_kappa_y(self, len_kappa_y = 10):
else:
return np.arange(min_coeff, max_coeff, step)

def refute_estimate(self):
def refute_estimate(self, show_progress_bar=False):
"""
This function attempts to add an unobserved common cause to the outcome and the treatment. At present, we have implemented the behavior for one dimensional behaviors for continuous
and binary variables. This function can either take single valued inputs or a range of inputs. The function then looks at the data type of the input and then decides on the course of
Expand Down Expand Up @@ -229,7 +231,8 @@ def refute_estimate(self):

results_matrix = np.random.rand(len(self.kappa_t),len(self.kappa_y)) # Matrix to hold all the results of NxM
orig_data = copy.deepcopy(self._data)
for i in range(len(self.kappa_t)):

for i in tqdm(range(len(self.kappa_t)), colour=CausalRefuter.PROGRESS_BAR_COLOR, disable = not show_progress_bar, desc="Refuting Estimates: "):
for j in range(len(self.kappa_y)):
new_data = self.include_confounders_effect(orig_data, self.kappa_t[i], self.kappa_y[j])
new_estimator = CausalEstimator.get_estimator_object(new_data, self._target_estimand, self._estimate)
Expand Down Expand Up @@ -282,7 +285,7 @@ def refute_estimate(self):
outcomes = np.random.rand(len(self.kappa_t))
orig_data = copy.deepcopy(self._data)

for i in range(0,len(self.kappa_t)):
for i in tqdm(range(0,len(self.kappa_t)), colour=CausalRefuter.PROGRESS_BAR_COLOR, disable = not show_progress_bar, desc="Refuting Estimates: "):
new_data = self.include_confounders_effect(orig_data, self.kappa_t[i], self.kappa_y)
new_estimator = CausalEstimator.get_estimator_object(new_data, self._target_estimand, self._estimate)
new_effect = new_estimator.estimate_effect()
Expand Down Expand Up @@ -316,7 +319,7 @@ def refute_estimate(self):
outcomes = np.random.rand(len(self.kappa_y))
orig_data = copy.deepcopy(self._data)

for i in range(0, len(self.kappa_y)):
for i in tqdm(range(0,len(self.kappa_y)), colour=CausalRefuter.PROGRESS_BAR_COLOR, disable = not show_progress_bar, desc="Refuting Estimates: "):
new_data = self.include_confounders_effect(orig_data, self.kappa_t, self.kappa_y[i])
new_estimator = CausalEstimator.get_estimator_object(new_data, self._target_estimand, self._estimate)
new_effect = new_estimator.estimate_effect()
Expand Down
6 changes: 4 additions & 2 deletions dowhy/causal_refuters/data_subset_refuter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import logging
from joblib import Parallel, delayed

from tqdm.auto import tqdm

from dowhy.causal_refuter import CausalRefuter, CausalRefutation
from dowhy.causal_estimator import CausalEstimator

Expand Down Expand Up @@ -37,7 +39,7 @@ def __init__(self, *args, **kwargs):

self.logger = logging.getLogger(__name__)

def refute_estimate(self):
def refute_estimate(self, show_progress_bar=False):

sample_estimates = np.zeros(self._num_simulations)
self.logger.info("Refutation over {} simulated datasets of size {} each"
Expand All @@ -60,7 +62,7 @@ def refute_once():
sample_estimates = Parallel(
n_jobs=self._n_jobs,
verbose=self._verbose
)(delayed(refute_once)() for _ in range(self._num_simulations))
)(delayed(refute_once)() for _ in tqdm(range(self._num_simulations), colour=CausalRefuter.PROGRESS_BAR_COLOR, disable = not show_progress_bar, desc="Refuting Estimates: "))
sample_estimates = np.array(sample_estimates)

refute = CausalRefutation(
Expand Down
7 changes: 5 additions & 2 deletions dowhy/causal_refuters/dummy_outcome_refuter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import pandas as pd
import logging
import pdb

from tqdm.auto import tqdm
from collections import OrderedDict, namedtuple
from dowhy.causal_refuter import CausalRefutation
from dowhy.causal_refuter import CausalRefuter
Expand Down Expand Up @@ -214,7 +216,7 @@ def __init__(self, *args, **kwargs):
self._outcome_name_str = self._outcome_name[0]
self.logger = logging.getLogger(__name__)

def refute_estimate(self):
def refute_estimate(self, show_progress_bar=False):

# We need to change the identified estimand
# We thus, make a copy. This is done as we don't want
Expand All @@ -238,7 +240,8 @@ def refute_estimate(self):
# Train and the Validation Datasets. Thus, we run the simulation loop followed by the training and the validation
# loops. Thus, we can get different values everytime we get the estimator.

for _ in range( self._num_simulations ):
# for _ in range( self._num_simulations ):
for _ in tqdm(range(self._num_simulations), colour=CausalRefuter.PROGRESS_BAR_COLOR, disable = not show_progress_bar, desc="Refuting Estimates: "):
estimates = []

if estimator_present == False:
Expand Down
7 changes: 5 additions & 2 deletions dowhy/causal_refuters/placebo_treatment_refuter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
from joblib import Parallel, delayed

from tqdm.auto import tqdm

from dowhy.causal_refuter import CausalRefutation
from dowhy.causal_refuter import CausalRefuter
Expand Down Expand Up @@ -52,7 +53,7 @@ def __init__(self, *args, **kwargs):
self.logger = logging.getLogger(__name__)


def refute_estimate(self):
def refute_estimate(self, show_progress_bar=False):
# only permute is supported for iv methods
if self._target_estimand.identifier_method.startswith("iv"):
if self._placebo_type != "permute":
Expand Down Expand Up @@ -145,7 +146,9 @@ def refute_once():
sample_estimates = Parallel(
n_jobs=self._n_jobs,
verbose=self._verbose
)(delayed(refute_once)() for _ in range(self._num_simulations))
)(delayed(refute_once)() for _ in tqdm(range(self._num_simulations), disable = not show_progress_bar, colour=CausalRefuter.PROGRESS_BAR_COLOR, desc="Refuting Estimates: "))

# for _ in range(self._num_simulations))
sample_estimates = np.array(sample_estimates)

# Restoring the value of iv_instrument_name
Expand Down
6 changes: 4 additions & 2 deletions dowhy/causal_refuters/random_common_cause.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import logging
from joblib import Parallel, delayed

from tqdm.auto import tqdm

from dowhy.causal_refuter import CausalRefutation
from dowhy.causal_refuter import CausalRefuter
from dowhy.causal_estimator import CausalEstimator
Expand Down Expand Up @@ -33,7 +35,7 @@ def __init__(self, *args, **kwargs):

self.logger = logging.getLogger(__name__)

def refute_estimate(self):
def refute_estimate(self, show_progress_bar=False):
num_rows = self._data.shape[0]
self.logger.info("Refutation over {} simulated datasets, each with a random common cause added"
.format(self._num_simulations))
Expand All @@ -58,7 +60,7 @@ def refute_once():
sample_estimates = Parallel(
n_jobs=self._n_jobs,
verbose=self._verbose
)(delayed(refute_once)() for _ in range(self._num_simulations))
)(delayed(refute_once)() for _ in tqdm(range(self._num_simulations), colour=CausalRefuter.PROGRESS_BAR_COLOR, disable = not show_progress_bar, desc="Refuting Estimates: "))
sample_estimates = np.array(sample_estimates)

refute = CausalRefutation(
Expand Down

0 comments on commit f946386

Please sign in to comment.