Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed warnings and progrss bar improvements #567

Merged
merged 6 commits into from
Aug 19, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 9 additions & 4 deletions docs/source/example_notebooks/dowhy_simple_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@
"outputs": [],
"source": [
"res_subset=model.refute_estimate(identified_estimand, estimate,\n",
" method_name=\"data_subset_refuter\", subset_fraction=0.9)\n",
" method_name=\"data_subset_refuter\", show_progress_bar=True, subset_fraction=0.9)\n",
amit-sharma marked this conversation as resolved.
Show resolved Hide resolved
"print(res_subset)"
]
},
Expand All @@ -388,7 +388,7 @@
"outputs": [],
"source": [
"res_subset=model.refute_estimate(identified_estimand, estimate,\n",
" method_name=\"data_subset_refuter\", subset_fraction=0.9, random_seed = 1, n_jobs=-1, verbose=10)\n",
" method_name=\"data_subset_refuter\", show_progress_bar=True, subset_fraction=0.9, random_seed = 1, n_jobs=-1, verbose=10)\n",
"print(res_subset)"
]
},
Expand Down Expand Up @@ -488,7 +488,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.7.10 ('venvrl')",
"display_name": "Python 3.8.13 ('base')",
"language": "python",
"name": "python3"
},
Expand All @@ -502,7 +502,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.10"
"version": "3.8.13"
},
"toc": {
"base_numbering": 1,
Expand All @@ -516,6 +516,11 @@
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
amit-sharma marked this conversation as resolved.
Show resolved Hide resolved
"vscode": {
"interpreter": {
"hash": "f1b38bb04d81fd42ee2e2f288098ef35128fc088ebd616248212bf67c262eb73"
}
}
},
"nbformat": 4,
Expand Down
7 changes: 6 additions & 1 deletion dowhy/causal_estimators/propensity_score_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,15 @@ def __init__(self, *args, propensity_score_model=None,
raise Exception(error_msg)

def _refresh_propensity_score(self):
'''
A custom estimator based on the way the propensity score estimates are to be used.
Invoked from the '_estimate_effect' method of various propensity score subclasses when the propensity score is not pre-computed.
'''
if self.recalculate_propensity_score is True:
if self.propensity_score_model is None:
self.propensity_score_model = linear_model.LogisticRegression()
self.propensity_score_model.fit(self._observed_common_causes, self._treatment)
treatment_reshaped = np.ravel(self._treatment)
self.propensity_score_model.fit(self._observed_common_causes, treatment_reshaped)
self._data[self.propensity_score_column] = self.propensity_score_model.predict_proba(self._observed_common_causes)[:, 1]
else:
# check if user provides the propensity score column
Expand Down
5 changes: 3 additions & 2 deletions dowhy/causal_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ def do(self, x, identified_estimand, method_name=None,
raise NotImplementedError
return estimate

def refute_estimate(self, estimand, estimate, method_name=None, **kwargs):
def refute_estimate(self, estimand, estimate, method_name=None, show_progress_bar=False, **kwargs):
"""Refute an estimated causal effect.

If method_name is provided, uses the provided method. In the future, we may support automatic selection of suitable refutation tests. Following refutation methods are supported.
Expand All @@ -399,6 +399,7 @@ def refute_estimate(self, estimand, estimate, method_name=None, **kwargs):
:param estimand: target estimand, an instance of the IdentifiedEstimand class (typically, the output of identify_effect)
:param estimate: estimate to be refuted, an instance of the CausalEstimate class (typically, the output of estimate_effect)
:param method_name: name of the refutation method
:param show_progress_bar: Boolean flag on whether to show a progress bar
:param kwargs: (optional) additional arguments that are passed directly to the refutation method. Can specify a random seed here to ensure reproducible results ('random_seed' parameter). For method-specific parameters, consult the documentation for the specific method. All refutation methods are in the causal_refuters subpackage.

:returns: an instance of the RefuteResult class
Expand All @@ -418,7 +419,7 @@ def refute_estimate(self, estimand, estimate, method_name=None, **kwargs):
estimate=estimate,
**kwargs
)
res = refuter.refute_estimate()
res = refuter.refute_estimate(show_progress_bar)
return res

def view_model(self, layout="dot", size=(8, 6), file_name="causal_model"):
Expand Down
2 changes: 1 addition & 1 deletion dowhy/causal_refuter.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def perform_normal_distribution_test(self, estimate, simulations):

return p_value

def refute_estimate(self):
def refute_estimate(self, show_progress_bar=False):
raise NotImplementedError


Expand Down
12 changes: 8 additions & 4 deletions dowhy/causal_refuters/add_unobserved_common_cause.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import pandas as pd
import scipy.stats

import tqdm
from tqdm.notebook import tqdm
amit-sharma marked this conversation as resolved.
Show resolved Hide resolved

import math
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
Expand Down Expand Up @@ -179,7 +182,7 @@ def infer_default_kappa_y(self, len_kappa_y = 10):
else:
return np.arange(min_coeff, max_coeff, step)

def refute_estimate(self):
def refute_estimate(self, show_progress_bar=False):
"""
This function attempts to add an unobserved common cause to the outcome and the treatment. At present, we have implemented the behavior for one dimensional behaviors for continuous
and binary variables. This function can either take single valued inputs or a range of inputs. The function then looks at the data type of the input and then decides on the course of
Expand Down Expand Up @@ -229,7 +232,8 @@ def refute_estimate(self):

results_matrix = np.random.rand(len(self.kappa_t),len(self.kappa_y)) # Matrix to hold all the results of NxM
orig_data = copy.deepcopy(self._data)
for i in range(len(self.kappa_t)):

for i in tqdm(range(len(self.kappa_t)), colour='green', disable = not show_progress_bar, desc="Refuting Estimates: "):
for j in range(len(self.kappa_y)):
new_data = self.include_confounders_effect(orig_data, self.kappa_t[i], self.kappa_y[j])
new_estimator = CausalEstimator.get_estimator_object(new_data, self._target_estimand, self._estimate)
Expand Down Expand Up @@ -282,7 +286,7 @@ def refute_estimate(self):
outcomes = np.random.rand(len(self.kappa_t))
orig_data = copy.deepcopy(self._data)

for i in range(0,len(self.kappa_t)):
for i in tqdm(range(0,len(self.kappa_t)), colour='green', disable = not show_progress_bar, desc="Refuting Estimates: "):
new_data = self.include_confounders_effect(orig_data, self.kappa_t[i], self.kappa_y)
new_estimator = CausalEstimator.get_estimator_object(new_data, self._target_estimand, self._estimate)
new_effect = new_estimator.estimate_effect()
Expand Down Expand Up @@ -316,7 +320,7 @@ def refute_estimate(self):
outcomes = np.random.rand(len(self.kappa_y))
orig_data = copy.deepcopy(self._data)

for i in range(0, len(self.kappa_y)):
for i in tqdm(range(0,len(self.kappa_y)), colour='green', disable = not show_progress_bar, desc="Refuting Estimates: "):
new_data = self.include_confounders_effect(orig_data, self.kappa_t, self.kappa_y[i])
new_estimator = CausalEstimator.get_estimator_object(new_data, self._target_estimand, self._estimate)
new_effect = new_estimator.estimate_effect()
Expand Down
7 changes: 5 additions & 2 deletions dowhy/causal_refuters/data_subset_refuter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
import logging
from joblib import Parallel, delayed

import tqdm
from tqdm.notebook import tqdm

from dowhy.causal_refuter import CausalRefuter, CausalRefutation
from dowhy.causal_estimator import CausalEstimator

Expand Down Expand Up @@ -37,7 +40,7 @@ def __init__(self, *args, **kwargs):

self.logger = logging.getLogger(__name__)

def refute_estimate(self):
def refute_estimate(self, show_progress_bar=False):

sample_estimates = np.zeros(self._num_simulations)
self.logger.info("Refutation over {} simulated datasets of size {} each"
Expand All @@ -60,7 +63,7 @@ def refute_once():
sample_estimates = Parallel(
n_jobs=self._n_jobs,
verbose=self._verbose
)(delayed(refute_once)() for _ in range(self._num_simulations))
)(delayed(refute_once)() for _ in tqdm(range(self._num_simulations), colour='green', disable = not show_progress_bar, desc="Refuting Estimates: "))
amit-sharma marked this conversation as resolved.
Show resolved Hide resolved
sample_estimates = np.array(sample_estimates)

refute = CausalRefutation(
Expand Down
9 changes: 7 additions & 2 deletions dowhy/causal_refuters/placebo_treatment_refuter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import logging
from joblib import Parallel, delayed

import tqdm
from tqdm.notebook import tqdm


from dowhy.causal_refuter import CausalRefutation
from dowhy.causal_refuter import CausalRefuter
Expand Down Expand Up @@ -52,7 +55,7 @@ def __init__(self, *args, **kwargs):
self.logger = logging.getLogger(__name__)


def refute_estimate(self):
def refute_estimate(self, show_progress_bar=False):
# only permute is supported for iv methods
if self._target_estimand.identifier_method.startswith("iv"):
if self._placebo_type != "permute":
Expand Down Expand Up @@ -145,7 +148,9 @@ def refute_once():
sample_estimates = Parallel(
n_jobs=self._n_jobs,
verbose=self._verbose
)(delayed(refute_once)() for _ in range(self._num_simulations))
)(delayed(refute_once)() for _ in tqdm(range(self._num_simulations), disable = not show_progress_bar, colour='green', desc="Refuting Estimates: "))

# for _ in range(self._num_simulations))
sample_estimates = np.array(sample_estimates)

# Restoring the value of iv_instrument_name
Expand Down
7 changes: 5 additions & 2 deletions dowhy/causal_refuters/random_common_cause.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import logging
from joblib import Parallel, delayed

import tqdm
from tqdm.notebook import tqdm

from dowhy.causal_refuter import CausalRefutation
from dowhy.causal_refuter import CausalRefuter
from dowhy.causal_estimator import CausalEstimator
Expand Down Expand Up @@ -33,7 +36,7 @@ def __init__(self, *args, **kwargs):

self.logger = logging.getLogger(__name__)

def refute_estimate(self):
def refute_estimate(self, show_progress_bar=False):
num_rows = self._data.shape[0]
self.logger.info("Refutation over {} simulated datasets, each with a random common cause added"
.format(self._num_simulations))
Expand All @@ -58,7 +61,7 @@ def refute_once():
sample_estimates = Parallel(
n_jobs=self._n_jobs,
verbose=self._verbose
)(delayed(refute_once)() for _ in range(self._num_simulations))
)(delayed(refute_once)() for _ in tqdm(range(self._num_simulations), colour='green', disable = not show_progress_bar, desc="Refuting Estimates: "))
sample_estimates = np.array(sample_estimates)

refute = CausalRefutation(
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ networkx>=2.0
sympy>=1.4
scikit-learn
pydot>=1.4
joblib>=1.0.0
joblib>=1.0.0
tqdm>=4.64.0