From f6b7ef334bca59dfca48d7a5e395ce249ecd2830 Mon Sep 17 00:00:00 2001 From: Grace Sng Date: Sat, 29 Mar 2025 20:54:05 -0500 Subject: [PATCH 1/4] Test classes for validation suggester and tuebingen model suggester. Signed-off-by: Grace Sng --- .../suggesters/tuebingen_model_suggester.py | 2 +- pywhyllm/suggesters/validation_suggester.py | 315 ++++++------------ ...tuebingen_model_suggester_data_provider.py | 25 ++ .../validation_suggester_data_provider.py | 56 ++++ .../test_tuebingen_model_suggester.py | 56 ++++ .../test_validation_suggester.py | 101 ++++++ 6 files changed, 349 insertions(+), 206 deletions(-) create mode 100644 pywhyllm/tests/model_suggester/data_providers/tuebingen_model_suggester_data_provider.py create mode 100644 pywhyllm/tests/model_suggester/data_providers/validation_suggester_data_provider.py create mode 100644 pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py create mode 100644 pywhyllm/tests/model_suggester/test_validation_suggester.py diff --git a/pywhyllm/suggesters/tuebingen_model_suggester.py b/pywhyllm/suggesters/tuebingen_model_suggester.py index 195879d..0e73c72 100644 --- a/pywhyllm/suggesters/tuebingen_model_suggester.py +++ b/pywhyllm/suggesters/tuebingen_model_suggester.py @@ -17,7 +17,7 @@ class Strategy(Enum): class TuebingenModelSuggester(ModelSuggester): - def __init__(self, llm): + def __init__(self, llm=None): super().__init__(llm) def suggest_description( diff --git a/pywhyllm/suggesters/validation_suggester.py b/pywhyllm/suggesters/validation_suggester.py index 0431429..7a7fa7e 100644 --- a/pywhyllm/suggesters/validation_suggester.py +++ b/pywhyllm/suggesters/validation_suggester.py @@ -13,9 +13,10 @@ class ValidationSuggester(IdentifierProtocol): CONTEXT: str = """causal mechanisms""" - def __init__(self, llm): - if llm == 'gpt-4': - self.llm = guidance.models.OpenAI('gpt-4') + def __init__(self, llm=None): + if llm is not None: + if llm == 'gpt-4': + self.llm = guidance.models.OpenAI('gpt-4') def suggest_negative_controls( self, @@ -23,7 +24,7 @@ def suggest_negative_controls( outcome: str, factors_list: list(), expertise_list: list(), - analysis_context: list() = CONTEXT, + analysis_context=CONTEXT, stakeholders: list() = None ): expert_list: List[str] = list() @@ -41,32 +42,14 @@ def suggest_negative_controls( if factors_list[i] != treatment and factors_list[i] != outcome: edited_factors_list.append(factors_list[i]) - if len(expert_list) > 1: - for expert in expert_list: - ( - negative_controls_counter, - negative_controls_list, - ) = self.request_negative_controls( - treatment=treatment, - outcome=outcome, - factors_list=edited_factors_list, - negative_controls_counter=negative_controls_counter, - domain_expertise=expert, - analysis_context=analysis_context - ) - for m in negative_controls_list: - if m not in negative_controls: - negative_controls.append(m) - else: - ( - negative_controls_counter, - negative_controls_list, - ) = self.request_negative_controls( + for expert in expert_list: + negative_controls_counter, + negative_controls_list = self.request_negative_controls( treatment=treatment, outcome=outcome, factors_list=edited_factors_list, negative_controls_counter=negative_controls_counter, - domain_expertise=expert_list[0], + domain_expertise=expert, analysis_context=analysis_context ) for m in negative_controls_list: @@ -82,7 +65,7 @@ def request_negative_controls( factors_list: list(), negative_controls_counter: list(), domain_expertise: str, - analysis_context: list() = CONTEXT + analysis_context: list = CONTEXT ): negative_controls_list: List[str] = list() @@ -93,29 +76,29 @@ def request_negative_controls( with system(): lm += f"""You are an expert in the {domain_expertise} and are - studying the {analysis_context}. You are using your domain knowledge to help understand the negative - controls for a causal model that contains all the assumptions about the {analysis_context}. Where a causal - model is a conceptual model that describes the causal mechanisms of a system. You will do this by answering - questions about cause and effect using your domain knowledge in the {domain_expertise}.""" + studying the {analysis_context}. You are using your domain knowledge to help understand the negative + controls for a causal model that contains all the assumptions about the {analysis_context}. Where a causal + model is a conceptual model that describes the causal mechanisms of a system. You will do this by answering + questions about cause and effect using your domain knowledge in the {domain_expertise}.""" with user(): lm += f"""factor_names: {factors_list} From your - perspective as an expert in the {domain_expertise}, what factor(s) from the list of factors, relevant to - the {analysis_context}, should see zero treatment effect when changing the {treatment}? Which factor(s) - from the list of factors, if any at all, relevant to the {analysis_context}, are negative controls on the - causal mechanisms that affect the {outcome} when changing {treatment}? Using your domain knowledge, - which factor(s) from the list of factors, if any at all, relevant to the {analysis_context}, - should we expect to be unaffected by any changes in {treatment}? Which factor(s) from the list of factors, - if any at all, would be surprising if affected by a change in {treatment}? Be concise and keep your - thoughts under two paragraphs. Then provide your step by step chain of thoughts within the tags - . Once you have thought things through, wrap the name of the factor(s) from the list of - factors, that has/have a high likelihood of being negative controls on the causal mechanisms that affect {outcome} - when changing {treatment}, within the tags factor_name. Wrap the name - of the factor(s) from the list of factors, that has/have a high likelihood of being unaffected when - changing {treatment}, within the tags factor_name. Where factor_name - is one of the items within the factor_names list. If a factor does not have a high likelihood of being a - negative control relevant to the {analysis_context}, then do not wrap the factor with any tags. Provide - your step by step answer as an expert in the {domain_expertise}:""" + perspective as an expert in the {domain_expertise}, what factor(s) from the list of factors, relevant to + the {analysis_context}, should see zero treatment effect when changing the {treatment}? Which factor(s) + from the list of factors, if any at all, relevant to the {analysis_context}, are negative controls on the + causal mechanisms that affect the {outcome} when changing {treatment}? Using your domain knowledge, + which factor(s) from the list of factors, if any at all, relevant to the {analysis_context}, + should we expect to be unaffected by any changes in {treatment}? Which factor(s) from the list of factors, + if any at all, would be surprising if affected by a change in {treatment}? Be concise and keep your + thoughts under two paragraphs. Then provide your step by step chain of thoughts within the tags + . Once you have thought things through, wrap the name of the factor(s) from the list of + factors, that has/have a high likelihood of being negative controls on the causal mechanisms that affect {outcome} + when changing {treatment}, within the tags factor_name. Wrap the name + of the factor(s) from the list of factors, that has/have a high likelihood of being unaffected when + changing {treatment}, within the tags factor_name. Where factor_name + is one of the items within the factor_names list. If a factor does not have a high likelihood of being a + negative control relevant to the {analysis_context}, then do not wrap the factor with any tags. Provide + your step by step answer as an expert in the {domain_expertise}:""" with assistant(): lm += gen("output") @@ -132,9 +115,7 @@ def request_negative_controls( and factor not in negative_controls_list ): negative_controls_list.append(factor) - success = True - else: - success = False + success = True except KeyError: success = False @@ -152,7 +133,7 @@ def suggest_latent_confounders( treatment: str, outcome: str, expertise_list: list(), - analysis_context: list() = CONTEXT, + analysis_context=CONTEXT, stakeholders: list() = None ): expert_list: List[str] = list() @@ -165,31 +146,15 @@ def suggest_latent_confounders( latent_confounders_counter: Dict[str, int] = dict() latent_confounders: List[str, str] = list() - if len(expert_list) > 1: - for expert in expert_list: - ( - latent_confounders_counter, - latent_confounders_list, - ) = self.request_latent_confounders( - treatment=treatment, - outcome=outcome, - latent_confounders_counter=latent_confounders_counter, - domain_expertise=expert, - analysis_context=analysis_context, - ) - for m in latent_confounders_list: - if m not in latent_confounders: - latent_confounders.append(m) - else: - ( - latent_confounders_counter, - latent_confounders_list, - ) = self.request_latent_confounders( + for expert in expert_list: + latent_confounders_counter, + latent_confounders_list = self.request_latent_confounders( treatment=treatment, outcome=outcome, latent_confounders_counter=latent_confounders_counter, - domain_expertise=expert_list[0], - analysis_context=analysis_context) + domain_expertise=expert, + analysis_context=analysis_context, + ) for m in latent_confounders_list: if m not in latent_confounders: latent_confounders.append(m) @@ -202,7 +167,7 @@ def request_latent_confounders( outcome: str, latent_confounders_counter: list(), domain_expertise: str, - analysis_context: list() = CONTEXT + analysis_context=CONTEXT ): latent_confounders_list: List[str] = list() @@ -212,26 +177,26 @@ def request_latent_confounders( lm = self.llm with system(): lm += f"""You are an expert in the {domain_expertise} and are - studying the {analysis_context}. You are using your knowledge to help build a causal model that contains - all the assumptions about the {domain_expertise}. Where a causal model is a conceptual model that describes - the causal mechanisms of a system. You will do this by by answering questions about cause and effect and - using your domain knowledge in the {domain_expertise}.""" + studying the {analysis_context}. You are using your knowledge to help build a causal model that contains + all the assumptions about the {domain_expertise}. Where a causal model is a conceptual model that describes + the causal mechanisms of a system. You will do this by by answering questions about cause and effect and + using your domain knowledge in the {domain_expertise}.""" with user(): lm += f"""(1) From your perspective as - an expert in the {domain_expertise}, think step by step as you consider the factors that may interact - between the {treatment} and the {outcome}. Use your knowledge as an expert in the {domain_expertise} to - describe the confounders, if there are any at all, between the {treatment} and the {outcome}. Be concise - and keep your thinking within two paragraphs. Then provide your step by step chain of thoughts within the - tags . (2) From your perspective as an expert in the {domain_expertise}, which factor( - s), if any at all, has/have a high likelihood of directly influencing and causing both the assignment of the - {treatment} and the {outcome}? Which factor(s), if any at all, have a causal chain that links the {treatment} - to the {outcome}? Which factor(s), if any at all, are a confounder to the causal relationship - between the {treatment} and the {outcome}? Be concise and keep your thinking within two paragraphs. Then - provide your step by step chain of thoughts within the tags . Wrap the name of the - factor(s), if any at all, that has/have a high likelihood of directly influencing and causing both the - {treatment} and the {outcome}, within the tags factor_name. If a - factor does not have a high likelihood of directly confounding, then do not wrap the factor with any tags. - Your step by step answer as an expert in the {domain_expertise}:""" + an expert in the {domain_expertise}, think step by step as you consider the factors that may interact + between the {treatment} and the {outcome}. Use your knowledge as an expert in the {domain_expertise} to + describe the confounders, if there are any at all, between the {treatment} and the {outcome}. Be concise + and keep your thinking within two paragraphs. Then provide your step by step chain of thoughts within the + tags . (2) From your perspective as an expert in the {domain_expertise}, which factor( + s), if any at all, has/have a high likelihood of directly influencing and causing both the assignment of the + {treatment} and the {outcome}? Which factor(s), if any at all, have a causal chain that links the {treatment} + to the {outcome}? Which factor(s), if any at all, are a confounder to the causal relationship + between the {treatment} and the {outcome}? Be concise and keep your thinking within two paragraphs. Then + provide your step by step chain of thoughts within the tags . Wrap the name of the + factor(s), if any at all, that has/have a high likelihood of directly influencing and causing both the + {treatment} and the {outcome}, within the tags factor_name. If a + factor does not have a high likelihood of directly confounding, then do not wrap the factor with any tags. + Your step by step answer as an expert in the {domain_expertise}:""" with assistant(): lm += gen("output") @@ -244,9 +209,7 @@ def request_latent_confounders( if latent_confounders: for factor in latent_confounders: latent_confounders_list.append(factor) - success = True - else: - success = False + success = True except KeyError: success = False @@ -261,12 +224,11 @@ def request_latent_confounders( def request_parent_critique( self, - analysis_context, factor, factors_list, - domain_expertise + domain_expertise, + analysis_context=CONTEXT ): - edited_factors_list: List[str] = [] for i in range(len(factors_list)): @@ -282,16 +244,16 @@ def request_parent_critique( lm = self.llm with system(): lm += f"""You are a helpful causal assistant and expert in {domain_expertise}, - studying {analysis_context}. Task: identify factors causing {factor}.""" + studying {analysis_context}. Task: identify factors causing {factor}.""" with user(): lm += f"""Steps: (1) - Analyze potential factors [{factors_list}] for factors directly influencing/causing/affecting { + Analyze potential factors [{factors_list}] for factors directly influencing/causing/affecting { factor}. Is relationship direct? Ignore feedback mechanisms/factors not in list. Keep thoughts within - tags. (2) Use prior thoughts to answer: how {factor} influenced/caused/affected by [ - {factors_list}]? Is relationship direct? Ignore feedback mechanisms/factors not in list. Wrap - factors highly likely directly influencing/causing/affecting {factor} in - tags. No tags for low likelihood factors. Ignore feedback - mechanisms/factors not in list. Answer as {domain_expertise} expert.""" + tags. (2) Use prior thoughts to answer: how {factor} influenced/caused/affected by [ + {factors_list}]? Is relationship direct? Ignore feedback mechanisms/factors not in list. Wrap + factors highly likely directly influencing/causing/affecting {factor} in + tags. No tags for low likelihood factors. Ignore feedback + mechanisms/factors not in list. Answer as {domain_expertise} expert.""" with assistant(): lm += gen("output") @@ -303,9 +265,7 @@ def request_parent_critique( for factor in influencing_factors: if factor in edited_factors_list and factor not in parents: parents.append(factor) - success = True - else: - success = False + success = True except KeyError: success = False @@ -315,12 +275,11 @@ def request_parent_critique( def request_children_critique( self, - analysis_context, factor, factors_list, - domain_expertise + domain_expertise, + analysis_context=CONTEXT ): - edited_factors_list: List[str] = [] for i in range(len(factors_list)): @@ -337,33 +296,30 @@ def request_children_critique( with system(): lm += f"""You are a helpful causal assistant and expert in {domain_expertise}, - studying {analysis_context}. Task: identify factors caused by {factor}.""" + studying {analysis_context}. Task: identify factors caused by {factor}.""" with user(): lm += f"""Steps: ( - 1) Analyze potential factors [{factors_list}] for factors directly influenced/caused/affected by - {factor}. Is relationship direct? Ignore feedback mechanisms/factors not in list. Keep thoughts within - tags. (2) Use prior thoughts to answer: how {factor} influences/causes/affects [{ + 1) Analyze potential factors [{factors_list}] for factors directly influenced/caused/affected by + {factor}. Is relationship direct? Ignore feedback mechanisms/factors not in list. Keep thoughts within + tags. (2) Use prior thoughts to answer: how {factor} influences/causes/affects [{ factors_list}]? Is relationship direct? Ignore feedback mechanisms/factors not in list. Wrap - factors highly likely directly influenced/caused/affected by {factor} in - tags. No tags for low likelihood factors. Ignore feedback - mechanisms/factors not in list. Answer as {domain_expertise} expert.""" + factors highly likely directly influenced/caused/affected by {factor} in + tags. No tags for low likelihood factors. Ignore feedback + mechanisms/factors not in list. Answer as {domain_expertise} expert.""" with assistant(): lm += gen("output") output = lm["output"] - influencing_factors = re.findall( + influenced_factors = re.findall( r"(.*?)", output) - if influencing_factors: - for factor in influencing_factors: + if influenced_factors: + for factor in influenced_factors: if factor in edited_factors_list and factor not in children: children.append(factor) - - success = True - else: - success = False + success = True except KeyError: success = False @@ -378,7 +334,6 @@ def request_pairwise_critique( factor_b: str, analysis_context: str = CONTEXT ): - success: bool = False while not success: @@ -387,15 +342,15 @@ def request_pairwise_critique( with system(): lm += f"""You are a helpful causal assistant, expert in {domain_expertise}, - studying {analysis_context}. Task: identify relationship between {factor_a} and {factor_b}.""" + studying {analysis_context}. Task: identify relationship between {factor_a} and {factor_b}.""" with user(): lm += f"""Steps: (1) Does {factor_a} influence/cause/affect {factor_b}? Is relationship direct? Does {factor_b} influence/cause/affect - {factor_a}? Is relationship direct? Ignore feedback mechanisms/factors not in list. Keep thoughts within - tags. (2) Use prior thoughts to select likely answer: (A) {factor_a} influences {factor_b} (B) { + {factor_a}? Is relationship direct? Ignore feedback mechanisms/factors not in list. Keep thoughts within + tags. (2) Use prior thoughts to select likely answer: (A) {factor_a} influences {factor_b} (B) { factor_b} influences {factor_a} (C) Neither. Wrap answer in . e.g. A, - B, C. No tags for low likelihood factors. Ignore feedback - mechanisms/factors not in list. Answer as {domain_expertise} expert.""" + B, C. No tags for low likelihood factors. Ignore feedback + mechanisms/factors not in list. Answer as {domain_expertise} expert.""" with assistant(): lm += gen("output") @@ -429,9 +384,9 @@ def critique_graph( factors_list: List[str], edges: Dict[Tuple[str, str], int], experts: list(), + relationship_strategy: RelationshipStrategy = RelationshipStrategy.Parent, analysis_context: str = CONTEXT, stakeholders: list() = None, - relationship_strategy: RelationshipStrategy = RelationshipStrategy.Parent, ): expert_list: List[str] = list() for elements in experts: @@ -446,32 +401,18 @@ def critique_graph( parent_edges: Dict[Tuple[str, str], int] = dict() for factor in factors_list: - if len(expert_list) > 1: - for expert in expert_list: - suggested_parent = self.request_parent_critique( - analysis_context=analysis_context, - factor=factor, - factors_list=factors_list, - domain_expertise=expert - ) - for element in suggested_parent: - if ( - element, - factor, - ) in parent_edges and element in factors_list: - parent_edges[(element, factor)] += 1 - else: - parent_edges[(element, factor)] = 1 - else: + for expert in expert_list: suggested_parent = self.request_parent_critique( analysis_context=analysis_context, factor=factor, factors_list=factors_list, - domain_expertise=expert_list[0] + domain_expertise=expert ) - for element in suggested_parent: - if (element, factor) in parent_edges: + if ( + element, + factor, + ) in parent_edges and element in factors_list: parent_edges[(element, factor)] += 1 else: parent_edges[(element, factor)] = 1 @@ -484,36 +425,15 @@ def critique_graph( critiqued_children_edges: Dict[Tuple[str, str], int] = dict() for factor in factors_list: - if len(expert_list) > 1: - for expert in expert_list: - suggested_children = self.request_children_critique( - analysis_context=analysis_context, - factor=factor, - factors_list=factors_list, - domain_expertise=expert - ) - for element in suggested_children: - if ( - ( - element, - factor, - ) - in critiqued_children_edges - and element in factors_list - ): - critiqued_children_edges[(element, factor)] += 1 - else: - critiqued_children_edges[(element, factor)] = 1 - else: + for expert in expert_list: suggested_children = self.request_children_critique( - analysis_context=analysis_context, factor=factor, factors_list=factors_list, - domain_expertise=expert_list[0] + domain_expertise=expert, + analysis_context=analysis_context ) - for element in suggested_children: - if (element, factor) in critiqued_children_edges: + if (element, factor) in critiqued_children_edges and element in factors_list: critiqued_children_edges[(element, factor)] += 1 else: critiqued_children_edges[(element, factor)] = 1 @@ -526,33 +446,18 @@ def critique_graph( critiqued_pairwise_edges: Dict[Tuple[str, str], int] = dict() for (factor_a, factor_b) in itertools.combinations(factors_list, 2): - if factor_a != factor_b: - if len(expert_list) > 1: - for expert in expert_list: - suggested_edge = self.request_pairwise_critique( - analysis_context=analysis_context, - factor_a=factor_a, - factor_b=factor_b, - domain_expertise=expert - ) - - if suggested_edge is not None: - if suggested_edge in critiqued_pairwise_edges: - critiqued_pairwise_edges[suggested_edge] += 1 - else: - critiqued_pairwise_edges[suggested_edge] = 1 - else: - suggested_edge = self.request_pairwise_critique( - analysis_context=analysis_context, - factor_a=factor_a, - factor_b=factor_b, - domain_expertise=expert_list[0] - ) - - if suggested_edge is not None: - if suggested_edge in critiqued_pairwise_edges: - critiqued_pairwise_edges[suggested_edge] += 1 - else: - critiqued_pairwise_edges[suggested_edge] = 1 + for expert in expert_list: + suggested_edge = self.request_pairwise_critique( + factor_a=factor_a, + factor_b=factor_b, + domain_expertise=expert, + analysis_context=analysis_context + ) + + if suggested_edge is not None: + if suggested_edge in critiqued_pairwise_edges: + critiqued_pairwise_edges[suggested_edge] += 1 + else: + critiqued_pairwise_edges[suggested_edge] = 1 return edges, critiqued_pairwise_edges diff --git a/pywhyllm/tests/model_suggester/data_providers/tuebingen_model_suggester_data_provider.py b/pywhyllm/tests/model_suggester/data_providers/tuebingen_model_suggester_data_provider.py new file mode 100644 index 0000000..7d77171 --- /dev/null +++ b/pywhyllm/tests/model_suggester/data_providers/tuebingen_model_suggester_data_provider.py @@ -0,0 +1,25 @@ +# TESTS +variable = "water" +variable_a = "water intake" +description_a = "the amount of water a person drinks per day" +variable_b = "hydration level" +description_b = "the level of hydration in the body" +domain = "biology" + +# MOCK_RESPONSES +test_suggest_description_expected_response = "Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states." +test_suggest_onesided_relationship_expected_response = "A" +test_suggest_relationship_expected_response = "Yes Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458." +# ASSERTIONS +test_suggest_description_expected_result = [ + "Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states."] +test_suggest_onesided_relationship_expected_result = 1 +test__build_description_program_expected_result = { + 'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal \n is to provide factual and succinct description of the given concept.', + 'user': " Describe the concept of water.\n In one sentence, provide a factual and succinct description of water\n Let's think step-by-step to make sure that we have a proper and clear description. Then provide \n your final answer within the tags, ."} +test_suggest_relationship_expected_result = (1, + [ + 'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.']) +test__build_relationship_program_expected_result = { + 'system': 'You are a helpful assistant on causal reasoning and biology. Your goal is to answer \n questions about cause and effect in a factual and concise way.', + 'user': "can changing water intake change hydration level? Answer Yes or No.At each step, each expert include a reference to a research paper that supports \n their argument. They will provide a one sentence summary of the paper and how it supports their argument. \n Then they will answer whether a change in water intake changes hydration level. Answer Yes or No.\n When consensus is reached, thinking carefully and factually, explain the council's answer. Provide \n the answer within the tags, Yes/No, and the most influential reference within \n the tags Author, Title, Year of publication.\n \n\n\n----------------\n\n\nYes\nAuthor, Title, Year of \n publication\n\n\n----------------\n\n\nNo {~/user}"} diff --git a/pywhyllm/tests/model_suggester/data_providers/validation_suggester_data_provider.py b/pywhyllm/tests/model_suggester/data_providers/validation_suggester_data_provider.py new file mode 100644 index 0000000..b8638d1 --- /dev/null +++ b/pywhyllm/tests/model_suggester/data_providers/validation_suggester_data_provider.py @@ -0,0 +1,56 @@ +# TESTS +test_vars = ["smoking", "lung cancer", "exercise habits", "air pollution exposure"] +domain_expertises = ['Epidemiology'] + +# MOCK RESPONSES +test_latent_confounders_expected_response = "socio-economic status mental health" +test_negative_controls_expected_response = "exercise habits" +test_parent_critique_expected_response = "None" +test_children_critique_expected_response = "lung cancer" +test_pairwise_critique_expected_response = "The answer is A" +test_critique_graph_parent_expected_response = ["None", + "smoking air pollution exposure", + "air pollution exposure", + "None"] +test_critique_graph_children_expected_response = ["lung cancer", + "exercise habits", + "lung cancer", + "lung cancer exercise habits"] +test_critique_graph_pairwise_expected_response = ["A", "A", "C", + "B", "B", "B"] + +# ASSERTIONS +test_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1}, + ['socio-economic status', 'mental health']) +test_negative_controls_expected_results = ({'exercise habits': 1}, ['exercise habits']) +test_parent_critique_expected_results = [] +test_children_critique_expected_results = ['lung cancer'] +test_pairwise_critique_expected_results = ('smoking', 'lung cancer') +test_critique_graph_parent_expected_results = ({('air pollution exposure', 'exercise habits'): 1, + ('air pollution exposure', 'lung cancer'): 1, + ('air pollution exposure', 'smoking'): 1, + ('smoking', 'lung cancer'): 1}, + {('air pollution exposure', 'exercise habits'): 1, + ('air pollution exposure', 'lung cancer'): 1, + ('smoking', 'lung cancer'): 1}) +test_critique_graph_children_expected_results = ({('air pollution exposure', 'smoking'): 1, + ('exercise habits', 'air pollution exposure'): 1, + ('exercise habits', 'smoking'): 1, + ('lung cancer', 'air pollution exposure'): 1, + ('lung cancer', 'exercise habits'): 1, + ('lung cancer', 'smoking'): 1}, + {('exercise habits', 'air pollution exposure'): 1, + ('exercise habits', 'lung cancer'): 1, + ('lung cancer', 'air pollution exposure'): 1, + ('lung cancer', 'exercise habits'): 1, + ('lung cancer', 'smoking'): 1}) +test_critique_graph_pairwise_expected_results = ({('air pollution exposure', 'exercise habits'): 1, + ('exercise habits', 'lung cancer'): 1, + ('smoking', 'air pollution exposure'): 1, + ('smoking', 'exercise habits'): 1, + ('smoking', 'lung cancer'): 1}, + {('smoking', 'lung cancer'): 1, + ('smoking', 'exercise habits'): 1, + ('exercise habits', 'lung cancer'): 1, + ('air pollution exposure', 'lung cancer'): 1, + ('air pollution exposure', 'exercise habits'): 1}) diff --git a/pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py b/pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py new file mode 100644 index 0000000..f1bbf96 --- /dev/null +++ b/pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py @@ -0,0 +1,56 @@ +import unittest +from unittest.mock import MagicMock +from guidance.models._openai import OpenAI + +from pywhyllm.suggesters.tuebingen_model_suggester import TuebingenModelSuggester, Strategy +from pywhyllm.tests.model_suggester.data_providers.tuebingen_model_suggester_data_provider import * + + +class TestTuebingenModelSuggester(unittest.TestCase): + def test_suggest_description(self): + modeler = TuebingenModelSuggester() + mock_llm = MagicMock(spec=OpenAI) + modeler.llm = mock_llm + + mock_llm.__add__ = MagicMock(return_value=mock_llm) + mock_llm.__getitem__ = MagicMock(return_value=test_suggest_description_expected_response) + result = modeler.suggest_description(variable) + assert result == test_suggest_description_expected_result + + def test_suggest_onesided_relationship(self): + modeler = TuebingenModelSuggester() + mock_llm = MagicMock(spec=OpenAI) + modeler.llm = mock_llm + + mock_llm.__add__ = MagicMock(return_value=mock_llm) + mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_expected_response) + result = modeler.suggest_onesided_relationship(variable_a, description_a, variable_b, description_b) + assert result == test_suggest_onesided_relationship_expected_result + + def test__build_description_program(self): + modeler = TuebingenModelSuggester() + mock_llm = MagicMock(spec=OpenAI) + modeler.llm = mock_llm + + result = modeler._build_description_program(variable) + assert result == test__build_description_program_expected_result + + def test_suggest_relationship(self): + modeler = TuebingenModelSuggester() + mock_llm = MagicMock(spec=OpenAI) + modeler.llm = mock_llm + + mock_llm.__add__ = MagicMock(return_value=mock_llm) + mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_expected_response) + result = modeler.suggest_relationship(variable_a, variable_b, description_a, description_b, domain, + strategy=Strategy.ToT_Single, ask_reference=True) + assert result == test_suggest_relationship_expected_result + + def test__build_relationship_program(self): + modeler = TuebingenModelSuggester() + mock_llm = MagicMock(spec=OpenAI) + modeler.llm = mock_llm + + result = modeler._build_relationship_program(variable_a, description_a, variable_b, description_b, domain, + use_description=False, ask_reference=True) + assert result == test__build_relationship_program_expected_result diff --git a/pywhyllm/tests/model_suggester/test_validation_suggester.py b/pywhyllm/tests/model_suggester/test_validation_suggester.py new file mode 100644 index 0000000..7b99b3d --- /dev/null +++ b/pywhyllm/tests/model_suggester/test_validation_suggester.py @@ -0,0 +1,101 @@ +import unittest +from typing import Dict +from unittest.mock import MagicMock +from guidance.models._openai import OpenAI + +from pywhyllm.suggesters.validation_suggester import ValidationSuggester +from pywhyllm.tests.model_suggester.data_providers.validation_suggester_data_provider import * +from pywhyllm.tests.model_suggester.data_providers.model_suggester_data_provider import * +from pywhyllm.helpers import RelationshipStrategy + + +class TestValidationSuggester(unittest.TestCase): + def test_request_latent_confounders_expected_response(self): + modeler = ValidationSuggester() + mock_llm = MagicMock(spec=OpenAI) + modeler.llm = mock_llm + + mock_llm.__add__ = MagicMock(return_value=mock_llm) + + mock_llm.__getitem__ = MagicMock(return_value=test_latent_confounders_expected_response) + + latent_confounders_counter: Dict[str, int] = dict() + result = modeler.request_latent_confounders(test_vars[0], test_vars[1], latent_confounders_counter, + domain_expertises[0]) + + assert result == test_latent_confounders_expected_results + + def test_request_negative_controls_expected_response(self): + modeler = ValidationSuggester() + mock_llm = MagicMock(spec=OpenAI) + modeler.llm = mock_llm + + mock_llm.__add__ = MagicMock(return_value=mock_llm) + + mock_llm.__getitem__ = MagicMock(return_value=test_negative_controls_expected_response) + + negative_controls_counter: Dict[str, int] = dict() + result = modeler.request_negative_controls(test_vars[0], test_vars[1], test_vars, negative_controls_counter, + domain_expertises[0]) + + assert result == test_negative_controls_expected_results + + def test_request_parent_critique_expected_response(self): + modeler = ValidationSuggester() + mock_llm = MagicMock(spec=OpenAI) + modeler.llm = mock_llm + + mock_llm.__add__ = MagicMock(return_value=mock_llm) + + mock_llm.__getitem__ = MagicMock(return_value=test_parent_critique_expected_response) + + result = modeler.request_parent_critique(test_vars[0], test_vars, domain_expertises[0]) + + assert result == test_parent_critique_expected_results + + def test_request_children_critique_expected_response(self): + modeler = ValidationSuggester() + mock_llm = MagicMock(spec=OpenAI) + modeler.llm = mock_llm + + mock_llm.__add__ = MagicMock(return_value=mock_llm) + + mock_llm.__getitem__ = MagicMock(return_value=test_children_critique_expected_response) + + result = modeler.request_children_critique(test_vars[0], test_vars, domain_expertises[0]) + + assert result == test_children_critique_expected_results + + def test_pairwise_critique_expected_response(self): + modeler = ValidationSuggester() + mock_llm = MagicMock(spec=OpenAI) + modeler.llm = mock_llm + + mock_llm.__add__ = MagicMock(return_value=mock_llm) + mock_llm.__getitem__ = MagicMock(return_value=test_pairwise_critique_expected_response) + result = modeler.request_pairwise_critique(domain_expertises[0], test_vars[0], test_vars[1]) + assert result == test_pairwise_critique_expected_results + + def test_critique_graph(self): + modeler = ValidationSuggester() + mock_llm = MagicMock(spec=OpenAI) + modeler.llm = mock_llm + + mock_llm.__add__ = MagicMock(return_value=mock_llm) + # parent + mock_llm.__getitem__ = MagicMock(side_effect=test_critique_graph_parent_expected_response) + result = modeler.critique_graph(test_vars, test_suggest_relationships_parent_expected_results, + domain_expertises, RelationshipStrategy.Parent) + + assert result == test_critique_graph_parent_expected_results + + mock_llm.__getitem__ = MagicMock(side_effect=test_critique_graph_children_expected_response) + result = modeler.critique_graph(test_vars, test_suggest_relationships_child_expected_results, + domain_expertises, RelationshipStrategy.Child) + + assert result == test_critique_graph_children_expected_results + + mock_llm.__getitem__ = MagicMock(side_effect=test_critique_graph_pairwise_expected_response) + result = modeler.critique_graph(test_vars, test_suggest_relationships_pairwise_expected_results, + domain_expertises, RelationshipStrategy.Pairwise) + assert result == test_critique_graph_pairwise_expected_results From 0a620f78be81ba4d3b9eaf847fdd09abc2e8f416 Mon Sep 17 00:00:00 2001 From: Grace Sng Date: Sun, 30 Mar 2025 23:44:46 -0500 Subject: [PATCH 2/4] Test classes for validation suggester and tuebingen model suggester. Signed-off-by: Grace Sng --- .../suggesters/tuebingen_model_suggester.py | 6 +- pywhyllm/suggesters/validation_suggester.py | 2 +- ...tuebingen_model_suggester_data_provider.py | 59 +++++++++++++++---- .../validation_suggester_data_provider.py | 11 +++- .../test_identification_suggester.py | 11 +++- .../test_tuebingen_model_suggester.py | 39 +++++++++--- .../test_validation_suggester.py | 40 ++++++++++--- 7 files changed, 132 insertions(+), 36 deletions(-) diff --git a/pywhyllm/suggesters/tuebingen_model_suggester.py b/pywhyllm/suggesters/tuebingen_model_suggester.py index 0e73c72..528d938 100644 --- a/pywhyllm/suggesters/tuebingen_model_suggester.py +++ b/pywhyllm/suggesters/tuebingen_model_suggester.py @@ -21,7 +21,7 @@ def __init__(self, llm=None): super().__init__(llm) def suggest_description( - self, variable, context=None, ask_reference=False + self, variable, ask_reference=False ): generate_description = self._build_description_program(variable) @@ -255,11 +255,11 @@ def _build_relationship_program( the answer within the tags, Yes/No, and the most influential reference within the tags Author, Title, Year of publication. \n\n\n----------------\n\n\nYes\nAuthor, Title, Year of - publication\n\n\n----------------\n\n\nNo {{~/user}}""" + publication\n\n\n----------------\n\n\nNo""" else: query["user"] += """When consensus is reached, thinking carefully and factually, explain the council's answer. Provide the answer within the tags, Yes/No. - \n\n\n----------------\n\n\nYes\n\n\n----------------\n\n\nNo {{~/user}}""" + \n\n\n----------------\n\n\nYes\n\n\n----------------\n\n\nNo""" elif use_strategy == Strategy.CoT: if use_description: diff --git a/pywhyllm/suggesters/validation_suggester.py b/pywhyllm/suggesters/validation_suggester.py index 7a7fa7e..3c07225 100644 --- a/pywhyllm/suggesters/validation_suggester.py +++ b/pywhyllm/suggesters/validation_suggester.py @@ -65,7 +65,7 @@ def request_negative_controls( factors_list: list(), negative_controls_counter: list(), domain_expertise: str, - analysis_context: list = CONTEXT + analysis_context = CONTEXT ): negative_controls_list: List[str] = list() diff --git a/pywhyllm/tests/model_suggester/data_providers/tuebingen_model_suggester_data_provider.py b/pywhyllm/tests/model_suggester/data_providers/tuebingen_model_suggester_data_provider.py index 7d77171..2008ba4 100644 --- a/pywhyllm/tests/model_suggester/data_providers/tuebingen_model_suggester_data_provider.py +++ b/pywhyllm/tests/model_suggester/data_providers/tuebingen_model_suggester_data_provider.py @@ -8,18 +8,55 @@ # MOCK_RESPONSES test_suggest_description_expected_response = "Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states." -test_suggest_onesided_relationship_expected_response = "A" -test_suggest_relationship_expected_response = "Yes Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458." +test_suggest_onesided_relationship_a_cause_b_expected_response = "A" +test_suggest_onesided_relationship_a_not_cause_b_expected_response = "B" +test_suggest_relationship_a_cause_b_expected_response = "Yes Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458." +test_suggest_relationship_a_not_cause_b_expected_response = "No Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458." + # ASSERTIONS -test_suggest_description_expected_result = [ - "Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states."] -test_suggest_onesided_relationship_expected_result = 1 -test__build_description_program_expected_result = { +test_suggest_description_expected_result = ([ + "Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states."], + []) +test_suggest_onesided_relationship_a_cause_b_expected_result = 1 +test_suggest_onesided_relationship_a_not_cause_b_expected_result = 0 +test__build_description_program_no_context_no_reference_expected_result = { 'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal \n is to provide factual and succinct description of the given concept.', 'user': " Describe the concept of water.\n In one sentence, provide a factual and succinct description of water\n Let's think step-by-step to make sure that we have a proper and clear description. Then provide \n your final answer within the tags, ."} -test_suggest_relationship_expected_result = (1, - [ - 'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.']) +test__build_description_program_no_context_with_reference_expected_result = { + 'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal \n is to provide factual and succinct description of the given concept.', + 'user': ' Describe the concept of water.\n In one sentence, provide a factual and succinct description of water"\n Then provide two research papers that support your description.\n Let\'s think step-by-step to make sure that we have a proper and clear description. Then provide \n your final answer within the tags, , and each research paper within the \n tags .'} +test__build_description_program_with_context_with_reference_expected_result = { + 'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal is \n to provide factual and succinct descriptions related to the given concept and context.', + 'user': "Using this context about the particular variable, describe the concept of water.\n In one sentence, provide a factual and succinct description of waterThen provide two research papers that support your description.\n Let's think step-by-step to make sure that we have a proper and clear description. Then provide your final \n answer within the tags, , and each research paper within the tags ."} +test__build_description_program_with_context_no_reference_expected_result = { + 'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal is \n to provide factual and succinct descriptions related to the given concept and context.', + 'user': "Using this context about the particular variable, describe the concept of water.\n In one sentence, provide a factual and succinct description of water\n Let's think step-by-step to make sure that we have a proper and clear description. Then provide your final \n answer within the tags, ."} +test_suggest_relationship_a_cause_b_expected_result = (1, + [ + 'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.']) +test_suggest_relationship_a_not_cause_b_expected_result = (0, + [ + 'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.']) test__build_relationship_program_expected_result = { - 'system': 'You are a helpful assistant on causal reasoning and biology. Your goal is to answer \n questions about cause and effect in a factual and concise way.', - 'user': "can changing water intake change hydration level? Answer Yes or No.At each step, each expert include a reference to a research paper that supports \n their argument. They will provide a one sentence summary of the paper and how it supports their argument. \n Then they will answer whether a change in water intake changes hydration level. Answer Yes or No.\n When consensus is reached, thinking carefully and factually, explain the council's answer. Provide \n the answer within the tags, Yes/No, and the most influential reference within \n the tags Author, Title, Year of publication.\n \n\n\n----------------\n\n\nYes\nAuthor, Title, Year of \n publication\n\n\n----------------\n\n\nNo {~/user}"} + 'system': 'You are a helpful assistant on causal reasoning and biology. Your ' + 'goal is to answer \n' + ' questions about cause and effect in a factual and ' + 'concise way.', + 'user': 'can changing water intake change hydration level? Answer Yes or ' + 'No.When consensus is reached, thinking carefully and factually, ' + "explain the council's answer. \n" + ' Provide the answer within the tags, ' + 'Yes/No.\n' + ' \n' + '\n' + '\n' + '----------------\n' + '\n' + '\n' + 'Yes\n' + '\n' + '\n' + '----------------\n' + '\n' + '\n' + 'No'} diff --git a/pywhyllm/tests/model_suggester/data_providers/validation_suggester_data_provider.py b/pywhyllm/tests/model_suggester/data_providers/validation_suggester_data_provider.py index b8638d1..0780157 100644 --- a/pywhyllm/tests/model_suggester/data_providers/validation_suggester_data_provider.py +++ b/pywhyllm/tests/model_suggester/data_providers/validation_suggester_data_provider.py @@ -20,9 +20,14 @@ "B", "B", "B"] # ASSERTIONS -test_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1}, - ['socio-economic status', 'mental health']) -test_negative_controls_expected_results = ({'exercise habits': 1}, ['exercise habits']) +test_suggest_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1}, + [{'mental health': 1, 'socio-economic status': 1}, + ['socio-economic status', 'mental health']]) +test_request_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1}, + ['socio-economic status', 'mental health']) +test_suggest_negative_controls_expected_results = ( +{'exercise habits': 1}, [{'exercise habits': 1}, ['exercise habits']]) +test_request_negative_controls_expected_results = ({'exercise habits': 1}, ['exercise habits']) test_parent_critique_expected_results = [] test_children_critique_expected_results = ['lung cancer'] test_pairwise_critique_expected_results = ('smoking', 'lung cancer') diff --git a/pywhyllm/tests/model_suggester/test_identification_suggester.py b/pywhyllm/tests/model_suggester/test_identification_suggester.py index 1040452..5515db4 100644 --- a/pywhyllm/tests/model_suggester/test_identification_suggester.py +++ b/pywhyllm/tests/model_suggester/test_identification_suggester.py @@ -3,13 +3,20 @@ from guidance.models._openai import OpenAI from pywhyllm.suggesters.identification_suggester import IdentificationSuggester +from pywhyllm.suggesters.model_suggester import ModelSuggester from pywhyllm.tests.model_suggester.data_providers.model_suggester_data_provider import * from pywhyllm.tests.model_suggester.data_providers.identification_suggester_data_provider import * -from pywhyllm.tests.model_suggester.test_model_suggester import TestModelSuggester class TestIdentificationSuggester(unittest.TestCase): def test_suggest_backdoor(self): - return TestModelSuggester().test_suggest_confounders() + modeler = IdentificationSuggester() + mock_llm = MagicMock(spec=OpenAI) + modeler.llm = mock_llm + mock_model_suggester = MagicMock(spec=ModelSuggester) + modeler.model_suggester = mock_model_suggester + mock_model_suggester.suggest_confounders = MagicMock(return_value=test_suggest_confounders_expected_results) + result = modeler.suggest_backdoor(test_vars[0], test_vars[1], test_vars, test_domain_expertises_expected_result) + assert result == test_suggest_confounders_expected_results def test_suggest_mediators(self): modeler = IdentificationSuggester() diff --git a/pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py b/pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py index f1bbf96..c90e308 100644 --- a/pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py +++ b/pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py @@ -14,7 +14,7 @@ def test_suggest_description(self): mock_llm.__add__ = MagicMock(return_value=mock_llm) mock_llm.__getitem__ = MagicMock(return_value=test_suggest_description_expected_response) - result = modeler.suggest_description(variable) + result = modeler.suggest_description(variable, True) assert result == test_suggest_description_expected_result def test_suggest_onesided_relationship(self): @@ -23,17 +23,32 @@ def test_suggest_onesided_relationship(self): modeler.llm = mock_llm mock_llm.__add__ = MagicMock(return_value=mock_llm) - mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_expected_response) + #Given the two variables and their descriptions, variable a causes variable b + mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_a_cause_b_expected_response) result = modeler.suggest_onesided_relationship(variable_a, description_a, variable_b, description_b) - assert result == test_suggest_onesided_relationship_expected_result + assert result == test_suggest_onesided_relationship_a_cause_b_expected_result + + #Given the two variables and their descriptions, variable a does not cause variable b + mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_a_not_cause_b_expected_response) + result = modeler.suggest_onesided_relationship(variable_a, description_a, variable_b, description_b) + assert result == test_suggest_onesided_relationship_a_not_cause_b_expected_result def test__build_description_program(self): modeler = TuebingenModelSuggester() mock_llm = MagicMock(spec=OpenAI) modeler.llm = mock_llm - - result = modeler._build_description_program(variable) - assert result == test__build_description_program_expected_result + #Test no context, no reference + result = modeler._build_description_program(variable, False, False) + assert result == test__build_description_program_no_context_no_reference_expected_result + #Test no context, with reference + result = modeler._build_description_program(variable, False, True) + assert result == test__build_description_program_no_context_with_reference_expected_result + #Test with context, no reference + result = modeler._build_description_program(variable, True, False) + assert result == test__build_description_program_with_context_no_reference_expected_result + #Test with context, with reference + result = modeler._build_description_program(variable, True, True) + assert result == test__build_description_program_with_context_with_reference_expected_result def test_suggest_relationship(self): modeler = TuebingenModelSuggester() @@ -41,10 +56,16 @@ def test_suggest_relationship(self): modeler.llm = mock_llm mock_llm.__add__ = MagicMock(return_value=mock_llm) - mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_expected_response) + #Given the two variables and their descriptions, variable a causes variable b + mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_a_cause_b_expected_response) + result = modeler.suggest_relationship(variable_a, variable_b, description_a, description_b, domain, + strategy=Strategy.ToT_Single, ask_reference=True) + assert result == test_suggest_relationship_a_cause_b_expected_result + #Given the two variables and their descriptions, variable a does not cause variable b + mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_a_not_cause_b_expected_response) result = modeler.suggest_relationship(variable_a, variable_b, description_a, description_b, domain, strategy=Strategy.ToT_Single, ask_reference=True) - assert result == test_suggest_relationship_expected_result + assert result == test_suggest_relationship_a_not_cause_b_expected_result def test__build_relationship_program(self): modeler = TuebingenModelSuggester() @@ -52,5 +73,5 @@ def test__build_relationship_program(self): modeler.llm = mock_llm result = modeler._build_relationship_program(variable_a, description_a, variable_b, description_b, domain, - use_description=False, ask_reference=True) + use_description=False, ask_reference=False) assert result == test__build_relationship_program_expected_result diff --git a/pywhyllm/tests/model_suggester/test_validation_suggester.py b/pywhyllm/tests/model_suggester/test_validation_suggester.py index 7b99b3d..a7bef47 100644 --- a/pywhyllm/tests/model_suggester/test_validation_suggester.py +++ b/pywhyllm/tests/model_suggester/test_validation_suggester.py @@ -10,7 +10,20 @@ class TestValidationSuggester(unittest.TestCase): - def test_request_latent_confounders_expected_response(self): + def test_suggest_latent_confounders(self): + modeler = ValidationSuggester() + mock_llm = MagicMock(spec=OpenAI) + modeler.llm = mock_llm + + mock_llm.__add__ = MagicMock(return_value=mock_llm) + + mock_llm.__getitem__ = MagicMock(return_value=test_latent_confounders_expected_response) + + result = modeler.suggest_latent_confounders(test_vars[0], test_vars[1], domain_expertises) + + assert result == test_suggest_latent_confounders_expected_results + + def test_request_latent_confounders(self): modeler = ValidationSuggester() mock_llm = MagicMock(spec=OpenAI) modeler.llm = mock_llm @@ -23,9 +36,22 @@ def test_request_latent_confounders_expected_response(self): result = modeler.request_latent_confounders(test_vars[0], test_vars[1], latent_confounders_counter, domain_expertises[0]) - assert result == test_latent_confounders_expected_results + assert result == test_request_latent_confounders_expected_results + + def test_suggest_negative_controls(self): + modeler = ValidationSuggester() + mock_llm = MagicMock(spec=OpenAI) + modeler.llm = mock_llm + + mock_llm.__add__ = MagicMock(return_value=mock_llm) + + mock_llm.__getitem__ = MagicMock(return_value=test_negative_controls_expected_response) + + result = modeler.suggest_negative_controls(test_vars[0], test_vars[1], test_vars, domain_expertises) + + assert result == test_suggest_negative_controls_expected_results - def test_request_negative_controls_expected_response(self): + def test_request_negative_controls(self): modeler = ValidationSuggester() mock_llm = MagicMock(spec=OpenAI) modeler.llm = mock_llm @@ -38,9 +64,9 @@ def test_request_negative_controls_expected_response(self): result = modeler.request_negative_controls(test_vars[0], test_vars[1], test_vars, negative_controls_counter, domain_expertises[0]) - assert result == test_negative_controls_expected_results + assert result == test_request_negative_controls_expected_results - def test_request_parent_critique_expected_response(self): + def test_request_parent_critique(self): modeler = ValidationSuggester() mock_llm = MagicMock(spec=OpenAI) modeler.llm = mock_llm @@ -53,7 +79,7 @@ def test_request_parent_critique_expected_response(self): assert result == test_parent_critique_expected_results - def test_request_children_critique_expected_response(self): + def test_request_children_critique(self): modeler = ValidationSuggester() mock_llm = MagicMock(spec=OpenAI) modeler.llm = mock_llm @@ -66,7 +92,7 @@ def test_request_children_critique_expected_response(self): assert result == test_children_critique_expected_results - def test_pairwise_critique_expected_response(self): + def test_request_pairwise_critique(self): modeler = ValidationSuggester() mock_llm = MagicMock(spec=OpenAI) modeler.llm = mock_llm From 4d9ee64b1cd79fea00aab92eb3269f54307c113d Mon Sep 17 00:00:00 2001 From: Grace Sng Date: Sun, 30 Mar 2025 23:51:36 -0500 Subject: [PATCH 3/4] Edited comments. Signed-off-by: Grace Sng --- pywhyllm/tests/model_suggester/test_validation_suggester.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pywhyllm/tests/model_suggester/test_validation_suggester.py b/pywhyllm/tests/model_suggester/test_validation_suggester.py index a7bef47..dd3a014 100644 --- a/pywhyllm/tests/model_suggester/test_validation_suggester.py +++ b/pywhyllm/tests/model_suggester/test_validation_suggester.py @@ -114,13 +114,13 @@ def test_critique_graph(self): domain_expertises, RelationshipStrategy.Parent) assert result == test_critique_graph_parent_expected_results - + # child mock_llm.__getitem__ = MagicMock(side_effect=test_critique_graph_children_expected_response) result = modeler.critique_graph(test_vars, test_suggest_relationships_child_expected_results, domain_expertises, RelationshipStrategy.Child) assert result == test_critique_graph_children_expected_results - + # pairwise mock_llm.__getitem__ = MagicMock(side_effect=test_critique_graph_pairwise_expected_response) result = modeler.critique_graph(test_vars, test_suggest_relationships_pairwise_expected_results, domain_expertises, RelationshipStrategy.Pairwise) From 0b268284edd165c2e520d129a31fc22281f734f5 Mon Sep 17 00:00:00 2001 From: Grace Sng Date: Mon, 31 Mar 2025 18:00:18 -0500 Subject: [PATCH 4/4] Fixed spacing issues. Signed-off-by: Grace Sng --- pywhyllm/suggesters/validation_suggester.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pywhyllm/suggesters/validation_suggester.py b/pywhyllm/suggesters/validation_suggester.py index 3c07225..57518ed 100644 --- a/pywhyllm/suggesters/validation_suggester.py +++ b/pywhyllm/suggesters/validation_suggester.py @@ -65,7 +65,7 @@ def request_negative_controls( factors_list: list(), negative_controls_counter: list(), domain_expertise: str, - analysis_context = CONTEXT + analysis_context=CONTEXT ): negative_controls_list: List[str] = list() @@ -82,8 +82,7 @@ def request_negative_controls( questions about cause and effect using your domain knowledge in the {domain_expertise}.""" with user(): - lm += f"""factor_names: {factors_list} From your - perspective as an expert in the {domain_expertise}, what factor(s) from the list of factors, relevant to + lm += f"""factor_names: {factors_list} From your perspective as an expert in the {domain_expertise}, what factor(s) from the list of factors, relevant to the {analysis_context}, should see zero treatment effect when changing the {treatment}? Which factor(s) from the list of factors, if any at all, relevant to the {analysis_context}, are negative controls on the causal mechanisms that affect the {outcome} when changing {treatment}? Using your domain knowledge, @@ -176,14 +175,12 @@ def request_latent_confounders( try: lm = self.llm with system(): - lm += f"""You are an expert in the {domain_expertise} and are - studying the {analysis_context}. You are using your knowledge to help build a causal model that contains + lm += f"""You are an expert in the {domain_expertise} and are studying the {analysis_context}. You are using your knowledge to help build a causal model that contains all the assumptions about the {domain_expertise}. Where a causal model is a conceptual model that describes the causal mechanisms of a system. You will do this by by answering questions about cause and effect and using your domain knowledge in the {domain_expertise}.""" with user(): - lm += f"""(1) From your perspective as - an expert in the {domain_expertise}, think step by step as you consider the factors that may interact + lm += f"""(1) From your perspective as an expert in the {domain_expertise}, think step by step as you consider the factors that may interact between the {treatment} and the {outcome}. Use your knowledge as an expert in the {domain_expertise} to describe the confounders, if there are any at all, between the {treatment} and the {outcome}. Be concise and keep your thinking within two paragraphs. Then provide your step by step chain of thoughts within the