Skip to content

Test classes for suggesters #2 #39

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 2, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pywhyllm/suggesters/tuebingen_model_suggester.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ class Strategy(Enum):


class TuebingenModelSuggester(ModelSuggester):
def __init__(self, llm):
def __init__(self, llm=None):
super().__init__(llm)

def suggest_description(
self, variable, context=None, ask_reference=False
self, variable, ask_reference=False
):
generate_description = self._build_description_program(variable)

Expand Down Expand Up @@ -255,11 +255,11 @@ def _build_relationship_program(
the answer within the tags, <answer>Yes/No</answer>, and the most influential reference within
the tags <reference>Author, Title, Year of publication</reference>.
\n\n\n----------------\n\n\n<answer>Yes</answer>\n<reference>Author, Title, Year of
publication</reference>\n\n\n----------------\n\n\n<answer>No</answer> {{~/user}}"""
publication</reference>\n\n\n----------------\n\n\n<answer>No</answer>"""
else:
query["user"] += """When consensus is reached, thinking carefully and factually, explain the council's answer.
Provide the answer within the tags, <answer>Yes/No</answer>.
\n\n\n----------------\n\n\n<answer>Yes</answer>\n\n\n----------------\n\n\n<answer>No</answer> {{~/user}}"""
\n\n\n----------------\n\n\n<answer>Yes</answer>\n\n\n----------------\n\n\n<answer>No</answer>"""

elif use_strategy == Strategy.CoT:
if use_description:
Expand Down
315 changes: 110 additions & 205 deletions pywhyllm/suggesters/validation_suggester.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# TESTS
variable = "water"
variable_a = "water intake"
description_a = "the amount of water a person drinks per day"
variable_b = "hydration level"
description_b = "the level of hydration in the body"
domain = "biology"

# MOCK_RESPONSES
test_suggest_description_expected_response = "<description>Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states.</description>"
test_suggest_onesided_relationship_a_cause_b_expected_response = "<answer>A</answer>"
test_suggest_onesided_relationship_a_not_cause_b_expected_response = "<answer>B</answer>"
test_suggest_relationship_a_cause_b_expected_response = "<answer>Yes</answer> <reference>Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458.</reference>"
test_suggest_relationship_a_not_cause_b_expected_response = "<answer>No</answer> <reference>Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458.</reference>"

# ASSERTIONS
test_suggest_description_expected_result = ([
"Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states."],
[])
test_suggest_onesided_relationship_a_cause_b_expected_result = 1
test_suggest_onesided_relationship_a_not_cause_b_expected_result = 0
test__build_description_program_no_context_no_reference_expected_result = {
'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal \n is to provide factual and succinct description of the given concept.',
'user': " Describe the concept of water.\n In one sentence, provide a factual and succinct description of water\n Let's think step-by-step to make sure that we have a proper and clear description. Then provide \n your final answer within the tags, <description></description>."}
test__build_description_program_no_context_with_reference_expected_result = {
'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal \n is to provide factual and succinct description of the given concept.',
'user': ' Describe the concept of water.\n In one sentence, provide a factual and succinct description of water"\n Then provide two research papers that support your description.\n Let\'s think step-by-step to make sure that we have a proper and clear description. Then provide \n your final answer within the tags, <description></description>, and each research paper within the \n tags <paper></paper>.'}
test__build_description_program_with_context_with_reference_expected_result = {
'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal is \n to provide factual and succinct descriptions related to the given concept and context.',
'user': "Using this context about the particular variable, describe the concept of water.\n In one sentence, provide a factual and succinct description of waterThen provide two research papers that support your description.\n Let's think step-by-step to make sure that we have a proper and clear description. Then provide your final \n answer within the tags, <description></description>, and each research paper within the tags <reference></reference>."}
test__build_description_program_with_context_no_reference_expected_result = {
'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal is \n to provide factual and succinct descriptions related to the given concept and context.',
'user': "Using this context about the particular variable, describe the concept of water.\n In one sentence, provide a factual and succinct description of water\n Let's think step-by-step to make sure that we have a proper and clear description. Then provide your final \n answer within the tags, <description></description>."}
test_suggest_relationship_a_cause_b_expected_result = (1,
[
'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.'])
test_suggest_relationship_a_not_cause_b_expected_result = (0,
[
'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.'])
test__build_relationship_program_expected_result = {
'system': 'You are a helpful assistant on causal reasoning and biology. Your '
'goal is to answer \n'
' questions about cause and effect in a factual and '
'concise way.',
'user': 'can changing water intake change hydration level? Answer Yes or '
'No.When consensus is reached, thinking carefully and factually, '
"explain the council's answer. \n"
' Provide the answer within the tags, '
'<answer>Yes/No</answer>.\n'
' \n'
'\n'
'\n'
'----------------\n'
'\n'
'\n'
'<answer>Yes</answer>\n'
'\n'
'\n'
'----------------\n'
'\n'
'\n'
'<answer>No</answer>'}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# TESTS
test_vars = ["smoking", "lung cancer", "exercise habits", "air pollution exposure"]
domain_expertises = ['Epidemiology']

# MOCK RESPONSES
test_latent_confounders_expected_response = "<confounding_factor>socio-economic status</confounding_factor> <confounding_factor>mental health</confounding_factor>"
test_negative_controls_expected_response = "<negative_control>exercise habits</negative_control>"
test_parent_critique_expected_response = "None"
test_children_critique_expected_response = "<influenced_factor>lung cancer</influenced_factor>"
test_pairwise_critique_expected_response = "The answer is <answer>A</answer>"
test_critique_graph_parent_expected_response = ["None",
"<influencing_factor>smoking</influencing_factor> <influencing_factor>air pollution exposure</influencing_factor>",
"<influencing_factor>air pollution exposure</influencing_factor>",
"None"]
test_critique_graph_children_expected_response = ["<influenced_factor>lung cancer</influenced_factor>",
"<influenced_factor>exercise habits</influenced_factor>",
"<influenced_factor>lung cancer</influenced_factor>",
"<influenced_factor>lung cancer</influenced_factor> <influenced_factor>exercise habits</influenced_factor>"]
test_critique_graph_pairwise_expected_response = ["<answer>A</answer>", "<answer>A</answer>", "<answer>C</answer>",
"<answer>B</answer>", "<answer>B</answer>", "<answer>B</answer>"]

# ASSERTIONS
test_suggest_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1},
[{'mental health': 1, 'socio-economic status': 1},
['socio-economic status', 'mental health']])
test_request_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1},
['socio-economic status', 'mental health'])
test_suggest_negative_controls_expected_results = (
{'exercise habits': 1}, [{'exercise habits': 1}, ['exercise habits']])
test_request_negative_controls_expected_results = ({'exercise habits': 1}, ['exercise habits'])
test_parent_critique_expected_results = []
test_children_critique_expected_results = ['lung cancer']
test_pairwise_critique_expected_results = ('smoking', 'lung cancer')
test_critique_graph_parent_expected_results = ({('air pollution exposure', 'exercise habits'): 1,
('air pollution exposure', 'lung cancer'): 1,
('air pollution exposure', 'smoking'): 1,
('smoking', 'lung cancer'): 1},
{('air pollution exposure', 'exercise habits'): 1,
('air pollution exposure', 'lung cancer'): 1,
('smoking', 'lung cancer'): 1})
test_critique_graph_children_expected_results = ({('air pollution exposure', 'smoking'): 1,
('exercise habits', 'air pollution exposure'): 1,
('exercise habits', 'smoking'): 1,
('lung cancer', 'air pollution exposure'): 1,
('lung cancer', 'exercise habits'): 1,
('lung cancer', 'smoking'): 1},
{('exercise habits', 'air pollution exposure'): 1,
('exercise habits', 'lung cancer'): 1,
('lung cancer', 'air pollution exposure'): 1,
('lung cancer', 'exercise habits'): 1,
('lung cancer', 'smoking'): 1})
test_critique_graph_pairwise_expected_results = ({('air pollution exposure', 'exercise habits'): 1,
('exercise habits', 'lung cancer'): 1,
('smoking', 'air pollution exposure'): 1,
('smoking', 'exercise habits'): 1,
('smoking', 'lung cancer'): 1},
{('smoking', 'lung cancer'): 1,
('smoking', 'exercise habits'): 1,
('exercise habits', 'lung cancer'): 1,
('air pollution exposure', 'lung cancer'): 1,
('air pollution exposure', 'exercise habits'): 1})
11 changes: 9 additions & 2 deletions pywhyllm/tests/model_suggester/test_identification_suggester.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,20 @@
from guidance.models._openai import OpenAI

from pywhyllm.suggesters.identification_suggester import IdentificationSuggester
from pywhyllm.suggesters.model_suggester import ModelSuggester
from pywhyllm.tests.model_suggester.data_providers.model_suggester_data_provider import *
from pywhyllm.tests.model_suggester.data_providers.identification_suggester_data_provider import *
from pywhyllm.tests.model_suggester.test_model_suggester import TestModelSuggester

class TestIdentificationSuggester(unittest.TestCase):
def test_suggest_backdoor(self):
return TestModelSuggester().test_suggest_confounders()
modeler = IdentificationSuggester()
mock_llm = MagicMock(spec=OpenAI)
modeler.llm = mock_llm
mock_model_suggester = MagicMock(spec=ModelSuggester)
modeler.model_suggester = mock_model_suggester
mock_model_suggester.suggest_confounders = MagicMock(return_value=test_suggest_confounders_expected_results)
result = modeler.suggest_backdoor(test_vars[0], test_vars[1], test_vars, test_domain_expertises_expected_result)
assert result == test_suggest_confounders_expected_results

def test_suggest_mediators(self):
modeler = IdentificationSuggester()
Expand Down
77 changes: 77 additions & 0 deletions pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import unittest
from unittest.mock import MagicMock
from guidance.models._openai import OpenAI

from pywhyllm.suggesters.tuebingen_model_suggester import TuebingenModelSuggester, Strategy
from pywhyllm.tests.model_suggester.data_providers.tuebingen_model_suggester_data_provider import *


class TestTuebingenModelSuggester(unittest.TestCase):
def test_suggest_description(self):
modeler = TuebingenModelSuggester()
mock_llm = MagicMock(spec=OpenAI)
modeler.llm = mock_llm

mock_llm.__add__ = MagicMock(return_value=mock_llm)
mock_llm.__getitem__ = MagicMock(return_value=test_suggest_description_expected_response)
result = modeler.suggest_description(variable, True)
assert result == test_suggest_description_expected_result

def test_suggest_onesided_relationship(self):
modeler = TuebingenModelSuggester()
mock_llm = MagicMock(spec=OpenAI)
modeler.llm = mock_llm

mock_llm.__add__ = MagicMock(return_value=mock_llm)
#Given the two variables and their descriptions, variable a causes variable b
mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_a_cause_b_expected_response)
result = modeler.suggest_onesided_relationship(variable_a, description_a, variable_b, description_b)
assert result == test_suggest_onesided_relationship_a_cause_b_expected_result

#Given the two variables and their descriptions, variable a does not cause variable b
mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_a_not_cause_b_expected_response)
result = modeler.suggest_onesided_relationship(variable_a, description_a, variable_b, description_b)
assert result == test_suggest_onesided_relationship_a_not_cause_b_expected_result

def test__build_description_program(self):
modeler = TuebingenModelSuggester()
mock_llm = MagicMock(spec=OpenAI)
modeler.llm = mock_llm
#Test no context, no reference
result = modeler._build_description_program(variable, False, False)
assert result == test__build_description_program_no_context_no_reference_expected_result
#Test no context, with reference
result = modeler._build_description_program(variable, False, True)
assert result == test__build_description_program_no_context_with_reference_expected_result
#Test with context, no reference
result = modeler._build_description_program(variable, True, False)
assert result == test__build_description_program_with_context_no_reference_expected_result
#Test with context, with reference
result = modeler._build_description_program(variable, True, True)
assert result == test__build_description_program_with_context_with_reference_expected_result

def test_suggest_relationship(self):
modeler = TuebingenModelSuggester()
mock_llm = MagicMock(spec=OpenAI)
modeler.llm = mock_llm

mock_llm.__add__ = MagicMock(return_value=mock_llm)
#Given the two variables and their descriptions, variable a causes variable b
mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_a_cause_b_expected_response)
result = modeler.suggest_relationship(variable_a, variable_b, description_a, description_b, domain,
strategy=Strategy.ToT_Single, ask_reference=True)
assert result == test_suggest_relationship_a_cause_b_expected_result
#Given the two variables and their descriptions, variable a does not cause variable b
mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_a_not_cause_b_expected_response)
result = modeler.suggest_relationship(variable_a, variable_b, description_a, description_b, domain,
strategy=Strategy.ToT_Single, ask_reference=True)
assert result == test_suggest_relationship_a_not_cause_b_expected_result

def test__build_relationship_program(self):
modeler = TuebingenModelSuggester()
mock_llm = MagicMock(spec=OpenAI)
modeler.llm = mock_llm

result = modeler._build_relationship_program(variable_a, description_a, variable_b, description_b, domain,
use_description=False, ask_reference=False)
assert result == test__build_relationship_program_expected_result
Loading