1
1
import os
2
+
2
3
import pytest
4
+ from anthropic .types import MessageParam , TextBlock , Usage
3
5
4
6
from evals .record import DummyRecorder
7
+ from evals .solvers .providers .anthropic .anthropic_solver import AnthropicSolver , anth_to_openai_usage
5
8
from evals .task_state import Message , TaskState
6
- from evals .solvers .providers .anthropic .anthropic_solver import (
7
- AnthropicSolver ,
8
- anth_to_openai_usage ,
9
- )
10
-
11
- from anthropic .types import ContentBlock , MessageParam , Usage
12
9
13
10
IN_GITHUB_ACTIONS = os .getenv ("GITHUB_ACTIONS" ) == "true"
14
11
MODEL_NAME = "claude-instant-1.2"
@@ -32,9 +29,7 @@ def dummy_recorder():
32
29
yield recorder
33
30
34
31
35
- @pytest .mark .skipif (
36
- IN_GITHUB_ACTIONS , reason = "API tests are wasteful to run on every commit."
37
- )
32
+ @pytest .mark .skipif (IN_GITHUB_ACTIONS , reason = "API tests are wasteful to run on every commit." )
38
33
def test_solver (dummy_recorder , anthropic_solver ):
39
34
"""
40
35
Test that the solver generates a response coherent with the message history
@@ -55,9 +50,7 @@ def test_solver(dummy_recorder, anthropic_solver):
55
50
)
56
51
57
52
solver_res = solver (task_state = task_state )
58
- assert (
59
- solver_res .output == answer
60
- ), f"Expected '{ answer } ', but got { solver_res .output } "
53
+ assert solver_res .output == answer , f"Expected '{ answer } ', but got { solver_res .output } "
61
54
62
55
63
56
def test_message_format ():
@@ -71,9 +64,7 @@ def test_message_format():
71
64
msgs = [
72
65
Message (role = "user" , content = "What is 2 + 2?" ),
73
66
Message (role = "system" , content = "reason step by step" ),
74
- Message (
75
- role = "assistant" , content = "I don't need to reason for this, 2+2 is just 4"
76
- ),
67
+ Message (role = "assistant" , content = "I don't need to reason for this, 2+2 is just 4" ),
77
68
Message (role = "system" , content = "now, given your reasoning, provide the answer" ),
78
69
]
79
70
anth_msgs = AnthropicSolver ._convert_msgs_to_anthropic_format (msgs )
@@ -82,24 +73,20 @@ def test_message_format():
82
73
MessageParam (
83
74
role = "user" ,
84
75
content = [
85
- ContentBlock (text = "What is 2 + 2?" , type = "text" ),
86
- ContentBlock (text = "reason step by step" , type = "text" ),
76
+ TextBlock (text = "What is 2 + 2?" , type = "text" ),
77
+ TextBlock (text = "reason step by step" , type = "text" ),
87
78
],
88
79
),
89
80
MessageParam (
90
81
role = "assistant" ,
91
82
content = [
92
- ContentBlock (
93
- text = "I don't need to reason for this, 2+2 is just 4" , type = "text"
94
- ),
83
+ TextBlock (text = "I don't need to reason for this, 2+2 is just 4" , type = "text" ),
95
84
],
96
85
),
97
86
MessageParam (
98
87
role = "user" ,
99
88
content = [
100
- ContentBlock (
101
- text = "now, given your reasoning, provide the answer" , type = "text"
102
- ),
89
+ TextBlock (text = "now, given your reasoning, provide the answer" , type = "text" ),
103
90
],
104
91
),
105
92
]
@@ -126,6 +113,4 @@ def test_anth_to_openai_usage_zero_tokens():
126
113
"prompt_tokens" : 0 ,
127
114
"total_tokens" : 0 ,
128
115
}
129
- assert (
130
- anth_to_openai_usage (usage ) == expected
131
- ), "Zero token cases are not handled correctly."
116
+ assert anth_to_openai_usage (usage ) == expected , "Zero token cases are not handled correctly."
0 commit comments