|
9 | 9 | - unlabeled
|
10 | 10 |
|
11 | 11 | env:
|
12 |
| - EVAL_MODELS: "gpt-4o,gpt-4o-mini,claude-3-5-sonnet-latest" |
| 12 | + EVAL_MODELS: "gpt-4.1,gemini-2.0-flash,claude-3-5-sonnet-latest" |
13 | 13 | EVAL_CATEGORIES: "observe,act,combination,extract,text_extract,targeted_extract"
|
14 | 14 |
|
15 | 15 | concurrency:
|
@@ -159,6 +159,7 @@ jobs:
|
159 | 159 | env:
|
160 | 160 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
161 | 161 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
| 162 | + GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} |
162 | 163 | BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
|
163 | 164 | BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
|
164 | 165 | HEADLESS: true
|
@@ -196,6 +197,7 @@ jobs:
|
196 | 197 | env:
|
197 | 198 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
198 | 199 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
| 200 | + GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} |
199 | 201 | BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
|
200 | 202 | BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
|
201 | 203 | BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
|
@@ -248,6 +250,7 @@ jobs:
|
248 | 250 | env:
|
249 | 251 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
250 | 252 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
| 253 | + GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} |
251 | 254 | BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
|
252 | 255 | BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
|
253 | 256 | BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
|
@@ -313,6 +316,7 @@ jobs:
|
313 | 316 | env:
|
314 | 317 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
315 | 318 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
| 319 | + GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} |
316 | 320 | BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
|
317 | 321 | BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
|
318 | 322 | BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
|
@@ -381,6 +385,7 @@ jobs:
|
381 | 385 | env:
|
382 | 386 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
383 | 387 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
| 388 | + GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} |
384 | 389 | BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
|
385 | 390 | BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
|
386 | 391 | BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
|
@@ -466,6 +471,7 @@ jobs:
|
466 | 471 | env:
|
467 | 472 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
468 | 473 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
| 474 | + GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} |
469 | 475 | BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
|
470 | 476 | BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
|
471 | 477 | BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
|
@@ -535,6 +541,7 @@ jobs:
|
535 | 541 | env:
|
536 | 542 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
537 | 543 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
| 544 | + GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} |
538 | 545 | BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
|
539 | 546 | BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
|
540 | 547 | BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
|
@@ -603,6 +610,7 @@ jobs:
|
603 | 610 | env:
|
604 | 611 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
605 | 612 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
| 613 | + GOOGLE_GENERATIVE_AI_API_KEY: ${{ secrets.GOOGLE_GENERATIVE_AI_API_KEY }} |
606 | 614 | BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
|
607 | 615 | BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
|
608 | 616 | BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
|
|
0 commit comments