Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -648,10 +648,16 @@ vf.print_prompt_completions_sample(outputs: GenerateOutputs, n: int = 3)
Pretty-print sample rollouts.

```python
vf.setup_logging(level: str = "INFO")
vf.setup_logging(
level: str = "INFO",
log_format: str | None = None,
date_format: str | None = None,
log_file: str | None = None,
log_file_level: str | None = None,
)
```

Configure verifiers logging. Set `VF_LOG_LEVEL` env var to change default.
Configure verifiers logging. Set `VF_LOG_LEVEL` env var to change default. Optionally specify `log_file` to write logs to a file in addition to stderr. Use `log_file_level` to set a different log level for the file handler.

```python
vf.log_level(level: str | int)
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dependencies = [
"jinja2>=3.1.6",
"math-verify>=0.8.0",
"mcp>=1.14.1",
"msgpack>=1.1.2",
"nest-asyncio>=1.6.0", # for jupyter notebooks
"openai>=1.108.1",
"openai-agents>=0.0.7",
Expand Down
15 changes: 6 additions & 9 deletions verifiers/envs/env_group.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import time
from typing import TYPE_CHECKING, AsyncContextManager, Mapping, final
from typing import TYPE_CHECKING, Mapping, final

from datasets import Dataset, concatenate_datasets
from openai import AsyncOpenAI

import verifiers as vf
from verifiers.types import RolloutInput, SamplingArgs
from verifiers.types import ClientConfig, RolloutInput, SamplingArgs

if TYPE_CHECKING:
pass
Expand Down Expand Up @@ -37,7 +36,6 @@ def _get_reward_func_names(self) -> list[str]:
async def score_rollout(
self,
state: vf.State,
score_sem: AsyncContextManager,
) -> None:
"""
Evaluate all reward functions in-place for a single rollout.
Expand All @@ -56,7 +54,7 @@ async def score_rollout(
state["metrics"] = metrics
return

await env.rubric.score_rollout(state, score_sem=score_sem)
await env.rubric.score_rollout(state)
env_reward = state.get("reward", 0.0)
env_metrics = state.get("metrics", {}).copy() if state.get("metrics") else {}

Expand All @@ -71,7 +69,6 @@ async def score_rollout(
async def score_group(
self,
states: list[vf.State],
score_sem: AsyncContextManager,
) -> None:
"""
Score a group of rollouts, routing to appropriate environment rubrics based on task.
Expand All @@ -94,7 +91,7 @@ async def score_group(
return

# Score all states using the environment's rubric
await env.rubric.score_group(states, score_sem=score_sem)
await env.rubric.score_group(states)

# Initialize metrics dict with all reward function names
aggregated_metrics: dict[str, list[float]] = {
Expand Down Expand Up @@ -266,12 +263,12 @@ def add_example_id(example, i):
async def rollout(
self,
input: RolloutInput,
client: AsyncOpenAI,
client_config: ClientConfig,
model: str,
sampling_args: SamplingArgs | None = None,
) -> vf.State:
env = self.get_env_for_task(input["task"])
return await env.rollout(input, client, model, sampling_args)
return await env.rollout(input, client_config, model, sampling_args)

def get_env_for_task(self, task: str) -> vf.Environment:
return self.env_map.get(task, self.envs[0])
Expand Down
Loading
Loading