env server/client #744

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft

mikasenghaas wants to merge 17 commits into multi-env-eval+dataset-builder from mika/env-worker

docs/reference.md

-Original file line number
+Diff line change
@@ Expand Up @@
     Pretty-print sample rollouts.
     ```python
-    vf.setup_logging(level: str = "INFO")
+    vf.setup_logging(
+        level: str = "INFO",
+        log_format: str | None = None,
+        date_format: str | None = None,
+        log_file: str | None = None,
+        log_file_level: str | None = None,
+    )
     ```
-    Configure verifiers logging. Set `VF_LOG_LEVEL` env var to change default.
+    Configure verifiers logging. Set `VF_LOG_LEVEL` env var to change default. Optionally specify `log_file` to write logs to a file in addition to stderr. Use `log_file_level` to set a different log level for the file handler.
     ```python
     vf.log_level(level: str | int)
@@ Expand Down @@

pyproject.toml

-Original file line number
+Diff line change
@@ Expand Up / @@ -32,6 +32,7 @@ dependencies = [ @@
         "jinja2>=3.1.6",
         "math-verify>=0.8.0",
         "mcp>=1.14.1",
+        "msgpack>=1.1.2",
         "nest-asyncio>=1.6.0", # for jupyter notebooks
         "openai>=1.108.1",
         "openai-agents>=0.0.7",
@@ Expand Down @@

verifiers/envs/env_group.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,11 +1,10 @@
  
    import time

    from typing import TYPE_CHECKING, AsyncContextManager, Mapping, final

    from typing import TYPE_CHECKING, Mapping, final

    from datasets import Dataset, concatenate_datasets

    from openai import AsyncOpenAI

    import verifiers as vf

    from verifiers.types import RolloutInput, SamplingArgs

    from verifiers.types import ClientConfig, RolloutInput, SamplingArgs

    if TYPE_CHECKING:

        pass

    @@ -37,7 +36,6 @@ def _get_reward_func_names(self) -> list[str]:
  
        async def score_rollout(

            self,

            state: vf.State,

            score_sem: AsyncContextManager,

        ) -> None:

            """

            Evaluate all reward functions in-place for a single rollout.

    @@ -56,7 +54,7 @@ async def score_rollout(
  
                state["metrics"] = metrics

                return

            await env.rubric.score_rollout(state, score_sem=score_sem)

            await env.rubric.score_rollout(state)

            env_reward = state.get("reward", 0.0)

            env_metrics = state.get("metrics", {}).copy() if state.get("metrics") else {}

    @@ -71,7 +69,6 @@ async def score_rollout(
  
        async def score_group(

            self,

            states: list[vf.State],

            score_sem: AsyncContextManager,

        ) -> None:

            """

            Score a group of rollouts, routing to appropriate environment rubrics based on task.

    @@ -94,7 +91,7 @@ async def score_group(
  
                return

            # Score all states using the environment's rubric

            await env.rubric.score_group(states, score_sem=score_sem)

            await env.rubric.score_group(states)

            # Initialize metrics dict with all reward function names

            aggregated_metrics: dict[str, list[float]] = {

    @@ -266,12 +263,12 @@ def add_example_id(example, i):
  
        async def rollout(

            self,

            input: RolloutInput,

            client: AsyncOpenAI,

            client_config: ClientConfig,

            model: str,

            sampling_args: SamplingArgs | None = None,

        ) -> vf.State:

            env = self.get_env_for_task(input["task"])

            return await env.rollout(input, client, model, sampling_args)

            return await env.rollout(input, client_config, model, sampling_args)

        def get_env_for_task(self, task: str) -> vf.Environment:

            return self.env_map.get(task, self.envs[0])

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

env server/client #744

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!

env server/client #744

Are you sure you want to change the base?

Uh oh!

env server/client #744

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!