Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 142 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,23 @@ jobs:
OPTILLM_API_KEY=optillm python tests/test_n_parameter.py
OPTILLM_API_KEY=optillm python -m pytest tests/test_api_compatibility.py -v --tb=short || echo "API compatibility tests require pytest"
OPTILLM_API_KEY=optillm python tests/test.py --approaches none --single-test "Simple Math Problem" || echo "Main test completed"

# Run SSL config tests (no server needed but requires proper env setup)
echo "Running SSL config tests..."
python -m pytest tests/test_ssl_config.py -v --tb=short

# Run MARS tests
echo "Running MARS parallel tests..."
OPTILLM_API_KEY=optillm python -m pytest tests/test_mars_parallel.py -v --tb=short

# Run deepconf tests
echo "Running deepconf tests..."
OPTILLM_API_KEY=optillm python -m pytest tests/test_deepconf.py -v --tb=short

# Run conversation logger unit tests (no server needed)
echo "Running conversation logger tests..."
python -m pytest tests/test_conversation_logger.py -v --tb=short

echo "All integration tests completed successfully!"
exit 0
env:
Expand All @@ -125,4 +142,128 @@ jobs:
pkill -f "python.*optillm" 2>/dev/null || true
sleep 2
echo "Server shutdown completed"
exit 0
exit 0

conversation-logging-tests:
runs-on: ubuntu-latest
needs: unit-tests
strategy:
matrix:
python-version: ['3.12']

steps:
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Cache pip packages
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -r tests/requirements.txt
pip install -e .

- name: Start optillm server with conversation logging
run: |
echo "Starting optillm server with conversation logging..."
mkdir -p /tmp/optillm_conversations
OPTILLM_API_KEY=optillm python optillm.py \
--model google/gemma-3-270m-it \
--port 8000 \
--log-conversations \
--conversation-log-dir /tmp/optillm_conversations &
echo $! > server.pid

# Wait for server to be ready
echo "Waiting for server to start..."
sleep 20

# Test server health
curl -s http://localhost:8000/health || echo "Server health check failed"
env:
OPTILLM_API_KEY: optillm
HF_TOKEN: ${{ secrets.HF_TOKEN }}

- name: Run conversation logging tests
run: |
echo "Running conversation logging approach tests..."
OPTILLM_API_KEY=optillm python -m pytest tests/test_conversation_logging_approaches.py -v --tb=short

echo "Running conversation logging server tests..."
OPTILLM_API_KEY=optillm OPTILLM_CONVERSATION_LOG_DIR=/tmp/optillm_conversations python -m pytest tests/test_conversation_logging_server.py -v --tb=short

echo "All conversation logging tests completed successfully!"
env:
OPTILLM_API_KEY: optillm
OPTILLM_CONVERSATION_LOG_DIR: /tmp/optillm_conversations
HF_TOKEN: ${{ secrets.HF_TOKEN }}

- name: Stop optillm server
if: always()
run: |
echo "Stopping optillm server..."
if [ -f server.pid ]; then
kill $(cat server.pid) 2>/dev/null || true
rm -f server.pid
fi
pkill -f "python.*optillm" 2>/dev/null || true
sleep 2
echo "Server shutdown completed"
exit 0

mcp-tests:
runs-on: ubuntu-latest
needs: unit-tests
if: github.event_name == 'push' && github.ref == 'refs/heads/main' # Only run on main branch pushes (secrets available)
strategy:
matrix:
python-version: ['3.12']

steps:
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: '20'

- name: Cache pip packages
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -r tests/requirements.txt
pip install -e .

- name: Run MCP plugin tests
run: |
echo "Running MCP plugin tests..."
python -m pytest tests/test_mcp_plugin.py -v --tb=short
echo "MCP tests completed successfully!"
env:
OPTILLM_API_KEY: optillm
GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,15 +216,15 @@ You can then run the optillm proxy as follows.
```bash
python optillm.py
2024-09-06 07:57:14,191 - INFO - Starting server with approach: auto
2024-09-06 07:57:14,191 - INFO - Server configuration: {'approach': 'auto', 'mcts_simulations': 2, 'mcts_exploration': 0.2, 'mcts_depth': 1, 'best_of_n': 3, 'model': 'gpt-4o-mini', 'rstar_max_depth': 3, 'rstar_num_rollouts': 5, 'rstar_c': 1.4, 'base_url': ''}
2024-09-06 07:57:14,191 - INFO - Server configuration: {'approach': 'auto', 'mcts_simulations': 2, 'mcts_exploration': 0.2, 'mcts_depth': 1, 'best_of_n': 3, 'model': 'gpt-4o-mini', 'rstar_max_depth': 3, 'rstar_num_rollouts': 5, 'rstar_c': 1.4, 'base_url': '', 'host': '127.0.0.1'}
* Serving Flask app 'optillm'
* Debug mode: off
2024-09-06 07:57:14,212 - INFO - WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Running on all addresses (0.0.0.0)
* Running on http://127.0.0.1:8000
* Running on http://192.168.10.48:8000
2024-09-06 07:57:14,212 - INFO - Press CTRL+C to quit
```

> **Security Note**: By default, optillm binds to `127.0.0.1` (localhost only) for security. To allow external connections (e.g., for Docker or remote access), use `--host 0.0.0.0`. Only do this on trusted networks or with proper authentication configured via `--optillm-api-key`.
## Usage

Once the proxy is running, you can use it as a drop in replacement for an OpenAI client by setting the `base_url` as `http://localhost:8000/v1`.
Expand Down
2 changes: 1 addition & 1 deletion optillm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Version information
__version__ = "0.3.11"
__version__ = "0.3.12"

# Import from server module
from .server import (
Expand Down
36 changes: 26 additions & 10 deletions optillm/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,10 @@ def get_config():
API_KEY = os.environ.get("OPENAI_API_KEY")
base_url = server_config['base_url']
if base_url != "":
default_client = OpenAI(api_key=API_KEY, base_url=base_url)
default_client = OpenAI(api_key=API_KEY, base_url=base_url, http_client=http_client)
logger.info(f"Created OpenAI client with base_url: {base_url}")
else:
default_client = OpenAI(api_key=API_KEY)
default_client = OpenAI(api_key=API_KEY, http_client=http_client)
logger.info("Created OpenAI client without base_url")
elif os.environ.get("AZURE_OPENAI_API_KEY"):
API_KEY = os.environ.get("AZURE_OPENAI_API_KEY")
Expand Down Expand Up @@ -189,6 +189,7 @@ def count_reasoning_tokens(text: str, tokenizer=None) -> int:
'base_url': '',
'optillm_api_key': '',
'return_full_response': False,
'host': '127.0.0.1', # Default to localhost for security; use 0.0.0.0 to allow external connections
'port': 8000,
'log': 'info',
'ssl_verify': True,
Expand Down Expand Up @@ -396,9 +397,9 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
if approach == 'none':
# Use the request_config that was already prepared and passed to this function
kwargs = request_config.copy() if request_config else {}

# Remove items that are handled separately by the framework
kwargs.pop('n', None) # n is handled by execute_n_times
# Note: 'n' is NOT removed - the none_approach passes it to the client which handles multiple completions
kwargs.pop('stream', None) # stream is handled by proxy()

# Reconstruct original messages from system_prompt and initial_query
Expand All @@ -408,6 +409,7 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
if initial_query:
messages.append({"role": "user", "content": initial_query})

logger.debug(f"none_approach kwargs: {kwargs}")
response = none_approach(original_messages=messages, client=client, model=model, request_id=request_id, **kwargs)
# For none approach, we return the response and a token count of 0
# since the full token count is already in the response
Expand Down Expand Up @@ -546,17 +548,29 @@ def execute_n_times(n: int, approaches, operation: str, system_prompt: str, init
return responses, total_tokens

def generate_streaming_response(final_response, model):
# Yield the final response
# Generate a unique response ID
response_id = f"chatcmpl-{int(time.time()*1000)}"
created = int(time.time())

# Yield the final response with OpenAI-compatible format
if isinstance(final_response, list):
for index, response in enumerate(final_response):
# First chunk includes role
yield "data: " + json.dumps({
"choices": [{"delta": {"content": response}, "index": index, "finish_reason": "stop"}],
"id": response_id,
"object": "chat.completion.chunk",
"created": created,
"model": model,
"choices": [{"delta": {"role": "assistant", "content": response}, "index": index, "finish_reason": "stop"}],
}) + "\n\n"
else:
# First chunk includes role
yield "data: " + json.dumps({
"choices": [{"delta": {"content": final_response}, "index": 0, "finish_reason": "stop"}],
"id": response_id,
"object": "chat.completion.chunk",
"created": created,
"model": model,
"choices": [{"delta": {"role": "assistant", "content": final_response}, "index": 0, "finish_reason": "stop"}],
}) + "\n\n"

# Yield the final message to indicate the stream has ended
Expand Down Expand Up @@ -987,6 +1001,7 @@ def parse_args():
("--rstar-c", "OPTILLM_RSTAR_C", float, 1.4, "Exploration constant for rStar algorithm"),
("--n", "OPTILLM_N", int, 1, "Number of final responses to be returned"),
("--return-full-response", "OPTILLM_RETURN_FULL_RESPONSE", bool, False, "Return the full response including the CoT with <thinking> tags"),
("--host", "OPTILLM_HOST", str, "127.0.0.1", "Host address to bind the server to (use 0.0.0.0 to allow external connections)"),
("--port", "OPTILLM_PORT", int, 8000, "Specify the port to run the proxy"),
("--log", "OPTILLM_LOG", str, "info", "Specify the logging level", list(logging_levels.keys())),
("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface"),
Expand Down Expand Up @@ -1263,7 +1278,8 @@ def process_batch_requests(batch_requests):
import gradio as gr
# Start server in a separate thread
import threading
server_thread = threading.Thread(target=app.run, kwargs={'host': '0.0.0.0', 'port': port})
host = server_config['host']
server_thread = threading.Thread(target=app.run, kwargs={'host': host, 'port': port})
server_thread.daemon = True
server_thread.start()

Expand Down Expand Up @@ -1310,12 +1326,12 @@ def chat_with_optillm(message, history):
description=f"Connected to OptILLM proxy at {base_url}"
)
demo.queue() # Enable queue to handle long operations properly
demo.launch(server_name="0.0.0.0", share=False)
demo.launch(server_name=host, share=False)
except ImportError:
logger.error("Gradio is required for GUI. Install it with: pip install gradio")
return

app.run(host='0.0.0.0', port=port)
app.run(host=server_config['host'], port=port)

if __name__ == "__main__":
main()
38 changes: 23 additions & 15 deletions optillm/z3_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,16 @@
class TimeoutException(Exception):
pass

def prepare_safe_globals():
safe_globals = {
def prepare_execution_globals():
"""
Prepare globals dictionary for Z3/SymPy code execution.

WARNING: This is NOT a security sandbox. The name "execution_globals" reflects
that this simply provides the execution environment for solver code, not a
security boundary. The code is executed via exec() with access to z3, sympy,
and math libraries. Only execute trusted code.
"""
execution_globals = {
'print': print,
'__builtins__': {
'True': True,
Expand All @@ -35,7 +43,7 @@ def prepare_safe_globals():
}

# Add common math functions
safe_globals.update({
execution_globals.update({
'log': math.log,
'log2': math.log2,
'sqrt': math.sqrt,
Expand All @@ -48,10 +56,10 @@ def prepare_safe_globals():
})

# Add complex number support
safe_globals['I'] = complex(0, 1)
safe_globals['Complex'] = complex
execution_globals['I'] = complex(0, 1)
execution_globals['Complex'] = complex

return safe_globals
return execution_globals

def execute_code_in_process(code: str):
import z3
Expand All @@ -60,18 +68,18 @@ def execute_code_in_process(code: str):
import itertools
from fractions import Fraction

safe_globals = prepare_safe_globals()
execution_globals = prepare_execution_globals()

# Add Z3 specific functions
z3_whitelist = set(dir(z3))
safe_globals.update({name: getattr(z3, name) for name in z3_whitelist})
execution_globals.update({name: getattr(z3, name) for name in z3_whitelist})

# Add SymPy specific functions
sympy_whitelist = set(dir(sympy))
safe_globals.update({name: getattr(sympy, name) for name in sympy_whitelist})
execution_globals.update({name: getattr(sympy, name) for name in sympy_whitelist})

# Ensure key Z3 and SymPy components are available
safe_globals.update({
execution_globals.update({
'z3': z3,
'sympy': sympy,
'Solver': z3.Solver,
Expand Down Expand Up @@ -112,22 +120,22 @@ def as_numerical(x):
return x.approx(20)
return float(x)

safe_globals['as_numerical'] = as_numerical
execution_globals['as_numerical'] = as_numerical

def Mod(x, y):
return x % y

safe_globals['Mod'] = Mod
execution_globals['Mod'] = Mod

def Rational(numerator, denominator=1):
return z3.Real(str(Fraction(numerator, denominator)))

safe_globals['Rational'] = Rational
execution_globals['Rational'] = Rational

output_buffer = io.StringIO()
with contextlib.redirect_stdout(output_buffer):
try:
exec(code, safe_globals, {})
exec(code, execution_globals, {})
except Exception:
return ("error", traceback.format_exc())
return ("success", output_buffer.getvalue())
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "optillm"
version = "0.3.11"
version = "0.3.12"
description = "An optimizing inference proxy for LLMs."
readme = "README.md"
license = "Apache-2.0"
Expand Down
Loading