algorithmicsuperintelligence · codelion · Dec 25, 2025 · Dec 24, 2025 · Dec 24, 2025 · Dec 24, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -107,6 +107,23 @@ jobs:
         OPTILLM_API_KEY=optillm python tests/test_n_parameter.py
         OPTILLM_API_KEY=optillm python -m pytest tests/test_api_compatibility.py -v --tb=short || echo "API compatibility tests require pytest"
         OPTILLM_API_KEY=optillm python tests/test.py --approaches none --single-test "Simple Math Problem" || echo "Main test completed"
+
+        # Run SSL config tests (no server needed but requires proper env setup)
+        echo "Running SSL config tests..."
+        python -m pytest tests/test_ssl_config.py -v --tb=short
+
+        # Run MARS tests
+        echo "Running MARS parallel tests..."
+        OPTILLM_API_KEY=optillm python -m pytest tests/test_mars_parallel.py -v --tb=short
+
+        # Run deepconf tests
+        echo "Running deepconf tests..."
+        OPTILLM_API_KEY=optillm python -m pytest tests/test_deepconf.py -v --tb=short
+
+        # Run conversation logger unit tests (no server needed)
+        echo "Running conversation logger tests..."
+        python -m pytest tests/test_conversation_logger.py -v --tb=short
+
         echo "All integration tests completed successfully!"
         exit 0
       env:
@@ -125,4 +142,128 @@ jobs:
         pkill -f "python.*optillm" 2>/dev/null || true
         sleep 2
         echo "Server shutdown completed"
-        exit 0
+        exit 0
+
+  conversation-logging-tests:
+    runs-on: ubuntu-latest
+    needs: unit-tests
+    strategy:
+      matrix:
+        python-version: ['3.12']
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Cache pip packages
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install -r tests/requirements.txt
+        pip install -e .
+
+    - name: Start optillm server with conversation logging
+      run: |
+        echo "Starting optillm server with conversation logging..."
+        mkdir -p /tmp/optillm_conversations
+        OPTILLM_API_KEY=optillm python optillm.py \
+          --model google/gemma-3-270m-it \
+          --port 8000 \
+          --log-conversations \
+          --conversation-log-dir /tmp/optillm_conversations &
+        echo $! > server.pid
+
+        # Wait for server to be ready
+        echo "Waiting for server to start..."
+        sleep 20
+
+        # Test server health
+        curl -s http://localhost:8000/health || echo "Server health check failed"
+      env:
+        OPTILLM_API_KEY: optillm
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+
+    - name: Run conversation logging tests
+      run: |
+        echo "Running conversation logging approach tests..."
+        OPTILLM_API_KEY=optillm python -m pytest tests/test_conversation_logging_approaches.py -v --tb=short
+
+        echo "Running conversation logging server tests..."
+        OPTILLM_API_KEY=optillm OPTILLM_CONVERSATION_LOG_DIR=/tmp/optillm_conversations python -m pytest tests/test_conversation_logging_server.py -v --tb=short
+
+        echo "All conversation logging tests completed successfully!"
+      env:
+        OPTILLM_API_KEY: optillm
+        OPTILLM_CONVERSATION_LOG_DIR: /tmp/optillm_conversations
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+
+    - name: Stop optillm server
+      if: always()
+      run: |
+        echo "Stopping optillm server..."
+        if [ -f server.pid ]; then
+          kill $(cat server.pid) 2>/dev/null || true
+          rm -f server.pid
+        fi
+        pkill -f "python.*optillm" 2>/dev/null || true
+        sleep 2
+        echo "Server shutdown completed"
+        exit 0
+
+  mcp-tests:
+    runs-on: ubuntu-latest
+    needs: unit-tests
+    if: github.event_name == 'push' && github.ref == 'refs/heads/main'  # Only run on main branch pushes (secrets available)
+    strategy:
+      matrix:
+        python-version: ['3.12']
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Set up Node.js
+      uses: actions/setup-node@v4
+      with:
+        node-version: '20'
+
+    - name: Cache pip packages
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install -r tests/requirements.txt
+        pip install -e .
+
+    - name: Run MCP plugin tests
+      run: |
+        echo "Running MCP plugin tests..."
+        python -m pytest tests/test_mcp_plugin.py -v --tb=short
+        echo "MCP tests completed successfully!"
+      env:
+        OPTILLM_API_KEY: optillm
+        GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
diff --git a/README.md b/README.md
@@ -216,15 +216,15 @@ You can then run the optillm proxy as follows.
 ```bash
 python optillm.py
 2024-09-06 07:57:14,191 - INFO - Starting server with approach: auto
-2024-09-06 07:57:14,191 - INFO - Server configuration: {'approach': 'auto', 'mcts_simulations': 2, 'mcts_exploration': 0.2, 'mcts_depth': 1, 'best_of_n': 3, 'model': 'gpt-4o-mini', 'rstar_max_depth': 3, 'rstar_num_rollouts': 5, 'rstar_c': 1.4, 'base_url': ''}
+2024-09-06 07:57:14,191 - INFO - Server configuration: {'approach': 'auto', 'mcts_simulations': 2, 'mcts_exploration': 0.2, 'mcts_depth': 1, 'best_of_n': 3, 'model': 'gpt-4o-mini', 'rstar_max_depth': 3, 'rstar_num_rollouts': 5, 'rstar_c': 1.4, 'base_url': '', 'host': '127.0.0.1'}
  * Serving Flask app 'optillm'
  * Debug mode: off
 2024-09-06 07:57:14,212 - INFO - WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
- * Running on all addresses (0.0.0.0)
  * Running on http://127.0.0.1:8000
- * Running on http://192.168.10.48:8000
 2024-09-06 07:57:14,212 - INFO - Press CTRL+C to quit
 ```
+
+> **Security Note**: By default, optillm binds to `127.0.0.1` (localhost only) for security. To allow external connections (e.g., for Docker or remote access), use `--host 0.0.0.0`. Only do this on trusted networks or with proper authentication configured via `--optillm-api-key`.
 ## Usage
 
 Once the proxy is running, you can use it as a drop in replacement for an OpenAI client by setting the `base_url` as `http://localhost:8000/v1`.

diff --git a/optillm/__init__.py b/optillm/__init__.py
@@ -1,5 +1,5 @@
 # Version information
-__version__ = "0.3.11"
+__version__ = "0.3.12"
 
 # Import from server module
 from .server import (

diff --git a/optillm/server.py b/optillm/server.py
@@ -94,10 +94,10 @@ def get_config():
         API_KEY = os.environ.get("OPENAI_API_KEY")
         base_url = server_config['base_url']
         if base_url != "":
-            default_client = OpenAI(api_key=API_KEY, base_url=base_url)
+            default_client = OpenAI(api_key=API_KEY, base_url=base_url, http_client=http_client)
             logger.info(f"Created OpenAI client with base_url: {base_url}")
         else:
-            default_client = OpenAI(api_key=API_KEY)
+            default_client = OpenAI(api_key=API_KEY, http_client=http_client)
             logger.info("Created OpenAI client without base_url")
     elif os.environ.get("AZURE_OPENAI_API_KEY"):
         API_KEY = os.environ.get("AZURE_OPENAI_API_KEY")
@@ -189,6 +189,7 @@ def count_reasoning_tokens(text: str, tokenizer=None) -> int:
     'base_url': '',
     'optillm_api_key': '',
     'return_full_response': False,
+    'host': '127.0.0.1',  # Default to localhost for security; use 0.0.0.0 to allow external connections
     'port': 8000,
     'log': 'info',
     'ssl_verify': True,
@@ -396,9 +397,9 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
         if approach == 'none':
             # Use the request_config that was already prepared and passed to this function
             kwargs = request_config.copy() if request_config else {}
-            
+
             # Remove items that are handled separately by the framework
-            kwargs.pop('n', None)  # n is handled by execute_n_times
+            # Note: 'n' is NOT removed - the none_approach passes it to the client which handles multiple completions
             kwargs.pop('stream', None)  # stream is handled by proxy()
 
             # Reconstruct original messages from system_prompt and initial_query
@@ -408,6 +409,7 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
             if initial_query:
                 messages.append({"role": "user", "content": initial_query})
 
+            logger.debug(f"none_approach kwargs: {kwargs}")
             response = none_approach(original_messages=messages, client=client, model=model, request_id=request_id, **kwargs)
             # For none approach, we return the response and a token count of 0
             # since the full token count is already in the response
@@ -546,17 +548,29 @@ def execute_n_times(n: int, approaches, operation: str, system_prompt: str, init
     return responses, total_tokens
 
 def generate_streaming_response(final_response, model):
-    # Yield the final response
+    # Generate a unique response ID
+    response_id = f"chatcmpl-{int(time.time()*1000)}"
+    created = int(time.time())
+
+    # Yield the final response with OpenAI-compatible format
     if isinstance(final_response, list):
         for index, response in enumerate(final_response):
+            # First chunk includes role
             yield "data: " + json.dumps({
-                "choices": [{"delta": {"content": response}, "index": index, "finish_reason": "stop"}],
+                "id": response_id,
+                "object": "chat.completion.chunk",
+                "created": created,
                 "model": model,
+                "choices": [{"delta": {"role": "assistant", "content": response}, "index": index, "finish_reason": "stop"}],
             }) + "\n\n"
     else:
+        # First chunk includes role
         yield "data: " + json.dumps({
-            "choices": [{"delta": {"content": final_response}, "index": 0, "finish_reason": "stop"}],
+            "id": response_id,
+            "object": "chat.completion.chunk",
+            "created": created,
             "model": model,
+            "choices": [{"delta": {"role": "assistant", "content": final_response}, "index": 0, "finish_reason": "stop"}],
         }) + "\n\n"
 
     # Yield the final message to indicate the stream has ended
@@ -987,6 +1001,7 @@ def parse_args():
         ("--rstar-c", "OPTILLM_RSTAR_C", float, 1.4, "Exploration constant for rStar algorithm"),
         ("--n", "OPTILLM_N", int, 1, "Number of final responses to be returned"),
         ("--return-full-response", "OPTILLM_RETURN_FULL_RESPONSE", bool, False, "Return the full response including the CoT with <thinking> tags"),
+        ("--host", "OPTILLM_HOST", str, "127.0.0.1", "Host address to bind the server to (use 0.0.0.0 to allow external connections)"),
         ("--port", "OPTILLM_PORT", int, 8000, "Specify the port to run the proxy"),
         ("--log", "OPTILLM_LOG", str, "info", "Specify the logging level", list(logging_levels.keys())),
         ("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface"),
@@ -1263,7 +1278,8 @@ def process_batch_requests(batch_requests):
             import gradio as gr
             # Start server in a separate thread
             import threading
-            server_thread = threading.Thread(target=app.run, kwargs={'host': '0.0.0.0', 'port': port})
+            host = server_config['host']
+            server_thread = threading.Thread(target=app.run, kwargs={'host': host, 'port': port})
             server_thread.daemon = True
             server_thread.start()
 
@@ -1310,12 +1326,12 @@ def chat_with_optillm(message, history):
                 description=f"Connected to OptILLM proxy at {base_url}"
             )
             demo.queue()  # Enable queue to handle long operations properly
-            demo.launch(server_name="0.0.0.0", share=False)
+            demo.launch(server_name=host, share=False)
         except ImportError:
             logger.error("Gradio is required for GUI. Install it with: pip install gradio")
             return
 
-    app.run(host='0.0.0.0', port=port)
+    app.run(host=server_config['host'], port=port)
 
 if __name__ == "__main__":
     main()
diff --git a/optillm/z3_solver.py b/optillm/z3_solver.py
@@ -15,8 +15,16 @@
 class TimeoutException(Exception):
     pass
 
-def prepare_safe_globals():
-    safe_globals = {
+def prepare_execution_globals():
+    """
+    Prepare globals dictionary for Z3/SymPy code execution.
+
+    WARNING: This is NOT a security sandbox. The name "execution_globals" reflects
+    that this simply provides the execution environment for solver code, not a
+    security boundary. The code is executed via exec() with access to z3, sympy,
+    and math libraries. Only execute trusted code.
+    """
+    execution_globals = {
         'print': print,
         '__builtins__': {
             'True': True,
@@ -35,7 +43,7 @@ def prepare_safe_globals():
     }
 
     # Add common math functions
-    safe_globals.update({
+    execution_globals.update({
         'log': math.log,
         'log2': math.log2,
         'sqrt': math.sqrt,
@@ -48,10 +56,10 @@ def prepare_safe_globals():
     })
 
     # Add complex number support
-    safe_globals['I'] = complex(0, 1)
-    safe_globals['Complex'] = complex
+    execution_globals['I'] = complex(0, 1)
+    execution_globals['Complex'] = complex
 
-    return safe_globals
+    return execution_globals
 
 def execute_code_in_process(code: str):
     import z3
@@ -60,18 +68,18 @@ def execute_code_in_process(code: str):
     import itertools
     from fractions import Fraction
 
-    safe_globals = prepare_safe_globals()
-    
+    execution_globals = prepare_execution_globals()
+
     # Add Z3 specific functions
     z3_whitelist = set(dir(z3))
-    safe_globals.update({name: getattr(z3, name) for name in z3_whitelist})
+    execution_globals.update({name: getattr(z3, name) for name in z3_whitelist})
 
     # Add SymPy specific functions
     sympy_whitelist = set(dir(sympy))
-    safe_globals.update({name: getattr(sympy, name) for name in sympy_whitelist})
+    execution_globals.update({name: getattr(sympy, name) for name in sympy_whitelist})
 
     # Ensure key Z3 and SymPy components are available
-    safe_globals.update({
+    execution_globals.update({
         'z3': z3,
         'sympy': sympy,
         'Solver': z3.Solver,
@@ -112,22 +120,22 @@ def as_numerical(x):
                 return x.approx(20)
         return float(x)
 
-    safe_globals['as_numerical'] = as_numerical
+    execution_globals['as_numerical'] = as_numerical
 
     def Mod(x, y):
         return x % y
 
-    safe_globals['Mod'] = Mod
+    execution_globals['Mod'] = Mod
 
     def Rational(numerator, denominator=1):
         return z3.Real(str(Fraction(numerator, denominator)))
 
-    safe_globals['Rational'] = Rational
+    execution_globals['Rational'] = Rational
 
     output_buffer = io.StringIO()
     with contextlib.redirect_stdout(output_buffer):
         try:
-            exec(code, safe_globals, {})
+            exec(code, execution_globals, {})
         except Exception:
             return ("error", traceback.format_exc())
     return ("success", output_buffer.getvalue())

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "optillm"
-version = "0.3.11"
+version = "0.3.12"
 description = "An optimizing inference proxy for LLMs."
 readme = "README.md"
 license = "Apache-2.0"