Created
February 21, 2026 16:08
-
-
Save awni/ef9358b488f2899605e34e8bbdc3d075 to your computer and use it in GitHub Desktop.
LM with a REPL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| An LM with a REPL | |
| Gives an LLM a Python REPL: the model can write ```repl``` code blocks, | |
| which get executed, with stdout/stderr fed back into the conversation. | |
| Requires a running mlx_lm.server: | |
| mlx_lm.server | |
| """ | |
| import re | |
| import sys | |
| import traceback | |
| from io import StringIO | |
| from openai import OpenAI | |
| SYSTEM_PROMPT = """\ | |
| You have access to a Python REPL. To run code, wrap it in a ```repl``` block: | |
| ```repl | |
| x = 2 + 2 | |
| print(x) | |
| ``` | |
| You will see the output (stdout and stderr) from each code block. Variables \ | |
| persist between blocks. | |
| Use the REPL to compute your answer rather than guessing.\ | |
| """ | |
| def extract_repl_block(text): | |
| """Extract code from the first ```repl``` fenced block, or None.""" | |
| m = re.search(r"```repl\n(.*?)```", text, re.DOTALL) | |
| return m.group(1) if m else None | |
| def execute_code(code, namespace): | |
| """ | |
| Execute code in the given namespace. | |
| Returns (stdout, stderr). | |
| """ | |
| old_stdout, old_stderr = sys.stdout, sys.stderr | |
| sys.stdout, sys.stderr = StringIO(), StringIO() | |
| try: | |
| exec(code, namespace) | |
| except Exception: | |
| traceback.print_exc(file=sys.stderr) | |
| finally: | |
| stdout = sys.stdout.getvalue() | |
| stderr = sys.stderr.getvalue() | |
| sys.stdout, sys.stderr = old_stdout, old_stderr | |
| return stdout, stderr | |
| def rlm(prompt, base_url="http://localhost:8080/v1", max_iterations=10): | |
| """ | |
| Run the RLM loop. | |
| Args: | |
| prompt: The user's question or task. | |
| base_url: The mlx_lm.server endpoint. | |
| max_iterations: Max REPL turns before giving up. | |
| Returns: | |
| The last assistant response. | |
| """ | |
| client = OpenAI(base_url=base_url, api_key="not-needed") | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| namespace = {"__builtins__": __builtins__} | |
| for i in range(max_iterations): | |
| response = client.chat.completions.create( | |
| messages=messages, | |
| max_tokens=2048, | |
| temperature=0.0, | |
| model="mlx-community/Qwen3-Coder-Next-6bit" | |
| ) | |
| assistant_text = response.choices[0].message.content | |
| messages.append({"role": "assistant", "content": assistant_text}) | |
| print(f"\n--- Iteration {i + 1} ---") | |
| print(assistant_text) | |
| block = extract_repl_block(assistant_text) | |
| if not block: | |
| return assistant_text | |
| stdout, stderr = execute_code(block, namespace) | |
| result = "" | |
| if stdout: | |
| result += stdout | |
| if stderr: | |
| result += f"[stderr]\n{stderr}" | |
| if not result: | |
| result = "(no output)" | |
| messages.append({"role": "user", "content": result}) | |
| print(f"\nREPL output:\n{result}") | |
| return assistant_text | |
| if __name__ == "__main__": | |
| import argparse | |
| parser = argparse.ArgumentParser(description="LLM with a Python REPL") | |
| parser.add_argument("prompt", help="The prompt to send to the model") | |
| parser.add_argument( | |
| "--base-url", | |
| default="http://localhost:8080/v1", | |
| help="mlx_lm.server base URL (default: http://localhost:8080/v1)", | |
| ) | |
| parser.add_argument( | |
| "--max-iterations", | |
| type=int, | |
| default=10, | |
| help="Max REPL iterations (default: 10)", | |
| ) | |
| args = parser.parse_args() | |
| result = rlm(args.prompt, base_url=args.base_url, max_iterations=args.max_iterations) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I have been hacking on this concept for the past few days: https://github.com/kulesh/recurgent Besides security there are bunch of interesting problems to solve around consistency and reliable reproduction of code etc. Happy to hear your feedback.