主函数定义在/rllm/tools/code_tools/python_interpreter.py中
def _init_backend(self):"""初始化沙箱"""# 默认使用LCBPythonInterpreterif self.backend_type == "local":self.backend: LCBPythonInterpreter | E2BPythonInterpreter | TogetherCodeTool = LCBPythonInterpreter()elif self.backend_type == "e2b":self.backend = E2BPythonInterpreter(n_sandboxes=self.n_sandboxes, api_key=self.api_key)elif self.backend_type == "together":self.backend = TogetherCodeTool(api_key=self.api_key)else:raise ValueError(f"Unsupported backend type: {self.backend_type}")def forward(self, code: str, timeout: int = 12, **kwargs) -> CodeToolOutput:"""在沙箱中执行codeArgs:code: Python code to executetimeout: Maximum execution time in seconds**kwargs: Additional parameters specific to the backend implementationReturns:CodeToolOutput containing execution results, stdout, and stderr"""return self.backend.forward(code=code, timeout=timeout, **kwargs)
LCB解释器定义在tools/code_tools/lcb_tool.py中,可以看到,这个本地沙箱的主要的防护为:
- 防止有害操作(例如对本地文件的读写等)
- 进行超时处理
import ast
import faulthandler
import multiprocessing
import queue
import signal
import tracebackfrom rllm.rewards.code_utils.livecodebench import (Capturing,clean_if_name,compile_code,get_function,make_function,reliability_guard,timeout_handler,
)
from rllm.tools.code_tools.code_tool import CodeTool, CodeToolOutputdef ensure_return_value(code):"""Ensures the code has a return statement for the last expression.Only converts the last statement to a return statement if it's an expression.Args:code (str): Python code to processReturns:str: Modified code with return statement if needed"""if not code.strip():return codetry:# Parse the codetree = ast.parse(code)body = tree.body# If the last element is an expression, convert it to a return statementif body and isinstance(body[-1], ast.Expr):value = body[-1].valuebody[-1] = ast.Return(value=value)# Preserve the line numbers and column offsets for better error messagesast.fix_missing_locations(tree)# Unparse the modified AST back to codereturn ast.unparse(tree)except SyntaxError:# If the code has syntax errors, return the original codereturn codeexcept Exception as e:# Log other unexpected errors but return the original codeprint(f"Warning: Could not process code: {e}")return codedef execute_code(code, timeout):"""Execute the provided code with safety measures and timeout handling.Args:code (str): Python code to executetimeout (int): Maximum execution time in secondsReturns:tuple: (stdout, stderr, result) containing execution output and result"""signal.signal(signal.SIGALRM, timeout_handler)stdout, stderr, result = None, None, None# Disable functionalities that can make destructive changes to the test.# 限制读写操作reliability_guard()signal.alarm(timeout)try:code = clean_if_name(code)## 将代码包裹在一个函数中code = make_function(code)# 执行代码compiled_sol = compile_code(code, timeout)if compiled_sol is None:stderr = "Failed to compile code"return stdout, stderr, resultmethod = get_function(compiled_sol, "wrapped_function")if method is None:stderr = "Failed to get function 'wrapped_function'"return stdout, stderr, result# 用于记录是否超时signal.alarm(timeout)faulthandler.enable()signal.alarm(timeout)# 捕获标准的输入输出with Capturing() as captured_output:try:try:result = method()except SystemExit as e:stderr = f"SystemExit: {e}"finally:pass# reset the alarmsignal.alarm(0)except Exception as e:signal.alarm(0)if "timeoutexception" in repr(e).lower():stderr = "Time Limit Exceeded."else:stderr = traceback.format_exc()finally:signal.alarm(0)faulthandler.disable()stdout = captured_output[0] if captured_output else ""return stdout, stderr, resultexcept Exception:return stdout, stderr, resultfinally:signal.alarm(0)def _wrapper_exec_fn(sample, timeout, result_queue):# 执行代码并且放入队列"""Helper function to execute code and put results in the queue"""res = execute_code(sample, timeout=timeout)result_queue.put(res)def lcb_sandbox(code, timeout):"""防止有害操作,进行超时处理。是执行代码的主函数Args:code (str): Python code to executetimeout (int): Maximum execution time in secondsReturns:tuple: (stdout, stderr, result) containing the execution output and result"""# Preprocess the code to ensure the last expression is returnedcode = ensure_return_value(code)# Use multiprocessing to isolate code execution in a separate processmanager = multiprocessing.Manager()result_queue = manager.Queue()# Create and start the processp = multiprocessing.Process(target=_wrapper_exec_fn,args=(code, timeout, result_queue),)p.start()# Wait for the process to complete with additional buffer timep.join(timeout=(timeout + 1) + 5)try:# Get the result from the queueres = result_queue.get()return resexcept queue.Empty:# Return timeout message if no result is availablereturn "Timeout", "", ""finally:# Ensure the process is terminated if still runningif p.is_alive():p.terminate()p.join(timeout=1)if p.is_alive():p.kill()class LCBPythonInterpreter(CodeTool):"""A tool for executing Python code in a sandboxed environment.This tool provides a safe way to execute Python code with timeout protectionand isolation from the main process, using the LiveCodeBench execution environment."""def __init__(self):"""Initialize the Python interpreter tool with appropriate settings."""super().__init__(name="python",description="Execute python code in the same environment as the LiveCodeBench benchmark.",n_sandboxes=-1,)def forward(self, code: str, timeout: int = 12, **kwargs) -> CodeToolOutput:"""Execute Python code using the LiveCodeBench sandbox environment.Args:code (str): Python code to executetimeout (int): Maximum execution time in seconds, defaults to 12**kwargs: Additional parameters (unused but kept for compatibility)Returns:CodeToolOutput: Contains execution results with stdout, stderr, and result fields"""try:stdout, stderr, result = lcb_sandbox(code, timeout=timeout)return CodeToolOutput(name=self.name or "python", stdout=stdout, stderr=stderr, output=result)except Exception as e:return CodeToolOutput(name=self.name or "python",error=f"Sandbox Error: {type(e).__name__} - {str(e)}",)if __name__ == "__main__":# Create a Python interpreter instanceinterpreter = LCBPythonInterpreter()# Example code to executetest_code = """
# Generate a large amount of code
result = 0
for i in range(1000):exec(f"var_{i} = {i}")result += i# Final expression after lots of code
result # Should be converted to return
"""# Run codeprint(interpreter(code=test_code))