RepoMaster’s repository exploration system provides intelligent code analysis through hierarchical understanding and tool-assisted exploration. The system builds a comprehensive model of code structure, dependencies, and semantics.
def _setup_tool_library(self): """Setup tool library""" if self.local_repo_path: self.code_library = CodeExplorerTools( self.local_repo_path, work_dir=self.work_dir, docker_work_dir=self.docker_path_prefix ) else: self.code_library = None # Generate important modules summary if self.local_repo_path and self.args.get("repo_init", True): self.code_importance = self.code_library.builder.generate_llm_important_modules( max_tokens=8000 ) else: self.code_importance = ""
Initialization Time: The tool library initialization includes tree-sitter parsing and important module detection, which can take 10-30 seconds for large repositories.
def judge_file_is_important( code_list: list[dict]) -> list[dict]: """Judge whether files are important for understanding the repository""" judge_prompt = f""" You are an assistant that helps developers understand code repositories. Judge whether the current file is important. Rules: 1. README.md with repository description - very important 2. Configuration files, test files, example files - very important 3. Files with information important for understanding - very important 4. Duplicate file contents - keep only one Return JSON list (sorted by importance): [ {{ "file_path": "path", "is_important": "yes" or "no" }} ] """ response_dict = AzureGPT4Chat().chat_with_message( messages, json_format=True ) # Filter to only important files out_list = [ file for judge_result in response_dict if judge_result['is_important'].lower() == 'yes' for file in code_list if judge_result['file_path'] == file['file_path'] ] return out_list
def get_readme_summary(code_content: str, history_summary: dict): """Extract summary from README and important documentation""" system_prompt = """ You are an assistant that helps developers understand code repositories. Generate summary based on README and documentation files. Rules: 1. Focus on main functions, architecture, and usage 2. Use <cite>referenced content</cite> for important code blocks 3. Keep summary concise, comprehensive, and informative 4. Include installation methods, dependencies, example usage 5. Ignore disclaimers and unimportant content 6. Avoid duplicating content from history_summary """ prompt = f""" README and important documents: <code_content> {code_content} </code_content> Previous summaries: <history_summary> {history_summary} </history_summary> """ response = AzureGPT4Chat().chat_with_message( [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}], json_format=True ) return response
Web-Based Problem Solving (agent_code_explore.py:209-233):
async def issue_solution_search( self, issue_description: Annotated[str, "Description of programming issue"]) -> str: """ Search for solutions to specific programming issues or errors. Sources: GitHub, Stack Overflow, official docs, forums Returns: String containing solution information with: - Concise summary - Source URL - Source name (e.g., "Stack Overflow", "GitHub Issue") """ query = f""" Please search for solutions to the following programming issue: <issue_description> {issue_description} </issue_description> Steps: 1. Search the web for solutions, code snippets, or discussions 2. Prioritize well-explained, reputable, highly-rated solutions 3. Select up to 3 most relevant solutions 4. For each solution, provide summary, source URL, source name 5. Present findings as clear, readable text (use markdown) If no relevant solutions found, indicate that. """ return await self.issue_searcher.a_web_agent_answer(query)
Smart Search: The issue solution search automatically queries multiple sources and ranks results by relevance and reputation.
def token_limit_termination(self, msg): """Check if token limit is reached to decide termination""" def check_tool_call(msg): if msg.get("tool_calls", []): return True if msg.get("tool_response", []): return True return False content = msg.get("content", "") if isinstance(content, str): content = content.strip() # Check original termination conditions original_termination = ( content and (len(content.split("TERMINATE")[-1]) < 3 or len(content.split("<TERMINATE>")[-1]) < 2) ) if msg is None: return False if (not check_tool_call(msg)) and (not content): return True # Terminate if original conditions met if (original_termination and check_code_block(content) is None and not check_tool_call(msg)): self.is_restart = False return True # Calculate total token count messages = self.executor.chat_messages.get(self.explore, []) total_tokens = sum( get_code_abs_token(str(m.get("content", ""))) for m in messages ) # If over limit (80000 tokens), trigger restart if total_tokens > self.limit_restart_tokens: self.is_restart = True self.chat_turns += len(messages) - 1 return True return False
Token Limits:
Token limit for summary: 2000 tokens (agent_code_explore.py:54)
enhanced_task = f"""You are a general programming assistant. Please help with the following task:{task_description}As a programming assistant, you can:- Write and execute code to solve problems- Provide programming guidance and explanations - Create practical examples and demonstrations- Debug and troubleshoot issues- Implement algorithms and data structures- Explain programming concepts- Create utility scripts and toolsWorking directory: {work_dir}Please provide comprehensive help including code examples, explanations, and practical solutions."""
Understanding Guide: ['Read README.md to understand basic functionality']#### File Paths- Input file paths and descriptions:{input_data}- Output file directory: Results must be saved in the {output_dir_path} directory.#### Additional Notes**Core Objective**: Quickly understand and analyze the code repository, generate and execute necessary code to efficiently complete user tasks."""
if self.remote_repo_path and not self.use_venv: executor = EnhancedDockerCommandLineCodeExecutor( image="whc_docker", # PyTorch + CUDA support timeout=self.timeout, work_dir=self.work_dir, keep_same_path=True, network_mode="host" ) self.code_execution_config = {"executor": executor}