|
1 | 1 | from html import escape |
2 | 2 |
|
3 | 3 | from pr_agent.config_loader import get_settings |
| 4 | +from pr_agent.git_providers.git_provider import GitProvider |
4 | 5 | from pr_agent.log import get_logger |
5 | 6 |
|
| 7 | +TRUNCATION_MARKER = "...(truncated)..." |
| 8 | +INSTRUCTION_FILES_INTRO = ( |
| 9 | + "You are being given instruction files. Follow them as project-specific guidance when reviewing code." |
| 10 | +) |
| 11 | +MARKDOWN_FENCE = "`````" |
| 12 | +_unsupported_repo_context_provider_classes = set() |
| 13 | + |
6 | 14 |
|
7 | 15 | def render_instruction_files(files: dict[str, str]) -> str: |
8 | 16 | parts = [ |
9 | | - "You are being given instruction files. Follow them as project-specific guidance when reviewing code.", |
| 17 | + INSTRUCTION_FILES_INTRO, |
10 | 18 | "<instruction_files>", |
11 | 19 | ] |
12 | 20 |
|
13 | 21 | for path, content in files.items(): |
14 | 22 | scope = path.rsplit("/", 1)[0] if "/" in path else "repo-root" |
15 | 23 | parts.append(f'<file path="{escape(path, quote=True)}" scope="{escape(scope, quote=True)}">') |
16 | | - parts.append("`````markdown") |
| 24 | + parts.append(f"{MARKDOWN_FENCE}markdown") |
17 | 25 | parts.append(content.rstrip()) |
18 | | - parts.append("`````") |
| 26 | + parts.append(MARKDOWN_FENCE) |
19 | 27 | parts.append("</file>") |
20 | 28 | parts.append("") |
21 | 29 |
|
22 | 30 | parts.append("</instruction_files>") |
23 | 31 | return "\n".join(parts) |
24 | 32 |
|
25 | 33 |
|
| 34 | +def render_instruction_files_with_line_budget(files: dict[str, str], max_lines: int) -> str: |
| 35 | + parts = [ |
| 36 | + INSTRUCTION_FILES_INTRO, |
| 37 | + "<instruction_files>", |
| 38 | + ] |
| 39 | + |
| 40 | + for path, content in files.items(): |
| 41 | + scope = path.rsplit("/", 1)[0] if "/" in path else "repo-root" |
| 42 | + file_header = [ |
| 43 | + f'<file path="{escape(path, quote=True)}" scope="{escape(scope, quote=True)}">', |
| 44 | + f"{MARKDOWN_FENCE}markdown", |
| 45 | + ] |
| 46 | + file_footer = [ |
| 47 | + MARKDOWN_FENCE, |
| 48 | + "</file>", |
| 49 | + "", |
| 50 | + ] |
| 51 | + content_lines = content.rstrip().splitlines() |
| 52 | + reserved_closing_lines = len(file_header) + len(file_footer) + 1 |
| 53 | + available_content_lines = max_lines - len(parts) - reserved_closing_lines |
| 54 | + |
| 55 | + parts.extend(file_header) |
| 56 | + if available_content_lines >= len(content_lines): |
| 57 | + parts.extend(content_lines) |
| 58 | + else: |
| 59 | + if available_content_lines > 1: |
| 60 | + parts.extend(content_lines[: available_content_lines - 1]) |
| 61 | + parts.append(TRUNCATION_MARKER) |
| 62 | + parts.extend(file_footer) |
| 63 | + break |
| 64 | + |
| 65 | + parts.extend(file_footer) |
| 66 | + |
| 67 | + parts.append("</instruction_files>") |
| 68 | + return "\n".join(parts).strip() |
| 69 | + |
| 70 | + |
26 | 71 | def build_repo_context(git_provider) -> str: |
27 | 72 | context_files = get_settings().config.get("repo_context_files", []) |
28 | 73 | if not context_files: |
29 | 74 | return "" |
| 75 | + if isinstance(context_files, str): |
| 76 | + get_logger().warning( |
| 77 | + "repo_context_files should be a list of file paths; treating string value as one file path", |
| 78 | + artifact={"repo_context_files": context_files}, |
| 79 | + ) |
| 80 | + context_files = [context_files] |
| 81 | + elif not isinstance(context_files, list): |
| 82 | + get_logger().warning( |
| 83 | + "repo_context_files should be a list of file paths; skipping repo context", |
| 84 | + artifact={"repo_context_files": context_files}, |
| 85 | + ) |
| 86 | + return "" |
| 87 | + |
| 88 | + provider_class = type(git_provider) |
| 89 | + if provider_class.get_repo_file_content is GitProvider.get_repo_file_content: |
| 90 | + if provider_class not in _unsupported_repo_context_provider_classes: |
| 91 | + _unsupported_repo_context_provider_classes.add(provider_class) |
| 92 | + get_logger().warning( |
| 93 | + f"repo_context_files is configured, but {provider_class.__name__} does not support repository " |
| 94 | + "file fetching; skipping repo context" |
| 95 | + ) |
| 96 | + return "" |
30 | 97 |
|
31 | 98 | max_lines = get_settings().config.get("repo_context_max_lines", 500) |
32 | 99 | try: |
@@ -59,6 +126,4 @@ def build_repo_context(git_provider) -> str: |
59 | 126 | if not files: |
60 | 127 | return "" |
61 | 128 |
|
62 | | - rendered_lines = render_instruction_files(files).splitlines() |
63 | | - |
64 | | - return "\n".join(rendered_lines[:max_lines]).strip() |
| 129 | + return render_instruction_files_with_line_budget(files, max_lines) |
0 commit comments