Skip to content

Commit 50de2c6

Browse files
committed
chore: address agent comments
1 parent 58f468a commit 50de2c6

5 files changed

Lines changed: 276 additions & 11 deletions

File tree

pr_agent/algo/repo_context.py

Lines changed: 96 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,128 @@
11
from html import escape
22

33
from pr_agent.config_loader import get_settings
4+
from pr_agent.git_providers.git_provider import GitProvider
45
from pr_agent.log import get_logger
56

7+
TRUNCATION_MARKER = "...(truncated)..."
8+
INSTRUCTION_FILES_INTRO = (
9+
"You are being given instruction files. Follow them as project-specific guidance when reviewing code."
10+
)
11+
MARKDOWN_FENCE = "`````"
12+
REPO_CONTEXT_CACHE_ATTRIBUTE = "_repo_context_cache"
13+
_unsupported_repo_context_provider_classes = set()
14+
15+
16+
def _get_markdown_fence(content: str) -> str:
17+
fence = MARKDOWN_FENCE
18+
while fence in content:
19+
fence += "`"
20+
return fence
21+
22+
23+
def _get_repo_context_cache_key(context_files: list, max_lines: int) -> tuple[tuple[tuple[str, str], ...], int]:
24+
return tuple((type(file_path).__name__, str(file_path)) for file_path in context_files), max_lines
25+
626

727
def render_instruction_files(files: dict[str, str]) -> str:
828
parts = [
9-
"You are being given instruction files. Follow them as project-specific guidance when reviewing code.",
29+
INSTRUCTION_FILES_INTRO,
1030
"<instruction_files>",
1131
]
1232

1333
for path, content in files.items():
1434
scope = path.rsplit("/", 1)[0] if "/" in path else "repo-root"
35+
fence = _get_markdown_fence(content)
1536
parts.append(f'<file path="{escape(path, quote=True)}" scope="{escape(scope, quote=True)}">')
16-
parts.append("`````markdown")
37+
parts.append(f"{fence}markdown")
1738
parts.append(content.rstrip())
18-
parts.append("`````")
39+
parts.append(fence)
1940
parts.append("</file>")
2041
parts.append("")
2142

2243
parts.append("</instruction_files>")
2344
return "\n".join(parts)
2445

2546

47+
def render_instruction_files_with_line_budget(files: dict[str, str], max_lines: int) -> str:
48+
parts = [
49+
INSTRUCTION_FILES_INTRO,
50+
"<instruction_files>",
51+
]
52+
53+
for path, content in files.items():
54+
scope = path.rsplit("/", 1)[0] if "/" in path else "repo-root"
55+
fence = _get_markdown_fence(content)
56+
file_header = [
57+
f'<file path="{escape(path, quote=True)}" scope="{escape(scope, quote=True)}">',
58+
f"{fence}markdown",
59+
]
60+
file_footer = [
61+
fence,
62+
"</file>",
63+
"",
64+
]
65+
content_lines = content.rstrip().splitlines()
66+
reserved_closing_lines = len(file_header) + len(file_footer) + 1
67+
available_content_lines = max_lines - len(parts) - reserved_closing_lines
68+
69+
parts.extend(file_header)
70+
if available_content_lines >= len(content_lines):
71+
parts.extend(content_lines)
72+
else:
73+
if available_content_lines > 1:
74+
parts.extend(content_lines[: available_content_lines - 1])
75+
parts.append(TRUNCATION_MARKER)
76+
parts.extend(file_footer)
77+
break
78+
79+
parts.extend(file_footer)
80+
81+
parts.append("</instruction_files>")
82+
return "\n".join(parts).strip()
83+
84+
2685
def build_repo_context(git_provider) -> str:
2786
context_files = get_settings().config.get("repo_context_files", [])
2887
if not context_files:
2988
return ""
89+
if isinstance(context_files, str):
90+
get_logger().warning(
91+
"repo_context_files should be a list of file paths; treating string value as one file path",
92+
artifact={"repo_context_files": context_files},
93+
)
94+
context_files = [context_files]
95+
elif not isinstance(context_files, list):
96+
get_logger().warning(
97+
"repo_context_files should be a list of file paths; skipping repo context",
98+
artifact={"repo_context_files": context_files},
99+
)
100+
return ""
101+
102+
provider_class = type(git_provider)
103+
if provider_class.get_repo_file_content is GitProvider.get_repo_file_content:
104+
if provider_class not in _unsupported_repo_context_provider_classes:
105+
_unsupported_repo_context_provider_classes.add(provider_class)
106+
get_logger().warning(
107+
f"repo_context_files is configured, but {provider_class.__name__} does not support repository "
108+
"file fetching; skipping repo context"
109+
)
110+
return ""
30111

31112
max_lines = get_settings().config.get("repo_context_max_lines", 500)
32113
try:
33114
max_lines = max(0, int(max_lines))
34115
except (TypeError, ValueError):
35116
max_lines = 500
36117

118+
cache_key = _get_repo_context_cache_key(context_files, max_lines)
119+
repo_context_cache = getattr(git_provider, REPO_CONTEXT_CACHE_ATTRIBUTE, None)
120+
if repo_context_cache is None:
121+
repo_context_cache = {}
122+
setattr(git_provider, REPO_CONTEXT_CACHE_ATTRIBUTE, repo_context_cache)
123+
if cache_key in repo_context_cache:
124+
return repo_context_cache[cache_key]
125+
37126
files = {}
38127
for file_path in context_files:
39128
if not isinstance(file_path, str) or not file_path.strip():
@@ -57,8 +146,9 @@ def build_repo_context(git_provider) -> str:
57146
files[file_path] = str(content).rstrip()
58147

59148
if not files:
149+
repo_context_cache[cache_key] = ""
60150
return ""
61151

62-
rendered_lines = render_instruction_files(files).splitlines()
63-
64-
return "\n".join(rendered_lines[:max_lines]).strip()
152+
repo_context = render_instruction_files_with_line_budget(files, max_lines)
153+
repo_context_cache[cache_key] = repo_context
154+
return repo_context

pr_agent/git_providers/gitlab_provider.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,17 @@ def get_repo_settings(self):
796796
except Exception:
797797
return ""
798798

799+
def get_repo_file_content(self, file_path: str):
800+
try:
801+
project = self.gl.projects.get(self.id_project)
802+
contents = project.files.get(file_path=file_path, ref=project.default_branch).decode()
803+
return decode_if_bytes(contents)
804+
except GitlabGetError:
805+
return ""
806+
except Exception as e:
807+
get_logger().warning(f"Failed to load repo file: {file_path}, error: {e}")
808+
return ""
809+
799810
def get_workspace_name(self):
800811
return self.id_project.split('/')[0]
801812

pr_agent/settings/configuration.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ ai_timeout=120 # 2 minutes
2525
skip_keys = []
2626
custom_reasoning_model = false # when true, disables system messages and temperature controls for models that don't support chat-style inputs
2727
response_language="en-US" # Language locales code for PR responses in ISO 3166 and ISO 639 format (e.g., "en-US", "it-IT", "zh-CN", ...)
28-
repo_context_files = [] # Explicit repository-relative files to include as AI prompt context, e.g. ["AGENTS.md"]
28+
repo_context_files = [] # Explicit repository-relative files to include as AI prompt context; use a list, e.g. ["AGENTS.md"]
2929
repo_context_max_lines = 500 # Maximum total lines rendered from configured repo context files
3030
# token limits
3131
max_description_tokens = 500

tests/unittest/test_gitlab_provider.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,27 @@ def test_get_pr_file_content_other_exception(self, gitlab_provider, mock_project
7373

7474
assert content == ""
7575

76+
def test_get_repo_file_content_loads_from_default_branch(self, gitlab_provider, mock_gitlab_client, mock_project):
77+
mock_project.default_branch = "main"
78+
mock_file = MagicMock(ProjectFile)
79+
mock_file.decode.return_value = b"repo context"
80+
mock_project.files.get.return_value = mock_file
81+
82+
content = gitlab_provider.get_repo_file_content("AGENTS.md")
83+
84+
assert content == "repo context"
85+
mock_gitlab_client.projects.get.assert_called_with("test/repo")
86+
mock_project.files.get.assert_called_once_with(file_path="AGENTS.md", ref="main")
87+
mock_file.decode.assert_called_once()
88+
89+
def test_get_repo_file_content_treats_missing_file_as_empty(self, gitlab_provider, mock_project):
90+
mock_project.default_branch = "main"
91+
mock_project.files.get.side_effect = GitlabGetError("404 Not Found")
92+
93+
content = gitlab_provider.get_repo_file_content("AGENTS.md")
94+
95+
assert content == ""
96+
7697
def test_create_or_update_pr_file_create_new(self, gitlab_provider, mock_project):
7798
mock_project.files.get.side_effect = GitlabGetError("404 Not Found")
7899
mock_file = MagicMock()

0 commit comments

Comments
 (0)