11from html import escape
22
33from pr_agent .config_loader import get_settings
4+ from pr_agent .git_providers .git_provider import GitProvider
45from pr_agent .log import get_logger
56
7+ TRUNCATION_MARKER = "...(truncated)..."
8+ INSTRUCTION_FILES_INTRO = (
9+ "You are being given instruction files. Follow them as project-specific guidance when reviewing code."
10+ )
11+ MARKDOWN_FENCE = "`````"
12+ REPO_CONTEXT_CACHE_ATTRIBUTE = "_repo_context_cache"
13+ _unsupported_repo_context_provider_classes = set ()
14+
15+
16+ def _get_markdown_fence (content : str ) -> str :
17+ fence = MARKDOWN_FENCE
18+ while fence in content :
19+ fence += "`"
20+ return fence
21+
22+
23+ def _get_repo_context_cache_key (context_files : list , max_lines : int ) -> tuple [tuple [tuple [str , str ], ...], int ]:
24+ return tuple ((type (file_path ).__name__ , str (file_path )) for file_path in context_files ), max_lines
25+
626
727def render_instruction_files (files : dict [str , str ]) -> str :
828 parts = [
9- "You are being given instruction files. Follow them as project-specific guidance when reviewing code." ,
29+ INSTRUCTION_FILES_INTRO ,
1030 "<instruction_files>" ,
1131 ]
1232
1333 for path , content in files .items ():
1434 scope = path .rsplit ("/" , 1 )[0 ] if "/" in path else "repo-root"
35+ fence = _get_markdown_fence (content )
1536 parts .append (f'<file path="{ escape (path , quote = True )} " scope="{ escape (scope , quote = True )} ">' )
16- parts .append ("````` markdown" )
37+ parts .append (f" { fence } markdown" )
1738 parts .append (content .rstrip ())
18- parts .append ("`````" )
39+ parts .append (fence )
1940 parts .append ("</file>" )
2041 parts .append ("" )
2142
2243 parts .append ("</instruction_files>" )
2344 return "\n " .join (parts )
2445
2546
47+ def render_instruction_files_with_line_budget (files : dict [str , str ], max_lines : int ) -> str :
48+ parts = [
49+ INSTRUCTION_FILES_INTRO ,
50+ "<instruction_files>" ,
51+ ]
52+
53+ for path , content in files .items ():
54+ scope = path .rsplit ("/" , 1 )[0 ] if "/" in path else "repo-root"
55+ fence = _get_markdown_fence (content )
56+ file_header = [
57+ f'<file path="{ escape (path , quote = True )} " scope="{ escape (scope , quote = True )} ">' ,
58+ f"{ fence } markdown" ,
59+ ]
60+ file_footer = [
61+ fence ,
62+ "</file>" ,
63+ "" ,
64+ ]
65+ content_lines = content .rstrip ().splitlines ()
66+ reserved_closing_lines = len (file_header ) + len (file_footer ) + 1
67+ available_content_lines = max_lines - len (parts ) - reserved_closing_lines
68+
69+ parts .extend (file_header )
70+ if available_content_lines >= len (content_lines ):
71+ parts .extend (content_lines )
72+ else :
73+ if available_content_lines > 1 :
74+ parts .extend (content_lines [: available_content_lines - 1 ])
75+ parts .append (TRUNCATION_MARKER )
76+ parts .extend (file_footer )
77+ break
78+
79+ parts .extend (file_footer )
80+
81+ parts .append ("</instruction_files>" )
82+ return "\n " .join (parts ).strip ()
83+
84+
2685def build_repo_context (git_provider ) -> str :
2786 context_files = get_settings ().config .get ("repo_context_files" , [])
2887 if not context_files :
2988 return ""
89+ if isinstance (context_files , str ):
90+ get_logger ().warning (
91+ "repo_context_files should be a list of file paths; treating string value as one file path" ,
92+ artifact = {"repo_context_files" : context_files },
93+ )
94+ context_files = [context_files ]
95+ elif not isinstance (context_files , list ):
96+ get_logger ().warning (
97+ "repo_context_files should be a list of file paths; skipping repo context" ,
98+ artifact = {"repo_context_files" : context_files },
99+ )
100+ return ""
101+
102+ provider_class = type (git_provider )
103+ if provider_class .get_repo_file_content is GitProvider .get_repo_file_content :
104+ if provider_class not in _unsupported_repo_context_provider_classes :
105+ _unsupported_repo_context_provider_classes .add (provider_class )
106+ get_logger ().warning (
107+ f"repo_context_files is configured, but { provider_class .__name__ } does not support repository "
108+ "file fetching; skipping repo context"
109+ )
110+ return ""
30111
31112 max_lines = get_settings ().config .get ("repo_context_max_lines" , 500 )
32113 try :
33114 max_lines = max (0 , int (max_lines ))
34115 except (TypeError , ValueError ):
35116 max_lines = 500
36117
118+ cache_key = _get_repo_context_cache_key (context_files , max_lines )
119+ repo_context_cache = getattr (git_provider , REPO_CONTEXT_CACHE_ATTRIBUTE , None )
120+ if repo_context_cache is None :
121+ repo_context_cache = {}
122+ setattr (git_provider , REPO_CONTEXT_CACHE_ATTRIBUTE , repo_context_cache )
123+ if cache_key in repo_context_cache :
124+ return repo_context_cache [cache_key ]
125+
37126 files = {}
38127 for file_path in context_files :
39128 if not isinstance (file_path , str ) or not file_path .strip ():
@@ -57,8 +146,9 @@ def build_repo_context(git_provider) -> str:
57146 files [file_path ] = str (content ).rstrip ()
58147
59148 if not files :
149+ repo_context_cache [cache_key ] = ""
60150 return ""
61151
62- rendered_lines = render_instruction_files (files ). splitlines ( )
63-
64- return " \n " . join ( rendered_lines [: max_lines ]). strip ()
152+ repo_context = render_instruction_files_with_line_budget (files , max_lines )
153+ repo_context_cache [ cache_key ] = repo_context
154+ return repo_context
0 commit comments