diff --git a/pr_review/src/gpt/bot.py b/pr_review/src/gpt/bot.py index d5cd1f6ca9bb994b87bbcb8a059a189f8d416bb0..e4039c7d4c48381243a25deab1e2d2f9831cc1b6 100644 --- a/pr_review/src/gpt/bot.py +++ b/pr_review/src/gpt/bot.py @@ -4,117 +4,53 @@ import tiktoken from loguru import logger +default_message = ''' + You are `@coder_review[bot]`, a language model trained by OpenAI. + Your purpose is to act as a highly experienced software engineer and provide a thorough review of the code hunks and suggest code snippets to improve key areas such as: + - Logic + - Security + - Performance + - Data races + - Consistency + - Error handling + - Maintainability + - Modularity + - Complexity + - Optimization + - Best practices: DRY, SOLID, KISS + + Do not comment on minor code style issues, missing comments/documentation. Identify and resolve significant concerns to improve overall code quality while deliberately disregarding minor issues. + ''' + + class Gpt: max_token_length = 0 encoding_name = '' url = '' limit = 5 prompt = '' + system_message = '' auth_url = '' app_id = '' app_secret = '' @staticmethod def init_config_attr(config): - Gpt.max_token_length = config["gpt"]["max_token_length"] - Gpt.encoding_name = config["gpt"]["encoding_name"] - Gpt.url = config["gpt"]["url"] - Gpt.limit = config["gpt"]["limit"] - Gpt.prompt = config["gpt"]["prompt"] + Gpt.max_token_length = config["gpt"].get("max_token_length", 1024) + Gpt.encoding_name = config["gpt"].get("encoding_name", "cl100k_base") + Gpt.url = config["gpt"].get("url") + Gpt.limit = config["gpt"].get("limit", 5) + Gpt.prompt = config["gpt"].get("prompt") + Gpt.system_message = config["gpt"].get("system_message", default_message) - Gpt.auth_url = config["auth"]["auth_url"] - Gpt.app_id = config["auth"]["app_id"] - Gpt.app_secret = config["auth"]["app_secret"] + Gpt.auth_url = config["auth"].get("auth_url") + Gpt.app_id = config["auth"].get("app_id") + Gpt.app_secret = config["auth"].get("app_secret") class Bot(Gpt): def __init__(self): - self.system_message = ''' - Input: New hunks annotated with line numbers and old hunks (replaced code). Hunks represent incomplete code fragments. - Additional Context: PR title, description, summaries and comment chains. - Task: Review new hunks for substantive issues using provided context and respond with comments if necessary. - Output: Review comments in markdown with exact line number ranges in new hunks. Start and end line numbers must be within the same hunk. For single-line comments, start=end line number. Must use example response format below. - Use fenced code blocks using the relevant language identifier where applicable. - Don't annotate code snippets with line numbers. Format and indent code correctly. - Do not use `suggestion` code blocks. - For fixes, use `diff` code blocks, marking changes with `+` or `-`. The line number range for comments with fix snippets must exactly match the range to replace in the new hunk. - - - Do NOT provide general feedback, summaries, explanations of changes, or praises - for making good additions. - - Focus solely on offering specific, objective insights based on the - given context and refrain from making broad comments about potential impacts on - the system or question intentions behind the changes. - - If there are no issues found on a line range, you MUST respond with the - text `LGTM!` for that line range in the review section. - - ## Example 1 - - ### Example changes - - ---new_hunk--- - ``` - z = x / y - return z - - def add(x, y): - z = x + y - retrn z - ``` - - ---old_hunk--- - ``` - z = x / y - return z - - def add(x, y): - return x + y - ``` - - ---comment_chains--- - ``` - Please review this change. - ``` - - ---end_change_section--- - - ### Example response - - 这里有语法错误 - ```diff - - retrn z - + return z - ``` - - ## Example 2 - - ### Example changes - - ---new_hunk--- - ``` - def add(x, y): - z = x + y - retrn z - ``` - - ---old_hunk--- - ``` - def add(x, y): - return x + y - ``` - - ---comment_chains--- - ``` - Please review this change. - ``` - - ---end_change_section--- - - ### Example response - - LGTM - ''' - + pass def get_token(self): params = { @@ -133,7 +69,14 @@ class Bot(Gpt): def chat(self, prompt): data = { + "model": "gpt-4", + "temperature": 0.05, + "top_p": 1, "messages": [ + { + "role": "system", + "content": self.system_message + }, { "role": "user", "content": prompt @@ -155,7 +98,14 @@ class Bot(Gpt): logger.error(f"Failed to get token") return data = { + "model": "gpt-4", + "temperature": 0.05, + "top_p": 1, "messages": [ + { + "role": "system", + "content": self.system_message + }, { "role": "user", "content": prompt diff --git a/pr_review/src/handle/pull_request.py b/pr_review/src/handle/pull_request.py index 1b552e01700a3894910739fa9237e8cc94eb7cfd..3139f4826c74df5b9a75856df5cfd59e4ae6f7bd 100644 --- a/pr_review/src/handle/pull_request.py +++ b/pr_review/src/handle/pull_request.py @@ -2,7 +2,7 @@ from loguru import logger from review_code.review_task import review_task -comment_method = {"@PRReviewAI reivew": review_task} +comment_method = {"@pr-review": review_task} def merge_request_hooks(data): diff --git a/pr_review/src/review_code/prompts.py b/pr_review/src/review_code/prompts.py index c9d9401a66dc354cc482b9de85a436999aa55329..a931eafa8ea3a47525089cb73bddaa55482d5420 100644 --- a/pr_review/src/review_code/prompts.py +++ b/pr_review/src/review_code/prompts.py @@ -17,7 +17,8 @@ class Prompts: self.triageFileDiff = ''' Please triagle the diff as \'NEEDS_REVIEW\' or \'APPROVED\'. ''' - self.reviewFileDiffOld = ''' + + self.reviewFileDiff = '''## GitHub PR Title Input: New hunks annotated with line numbers and old hunks (replaced code). Hunks represent incomplete code fragments. Additional Context: PR title, description, summaries and comment chains. Task: Review new hunks for substantive issues using provided context and respond with comments if necessary. @@ -88,7 +89,7 @@ class Prompts: LGTM! --- - ## Changes made to `$filename` for your review + ## Changes made to \`$filename\` for your review $patches ''' diff --git a/pr_review/src/review_code/review.py b/pr_review/src/review_code/review.py index 43f687a25f45aa074dda62bc7a287d946106e464..01269317f8c3689203db5204f264c5601ad5ea88 100644 --- a/pr_review/src/review_code/review.py +++ b/pr_review/src/review_code/review.py @@ -119,6 +119,7 @@ class CodeReview: self.commits = commits filteredFiles = [] + file_comment_line = {} # 把patch切割成hunk for aFile in filterSelectedFiles: if not self.giteeApi.pr_number: @@ -134,11 +135,10 @@ class CodeReview: logger.warning('failed to get file contents: %s'%(e)) fileDiff = aFile.get('patch', '') - diff_lines = fileDiff.splitlines() + file_comment_line[aFile.get('filename')] = self.get_diff_new_line_dic(file_patches=fileDiff) patches = [] diff_num = 0 for patch in self.splitPatch(aFile.get('patch', '')): - patch = patch.replace('\\ No newline at end of file', '') diff_num += 1 patchLines = self.patchStartEndLine(patch) if not patchLines: @@ -148,8 +148,7 @@ class CodeReview: continue hunksStr = '''---new_hunk---\n\'\'\'\n%s\n\'\'\'\n---old_hunk---\n\'\'\'\n%s\n\'\'\''''%(hunks.get('newHunk', None), hunks.get('oldHunk', None)) - comment_diff_line = self.get_patch_diff_line(diff_num, diff_lines, fileDiff) - patches.append([patchLines.get('newHunk', None).get('startLine', None), patchLines.get('newHunk', None).get('endLine', None), hunksStr, comment_diff_line]) + patches.append([patchLines.get('newHunk', None).get('startLine', None), patchLines.get('newHunk', None).get('endLine', None), hunksStr]) if len(patches) > 0: filteredFiles.append([aFile.get('filename', None), fileContent, fileDiff, patches]) @@ -162,9 +161,9 @@ class CodeReview: filesAndChangesReview = filesAndChanges for filename, fileContent, _, patches in filesAndChangesReview: - lgtm_num = self.do_review(filename, self.input, patches) + lgtm_num = self.do_review(filename, self.input, patches, file_comment_line[filename]) if lgtm_num == len(patches): - self.giteeApi.submit_review(body = "ok", commitId = self.commits[0], filename=filename, line=patches[-1][-1]) + self.giteeApi.submit_review(body = "There is no issue found", commitId = self.commits[0], filename=filename, line=file_comment_line[filename].get(patches[-1][1])) def get_patch_diff_line(self, diff_num, diff_lines, file_diff): line_no = 0 @@ -243,45 +242,52 @@ class CodeReview: return None def parsePatch(self, patch): - hunkInfo = self.patchStartEndLine(patch) - if not hunkInfo: + hunk_info = self.patchStartEndLine(patch) + if not hunk_info: return - oldHunkLines = [] - newHunkLines = [] - newLine = hunkInfo.get('newHunk', None).get('startLine', None) - # 去除第一行@@ - lines = patch.split('\n') [1:] - # 去除最后一行空格 - if lines[-1] == '': - lines = lines[:-1] - skipStart = 3 - skipEnd = 3 - currentLine = 0 - - # reamovalOnly=True代表只删除内容,没有新增内容 - removalOnly = True - for line in lines: - if line.startswith('+'): - removalOnly = False - break - + + old_hunk_lines = [] + new_hunk_lines = [] + + new_line = hunk_info["newHunk"]["startLine"] + + lines = patch.split('\n')[1:] + + # Remove the last line if it's empty + if lines[-1] == '': + lines.pop() + + # Skip annotations for the first 3 and last 3 lines + skip_start = 3 + skip_end = 3 + + current_line = 0 + + removal_only = not any(line.startswith('+') for line in lines) + for line in lines: - currentLine += 1 + if line == '\\ No newline at end of file': + continue + current_line += 1 if line.startswith('-'): - oldHunkLines.append(line[1:]) + old_hunk_lines.append(line[1:]) elif line.startswith('+'): - newHunkLines.append(line[1:]) - newLine += 1 + new_hunk_lines.append(f"{new_line}: {line[1:]}") + new_line += 1 else: - oldHunkLines.append(line) - if removalOnly or (currentLine > skipStart and currentLine <= len(lines) - skipEnd): - newHunkLines.append(line) + old_hunk_lines.append(line) + if removal_only or (current_line > skip_start and current_line <= len(lines) - skip_end): + new_hunk_lines.append(f"{new_line}: {line}") else: - newHunkLines.append(line) - newLine += 1 - return {"oldHunk": '\n'.join(oldHunkLines), "newHunk": '\n'.join(newHunkLines)} + new_hunk_lines.append(line) + new_line += 1 - def do_review(self, filename, input, patches): + return { + "oldHunk": '\n'.join(old_hunk_lines), + "newHunk": '\n'.join(new_hunk_lines) + } + + def do_review(self, filename, input, patches, diff_new_line_dic): logger.info('reviewing: {}'.format(filename)) ins = copy.deepcopy(input) ins.filename = filename @@ -292,7 +298,7 @@ class CodeReview: # 计算有多少个hunkstr可以放入prompt # 当 prompt 的 token > max_token_length 时, 需要将 patch 切割吗? patchesToPack = 0 - for _, _, patch, _ in patches: + for _, _, patch in patches: patchTokens = self.bot.get_token_count(patch) if tokens + patchTokens > self.bot.max_token_length: logger.info('only packing {}/{} patches, tokens: {}/{}'.format(patchesToPack, len(patches), tokens, self.bot.max_token_length)) @@ -302,7 +308,7 @@ class CodeReview: patchesPacked = 0 - for startLine, endLine, patch, comment_line in patches: + for startLine, endLine, patch in patches: if patchesPacked >= patchesToPack: logger.info('unable to pack more patches into this request, packed: {}, total patches: {}, skipping'.format(patchesPacked, len(patches))) if self.options.debug: @@ -324,18 +330,87 @@ class CodeReview: if commentChain: ins.patches += '---comment_chains---\n\'\'\'{}\'\'\'---end_change_section---'.format(commentChain) if patchesPacked > 0: - messages = self.prompts.renderReviewFileDiff(ins, self.bot.prompt) - logger.info(messages) + messages = self.prompts.renderReviewFileDiff(ins, self.prompts.reviewFileDiff) + logger.info(f"Messages :\n{messages}") res = self.bot.chat(messages) if res.status_code != 200: logger.info('review: nothing obtained from openai') return '{} (no response)'.format(filename) - ans = res.json() - if ('LGTM' not in ans): - self.giteeApi.submit_review(body = ans, commitId = self.commits[0], filename=filename, line=comment_line) - - else: + answer = res.json() + logger.info(f"Answer :\n{answer}") + if self.parse_result(diff_new_line_dic, filename, answer): lgtm_num += 1 + return lgtm_num + + def parse_result(self, diff_new_line_dic, filename, answer): + lgtm = True + ans = answer.split('---') + for patch in ans: + lines = patch.split(':') + line = lines[0].split('-')[-1] + comment_line = diff_new_line_dic[int(line)] + + # There is issue found on a line range + if ('LGTM' not in lines[1]): + lgtm = False + self.giteeApi.submit_review(body = lines[1], commitId = self.commits[0], filename=filename, line=comment_line) + return lgtm + + def delta(self, line_numbers, step): + res = {} + for key in line_numbers: + res.update({key: line_numbers[key] + step}) + return res + + + def parse(self, diff, patch_lines): + lines = diff.split('\n') + + line_numbers = {} + + current_line_number = 0 + new_line = 0 + + line_numbers.update({patch_lines['newHunk']['startLine']: 2}) + + for line in lines: + current_line_number += 1 + if line.startswith('@@'): + current_line_number += 1 + continue + if line.startswith('-'): + continue + else: + # Context line, increment line number + new_line += 1 + line_numbers.update({patch_lines['newHunk']['startLine'] + new_line: current_line_number}) + + return line_numbers + + def get_diff_new_line_dic(self, file_patches): + i = 0 + step = [0] + diff_new_line_dic = {} + patches = self.splitPatch(file_patches) + try: + for patch in patches: + patch_lines = self.patchStartEndLine(patch) + diff_new_line_dic.update(self.delta(self.parse(patch, patch_lines), step[i])) + end_line = patch_lines['newHunk']['startLine'] + patch_lines['newHunk']['endLine'] + step.append(diff_new_line_dic.get(end_line)) + i += 1 + except Exception as e: + logger.info(e) + + # If there are modifications in the first four lines, + # the first line "@@" will not be displayed, + # all line numbers need to be decremented by 1. + if self.modify_line(file_patches): + for key in diff_new_line_dic: + diff_new_line_dic.update({key: diff_new_line_dic[key] - 1}) + + logger.info(f'file_line : diff_line ====> {diff_new_line_dic}') + return diff_new_line_dic