From b4937d422329f21e8e453b1e4a5dff9554e29d1e Mon Sep 17 00:00:00 2001 From: kaede10 Date: Sat, 28 Oct 2023 17:09:13 +0800 Subject: [PATCH] update pr_review --- {prreivew => pr_reivew}/Dockerfile | 0 {prreivew => pr_reivew}/README.md | 0 {prreivew => pr_reivew}/config.yaml | 10 +- {prreivew => pr_reivew}/requirements.txt | 0 {prreivew => pr_reivew}/src/app.py | 0 .../src/config/init_config.py | 17 +- .../src/gitee/gitee_api.py | 93 ++++-- pr_reivew/src/gpt/bot.py | 120 +++++++ .../src/handle/pull_request.py | 5 +- {prreivew => pr_reivew}/src/handle/task.py | 0 {prreivew => pr_reivew}/src/main.py | 2 - .../src/review_code}/Untitled-1.txt | 0 .../src/review_code}/__init__.py | 0 .../src/review_code}/commenter.py | 76 ++--- .../src/review_code}/input.py | 0 .../src/review_code}/options.py | 2 +- .../src/review_code}/prompts.py | 10 +- pr_reivew/src/review_code/review.py | 297 ++++++++++++++++ pr_reivew/src/review_code/review_task.py | 9 + {prreivew => pr_reivew}/src/router/router.py | 9 - .../src/utils/background_task.py | 0 .../src/utils/utile_tool.py | 0 prreivew/src/gitee/gitee_api.py | 47 --- prreivew/src/gpt/chat_gpt.py | 92 ----- prreivew/src/gpt/gpt.py | 28 -- prreivew/src/gpt/gpt_class_factory.py | 14 - prreivew/src/gpt/my_gpt.py | 56 ---- prreivew/src/handle/comment_command.py | 78 ----- prreivew/src/handle/diff.py | 93 ------ prreivew/src/reviewCode/bot.py | 14 - prreivew/src/reviewCode/main.py | 15 - prreivew/src/reviewCode/review.py | 316 ------------------ prreivew/src/reviewCode/tokenizer.py | 9 - 33 files changed, 537 insertions(+), 875 deletions(-) rename {prreivew => pr_reivew}/Dockerfile (100%) rename {prreivew => pr_reivew}/README.md (100%) rename {prreivew => pr_reivew}/config.yaml (30%) rename {prreivew => pr_reivew}/requirements.txt (100%) rename {prreivew => pr_reivew}/src/app.py (100%) rename {prreivew => pr_reivew}/src/config/init_config.py (40%) rename prreivew/src/reviewCode/giteeApi.py => pr_reivew/src/gitee/gitee_api.py (34%) create mode 100644 pr_reivew/src/gpt/bot.py rename {prreivew => pr_reivew}/src/handle/pull_request.py (84%) rename {prreivew => pr_reivew}/src/handle/task.py (100%) rename {prreivew => pr_reivew}/src/main.py (83%) rename {prreivew/src/reviewCode => pr_reivew/src/review_code}/Untitled-1.txt (100%) rename {prreivew/src/reviewCode => pr_reivew/src/review_code}/__init__.py (100%) rename {prreivew/src/reviewCode => pr_reivew/src/review_code}/commenter.py (69%) rename {prreivew/src/reviewCode => pr_reivew/src/review_code}/input.py (100%) rename {prreivew/src/reviewCode => pr_reivew/src/review_code}/options.py (92%) rename {prreivew/src/reviewCode => pr_reivew/src/review_code}/prompts.py (94%) create mode 100644 pr_reivew/src/review_code/review.py create mode 100644 pr_reivew/src/review_code/review_task.py rename {prreivew => pr_reivew}/src/router/router.py (55%) rename {prreivew => pr_reivew}/src/utils/background_task.py (100%) rename {prreivew => pr_reivew}/src/utils/utile_tool.py (100%) delete mode 100644 prreivew/src/gitee/gitee_api.py delete mode 100644 prreivew/src/gpt/chat_gpt.py delete mode 100644 prreivew/src/gpt/gpt.py delete mode 100644 prreivew/src/gpt/gpt_class_factory.py delete mode 100644 prreivew/src/gpt/my_gpt.py delete mode 100644 prreivew/src/handle/comment_command.py delete mode 100644 prreivew/src/handle/diff.py delete mode 100644 prreivew/src/reviewCode/bot.py delete mode 100644 prreivew/src/reviewCode/main.py delete mode 100644 prreivew/src/reviewCode/review.py delete mode 100644 prreivew/src/reviewCode/tokenizer.py diff --git a/prreivew/Dockerfile b/pr_reivew/Dockerfile similarity index 100% rename from prreivew/Dockerfile rename to pr_reivew/Dockerfile diff --git a/prreivew/README.md b/pr_reivew/README.md similarity index 100% rename from prreivew/README.md rename to pr_reivew/README.md diff --git a/prreivew/config.yaml b/pr_reivew/config.yaml similarity index 30% rename from prreivew/config.yaml rename to pr_reivew/config.yaml index 97b6f41..9274029 100644 --- a/prreivew/config.yaml +++ b/pr_reivew/config.yaml @@ -1,5 +1,11 @@ access_token: gitee_host: -max_token_length: -encoding_name: \ No newline at end of file + +gpt: + max_token_length: + encoding_name: + url: + limit: + + diff --git a/prreivew/requirements.txt b/pr_reivew/requirements.txt similarity index 100% rename from prreivew/requirements.txt rename to pr_reivew/requirements.txt diff --git a/prreivew/src/app.py b/pr_reivew/src/app.py similarity index 100% rename from prreivew/src/app.py rename to pr_reivew/src/app.py diff --git a/prreivew/src/config/init_config.py b/pr_reivew/src/config/init_config.py similarity index 40% rename from prreivew/src/config/init_config.py rename to pr_reivew/src/config/init_config.py index acacb82..c29f102 100644 --- a/prreivew/src/config/init_config.py +++ b/pr_reivew/src/config/init_config.py @@ -1,27 +1,24 @@ -import os + import yaml -from gitee.gitee_api import GiteeApiCaller -from gpt.gpt import Gpt -from handle.diff import Diff_Prompt +from gitee.gitee_api import GiteeCaller +from gpt.bot import Gpt + def init_config(path): - print(os.getcwd()) with open(path, "r", encoding="utf-8") as f: config = yaml.safe_load(f) - GiteeApiCaller.init_config_attr(config["access_token"], config["gitee_host"]) + GiteeCaller.init_config_attr(config["access_token"], config["gitee_host"]) Gpt.init_config_attr( - config["gpt"]["use"], config["gpt"]["max_token_length"], config["gpt"]["encoding_name"], - config["gpt"]["host"], - config["gpt"]["Authorization"], + config["gpt"]["url"], + config["gpt"]["limit"], ) - Diff_Prompt.init_config_attr() diff --git a/prreivew/src/reviewCode/giteeApi.py b/pr_reivew/src/gitee/gitee_api.py similarity index 34% rename from prreivew/src/reviewCode/giteeApi.py rename to pr_reivew/src/gitee/gitee_api.py index 18063f8..aae6808 100644 --- a/prreivew/src/reviewCode/giteeApi.py +++ b/pr_reivew/src/gitee/gitee_api.py @@ -2,9 +2,17 @@ from loguru import logger import requests import json -class GiteeApi: + +class GiteeCaller: + access_token = '' + gitee_url = '' + + def init_config_attr(access_token, gitee_url): + GiteeCaller.access_token = access_token + GiteeCaller.gitee_url = gitee_url + +class GiteeApi(GiteeCaller): def __init__(self, data): - self.token = None self.cookies = None self.owner = None self.repo = None @@ -13,49 +21,60 @@ class GiteeApi: self.owner = project.get('namespace', None) self.repo = project.get('path', None) self.pr = data.get('pull_request', None) - self.prNumber = self.pr.get('number', None) - if (not self.owner) or (not self.repo) or (not self.pr) or (not self.prNumber): + self.pr_number = self.pr.get('number', None) + if (not self.owner) or (not self.repo) or (not self.pr) or (not self.pr_number): logger.error('not giteeApi param') - def listComments(self): - # 此处用的是pr中的comment,而不是issue中的comment + def list_comments(self): page = 1 - allComments = [] + all_comments = [] while True: - url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/comments?access_token={}&page={}&per_page=100&direction=desc'. \ - format(self.owner, self.repo, self.prNumber, self.token, page) - res = json.loads(requests.get(url = url).content.decode('utf-8')) - # res = requests.get(url = url).json() - allComments.extend(res) + url = f'{self.gitee_url}/{self.owner}/{self.repo}/pulls/{self.pr_number}/comments' + params = { + 'access_token': self.access_token, + 'page': page, + 'per_page': 100, + 'direction': 'desc' + } + res = json.loads(requests.get(url=url, params=params).content.decode('utf-8')) + all_comments.extend(res) page += 1 if not res or len(res) < 100: break - return allComments + return all_comments - def getAllCommitIds(self): - url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/commits?access_token={}'. \ - format(self.owner, self.repo, self.prNumber, self.token) - return json.loads(requests.get(url = url).content.decode('utf-8')) + def get_all_commit_ids(self): + url = f'{self.gitee_url}/{self.owner}/{self.repo}/pulls/{self.pr_number}/commits' + params = { + 'access_token': self.access_token + } + return json.loads(requests.get(url = url, params=params).content.decode('utf-8')) - def listReviewComments(self, prNumber): + def list_review_comments(self): page = 1 - allComments = [] + all_comments = [] while True: - url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/comments?access_token={}&page={}&per_page=100'. \ - format(self.owner, self.repo, prNumber, self.token, page) - res = json.loads(requests.get(url = url).content.decode('utf-8')) - allComments.extend(res) + url = f'{self.gitee_url}/{self.owner}/{self.repo}/pulls/{self.pr_number}/comments' + params = { + 'access_token': self.access_token, + 'page': page, + 'per_page': 100, + } + res = json.loads(requests.get(url = url, params=params).content.decode('utf-8')) + all_comments.extend(res) page += 1 if not res or len(res) < 100: break - return allComments + return all_comments - def submitReview(self, body, commitId): - url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/comments'.format(self.owner, self.repo, self.prNumber) + def submit_review(self, body, commitId, filename, line): + url = f'{self.gitee_url}/{self.owner}/{self.repo}/pulls/{self.pr_number}/comments' data = { - 'access_token': self.token, + 'access_token': self.access_token, 'body': body, - 'commit_id': commitId + 'commit_id': commitId, + "path": filename, + "position": line } res = requests.post(url = url, data = data) if res.status_code != 201: @@ -65,16 +84,18 @@ class GiteeApi: else: logger.info('post to gitee succeed') - def fetchPR(self): - url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}?access_token={}'. \ - format(self.owner, self.repo, self.prNumber, self.token) + def fetch_pr(self): + url = f'{self.gitee_url}/{self.owner}/{self.repo}/pulls/{self.pr_number}?access_token={self.access_token}' return json.loads(requests.get(url = url).content.decode('utf-8')) def compare(self, formerSha, latterSha): - url = 'https://gitee.com/api/v5/repos/{}/{}/compare/{}...{}?access_token={}&straight=true'. \ - format(self.owner, self.repo, formerSha, latterSha, self.token) - return json.loads(requests.get(url = url).content.decode('utf-8')) + url = f'{self.gitee_url}/{self.owner}/{self.repo}/compare/{formerSha}...{latterSha}' + params = { + 'access_token': self.access_token, + 'straight': True, + } + return json.loads(requests.get(url = url, params=params).content.decode('utf-8')) - def fetchFileContent(self, rawUrl): - url = '{}?access_token={}'.format(rawUrl, self.token) + def fetch_file_content(self, rawUrl): + url = f'{rawUrl}?access_token={self.access_token}' return requests.get(url, cookies = self.cookies).content.decode('utf-8') diff --git a/pr_reivew/src/gpt/bot.py b/pr_reivew/src/gpt/bot.py new file mode 100644 index 0000000..1cb89d8 --- /dev/null +++ b/pr_reivew/src/gpt/bot.py @@ -0,0 +1,120 @@ +import requests +import tiktoken +from loguru import logger + + +class Gpt: + max_token_length = 0 + encoding_name = '' + url = '' + limit = 5 + + def init_config_attr(max_token_length, encoding_name, url, limit): + Gpt.max_token_length = max_token_length + Gpt.encoding_name = encoding_name + Gpt.url = url + Gpt.limit = limit + + +class Bot(Gpt): + def __init__(self): + self.system_message = ''' + Input: New hunks annotated with line numbers and old hunks (replaced code). Hunks represent incomplete code fragments. + Additional Context: PR title, description, summaries and comment chains. + Task: Review new hunks for substantive issues using provided context and respond with comments if necessary. + Output: Review comments in markdown with exact line number ranges in new hunks. Start and end line numbers must be within the same hunk. For single-line comments, start=end line number. Must use example response format below. + Use fenced code blocks using the relevant language identifier where applicable. + Don't annotate code snippets with line numbers. Format and indent code correctly. + Do not use `suggestion` code blocks. + For fixes, use `diff` code blocks, marking changes with `+` or `-`. The line number range for comments with fix snippets must exactly match the range to replace in the new hunk. + + - Do NOT provide general feedback, summaries, explanations of changes, or praises + for making good additions. + - Focus solely on offering specific, objective insights based on the + given context and refrain from making broad comments about potential impacts on + the system or question intentions behind the changes. + + If there are no issues found on a line range, you MUST respond with the + text `LGTM!` for that line range in the review section. + + ## Example + + ### Example changes + + ---new_hunk--- + ``` + z = x / y + return z + + 20: def add(x, y): + 21: z = x + y + 22: retrn z + 23: + 24: def multiply(x, y): + 25: return x * y + + def subtract(x, y): + z = x - y + ``` + + ---old_hunk--- + ``` + z = x / y + return z + + def add(x, y): + return x + y + + def subtract(x, y): + z = x - y + ``` + + ---comment_chains--- + ``` + Please review this change. + ``` + + ---end_change_section--- + + ### Example response + + 22-22: + 这里有语法错误 + ```diff + - retrn z + + return z + ``` + ''' + + + def chat(self, prompt): + data = { + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "system", + "content": self.system_message + }, + { + "role": "user", + "content": prompt + } + ], + "temperature": 0.7, + } + response = requests.post( + self.url, json=data + ) + if response.status_code != 200: + logger.info("get answer error") + logger.info(response.status_code) + + return response + + def get_token_count(self, content): + encoding = tiktoken.get_encoding(self.encoding_name) + tokens = encoding.encode(content) + return len(tokens) + + def get_max_prompt_length(self): + return self.max_token_length \ No newline at end of file diff --git a/prreivew/src/handle/pull_request.py b/pr_reivew/src/handle/pull_request.py similarity index 84% rename from prreivew/src/handle/pull_request.py rename to pr_reivew/src/handle/pull_request.py index c1bd92f..c3f716e 100644 --- a/prreivew/src/handle/pull_request.py +++ b/pr_reivew/src/handle/pull_request.py @@ -1,9 +1,8 @@ from loguru import logger -from handle.comment_command import * -from reviewCode.main import reviewMain +from review_code.review_task import review_task -comment_method = {"/summary-message": summary_message, "@PRReviewAI reivew": reviewMain} +comment_method = {"@PRReviewAI reivew": review_task} def merge_request_hooks(data): diff --git a/prreivew/src/handle/task.py b/pr_reivew/src/handle/task.py similarity index 100% rename from prreivew/src/handle/task.py rename to pr_reivew/src/handle/task.py diff --git a/prreivew/src/main.py b/pr_reivew/src/main.py similarity index 83% rename from prreivew/src/main.py rename to pr_reivew/src/main.py index 72365c7..ac01ca7 100644 --- a/prreivew/src/main.py +++ b/pr_reivew/src/main.py @@ -6,9 +6,7 @@ from config import init_config @click.command() @click.option("--config", default="config.yaml", help="config file path") def main(config): - # Init config from yaml file init_config.init_config(config) - # Set up routing router.start_router() diff --git a/prreivew/src/reviewCode/Untitled-1.txt b/pr_reivew/src/review_code/Untitled-1.txt similarity index 100% rename from prreivew/src/reviewCode/Untitled-1.txt rename to pr_reivew/src/review_code/Untitled-1.txt diff --git a/prreivew/src/reviewCode/__init__.py b/pr_reivew/src/review_code/__init__.py similarity index 100% rename from prreivew/src/reviewCode/__init__.py rename to pr_reivew/src/review_code/__init__.py diff --git a/prreivew/src/reviewCode/commenter.py b/pr_reivew/src/review_code/commenter.py similarity index 69% rename from prreivew/src/reviewCode/commenter.py rename to pr_reivew/src/review_code/commenter.py index a199a16..fbe49ee 100644 --- a/prreivew/src/reviewCode/commenter.py +++ b/pr_reivew/src/review_code/commenter.py @@ -1,8 +1,6 @@ from loguru import logger -import requests -import chardet -import json -from reviewCode.giteeApi import GiteeApi +from gitee.gitee_api import GiteeApi + class Commenter: COMMENT_GREETING = '(PRReviewAI COMMENT_GREETING)' @@ -35,44 +33,18 @@ class Commenter: def __init__(self, giteeApi: GiteeApi): self.giteeApi = giteeApi - - # def comment(self, message, tag, mode): - # target = -1 - # pr_number = gitee_hook.get('number', None) - # if pr_number is not None: - # target = pr_number - # else: - # logger.error('not pull request number') - - # if tag is None: - # tag = Commenter.COMMENT_GREETING - - # body = '''%s\n%s\n%s'''%(Commenter.COMMENT_GREETING, message, tag) - - # if mode == 'create': - # self.create(body, target) - # elif mode == 'replace': - # self.replace(body, tag, target) - # else: - # logger.warning('unknown mode: %s, use \'replace\' instead') - # self.replace(body, tag, target) - def listComments(self, prNumber): + def list_comments(self): # 返回当前prnumber的所有commentR - # res = requests.get('https://gitee.com/api/v5/repos/ggzzll1/temp/pulls/2/comments?access_token=7e68411eb68f4f52834ed8510a0656ce&page=1&per_page=100&direction=desc') - # return json.loads(res.content.decode('utf-8') - return self.giteeApi.listComments() - - - - def findCommentWithTag(self, tag, target): - comments = self.listComments(target) + return self.giteeApi.list_comments() + + def findCommentWithTag(self, tag): + comments = self.list_comments() for comment in comments: if comment.get('body', None) and tag in comment.get('body', None): return comment return '' - - + def getContentWithinTags(self, content: str, startTag: str, endTag: str) -> str: start = content.find(startTag) end = content.find(endTag) @@ -92,8 +64,8 @@ class Commenter: return '' return commentBody[start : end + len(Commenter.COMMIT_ID_END_TAG)] - def getAllCommitIds(self): - commits = self.giteeApi.getAllCommitIds() + def get_all_commit_ids(self): + commits = self.giteeApi.get_all_commit_ids() allCommits = [] for commit in commits: allCommits.append(commit.get('sha', None)) @@ -113,8 +85,8 @@ class Commenter: ids = commentBody[start + len(Commenter.COMMIT_ID_START_TAG): end] return ids.split(',') - def getCommentChainsWithinRange(self, prNumber, path, startLine, endLine, tag = ''): - existingComments = self.getCommentsWithinRange(prNumber, path, startLine, endLine) + def getCommentChainsWithinRange(self, path, startLine, endLine, tag = ''): + existingComments = self.getCommentsWithinRange(path, startLine, endLine) topLevelComments = [] for comment in existingComments: if not comment.get('in_reply_to_id', None): @@ -135,17 +107,25 @@ class Commenter: conversationChain.append('{}: {}'.format(comment.get('user', None).get('login', None), comment.get('body', None))) return '\n---\n'.join(conversationChain) - def getCommentsWithinRange(self, prNumber, path, startLine, endLine): - comments = self.listReviewComments(prNumber) + def getCommentsWithinRange(self, path, startLine, endLine): + comments = self.list_review_comments() requiredComments = [] - for comment in comments: - if comment.get('path', None) == path and comment.get('body', None) and \ - comment.get('comment_type', None) == 'diff_comment' and startLine <= comment.get('new_line', None) <= endLine: - requiredComments.append(comment) + try: + for comment in comments: + if comment.get('path', None) == path and \ + comment.get('body', None) and \ + comment.get('comment_type', None) == 'diff_comment' and \ + comment.get('new_line') and \ + comment.get('new_line') >= startLine and \ + comment.get('line') and \ + comment.get('line') <= endLine: + requiredComments.append(comment) + except Exception as e: + logger.info('*******************************getCommentsWithinRange error = ', e) return requiredComments - def listReviewComments(self, prNumber): - return self.giteeApi.listReviewComments(prNumber) + def list_review_comments(self): + return self.giteeApi.list_review_comments() def addReviewedCommitId(self, commentBody, commitId): start = commentBody.find(Commenter.COMMIT_ID_START_TAG) diff --git a/prreivew/src/reviewCode/input.py b/pr_reivew/src/review_code/input.py similarity index 100% rename from prreivew/src/reviewCode/input.py rename to pr_reivew/src/review_code/input.py diff --git a/prreivew/src/reviewCode/options.py b/pr_reivew/src/review_code/options.py similarity index 92% rename from prreivew/src/reviewCode/options.py rename to pr_reivew/src/review_code/options.py index 654c936..09823ff 100644 --- a/prreivew/src/reviewCode/options.py +++ b/pr_reivew/src/review_code/options.py @@ -6,7 +6,7 @@ class Options: self.maxFiles = 1000 self.pathFilters= '' self.TokenLimits = 1024 - self.rules = {'*.txt': False, '*.py': False} + self.rules = {'*.txt': False, '*.py': False, '*.md': False} self.debug = False def checkPath(self, path): diff --git a/prreivew/src/reviewCode/prompts.py b/pr_reivew/src/review_code/prompts.py similarity index 94% rename from prreivew/src/reviewCode/prompts.py rename to pr_reivew/src/review_code/prompts.py index e8e4a8e..b41331a 100644 --- a/prreivew/src/reviewCode/prompts.py +++ b/pr_reivew/src/review_code/prompts.py @@ -1,4 +1,4 @@ -from reviewCode.input import Input +from review_code.input import Input class Prompts: @@ -17,7 +17,7 @@ class Prompts: self.triageFileDiff = ''' Please triagle the diff as \'NEEDS_REVIEW\' or \'APPROVED\'. ''' - self.reviewFileDiff = ''' + self.reviewFileDiffOld = ''' Input: New hunks annotated with line numbers and old hunks (replaced code). Hunks represent incomplete code fragments. Additional Context: PR title, description, summaries and comment chains. Task: Review new hunks for substantive issues using provided context and respond with comments if necessary. @@ -92,7 +92,13 @@ class Prompts: $patches ''' + + self.reviewFileDiff = ''' + ## 请用中文对 `$filename` 的语法错误进行 review,并给出修改意见 + $patches + ''' + def renderSummarizeFileDiff(self, inputIn, reviewSimpleChanges): prompt = self.summarizeFileDiff if not reviewSimpleChanges: diff --git a/pr_reivew/src/review_code/review.py b/pr_reivew/src/review_code/review.py new file mode 100644 index 0000000..f8d991e --- /dev/null +++ b/pr_reivew/src/review_code/review.py @@ -0,0 +1,297 @@ +import copy +import re +from loguru import logger +from review_code.prompts import Prompts +from review_code.options import Options +from gitee.gitee_api import GiteeApi +from gpt.bot import Bot +from review_code.input import Input +from review_code.commenter import Commenter + + +IGNORE_KEYWORD = '@PRReviewAI: ignore' + + +class CodeReview: + def __init__(self, data): + self.options = Options() + self.prompts = Prompts() + self.bot = Bot() + self.giteeApi = GiteeApi(data) + self.input = Input(data) + self.commenter = Commenter(self.giteeApi) + self.commits = [] + + def code_review(self): + # 如果body中包含IGNORE_KEYWORD,跳过本次reivew + if (IGNORE_KEYWORD in self.input.description): + logger.info("skipped: body contains ignore_keyword") + return + + # 已经在pr中出现的comment + existingSummarizeComment = self.commenter.findCommentWithTag(Commenter.SUMMARIZE_TAG) + + existingCommitIdsBlock = '' + existingSummarizeCommentBody = '' + if existingSummarizeComment: + existingSummarizeCommentBody = existingSummarizeComment.get('body', None) + self.input.rawSummary = self.commenter.getRawSummary(existingSummarizeCommentBody) + self.input.shortSummary = self.commenter.getShortSummary(existingSummarizeCommentBody) + existingCommitIdsBlock = self.commenter.getReviewedCommitIdsBlock(existingSummarizeCommentBody) + + allCommitIds = self.commenter.get_all_commit_ids() + highrestReviewedCommitId = '' + if existingCommitIdsBlock: + highrestReviewedCommitId = self.commenter.getHighestReviewedCommitId(allCommitIds, self.commenter.getReviewedCommitIds(existingCommitIdsBlock)) + + # 获取PR + prs = self.giteeApi.fetch_pr() + # head:补丁分支,作者修改代码后提交PR对应的commit + # base:基准分支,接受修改的分支 + # PR:将补丁分支head中的代码合入基准分支base + headSha = None + baseSha = None + if prs.get('head', None): + headSha = prs.get('head', None).get('sha', None) + if prs.get('base', None): + baseSha = prs.get('base', None).get('sha', None) + + if (not highrestReviewedCommitId) or highrestReviewedCommitId == headSha: + logger.info('will review from the base commit: {}'.format(baseSha)) + highrestReviewedCommitId = baseSha + else: + logger.info('will review from commit: {}'.format(highrestReviewedCommitId)) + + # 比较highrestReviewedCommitId及headSha + incrementalDiff = self.giteeApi.compare(highrestReviewedCommitId, headSha) + targetBranchDiff = self.giteeApi.compare(baseSha, headSha) + + incrementalFiles = incrementalDiff.get('files', None) + targetBranchFiles = targetBranchDiff.get('files', None) + if (not incrementalFiles) and (not targetBranchFiles): + logger.warning('skipped: files data is missing') + return + + incrementalFilesNames = [] + for incrementalFile in incrementalFiles: + if incrementalFile.get('filename', None): + incrementalFilesNames.append(incrementalFile.get('filename')) + + files = [] + for targetBranchFile in targetBranchFiles: + if targetBranchFile.get('filename', None) in incrementalFilesNames: + files.append(targetBranchFile) + if len(files) == 0: + logger.warning('skipped: files is null') + return + + filterSelectedFiles = [] + filterIgnoredFiles = [] + for aFile in files: + if self.options.checkPath(aFile.get('filename', None)): + filterSelectedFiles.append(aFile) + else: + logger.info('skip for excluded path: %s'%(aFile.get('filename', None))) + filterIgnoredFiles.append(aFile) + if len(filterSelectedFiles) == 0: + logger.warning('skipped: filterSelectedFiles is null') + return + + # 获取本次pr的所有commitid + commits = [] + if incrementalDiff.get('commits', None): + for commit in incrementalDiff.get('commits', None): + commits.append(commit.get('sha', None)) + + if not commits: + logger.warning('skipped: commits is null') + return + self.commits = commits + + filteredFiles = [] + # 把patch切割成hunk + for aFile in filterSelectedFiles: + if not self.giteeApi.pr_number: + logger('skipped: pr is null') + continue + + fileContent = '' + try: + rawUrl = aFile.get('raw_url', None) + if rawUrl: + fileContent = self.giteeApi.fetch_file_content(rawUrl) + except Exception as e: + logger.warning('failed to get file contents: %s'%(e)) + + fileDiff = aFile.get('patch', '') + diff_lines = fileDiff.splitlines() + + patches = [] + diff_num = 0 + for patch in self.splitPatch(aFile.get('patch', '')): + diff_num += 1 + patchLines = self.patchStartEndLine(patch) + if not patchLines: + continue + hunks = self.parsePatch(patch) + if not hunks: + continue + + hunksStr = '''---new_hunk---\n\'\'\'\n%s\n\'\'\'\n---old_hunk---\n\'\'\'\n%s\n\'\'\''''%(hunks.get('newHunk', None), hunks.get('oldHunk', None)) + comment_diff_line = self.get_patch_diff_line(diff_num, diff_lines) + patches.append([patchLines.get('newHunk', None).get('startLine', None), patchLines.get('newHunk', None).get('endLine', None), hunksStr, comment_diff_line]) + if len(patches) > 0: + filteredFiles.append([aFile.get('filename', None), fileContent, fileDiff, patches]) + + filesAndChanges = filteredFiles + if len(filesAndChanges) == 0: + logger.error('skipped: no files to review') + return + + # 跳过summary,直接review + filesAndChangesReview = filesAndChanges + + for filename, fileContent, _, patches in filesAndChangesReview: + lgtm_num = self.do_review(filename, self.input, patches) + if lgtm_num == len(patches): + self.giteeApi.submit_review(body = "/lgtm", commitId = self.commits[0], filename=filename, line=patches[-1][-1]) + + def get_patch_diff_line(self, diff_num, diff_lines): + line_no = 0 + hit_no = 0 + for line in diff_lines: + if line.startswith('@@'): + if hit_no == diff_num: + return line_no + hit_no += 1 + line_no += 1 + return line_no + + def splitPatch(self, patch): + if not patch: + return [] + results = [] + splitLines = patch.split('\n') + # 去掉最后一行空格 + splitLines = splitLines[:-1] + lastLine = -1 + for iLine in range(len(splitLines)): + # 当前行数据格式是否满足:@@ -0,0 +0,0 @@ + reSplit = re.split('^@@ -(\d+),(\d+) \+(\d+),(\d+) @@', splitLines[iLine]) + if len(reSplit) > 1: + if lastLine == -1: + lastLine = iLine + else: + results.append('\n'.join(splitLines[lastLine: iLine])) + lastLine = iLine + if lastLine != -1: + results.append('\n'.join(splitLines[lastLine:])) + return results + + def patchStartEndLine(self, patch): + reSplit = re.split('^@@ -(\d+),(\d+) \+(\d+),(\d+) @@', patch) + if len(reSplit) > 1: + oldBegin = int(reSplit[1]) + oldDiff = int(reSplit[2]) + newBegin = int(reSplit[3]) + newDiff = int(reSplit[4]) + return {'oldHunk': {'startLine': oldBegin, 'endLine': oldDiff}, 'newHunk': {'startLine': newBegin, 'endLine': newDiff}} + else: + return None + + def parsePatch(self, patch): + hunkInfo = self.patchStartEndLine(patch) + if not hunkInfo: + return + oldHunkLines = [] + newHunkLines = [] + newLine = hunkInfo.get('newHunk', None).get('startLine', None) + # 去除第一行@@ + lines = patch.split('\n') [1:] + # 去除最后一行空格 + if lines[-1] == '': + lines = lines[:-1] + skipStart = 3 + skipEnd = 3 + currentLine = 0 + + # reamovalOnly=True代表只删除内容,没有新增内容 + removalOnly = True + for line in lines: + if line.startswith('+'): + removalOnly = False + break + + for line in lines: + currentLine += 1 + if line.startswith('-'): + oldHunkLines.append(line[1:]) + elif line.startswith('+'): + newHunkLines.append(line[1:]) + newLine += 1 + else: + oldHunkLines.append(line) + if removalOnly or (currentLine > skipStart and currentLine <= len(lines) - skipEnd): + newHunkLines.append(str(newLine) + ': ' + line) + else: + newHunkLines.append(line) + newLine += 1 + return {"oldHunk": '\n'.join(oldHunkLines), "newHunk": '\n'.join(newHunkLines)} + + def do_review(self, filename, input, patches): + logger.info('reviewing: {}'.format(filename)) + ins = copy.deepcopy(input) + ins.filename = filename + lgtm_num = 0 + + tokens = self.bot.get_token_count(self.bot.system_message) + + # 计算有多少个hunkstr可以放入prompt + # 当 prompt 的 token > max_token_length 时, 需要将 patch 切割吗? + patchesToPack = 0 + for _, _, patch, _ in patches: + patchTokens = self.bot.get_token_count(patch) + if tokens + patchTokens > self.bot.max_token_length: + logger.info('only packing {}/{} patches, tokens: {}/{}'.format(patchesToPack, len(patches), tokens, self.bot.max_token_length)) + break + # tokens += patchTokens + patchesToPack += 1 + + + patchesPacked = 0 + for startLine, endLine, patch, comment_line in patches: + if patchesPacked >= patchesToPack: + logger.info('unable to pack more patches into this request, packed: {}, total patches: {}, skipping'.format(patchesPacked, len(patches))) + if self.options.debug: + logger.info('prompt so far: {}'.format(self.prompts.renderReviewFileDiff(ins))) + break + patchesPacked += 1 + commentChain = '' + allChians = self.commenter.getCommentChainsWithinRange(filename, startLine, endLine, self.commenter.COMMENT_REPLY_TAG) + if len(allChians) > 0: + logger.info('Found comment chains: {} for {}'.format(allChians, filename)) + commentChain = allChians + commentChainTokens = self.bot.get_token_count(commentChain) + if tokens + commentChainTokens > self.bot.max_token_length: + commentChain = '' + else: + tokens += commentChainTokens + + ins.patches = patch + if commentChain: + ins.patches += '---comment_chains---\n\'\'\'{}\'\'\'---end_change_section---'.format(commentChain) + if patchesPacked > 0: + messages = self.prompts.renderReviewFileDiff(ins) + res = self.bot.chat(messages) + + if res.status_code != 200: + logger.info('review: nothing obtained from openai') + return '{} (no response)'.format(filename) + + ans = res.json() + if ('LGTM' not in ans): + self.giteeApi.submit_review(body = ans, commitId = self.commits[0], filename=filename, line=comment_line) + + else: + lgtm_num += 1 + return lgtm_num diff --git a/pr_reivew/src/review_code/review_task.py b/pr_reivew/src/review_code/review_task.py new file mode 100644 index 0000000..2e974e4 --- /dev/null +++ b/pr_reivew/src/review_code/review_task.py @@ -0,0 +1,9 @@ + +from review_code.review import CodeReview + + +def review_task(data): + + if data.get('noteable_type', None) == 'PullRequest': + codeReview = CodeReview(data) + codeReview.code_review() diff --git a/prreivew/src/router/router.py b/pr_reivew/src/router/router.py similarity index 55% rename from prreivew/src/router/router.py rename to pr_reivew/src/router/router.py index 86da142..8221e8c 100644 --- a/prreivew/src/router/router.py +++ b/pr_reivew/src/router/router.py @@ -20,15 +20,6 @@ def analyze(): @app.before_request def before_request(): headers = request.headers - # if headers.get("User-Agent") != "Robot-Gitee-Access": - # return "Bad Request: unknown User-Agent Header", 400 - - # if headers.get("X-Gitee-Event") == "": - # return "Bad Request: Missing X-Gitee-Event Header", 400 - - # uuid = headers.get("X-Gitee-Timestamp") - # if uuid == "": - # return "Bad Request: Missing X-Gitee-Timestamp Header", 400 def start_router(): diff --git a/prreivew/src/utils/background_task.py b/pr_reivew/src/utils/background_task.py similarity index 100% rename from prreivew/src/utils/background_task.py rename to pr_reivew/src/utils/background_task.py diff --git a/prreivew/src/utils/utile_tool.py b/pr_reivew/src/utils/utile_tool.py similarity index 100% rename from prreivew/src/utils/utile_tool.py rename to pr_reivew/src/utils/utile_tool.py diff --git a/prreivew/src/gitee/gitee_api.py b/prreivew/src/gitee/gitee_api.py deleted file mode 100644 index aa7866d..0000000 --- a/prreivew/src/gitee/gitee_api.py +++ /dev/null @@ -1,47 +0,0 @@ -import requests -from loguru import logger - - -class GiteeApiCaller: - access_token = "" - gitee_host = "" - - def init_config_attr(access_token, gitee_host): - GiteeApiCaller.access_token = access_token - GiteeApiCaller.gitee_host = gitee_host - - -class PullRequestComments(GiteeApiCaller): - submit_pull_request_comments_url_template = ( - "{host}/api/v5/repos/{owner}/{repo}/pulls/{number}/comments" - ) - - def __init__(self, owner, repo, number, body, commit_id, path, position): - self.owner = owner - self.repo = repo - self.number = number - self.body = body - self.commit_id = commit_id - self.path = path - self.position = position - - def submit_pull_request_comments(self): - url = self.submit_pull_request_comments_url_template.format( - host=self.gitee_host, owner=self.owner, repo=self.repo, number=self.number - ) - - form_data = { - "access_token": self.access_token, - "body": self.body, - "commit_id": self.commit_id, - "path": self.path, - "position": self.position, - } - response = requests.post(url, data=form_data) - - if response.status_code == 201: - logger.info("post to gitee success") - else: - logger.info("post to gitee failed") - logger.info(response.status_code) - logger.info(response.text) diff --git a/prreivew/src/gpt/chat_gpt.py b/prreivew/src/gpt/chat_gpt.py deleted file mode 100644 index d84cf44..0000000 --- a/prreivew/src/gpt/chat_gpt.py +++ /dev/null @@ -1,92 +0,0 @@ -import requests -import tiktoken - -from gpt.gpt import Gpt - - -class ChatGpt(Gpt): - def get_answer(prompt): - url = "{openai_host}/v1/chat/completions".format(openai_host=ChatGpt.host) - - data = { - "model": "gpt-3.5-turbo", - "messages": [ - { - "role": "system", - "content": ( - "您将充当 git 中提交消息的作者。" - "您的任务是在传统git提交中创建清晰且全面的提交消息,详细清晰的解释更改内容。 我将向您发送“git diff --staged”命令的输出,然后您将其转换为提交消息。" - "行长度不得超过 74 个字符。" - "用中文回答。" - "使用如下模板:" - "修改了那个文件\n" - "- 修改细节1\n" - "- 修改细节2\n" - ), - }, - { - "role": "user", - "content": prompt, - } - ], - "temperature": 0.7, - } - - - response = requests.post( - url, json=data, headers={"Authorization": "Bearer " + ChatGpt.Authorization} - ) - - if response.status_code != 200: - print("get answer error") - print(response.status_code) - - pr = response.json() - - return pr["choices"][0]["message"]["content"] - - - - def get_summary(content): - url = "{openai_host}/v1/chat/completions".format(openai_host=ChatGpt.host) - data = { - "model": "gpt-3.5-turbo", - "messages": [ - { - "role": "system", - "content": ( - "您的任务是高度概括总结我给您的输入内容。" - "用中文回答。" - ), - }, - { - "role": "user", - "content": content, - } - ], - "temperature": 0.7, - } - - response = requests.post( - url, json=data, headers={"Authorization": "Bearer " + ChatGpt.Authorization} - ) - - if response.status_code != 200: - print("get answer error") - print(response.status_code) - - pr = response.json() - - return pr["choices"][0]["message"]["content"] - - - - - def num_tokens_from_string(string: str) -> int: - encoding = tiktoken.get_encoding(ChatGpt.encoding_name) - tokens = encoding.encode(string) - num_tokens = len(tokens) - return num_tokens - - def get_max_prompt_length(): - return ChatGpt.max_token_length diff --git a/prreivew/src/gpt/gpt.py b/prreivew/src/gpt/gpt.py deleted file mode 100644 index 6d7cbc4..0000000 --- a/prreivew/src/gpt/gpt.py +++ /dev/null @@ -1,28 +0,0 @@ -from abc import ABCMeta, abstractmethod - - -class Gpt(metaclass=ABCMeta): - use = "" - max_token_length = 0 - encoding_name = "" - host = "" - Authorization = "" - - def init_config_attr(use, max_token_length, encoding_name, host, Authorization): - Gpt.use = use - Gpt.max_token_length = max_token_length - Gpt.encoding_name = encoding_name - Gpt.host = host - Gpt.Authorization = Authorization - - @abstractmethod - def get_answer(prompt): - pass - - @abstractmethod - def num_tokens_from_string(string: str) -> int: - pass - - @abstractmethod - def get_max_prompt_length(): - pass diff --git a/prreivew/src/gpt/gpt_class_factory.py b/prreivew/src/gpt/gpt_class_factory.py deleted file mode 100644 index 5a131c5..0000000 --- a/prreivew/src/gpt/gpt_class_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from gpt.chat_gpt import ChatGpt -from gpt.gpt import Gpt -from gpt.my_gpt import MyGpt - - -class GptClassFactory: - @staticmethod - def create_class(): - if Gpt.use == "my_gpt": - return MyGpt - elif Gpt.use == "open_ai": - return ChatGpt - else: - raise ValueError("Invalid class name") diff --git a/prreivew/src/gpt/my_gpt.py b/prreivew/src/gpt/my_gpt.py deleted file mode 100644 index dc16cbf..0000000 --- a/prreivew/src/gpt/my_gpt.py +++ /dev/null @@ -1,56 +0,0 @@ -import re -import requests -import tiktoken -from loguru import logger - -from gpt.gpt import Gpt - - -class MyGpt(Gpt): - question = ( - "You are to act as the author of a commit message in git." - "Your mission is to create clean and comprehensive commit messages in the conventional commit convention and explain WHAT were the changes and WHY the changes were done. I'll send you an output of 'git diff --staged' command, and you convert it into a commit message." - "Do not preface the commit with anything." - "Don't add any descriptions to the commit, only commit message." - "Use the present tense. Lines must not be longer than 74 characters." - "Use Chinese to answer." - "The diff is: {diff_content}" - ) - - def get_answer(prompt): - url = "{host}/hcstream".format(host=MyGpt.host) - - data = {"question": MyGpt.question.format(diff_content=prompt), "history": []} - response = requests.post(url, json=data, stream=True) - if response.status_code != 200: - logger.error("get answer error") - logger.error(response.status_code) - return - - data_list = [] - pattern = r'"answer":\s+"([^"]+)"' - for line in response.iter_lines(): - if line: - line_text = line.decode("utf-8") - match = re.search(pattern, line_text) - if match: - result = match.group(1) - data_list.append(result) - - if len(data_list) < 2: - logger.info("no answer") - return - data_list.pop() - - combined_result = "".join(data_list) - - return combined_result - - def num_tokens_from_string(string: str) -> int: - encoding = tiktoken.get_encoding(MyGpt.encoding_name) - tokens = encoding.encode(string) - num_tokens = len(tokens) - return num_tokens - - def get_max_prompt_length(): - return MyGpt.max_token_length - MyGpt.num_tokens_from_string(MyGpt.question) diff --git a/prreivew/src/handle/comment_command.py b/prreivew/src/handle/comment_command.py deleted file mode 100644 index 4d6e08b..0000000 --- a/prreivew/src/handle/comment_command.py +++ /dev/null @@ -1,78 +0,0 @@ -import time -from loguru import logger -import requests -from gitee.gitee_api import PullRequestComments -from gpt.gpt_class_factory import GptClassFactory - -from handle.diff import Diff_Prompt, handle_diff - - -def summary_message(data): - pr = data.get("pull_request", None) - if pr is None: - logger.error("no pull_request") - return - - diff_url = pr.get("diff_url", None) - if diff_url is None: - logger.error("no diff") - return - - diff = requests.get(diff_url) - if diff.status_code != 200: - logger.error("get diff error") - return - - diff.encoding = "utf-8" - diff_text = diff.text - - results = handle_diff(diff_text) - if results is None: - logger.error("can't get prompts") - return - - comment_list = [] - - - for result in results: - answer = GptClassFactory.create_class().get_answer(result) - if answer is None: - continue - comment_list.append(answer) - - time.sleep(10) - - comment = "" - - for single_comment in comment_list: - single_comment = str(single_comment).strip() - if single_comment == "": - continue - comment += single_comment + "\n\n" - - # summarize = GptClassFactory.create_class().get_summary(comment) - # comment += summarize - - project = data.get("project", None) - if project is None: - logger.error("no project") - return - - owner = project.get("namespace", None) - if owner is None: - logger.error("no owner") - return - - repo = project.get("path", None) - if repo is None: - logger.error("no repo") - return - - number = pr.get("number", None) - if number is None: - logger.error("no number") - return - - pr = PullRequestComments(owner, repo, number, comment, None, None, None) - - pr.submit_pull_request_comments() diff --git a/prreivew/src/handle/diff.py b/prreivew/src/handle/diff.py deleted file mode 100644 index da509bf..0000000 --- a/prreivew/src/handle/diff.py +++ /dev/null @@ -1,93 +0,0 @@ -import re -from loguru import logger -from gpt.gpt_class_factory import GptClassFactory - -from utils.utile_tool import split_string - - -class Diff_Prompt: - max_template_token_length = 1024 - - def init_config_attr(): - Diff_Prompt.max_template_token_length = ( - GptClassFactory.create_class().get_max_prompt_length() - ) - - -def diff_content_out_of_length(diff_content): - return ( - GptClassFactory.create_class().num_tokens_from_string(diff_content) - >= Diff_Prompt.max_template_token_length - ) - - -def handle_diff(diff): - prompt_list = [] - - if diff_content_out_of_length(diff): - diff_array = cut_diff_by_file_diffs(diff) - - for single_diff in diff_array: - if diff_content_out_of_length(single_diff): - result_array = cut_single_diff(single_diff) - prompt_list.extend(result_array) - else: - prompt_list.append(single_diff) - - return prompt_list - - -def cut_diff_by_file_diffs(diff): - separator = "diff --git" - - diff_array = diff.split(separator) - diff_array.pop(0) - - diff_array = [separator + diff for diff in diff_array] - - return diff_array - - -def cut_one_diff_by_change(diff): - separator = "@@ -" - - change_array = diff.split(separator) - diff_title = change_array.pop(0) - - change_array = [separator + diff for diff in change_array] - - change_array[0] = diff_title + change_array[0] - - return change_array - - -def cut_single_diff(diff): - result_array = [] - change_array = cut_one_diff_by_change(diff) - - index = 0 - while index < len(change_array): - if diff_content_out_of_length(change_array[index]): - result_array.extend(cut_change(change_array[index])) - index += 1 - continue - - merge_change = change_array[index] - for i in range(index + 1, len(change_array)): - if diff_content_out_of_length(merge_change + change_array[i]): - index = i - break - else: - index = i + 1 - merge_change += change_array[i] - - result_array.append(merge_change) - - if index >= len(change_array) - 1: - break - - return result_array - - -def cut_change(change): - return split_string(change, Diff_Prompt.max_template_token_length) diff --git a/prreivew/src/reviewCode/bot.py b/prreivew/src/reviewCode/bot.py deleted file mode 100644 index 7a4f47c..0000000 --- a/prreivew/src/reviewCode/bot.py +++ /dev/null @@ -1,14 +0,0 @@ -import requests -from loguru import logger - -class Bot: - def __init__(self): - self.answer = '' - - - def chat(self, prompt): - url = 'https://modelapi.osinfra.cn/hcstream' - data = {'question': prompt, 'history': []} - response = requests.post(url, json = data, stream = True) - return response - \ No newline at end of file diff --git a/prreivew/src/reviewCode/main.py b/prreivew/src/reviewCode/main.py deleted file mode 100644 index c4afc11..0000000 --- a/prreivew/src/reviewCode/main.py +++ /dev/null @@ -1,15 +0,0 @@ -from reviewCode.review import codeReview -from reviewCode.options import Options -from reviewCode.prompts import Prompts -from reviewCode.giteeApi import GiteeApi -from reviewCode.bot import Bot - -def reviewMain(data): - # options = Options() - # prompts = Prompts() - # bot = Bot() - # giteeApi = GiteeApi() - - - if data.get('noteable_type', None) == 'PullRequest': - codeReview(data) diff --git a/prreivew/src/reviewCode/review.py b/prreivew/src/reviewCode/review.py deleted file mode 100644 index 6d620d5..0000000 --- a/prreivew/src/reviewCode/review.py +++ /dev/null @@ -1,316 +0,0 @@ -from loguru import logger -import json -import requests -# from handle.diff import Diff_prompt, handle_diff -from gpt.gpt import Gpt -import time -# from gitee.gitee_api import pull_request_comments -from diff import diff -from git.repo import Repo -import base64 -import re -import copy -from reviewCode.prompts import Prompts -from reviewCode.options import Options -from reviewCode.prompts import Prompts -from reviewCode.giteeApi import GiteeApi -from reviewCode.bot import Bot -from reviewCode.input import Input -from reviewCode.commenter import Commenter -from reviewCode.tokenizer import getTokenCount - -IGNORE_KEYWORD = '@PRReviewAI: ignore' - -def codeReview(data): - options = Options() - prompts = Prompts() - bot = Bot() - giteeApi = GiteeApi(data) - input = Input(data) - commenter = Commenter(giteeApi) - # 如果body中包含IGNORE_KEYWORD,跳过本次reivew - if (IGNORE_KEYWORD in input.description): - logger.info("skipped: body contains ignore_keyword") - return - - # 已经在pr中出现的comment - existingSummarizeComment = commenter.findCommentWithTag(Commenter.SUMMARIZE_TAG, giteeApi.prNumber) - existingCommitIdsBlock = '' - existingSummarizeCommentBody = '' - if existingSummarizeComment: - existingSummarizeCommentBody = existingSummarizeComment.get('body', None) - input.rawSummary = commenter.getRawSummary(existingSummarizeCommentBody) - input.shortSummary = commenter.getShortSummary(existingSummarizeCommentBody) - existingCommitIdsBlock = commenter.getReviewedCommitIdsBlock(existingSummarizeCommentBody) - - allCommitIds = commenter.getAllCommitIds() - highrestReviewedCommitId = '' - if existingCommitIdsBlock: - highrestReviewedCommitId = commenter.getHighestReviewedCommitId(allCommitIds, commenter.getReviewedCommitIds(existingCommitIdsBlock)) - - # 获取PR - prs = giteeApi.fetchPR() - # head:补丁分支,作者修改代码后提交PR对应的commit - # base:基准分支,接受修改的分支 - # PR:将补丁分支head中的代码合入基准分支base - headSha = None - baseSha = None - if prs.get('head', None): - headSha = prs.get('head', None).get('sha', None) - if prs.get('base', None): - baseSha = prs.get('base', None).get('sha', None) - - if (not highrestReviewedCommitId) or highrestReviewedCommitId == headSha: - logger.info('will review from the base commit: {}'.format(baseSha)) - highrestReviewedCommitId = baseSha - else: - logger.info('will review from commit: {}'.format(highrestReviewedCommitId)) - - # 比较highrestReviewedCommitId及headSha - incrementalDiff = giteeApi.compare(highrestReviewedCommitId, headSha) - targetBranchDiff = giteeApi.compare(baseSha, headSha) - - incrementalFiles = incrementalDiff.get('files', None) - targetBranchFiles = targetBranchDiff.get('files', None) - if (not incrementalFiles) and (not targetBranchFiles): - logger.warning('skipped: files data is missing') - return - - incrementalFilesNames = [] - for incrementalFile in incrementalFiles: - if incrementalFile.get('filename', None): - incrementalFilesNames.append(incrementalFile.get('filename')) - files = [] - for targetBranchFile in targetBranchFiles: - if targetBranchFile.get('filename', None) in incrementalFilesNames: - files.append(targetBranchFile) - if len(files) == 0: - logger.warning('skipped: files is null') - return - - filterSelectedFiles = [] - filterIgnoredFiles = [] - for aFile in files: - if options.checkPath(aFile.get('filename', None)): - filterSelectedFiles.append(aFile) - else: - logger.info('skip for excluded path: %s'%(aFile.get('filename', None))) - filterIgnoredFiles.append(aFile) - if len(filterSelectedFiles) == 0: - logger.warning('skipped: filterSelectedFiles is null') - return - - # 获取本次pr的所有commitid - commits = [] - if incrementalDiff.get('commits', None): - for commit in incrementalDiff.get('commits', None): - commits.append(commit.get('sha', None)) - - if not commits: - logger.warning('skipped: commits is null') - return - - filteredFiles = [] - # 把patch切割成hunk - for aFile in filterSelectedFiles: - if not giteeApi.prNumber: - logger('skipped: pr is null') - continue - - fileContent = '' - try: - # contentUrl = aFile.get('content_url', None) - # contents = json.loads(requests.get(contentUrl).content.decode('utf-8')) - # if contents and contents.get('type', None) == 'file' and contents.get('content', None): - # fileContent = base64.b64decode(contents.get('content', None)).decode('utf-8') - # content_url没有结果,换成raw_url - rawUrl = aFile.get('raw_url', None) - if rawUrl: - fileContent = giteeApi.fetchFileContent(rawUrl) - except Exception as e: - logger.warning('failed to get file contents: %s'%(e)) - - fileDiff = aFile.get('patch', '') - patches = [] - for patch in splitPatch(aFile.get('patch', '')): - patchLines = patchStartEndLine(patch) - if not patchLines: - continue - hunks = parsePatch(patch) - if not hunks: - continue - hunksStr = '''---new_hunk---\n\'\'\'\n%s\n\'\'\'\n---old_hunk---\n\'\'\'\n%s\n\'\'\''''%(hunks.get('newHunk', None), hunks.get('oldHunk', None)) - patches.append([patchLines.get('newHunk', None).get('startLine', None), patchLines.get('newHunk', None).get('endLine', None), hunksStr]) - if len(patches) > 0: - filteredFiles.append([aFile.get('filename', None), fileContent, fileDiff, patches]) - - filesAndChanges = filteredFiles - if len(filesAndChanges) == 0: - logger.error('skipped: no files to review') - return - - statusMsg = { - 'highrestReviewedCommitId': highrestReviewedCommitId, - 'headCommitId': allCommitIds[0], - 'filesAndChanges': filesAndChanges, - 'filterIgnoredFiles': filterIgnoredFiles, - } - - # 跳过summary,直接review - filesAndChangesReview = filesAndChanges - reviewsSkipped = [] - reviewsFailed = [] - skippedFiles = [] - reviewContent = [] - - def doReview(filename, fileContent, patches): - logger.info('reviewing: {}'.format(filename)) - ins = copy.deepcopy(input) - ins.filename = filename - ins.fileDiff = fileDiff - - tokens = getTokenCount(prompts.renderReviewFileDiff(ins)) - - # 计算有多少个hunkstr可以放入prompt - patchesToPack = 0 - for _, _, patch in patches: - patchTokens = getTokenCount(patch) - if tokens + patchTokens > options.TokenLimits: - logger.info('only packing {}/{} patches, tokens: {}/{}'.format(patchesToPack, len(patches), tokens, options.requestTokens)) - break - tokens += patchTokens - patchesToPack += 1 - - patchesPacked = 0 - for startLine, endLine, patch in patches: - if patchesPacked >= patchesToPack: - logger.info('unable to pack more patches into this request, packed: {}, total patches: {}, skipping'.format(patchesPacked, len(patches))) - if options.debug: - logger.info('prompt so far: {}'.format(prompts.renderReviewFileDiff(ins))) - break - patchesPacked += 1 - commentChain = '' - allChians = commenter.getCommentChainsWithinRange(giteeApi.prNumber, filename, startLine, endLine, commenter.COMMENT_REPLY_TAG) - if len(allChians) > 0: - logger.info('Found comment chains: {} for {}'.format(allChians, filename)) - commentChain = allChians - commentChainTokens = getTokenCount(commentChain) - if tokens + commentChainTokens > options.TokenLimits: - commentChain = '' - else: - tokens += commentChainTokens - - ins.patches += patch - if commentChain: - ins.patches += '---comment_chains---\n\'\'\'{}\'\'\'---end_change_section---'.format(commentChain) - if patchesPacked > 0: - print(prompts.renderReviewFileDiff(ins)) - exit() - res = bot.chat(prompts.renderReviewFileDiff(ins)) - - if res.status_code != 200: - logger.info('review: nothing obtained from openai') - reviewsFailed.append('{} (no response)'.format(filename)) - return - ans = parseReview(res) - print('-\n'*5) - print(ans) - reviewContent.append(ans) - - for filename, fileContent, _, patches in filesAndChangesReview: - doReview(filename, fileContent, patches) - - # 添加headSha到COMMIT_ID_TAG中 - summarizeComment = '' - summarizeComment += commenter.addReviewedCommitId(existingCommitIdsBlock, headSha) - - - giteeApi.submitReview(body = reviewContent, commitId = commits[-1]) - giteeApi.submitReview(body = summarizeComment, commitId = commits[-1]) - -def parseReview(response): - data_list = [] - pattern = r'"answer":\s+"([^"]+)"' - for line in response.iter_lines(): - if line: - line_text = line.decode("utf-8") - match = re.search(pattern, line_text) - if match: - result = match.group(1) - data_list.append(result) - - if len(data_list) < 2: - logger.info("no answer") - return - data_list.pop() - combined_result = "".join(data_list) - return combined_result - -def splitPatch(patch): - if not patch: - return [] - results = [] - splitLines = patch.split('\n') - # 去掉最后两行,一行是空格,一行是 \ No newline at end of file - splitLines = splitLines[:-2] - lastLine = -1 - for iLine in range(len(splitLines)): - # 当前行数据格式是否满足:@@ -0,0 +0,0 @@ - reSplit = re.split('^@@ -(\d+),(\d+) \+(\d+),(\d+) @@', splitLines[iLine]) - if len(reSplit) > 1: - if lastLine == -1: - lastLine = iLine - else: - results.append('\n'.join(splitLines[lastLine: iLine])) - lastLine = iLine - if lastLine != -1: - results.append('\n'.join(splitLines[lastLine:])) - return results - -def patchStartEndLine(patch): - reSplit = re.split('^@@ -(\d+),(\d+) \+(\d+),(\d+) @@', patch) - if len(reSplit) > 1: - oldBegin = int(reSplit[1]) - oldDiff = int(reSplit[2]) - newBegin = int(reSplit[3]) - newDiff = int(reSplit[4]) - return {'oldHunk': {'startLine': oldBegin, 'endLine': oldDiff}, 'newHunk': {'startLine': newBegin, 'endLine': newDiff}} - else: - return None - -def parsePatch(patch): - hunkInfo = patchStartEndLine(patch) - if not hunkInfo: - return - oldHunkLines = [] - newHunkLines = [] - newLine = hunkInfo.get('newHunk', None).get('startLine', None) - lines = patch.split('\n') [1:] # 去除第一行@@ - if lines[-1] == '': # 去除最后一行空格 - lines = lines[:-1] - skipStart = 3 - skipEnd = 3 - currentLine = 0 - - # reamovalOnly=True代表只删除内容,没有新增内容 - removalOnly = True - for line in lines: - if line.startswith('+'): - removalOnly = False - break - - for line in lines: - currentLine += 1 - if line.startswith('-'): - oldHunkLines.append(line[1:]) - elif line.startswith('+'): - newHunkLines.append(line[1:]) - newLine += 1 - else: - oldHunkLines.append(line) - if removalOnly or (currentLine > skipStart and currentLine <= len(lines) - skipEnd): - newHunkLines.append(str(newLine) + ': ' + line) - else: - newHunkLines.append(line) - newLine += 1 - return {"oldHunk": '\n'.join(oldHunkLines), "newHunk": '\n'.join(newHunkLines)} \ No newline at end of file diff --git a/prreivew/src/reviewCode/tokenizer.py b/prreivew/src/reviewCode/tokenizer.py deleted file mode 100644 index 3823f06..0000000 --- a/prreivew/src/reviewCode/tokenizer.py +++ /dev/null @@ -1,9 +0,0 @@ -import tiktoken - - -def getTokenCount(strIn): - encoding = tiktoken.get_encoding('cl100k_base') - tokens = encoding.encode(strIn) - return len(tokens) - - -- Gitee