From 150875f196c3bfe0c3b7ee964cb13b00b5658b8a Mon Sep 17 00:00:00 2001 From: kaede10 Date: Sun, 4 Feb 2024 17:32:04 +0800 Subject: [PATCH 1/2] pr-review adapt to the github platform --- plugins/src/backend/drivers/driver_adaptor.py | 4 +- .../src/backend/drivers/driver_interface.py | 4 +- plugins/src/backend/drivers/gitee.py | 2 + plugins/src/backend/drivers/github.py | 218 ++++++++++++++++-- plugins/src/common/configs/project_args.py | 26 +-- plugins/src/plugins/pr_review/pr_review.py | 47 ++-- plugins/src/task/manage.py | 6 + 7 files changed, 250 insertions(+), 57 deletions(-) diff --git a/plugins/src/backend/drivers/driver_adaptor.py b/plugins/src/backend/drivers/driver_adaptor.py index 854f0a0..db324a3 100644 --- a/plugins/src/backend/drivers/driver_adaptor.py +++ b/plugins/src/backend/drivers/driver_adaptor.py @@ -11,7 +11,6 @@ class BaseDriver(ABC): self.driver_url = driver_args.driver_base_url self.owner = project_args.owner self.repo = project_args.repo - self.pr_number = project_args.pull_request_number self.rules = driver_args.path_rules self.rules = self._init_rules() @@ -62,6 +61,9 @@ class BaseDriver(ABC): pass def check_path(self, path): + ''' + True: file needs to be review + ''' if len(self.rules) == 0: return True included = False diff --git a/plugins/src/backend/drivers/driver_interface.py b/plugins/src/backend/drivers/driver_interface.py index 3ef178a..0da1af7 100644 --- a/plugins/src/backend/drivers/driver_interface.py +++ b/plugins/src/backend/drivers/driver_interface.py @@ -4,6 +4,7 @@ from abc import abstractmethod from common.configs.driver_args import DriverArguments from backend.drivers.gitee import GiteeDriver +from backend.drivers.github import GithubDriver from backend.drivers.driver_adaptor import BaseDriver class DriverInterface(): @@ -14,7 +15,8 @@ class DriverInterface(): @classmethod def create_driver(cls, project_args, driver_args): driver_mapping = { - 'gitee': GiteeDriver + 'gitee': GiteeDriver, + 'github': GithubDriver } driver_type = driver_args.driver_type if driver_type in driver_mapping: diff --git a/plugins/src/backend/drivers/gitee.py b/plugins/src/backend/drivers/gitee.py index 5c4ed6a..80ffd9d 100644 --- a/plugins/src/backend/drivers/gitee.py +++ b/plugins/src/backend/drivers/gitee.py @@ -9,6 +9,8 @@ class GiteeDriver(BaseDriver): def __init__(self, project_args, driver_args): super().__init__(project_args, driver_args) self.pr_diff = project_args.pull_request.get("diff_url", None) + self.pull_request_state = project_args.pull_request.get("state", None) + self.pr_number = project_args.pull_request.get("number", None) def list_comments(self): page = 1 diff --git a/plugins/src/backend/drivers/github.py b/plugins/src/backend/drivers/github.py index 1e92b5c..31d78fb 100644 --- a/plugins/src/backend/drivers/github.py +++ b/plugins/src/backend/drivers/github.py @@ -1,4 +1,19 @@ - +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 The community Authors. +# A-Tune is licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. +# Create: 2024-02 +# +import json +import requests from loguru import logger from backend.drivers.driver_adaptor import BaseDriver @@ -6,35 +21,200 @@ from backend.drivers.driver_adaptor import BaseDriver class GithubDriver(BaseDriver): def __init__(self, project_args, driver_args): - self.access_token = driver_args.driver_token - self.base_url = driver_args.driver_base_url - # TODO Added logic + super().__init__(project_args, driver_args) + self.pr_diff = project_args.issue.get('pull_request').get("diff_url", None) + self.pull_request_state = project_args.issue.get("state", None) + self.pr_number = project_args.issue.get("number", None) + + self.token_list = self.access_token.split(',') + self.used_tokens = [] + self.headers = { + 'Content-Type': 'application/json', + 'Authorization': 'Bearer ' + self.token_list[0], + 'Accept': 'application/vnd.github+json' + } + self.session = requests.Session() + self.retry_cnt = 0 + self.retry_times = 5 + def list_comments(self): - all_comments = [] - # TODO Added list comment - return all_comments + ''' + https://api.github.com/repos/OWNER/REPO/pulls/PULL_NUMBER/comments + ''' + url = f'{self.driver_url}/{self.owner}/{self.repo}/pulls/{self.pr_number}/comments' + params = { + 'per_page': 100, + 'direction': 'desc' + } + response = [] + self.get_data(url=url, params=params, current_page=1, datas=response) + return response def get_all_commit_ids_by_pr(self): - # TODO Added - return + ''' + https://api.github.com/repos/OWNER/REPO/pulls/PULL_NUMBER/commits + ''' + url = f'{self.driver_url}/{self.owner}/{self.repo}/pulls/{self.pr_number}/commits' + params = { + 'per_page': 100, + 'direction': 'desc' + } + response = [] + self.get_data(url=url, params=params, current_page=1, datas=response) + return response + + def get_all_commit_ids(self): + ''' + https://api.github.com/repos/OWNER/REPO/pulls/PULL_NUMBER/commits + ''' + url = f'{self.driver_url}/{self.owner}/{self.repo}/pulls/{self.pr_number}/commits' + params = { + 'per_page': 100, + 'direction': 'desc' + } + response = [] + self.get_data(url=url, params=params, current_page=1, datas=response) + return response + + def get_driver_type(self): + pass def list_review_comments(self): - # TODO Added - return + url = f'{self.driver_url}/{self.owner}/{self.repo}/pulls/{self.pr_number}/comments' + params = { + 'per_page': 100, + 'direction': 'desc' + } + response = [] + self.get_data(url=url, params=params, current_page=1, datas=response) + return response def submit_comment_to_pr(self, body, commitId, filename, line): - # TODO Added + ''' + https://docs.github.com/en/rest/pulls/comments?apiVersion=2022-11-28#create-a-review-comment-for-a-pull-request + data = { + owner: 'OWNER', + repo: 'REPO', + pull_number: 'PULL_NUMBER', + body: 'Great stuff!', + commit_id: '6dcb09b5b57875f334f61aebed695e2e4193db5e', + path: 'file1.txt', + start_line: 1, + start_side: 'RIGHT', + line: 2, + side: 'RIGHT', + headers: { + 'X-GitHub-Api-Version': '2022-11-28' + } + } + ''' + url = f'{self.driver_url}/{self.owner}/{self.repo}/pulls/{self.pr_number}/comments' + data = { + 'body': body, + 'commit_id': commitId, + "path": filename, + "line": line + } + res = self.api_request(url=url, params=json.dumps(data), method='POST') + if res.status_code != 201: + logger.error(f'post to github failed: {filename}') + logger.error(res.text) + logger.error(res.status_code) + else: + logger.info(f'post to github succeed: {filename}') return + def compare(self, base, head): + url = f'{self.driver_url}/{self.owner}/{self.repo}/compare/{base}...{head}' + response = self.api_request(url=url, method='GET') + return response.json() + def fetch_pr(self): - # TODO Added - return + url = f'{self.driver_url}/{self.owner}/{self.repo}/pulls/{self.pr_number}' + params = { + 'per_page': 100, + 'direction': 'desc' + } + response = self.api_request(url=url) + return response.json() def fetch_file_content(self, rawUrl): - # TODO Added - return + response = self.api_request(url=rawUrl) + return response.text + + def get_pr_diff(self): + response = self.api_request(url=self.pr_diff) + return response.text + + def get_data(self, url, params, current_page, datas): + logger.info('****** Data page: %i ******' % current_page) + params['page'] = current_page + req = self.http_req(url=url, params=params) + + if req.status_code != 200: + if req.headers.get('X-RateLimit-Used') is not None and req.headers.get( + 'X-RateLimit-Limit') is not None and int(req.headers.get('X-RateLimit-Used')) >= ( + int(req.headers.get('X-RateLimit-Limit')) - 1): + logger.info('Limit exceeded, API: %s, req: %s' % (req.url, req.text)) + self.change_token() + self.get_data(url, params=params, current_page=current_page, datas=datas) + else: + logger.info('Forbidden, API: %s, req: %s' % (req.url, req.text)) + + else: + logger.info('Get success, API: %s' % req.url) + js = req.json() + if type(js) == dict: + datas.append(js) + else: + datas.extend(req.json()) + + if 'next' in req.links: + url_next = req.links['next']['url'] + current_page += 1 + self.get_data(url_next, params=params, current_page=current_page, datas=datas) + + def api_request(self, url, params=None, method=None): + try: + response = self.http_req(url=url, params=params, method=method) + if not (response.status_code == 200 or response.status_code == 201) and self.retry_cnt < self.retry_times: + self.retry_cnt += 1 + logger.info('Http error, API: %s, req: %s, retry: %d' % (response.url, 'response.text', self.retry_cnt)) + self.change_token() + response = self.api_request(url, params, method) + except requests.exceptions.RequestException as e: + while self.retry_cnt < self.retry_times: + try: + self.retry_cnt += 1 + logger.info('Retry ' + str(self.retry_cnt) + ' times: ' + url) + return self.api_request(url, params, method) + finally: + pass + except Exception as e: + raise e + else: + self.retry_cnt = 0 + return response + + def http_req(self, url, params=None, method=None, headers=None): + if headers is None: + headers = self.headers + + if method == 'GET' or not method: + response = self.session.get(url, params=params, headers=headers, timeout=60) + else: + response = self.session.post(url, data=params, headers=headers) + return response + + def change_token(self): + logger.info('Change token') + diff = list(set(self.token_list).difference(set(self.used_tokens))) + if len(diff) == 0: + token = self.used_tokens[0] + self.used_tokens = [] + else: + token = diff[0] + self.used_tokens.append(token) + self.headers["Authorization"] = token - def get_pr_diff(self, diff_url): - # TODO Added - return \ No newline at end of file diff --git a/plugins/src/common/configs/project_args.py b/plugins/src/common/configs/project_args.py index 1da25a4..733723e 100644 --- a/plugins/src/common/configs/project_args.py +++ b/plugins/src/common/configs/project_args.py @@ -11,23 +11,20 @@ import functools class ProjectArguments: action: str pull_request: dict - pull_request_state: str - pull_request_diff: str - pull_request_number: str comment: str - noteable_type: str owner: str repo: str + issue: dict @staticmethod def validate_arguments(structured_data): + ''' + gitee: action -> comment + github: action -> created + ''' validation_rules = { - "action": lambda value: isinstance(value, str) and value != "comment", - "pull_request": lambda value: isinstance(value, dict) and value is None, - "pull_request_state": lambda value: isinstance(value, str) and value != "open", - "pull_request_number": lambda value: isinstance(value, str) and value is None, + "action": lambda value: isinstance(value, str) and value != "comment" and value != "created", "comment": lambda value: isinstance(value, str) and value is None, - "noteable_type": lambda value: isinstance(value, str) and value != "PullRequest", "owner": lambda value: isinstance(value, str) and value is None, "repo": lambda value: isinstance(value, str) and value is None } @@ -47,16 +44,13 @@ class ProjectArguments: try: structured_data = {} structured_data["action"] = data.get("action", None) - structured_data["pull_request"] = data.get("pull_request", None) - structured_data["pull_request_state"] = structured_data["pull_request"].get("state", None) - structured_data["pull_request_diff"] = structured_data["pull_request"].get("state", None) - structured_data["pull_request_number"] = structured_data["pull_request"].get("number", None) + structured_data["pull_request"] = data.get("pull_request", {}) structured_data["comment"] = data.get("comment", None) - structured_data["noteable_type"] = data.get("noteable_type", None) structured_data["repo"] = data.get("repository", None).get("name", None) structured_data["owner"] = data.get("repository", None).get("owner", None).get("login", None) - except: - pass + structured_data["issue"] = data.get("issue", None) + except Exception as e: + logger.error(e) return structured_data \ No newline at end of file diff --git a/plugins/src/plugins/pr_review/pr_review.py b/plugins/src/plugins/pr_review/pr_review.py index d1bbe5c..32f41fe 100644 --- a/plugins/src/plugins/pr_review/pr_review.py +++ b/plugins/src/plugins/pr_review/pr_review.py @@ -227,12 +227,12 @@ class CodeReviewPlugin(BasePlugin): file_comment_line[a_file.get('filename')] = self._get_diff_new_line_dic(file_patches=file_diff) patches = [] diff_num = 0 - for patch in self._split_patch(a_file.get('patch', '')): + for patch in self._split_patch(file_diff): diff_num += 1 patch_lines = self._patch_start_end_line(patch) if not patch_lines: continue - hunks = self._parse_patch(patch) + hunks = self._parse_patch(patch, patch_lines) if not hunks: continue @@ -284,12 +284,12 @@ class CodeReviewPlugin(BasePlugin): def _code_review(self, filtered_files, file_comment_line): hunk_answers = [] - for filename, _, patches in filtered_files: - review_answers =self._do_review(filename, patches, file_comment_line[filename]) - for review_answer in review_answers: - hunk_answer = self.parse_result(review_answer) - if hunk_answer: - hunk_answers.append(hunk_answer) + # for filename, _, patches in filtered_files: + # review_answers =self._do_review(filename, patches, file_comment_line[filename]) + # for review_answer in review_answers: + # hunk_answer = self.parse_result(review_answer) + # if hunk_answer: + # hunk_answers.append(hunk_answer) return hunk_answers def _split_patch(self, patch): @@ -301,33 +301,40 @@ class CodeReviewPlugin(BasePlugin): last_line = -1 for a_line in range(len(split_lines)): # whether current line matches format: @@ -0,0 +0,0 @@ - re_split = re.split('^@@ -(\d+),(\d+) \+(\d+),(\d+) @@', split_lines[a_line]) + re_split = re.split('^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@', split_lines[a_line]) if len(re_split) > 1: if last_line == -1: last_line = a_line else: results.append('\n'.join(split_lines[last_line: a_line])) last_line = a_line - if last_line != -1: - results.append('\n'.join(split_lines[last_line:])) - return results + if last_line != -1: + results.append('\n'.join(split_lines[last_line:])) + return results def _patch_start_end_line(self, patch): - re_split = re.split('^@@ -(\d+),(\d+) \+(\d+),(\d+) @@', patch) + ''' + '^': match the beginning of the line. + '@@': match parts starting with '@@'. + '-': match '-'. + '(\d+)': the first group, indicating the starting line number before the change. + '(?:,(\d+))?': optional group, indicating the range of rows before the change (optional). + '\+': match '+'. + '(\d+)': the second group, indicating the starting line number after the change. + '(?:,(\d+))?': optional group, indicating the range of changed rows (optional). + '@@': match the part ending with '@@'. + ''' + re_split = re.split('^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@', patch) if len(re_split) > 1: old_begin = int(re_split[1]) - old_diff = int(re_split[2]) + old_diff = int(re_split[2]) if re_split[2] else 0 new_begin = int(re_split[3]) - new_diff = int(re_split[4]) + new_diff = int(re_split[4]) if re_split[4] else 0 return {'oldHunk': {'startLine': old_begin, 'endLine': old_diff}, 'newHunk': {'startLine': new_begin, 'endLine': new_diff}} else: return None - def _parse_patch(self, patch): - hunk_info = self._patch_start_end_line(patch) - if not hunk_info: - return - + def _parse_patch(self, patch, hunk_info): old_hunk_lines = [] new_hunk_lines = [] diff --git a/plugins/src/task/manage.py b/plugins/src/task/manage.py index 7a69878..8f806fe 100644 --- a/plugins/src/task/manage.py +++ b/plugins/src/task/manage.py @@ -21,6 +21,12 @@ def init_plugin(data): logger.error("Failed to get webhook arguments.") return driver_inter = DriverInterface.create_driver(project_args, driver_args) + if driver_inter.pull_request_state != 'open': + logger.error("Skip: pr state is open.") + return + if not driver_inter.pr_diff: + logger.error("No diff to review.") + return plugin_inter = PluginInterface.create_plugin( gen_args, project_args, driver_inter, model_inter) return plugin_inter -- Gitee From 3769bd0b764238925f29274a936972cee164686d Mon Sep 17 00:00:00 2001 From: kaede10 Date: Sun, 4 Feb 2024 17:36:37 +0800 Subject: [PATCH 2/2] update --- plugins/src/plugins/pr_review/pr_review.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/plugins/src/plugins/pr_review/pr_review.py b/plugins/src/plugins/pr_review/pr_review.py index 32f41fe..3294c24 100644 --- a/plugins/src/plugins/pr_review/pr_review.py +++ b/plugins/src/plugins/pr_review/pr_review.py @@ -284,12 +284,12 @@ class CodeReviewPlugin(BasePlugin): def _code_review(self, filtered_files, file_comment_line): hunk_answers = [] - # for filename, _, patches in filtered_files: - # review_answers =self._do_review(filename, patches, file_comment_line[filename]) - # for review_answer in review_answers: - # hunk_answer = self.parse_result(review_answer) - # if hunk_answer: - # hunk_answers.append(hunk_answer) + for filename, _, patches in filtered_files: + review_answers =self._do_review(filename, patches, file_comment_line[filename]) + for review_answer in review_answers: + hunk_answer = self.parse_result(review_answer) + if hunk_answer: + hunk_answers.append(hunk_answer) return hunk_answers def _split_patch(self, patch): -- Gitee