diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..160f6569da419fdede9aa8e3c556d0f28a425438 --- /dev/null +++ b/.gitignore @@ -0,0 +1,36 @@ +# 忽略Python字节码文件和缓存文件 +__pycache__/ +*.pyc +*.pyo +*.pyd + +# 忽略编辑器生成的文件和目录 +.idea/ +.vscode/ + +# 忽略由开发环境或工具生成的其他文件和目录 +venv/ +env/ +dist/ +build/ +*.egg-info/ + +# 忽略日志文件和临时文件 +*.log +*.tmp + +# 忽略敏感信息和配置文件 +config.py +secret.txt + +# 忽略文档和报告文件 +docs/ +reports/ + +# 忽略测试相关的文件和目录 +tests/ +test_*.py + +# 忽略其他自定义的文件和目录 +custom_directory/ +custom_file.txt diff --git a/pr-message/Dockerfile b/pr-message/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..5751bbfcd53d5eefa3f4c34d51a1883233f0f98d --- /dev/null +++ b/pr-message/Dockerfile @@ -0,0 +1,36 @@ +FROM openeuler/openeuler:22.03 + +RUN groupadd -g 1001 pr \ + && useradd -u 1001 -g pr -s /bin/bash -m pr + +RUN cd /home/pr + +RUN yum update -y \ + && yum install -y make gcc zlib-devel openssl-devel bzip2-devel ncurses-devel gdbm-devel readline-devel sqlite-devel libffi-devel tk-devel xz-devel \ + && yum install -y openssl-devel openssl \ + && yum install -y wget + +RUN wget https://www.python.org/ftp/python/3.11.0/Python-3.11.0.tgz \ + && tar -xzf Python-3.11.0.tgz \ + && yum install readline-devel \ + && cd Python-3.11.0 \ + && ./configure --prefix=/home/pr/python --with-ssl \ + && make \ + && make install + +ENV PATH="/home/pr/python/bin:${PATH}" + + +RUN python3 -V + +WORKDIR /home/pr/pr-message + +COPY . . + +RUN pip3 install --no-cache-dir -r requirements.txt + +USER pr + +EXPOSE 8080 + +CMD ["python3", "src/main.py"] diff --git a/pr-message/README.md b/pr-message/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pr-message/config.yaml b/pr-message/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72776faa86e7c768583a9a56bcdc4580e1230f3a --- /dev/null +++ b/pr-message/config.yaml @@ -0,0 +1,12 @@ +access_token: +# gitee official website domain name, No risk, hereby declare +gitee_host: + + +gpt: + use: + max_token_length: + encoding_name: + host: + + Authorization: diff --git a/pr-message/requirements.txt b/pr-message/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..730d5fabe1af1da2f790d5e81473eb8a3c69dbd6 --- /dev/null +++ b/pr-message/requirements.txt @@ -0,0 +1,6 @@ +click==8.1.3 +Flask==2.2.3 +loguru==0.7.0 +PyYAML==6.0.1 +Requests==2.31.0 +tiktoken==0.3.0 diff --git a/pr-message/src/app.py b/pr-message/src/app.py new file mode 100644 index 0000000000000000000000000000000000000000..0a071e601bbc76fdfcd5d533325c1a0e5b2476db --- /dev/null +++ b/pr-message/src/app.py @@ -0,0 +1,22 @@ +from flask import request +from flask import Flask +from config.init_config import init_config + +from handle.task import assgin_task + +app = Flask(__name__) + +init_config("config.yaml") + + +@app.route("/hook/analyze", methods=["POST"]) +def analyze(): + data = request.get_json() + + assgin_task(data) + + return "Processing completed" + + +def start_router(): + app.run("0.0.0.0", debug=True, port=8080) diff --git a/pr-message/src/config/init_config.py b/pr-message/src/config/init_config.py new file mode 100644 index 0000000000000000000000000000000000000000..acacb8226229cd32ceb799d8703b403981adfb2d --- /dev/null +++ b/pr-message/src/config/init_config.py @@ -0,0 +1,27 @@ +import os +import yaml + +from gitee.gitee_api import GiteeApiCaller +from gpt.gpt import Gpt +from handle.diff import Diff_Prompt + + +def init_config(path): + print(os.getcwd()) + with open(path, "r", encoding="utf-8") as f: + config = yaml.safe_load(f) + + GiteeApiCaller.init_config_attr(config["access_token"], config["gitee_host"]) + + Gpt.init_config_attr( + config["gpt"]["use"], + config["gpt"]["max_token_length"], + config["gpt"]["encoding_name"], + config["gpt"]["host"], + config["gpt"]["Authorization"], + ) + + Diff_Prompt.init_config_attr() + + + diff --git a/pr-message/src/gitee/gitee_api.py b/pr-message/src/gitee/gitee_api.py new file mode 100644 index 0000000000000000000000000000000000000000..aa7866d7a128db74928651e44f334e283b0e2e03 --- /dev/null +++ b/pr-message/src/gitee/gitee_api.py @@ -0,0 +1,47 @@ +import requests +from loguru import logger + + +class GiteeApiCaller: + access_token = "" + gitee_host = "" + + def init_config_attr(access_token, gitee_host): + GiteeApiCaller.access_token = access_token + GiteeApiCaller.gitee_host = gitee_host + + +class PullRequestComments(GiteeApiCaller): + submit_pull_request_comments_url_template = ( + "{host}/api/v5/repos/{owner}/{repo}/pulls/{number}/comments" + ) + + def __init__(self, owner, repo, number, body, commit_id, path, position): + self.owner = owner + self.repo = repo + self.number = number + self.body = body + self.commit_id = commit_id + self.path = path + self.position = position + + def submit_pull_request_comments(self): + url = self.submit_pull_request_comments_url_template.format( + host=self.gitee_host, owner=self.owner, repo=self.repo, number=self.number + ) + + form_data = { + "access_token": self.access_token, + "body": self.body, + "commit_id": self.commit_id, + "path": self.path, + "position": self.position, + } + response = requests.post(url, data=form_data) + + if response.status_code == 201: + logger.info("post to gitee success") + else: + logger.info("post to gitee failed") + logger.info(response.status_code) + logger.info(response.text) diff --git a/pr-message/src/gpt/chat_gpt.py b/pr-message/src/gpt/chat_gpt.py new file mode 100644 index 0000000000000000000000000000000000000000..d84cf44f7bdd7befddb8cc35a6b2bb54b5ee09fa --- /dev/null +++ b/pr-message/src/gpt/chat_gpt.py @@ -0,0 +1,92 @@ +import requests +import tiktoken + +from gpt.gpt import Gpt + + +class ChatGpt(Gpt): + def get_answer(prompt): + url = "{openai_host}/v1/chat/completions".format(openai_host=ChatGpt.host) + + data = { + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "system", + "content": ( + "您将充当 git 中提交消息的作者。" + "您的任务是在传统git提交中创建清晰且全面的提交消息,详细清晰的解释更改内容。 我将向您发送“git diff --staged”命令的输出,然后您将其转换为提交消息。" + "行长度不得超过 74 个字符。" + "用中文回答。" + "使用如下模板:" + "修改了那个文件\n" + "- 修改细节1\n" + "- 修改细节2\n" + ), + }, + { + "role": "user", + "content": prompt, + } + ], + "temperature": 0.7, + } + + + response = requests.post( + url, json=data, headers={"Authorization": "Bearer " + ChatGpt.Authorization} + ) + + if response.status_code != 200: + print("get answer error") + print(response.status_code) + + pr = response.json() + + return pr["choices"][0]["message"]["content"] + + + + def get_summary(content): + url = "{openai_host}/v1/chat/completions".format(openai_host=ChatGpt.host) + data = { + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "system", + "content": ( + "您的任务是高度概括总结我给您的输入内容。" + "用中文回答。" + ), + }, + { + "role": "user", + "content": content, + } + ], + "temperature": 0.7, + } + + response = requests.post( + url, json=data, headers={"Authorization": "Bearer " + ChatGpt.Authorization} + ) + + if response.status_code != 200: + print("get answer error") + print(response.status_code) + + pr = response.json() + + return pr["choices"][0]["message"]["content"] + + + + + def num_tokens_from_string(string: str) -> int: + encoding = tiktoken.get_encoding(ChatGpt.encoding_name) + tokens = encoding.encode(string) + num_tokens = len(tokens) + return num_tokens + + def get_max_prompt_length(): + return ChatGpt.max_token_length diff --git a/pr-message/src/gpt/gpt.py b/pr-message/src/gpt/gpt.py new file mode 100644 index 0000000000000000000000000000000000000000..6d7cbc4495bef1c27bb6dfcf04d54811d1ad7d2f --- /dev/null +++ b/pr-message/src/gpt/gpt.py @@ -0,0 +1,28 @@ +from abc import ABCMeta, abstractmethod + + +class Gpt(metaclass=ABCMeta): + use = "" + max_token_length = 0 + encoding_name = "" + host = "" + Authorization = "" + + def init_config_attr(use, max_token_length, encoding_name, host, Authorization): + Gpt.use = use + Gpt.max_token_length = max_token_length + Gpt.encoding_name = encoding_name + Gpt.host = host + Gpt.Authorization = Authorization + + @abstractmethod + def get_answer(prompt): + pass + + @abstractmethod + def num_tokens_from_string(string: str) -> int: + pass + + @abstractmethod + def get_max_prompt_length(): + pass diff --git a/pr-message/src/gpt/gpt_class_factory.py b/pr-message/src/gpt/gpt_class_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..5a131c5afae5adf14b669617d8f0c047c2ccd736 --- /dev/null +++ b/pr-message/src/gpt/gpt_class_factory.py @@ -0,0 +1,14 @@ +from gpt.chat_gpt import ChatGpt +from gpt.gpt import Gpt +from gpt.my_gpt import MyGpt + + +class GptClassFactory: + @staticmethod + def create_class(): + if Gpt.use == "my_gpt": + return MyGpt + elif Gpt.use == "open_ai": + return ChatGpt + else: + raise ValueError("Invalid class name") diff --git a/pr-message/src/gpt/my_gpt.py b/pr-message/src/gpt/my_gpt.py new file mode 100644 index 0000000000000000000000000000000000000000..dc16cbf9320fd515ba0cdb9e28cddfb3b56e9b60 --- /dev/null +++ b/pr-message/src/gpt/my_gpt.py @@ -0,0 +1,56 @@ +import re +import requests +import tiktoken +from loguru import logger + +from gpt.gpt import Gpt + + +class MyGpt(Gpt): + question = ( + "You are to act as the author of a commit message in git." + "Your mission is to create clean and comprehensive commit messages in the conventional commit convention and explain WHAT were the changes and WHY the changes were done. I'll send you an output of 'git diff --staged' command, and you convert it into a commit message." + "Do not preface the commit with anything." + "Don't add any descriptions to the commit, only commit message." + "Use the present tense. Lines must not be longer than 74 characters." + "Use Chinese to answer." + "The diff is: {diff_content}" + ) + + def get_answer(prompt): + url = "{host}/hcstream".format(host=MyGpt.host) + + data = {"question": MyGpt.question.format(diff_content=prompt), "history": []} + response = requests.post(url, json=data, stream=True) + if response.status_code != 200: + logger.error("get answer error") + logger.error(response.status_code) + return + + data_list = [] + pattern = r'"answer":\s+"([^"]+)"' + for line in response.iter_lines(): + if line: + line_text = line.decode("utf-8") + match = re.search(pattern, line_text) + if match: + result = match.group(1) + data_list.append(result) + + if len(data_list) < 2: + logger.info("no answer") + return + data_list.pop() + + combined_result = "".join(data_list) + + return combined_result + + def num_tokens_from_string(string: str) -> int: + encoding = tiktoken.get_encoding(MyGpt.encoding_name) + tokens = encoding.encode(string) + num_tokens = len(tokens) + return num_tokens + + def get_max_prompt_length(): + return MyGpt.max_token_length - MyGpt.num_tokens_from_string(MyGpt.question) diff --git a/pr-message/src/handle/comment_command.py b/pr-message/src/handle/comment_command.py new file mode 100644 index 0000000000000000000000000000000000000000..4d6e08bf80127642433b671a39976df3e52a7e62 --- /dev/null +++ b/pr-message/src/handle/comment_command.py @@ -0,0 +1,78 @@ +import time +from loguru import logger +import requests +from gitee.gitee_api import PullRequestComments +from gpt.gpt_class_factory import GptClassFactory + +from handle.diff import Diff_Prompt, handle_diff + + +def summary_message(data): + pr = data.get("pull_request", None) + if pr is None: + logger.error("no pull_request") + return + + diff_url = pr.get("diff_url", None) + if diff_url is None: + logger.error("no diff") + return + + diff = requests.get(diff_url) + if diff.status_code != 200: + logger.error("get diff error") + return + + diff.encoding = "utf-8" + diff_text = diff.text + + results = handle_diff(diff_text) + if results is None: + logger.error("can't get prompts") + return + + comment_list = [] + + + for result in results: + answer = GptClassFactory.create_class().get_answer(result) + if answer is None: + continue + comment_list.append(answer) + + time.sleep(10) + + comment = "" + + for single_comment in comment_list: + single_comment = str(single_comment).strip() + if single_comment == "": + continue + comment += single_comment + "\n\n" + + # summarize = GptClassFactory.create_class().get_summary(comment) + # comment += summarize + + project = data.get("project", None) + if project is None: + logger.error("no project") + return + + owner = project.get("namespace", None) + if owner is None: + logger.error("no owner") + return + + repo = project.get("path", None) + if repo is None: + logger.error("no repo") + return + + number = pr.get("number", None) + if number is None: + logger.error("no number") + return + + pr = PullRequestComments(owner, repo, number, comment, None, None, None) + + pr.submit_pull_request_comments() diff --git a/pr-message/src/handle/diff.py b/pr-message/src/handle/diff.py new file mode 100644 index 0000000000000000000000000000000000000000..da509bf25f263dc619112938ad30719c21ecc2a5 --- /dev/null +++ b/pr-message/src/handle/diff.py @@ -0,0 +1,93 @@ +import re +from loguru import logger +from gpt.gpt_class_factory import GptClassFactory + +from utils.utile_tool import split_string + + +class Diff_Prompt: + max_template_token_length = 1024 + + def init_config_attr(): + Diff_Prompt.max_template_token_length = ( + GptClassFactory.create_class().get_max_prompt_length() + ) + + +def diff_content_out_of_length(diff_content): + return ( + GptClassFactory.create_class().num_tokens_from_string(diff_content) + >= Diff_Prompt.max_template_token_length + ) + + +def handle_diff(diff): + prompt_list = [] + + if diff_content_out_of_length(diff): + diff_array = cut_diff_by_file_diffs(diff) + + for single_diff in diff_array: + if diff_content_out_of_length(single_diff): + result_array = cut_single_diff(single_diff) + prompt_list.extend(result_array) + else: + prompt_list.append(single_diff) + + return prompt_list + + +def cut_diff_by_file_diffs(diff): + separator = "diff --git" + + diff_array = diff.split(separator) + diff_array.pop(0) + + diff_array = [separator + diff for diff in diff_array] + + return diff_array + + +def cut_one_diff_by_change(diff): + separator = "@@ -" + + change_array = diff.split(separator) + diff_title = change_array.pop(0) + + change_array = [separator + diff for diff in change_array] + + change_array[0] = diff_title + change_array[0] + + return change_array + + +def cut_single_diff(diff): + result_array = [] + change_array = cut_one_diff_by_change(diff) + + index = 0 + while index < len(change_array): + if diff_content_out_of_length(change_array[index]): + result_array.extend(cut_change(change_array[index])) + index += 1 + continue + + merge_change = change_array[index] + for i in range(index + 1, len(change_array)): + if diff_content_out_of_length(merge_change + change_array[i]): + index = i + break + else: + index = i + 1 + merge_change += change_array[i] + + result_array.append(merge_change) + + if index >= len(change_array) - 1: + break + + return result_array + + +def cut_change(change): + return split_string(change, Diff_Prompt.max_template_token_length) diff --git a/pr-message/src/handle/pull_request.py b/pr-message/src/handle/pull_request.py new file mode 100644 index 0000000000000000000000000000000000000000..af1b5cbf43a66042405a0a9e7558d53bfe6e3b48 --- /dev/null +++ b/pr-message/src/handle/pull_request.py @@ -0,0 +1,44 @@ +from loguru import logger + +from handle.comment_command import * + + +comment_method = {"/summary-message": summary_message} + + +def merge_request_hooks(data): + pass + + +def note_hooks(data): + action = data.get("action", None) + if action != "comment": + logger.error("action is not comment") + return + + pr = data.get("pull_request", None) + if pr is None: + logger.error("no pull_request") + return + + state = pr.get("state", None) + if state != "open": + logger.error("pull request is not open") + return + + comment = data.get("comment", None) + if comment is None: + logger.error("no comment") + return + + comment_body = comment.get("body", None) + if comment_body is None: + logger.error("no comment body") + return + + comment_fuc = comment_method.get(str(comment_body).strip()) + if comment_fuc is None: + logger.error("system not suport this comment") + return + + comment_fuc(data) diff --git a/pr-message/src/handle/task.py b/pr-message/src/handle/task.py new file mode 100644 index 0000000000000000000000000000000000000000..02ac65fabc5de262899da4faf74ffc40ae157512 --- /dev/null +++ b/pr-message/src/handle/task.py @@ -0,0 +1,19 @@ +from loguru import logger + +from handle.pull_request import * + +hook_method = {"merge_request_hooks": merge_request_hooks, "note_hooks": note_hooks} + + +def assgin_task(data): + hook_name = data.get("hook_name", None) + + if hook_name is None: + logger.error("no hook_name") + return + + handle_fuc = hook_method.get(hook_name) + if handle_fuc is None: + logger.error("system not suport this hook_name") + return + handle_fuc(data) diff --git a/pr-message/src/main.py b/pr-message/src/main.py new file mode 100644 index 0000000000000000000000000000000000000000..72365c7f8b3a1f81903a9848d5dedc9a5a828cbc --- /dev/null +++ b/pr-message/src/main.py @@ -0,0 +1,16 @@ +import click +from router import router +from config import init_config + + +@click.command() +@click.option("--config", default="config.yaml", help="config file path") +def main(config): + # Init config from yaml file + init_config.init_config(config) + # Set up routing + router.start_router() + + +if __name__ == "__main__": + main() diff --git a/pr-message/src/router/router.py b/pr-message/src/router/router.py new file mode 100644 index 0000000000000000000000000000000000000000..8221e8cc246e8eb414487475e6bb3b3783369dc0 --- /dev/null +++ b/pr-message/src/router/router.py @@ -0,0 +1,26 @@ +from flask import request +from flask import Flask + +from handle.task import assgin_task +from utils.background_task import start_thread + + +app = Flask(__name__) + + +@app.route("/hook/analyze", methods=["POST"]) +def analyze(): + data = request.get_json() + + start_thread(assgin_task, data) + + return "Processing completed" + + +@app.before_request +def before_request(): + headers = request.headers + + +def start_router(): + app.run("0.0.0.0", debug=True, port=8080) diff --git a/pr-message/src/utils/background_task.py b/pr-message/src/utils/background_task.py new file mode 100644 index 0000000000000000000000000000000000000000..bef529f4cf3ec1b1a90ad619bef040ef6270217d --- /dev/null +++ b/pr-message/src/utils/background_task.py @@ -0,0 +1,7 @@ +from concurrent.futures import ThreadPoolExecutor + +executor = ThreadPoolExecutor(2) + + +def start_thread(target, *args, **kwargs): + return executor.submit(target, *args, **kwargs) diff --git a/pr-message/src/utils/utile_tool.py b/pr-message/src/utils/utile_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..700d27fcce7f6845b5f265797c11ee95788f7f2f --- /dev/null +++ b/pr-message/src/utils/utile_tool.py @@ -0,0 +1,3 @@ + +def split_string(string, length): + return [string[i:i+length] for i in range(0, len(string), length)] \ No newline at end of file