diff --git "a/\347\275\221\347\273\234\347\210\254\350\231\253\345\210\206\346\236\220/readme.txt" "b/\347\275\221\347\273\234\347\210\254\350\231\253\345\210\206\346\236\220/readme.txt" new file mode 100644 index 0000000000000000000000000000000000000000..86dfc5e33f0c819c8e005b1305f32577d994b295 --- /dev/null +++ "b/\347\275\221\347\273\234\347\210\254\350\231\253\345\210\206\346\236\220/readme.txt" @@ -0,0 +1,2 @@ +简介:利用dify平台集成了jina ai插件,实现网站爬虫分析 +用法:输入问题和网站链接,url以%%分割 \ No newline at end of file diff --git "a/\347\275\221\347\273\234\347\210\254\350\231\253\345\210\206\346\236\220/\347\275\221\347\253\231\347\210\254\350\231\253\345\210\206\346\236\220.yml" "b/\347\275\221\347\273\234\347\210\254\350\231\253\345\210\206\346\236\220/\347\275\221\347\253\231\347\210\254\350\231\253\345\210\206\346\236\220.yml" new file mode 100644 index 0000000000000000000000000000000000000000..51531276a538d823f1a24c0d891741e4dd3d195c --- /dev/null +++ "b/\347\275\221\347\273\234\347\210\254\350\231\253\345\210\206\346\236\220/\347\275\221\347\253\231\347\210\254\350\231\253\345\210\206\346\236\220.yml" @@ -0,0 +1,624 @@ +app: + description: '' + icon: 🤖 + icon_background: '#FFEAD5' + mode: workflow + name: 网站爬虫分析 + use_icon_as_answer_icon: false +dependencies: +- current_identifier: null + type: marketplace + value: + marketplace_plugin_unique_identifier: langgenius/volcengine_maas:0.0.7@f8e44422cfa5b9a6ac1f2d3b43ef1069868efdad1e5cec2590de3f53ceac37b0 +kind: app +version: 0.1.5 +workflow: + conversation_variables: [] + environment_variables: [] + features: + file_upload: + allowed_file_extensions: + - .JPG + - .JPEG + - .PNG + - .GIF + - .WEBP + - .SVG + allowed_file_types: + - image + allowed_file_upload_methods: + - local_file + - remote_url + enabled: false + fileUploadConfig: + audio_file_size_limit: 50 + batch_count_limit: 5 + file_size_limit: 15 + image_file_size_limit: 10 + video_file_size_limit: 100 + workflow_file_upload_limit: 10 + image: + enabled: false + number_limits: 3 + transfer_methods: + - local_file + - remote_url + number_limits: 3 + opening_statement: '' + retriever_resource: + enabled: true + sensitive_word_avoidance: + enabled: false + speech_to_text: + enabled: false + suggested_questions: [] + suggested_questions_after_answer: + enabled: false + text_to_speech: + enabled: false + language: '' + voice: '' + graph: + edges: + - data: + isInIteration: false + isInLoop: false + sourceType: start + targetType: code + id: 1743067816462-source-1743067904827-target + source: '1743067816462' + sourceHandle: source + target: '1743067904827' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + isInLoop: false + sourceType: start + targetType: code + id: 1743067816462-source-1743067945288-target + source: '1743067816462' + sourceHandle: source + target: '1743067945288' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + isInLoop: false + sourceType: code + targetType: iteration + id: 1743067945288-source-1743068017702-target + source: '1743067945288' + sourceHandle: source + target: '1743068017702' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInLoop: false + sourceType: code + targetType: iteration + id: 1743067904827-source-1743068017702-target + source: '1743067904827' + sourceHandle: source + target: '1743068017702' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: true + isInLoop: false + iteration_id: '1743068017702' + sourceType: iteration-start + targetType: http-request + id: 1743068017702start-source-1743068056719-target + source: 1743068017702start + sourceHandle: source + target: '1743068056719' + targetHandle: target + type: custom + zIndex: 1002 + - data: + isInIteration: false + isInLoop: false + sourceType: iteration + targetType: llm + id: 1743068017702-source-1743070280184-target + source: '1743068017702' + sourceHandle: source + target: '1743070280184' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + isInLoop: false + sourceType: llm + targetType: end + id: 1743070280184-source-1743071122526-target + source: '1743070280184' + sourceHandle: source + target: '1743071122526' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInLoop: false + sourceType: tool + targetType: llm + id: 1743072489803-source-1743070280184-target + source: '1743072489803' + sourceHandle: source + target: '1743070280184' + targetHandle: target + type: custom + zIndex: 0 + - data: + isInLoop: false + sourceType: start + targetType: tool + id: 1743067816462-source-1743072489803-target + source: '1743067816462' + sourceHandle: source + target: '1743072489803' + targetHandle: target + type: custom + zIndex: 0 + nodes: + - data: + desc: '' + selected: false + title: 开始 + type: start + variables: + - label: question + max_length: 256 + options: [] + required: true + type: text-input + variable: question + - label: url以%%分割 + max_length: 256 + options: [] + required: true + type: text-input + variable: url + height: 114 + id: '1743067816462' + position: + x: -173.40901640126066 + y: 308.1620796251141 + positionAbsolute: + x: -173.40901640126066 + y: 308.1620796251141 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 243 + - data: + code: "import urllib.parse\ndef main(arg1: str):\n encoded_string = urllib.parse.quote(arg1)\n\ + \ return {\"result\":encoded_string}\n \n" + code_language: python3 + desc: '' + outputs: + result: + children: null + type: string + selected: false + title: Url编码 + type: code + variables: + - value_selector: + - '1743067816462' + - question + variable: arg1 + height: 52 + id: '1743067904827' + position: + x: 404 + y: 297 + positionAbsolute: + x: 404 + y: 297 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 243 + - data: + code: "def main(arg1: str):\n parts = arg1.split('%%')\n return {\n\ + \ \"result\": parts\n }" + code_language: python3 + desc: '' + outputs: + result: + children: null + type: array[string] + selected: false + title: 搜索目标网站列表 + type: code + variables: + - value_selector: + - '1743067816462' + - url + variable: arg1 + height: 52 + id: '1743067945288' + position: + x: 404 + y: 407 + positionAbsolute: + x: 404 + y: 407 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 243 + - data: + desc: '' + error_handle_mode: terminated + height: 347 + is_parallel: false + iterator_selector: + - '1743067945288' + - result + output_selector: + - '1743068056719' + - body + output_type: array[string] + parallel_nums: 10 + selected: false + start_node_id: 1743068017702start + title: 迭代 + type: iteration + width: 610 + height: 347 + id: '1743068017702' + position: + x: 815.134312519097 + y: 289.1343125190971 + positionAbsolute: + x: 815.134312519097 + y: 289.1343125190971 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 610 + zIndex: 1 + - data: + desc: '' + isInIteration: true + selected: false + title: '' + type: iteration-start + draggable: false + height: 48 + id: 1743068017702start + parentId: '1743068017702' + position: + x: 24 + y: 68 + positionAbsolute: + x: 839.134312519097 + y: 357.1343125190971 + selectable: false + sourcePosition: right + targetPosition: left + type: custom-iteration-start + width: 44 + zIndex: 1002 + - data: + authorization: + config: null + type: no-auth + body: + data: [] + type: none + desc: '' + headers: 'Authorization:Bearer jina_673ac3c809414eaa855de568982a5588-SPeGK8ylOD4CwWe5sqRPJ6aKDUD + + X-Respond-With:no-content + + X-Site:{{#1743068017702.item#}}' + isInIteration: true + isInLoop: false + iteration_id: '1743068017702' + method: get + params: '' + retry_config: + max_retries: 3 + retry_enabled: true + retry_interval: 100 + selected: false + timeout: + max_connect_timeout: 0 + max_read_timeout: 0 + max_write_timeout: 0 + title: HTTP 请求 + type: http-request + url: https://s.jina.ai/?q={{#1743067904827.result#}} + variables: [] + height: 136 + id: '1743068056719' + parentId: '1743068017702' + position: + x: 195.54051990627863 + y: 99 + positionAbsolute: + x: 1010.6748324253756 + y: 388.1343125190971 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 243 + zIndex: 1002 + - data: + context: + enabled: true + variable_selector: + - '1743068017702' + - output + desc: '' + model: + completion_params: + temperature: 0.7 + mode: chat + name: DeepSeek-R1 + provider: langgenius/volcengine_maas/volcengine_maas + prompt_template: + - id: 0e3cccf0-e929-4fef-9085-9f4c6e6c3a21 + role: system + text: '你是一个优秀的归纳总结师,给你的内容是包含了新闻标题、链接、简介。请根据给定内容先按照新闻网站进行分类,再按照时效性、与{{#1743067816462.question#}}的相关性进行总结。 + + 当前时间是:{{#1743072489803.text#}} + + 给定内容:{{#1743068017702.output#}} + + 输出格式: + + #新闻网站# + + 新闻标题、新闻链接、新闻简介 + + 总结时,请以新闻网站为单位,对新闻网站的发文内容进行总结' + selected: true + title: LLM + type: llm + variables: [] + vision: + enabled: false + height: 88 + id: '1743070280184' + position: + x: 1579.7977764086995 + y: 127.13202211601171 + positionAbsolute: + x: 1579.7977764086995 + y: 127.13202211601171 + selected: true + sourcePosition: right + targetPosition: left + type: custom + width: 243 + - data: + desc: '' + outputs: + - value_selector: + - '1743070280184' + - text + variable: text + selected: false + title: 结束 + type: end + height: 88 + id: '1743071122526' + position: + x: 1910.0781298219247 + y: 206.14828216483022 + positionAbsolute: + x: 1910.0781298219247 + y: 206.14828216483022 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 243 + - data: + desc: '' + is_team_authorization: true + output_schema: null + paramSchemas: + - auto_generate: null + default: '%Y-%m-%d %H:%M:%S' + form: form + human_description: + en_US: Time format in strftime standard. + ja_JP: Time format in strftime standard. + pt_BR: Time format in strftime standard. + zh_Hans: strftime 标准的时间格式。 + label: + en_US: Format + ja_JP: Format + pt_BR: Format + zh_Hans: 格式 + llm_description: null + max: null + min: null + name: format + options: [] + placeholder: null + precision: null + required: false + scope: null + template: null + type: string + - auto_generate: null + default: UTC + form: form + human_description: + en_US: Timezone + ja_JP: Timezone + pt_BR: Timezone + zh_Hans: 时区 + label: + en_US: Timezone + ja_JP: Timezone + pt_BR: Timezone + zh_Hans: 时区 + llm_description: null + max: null + min: null + name: timezone + options: + - label: + en_US: UTC + ja_JP: UTC + pt_BR: UTC + zh_Hans: UTC + value: UTC + - label: + en_US: America/New_York + ja_JP: America/New_York + pt_BR: America/New_York + zh_Hans: 美洲/纽约 + value: America/New_York + - label: + en_US: America/Los_Angeles + ja_JP: America/Los_Angeles + pt_BR: America/Los_Angeles + zh_Hans: 美洲/洛杉矶 + value: America/Los_Angeles + - label: + en_US: America/Chicago + ja_JP: America/Chicago + pt_BR: America/Chicago + zh_Hans: 美洲/芝加哥 + value: America/Chicago + - label: + en_US: America/Sao_Paulo + ja_JP: America/Sao_Paulo + pt_BR: América/São Paulo + zh_Hans: 美洲/圣保罗 + value: America/Sao_Paulo + - label: + en_US: Asia/Shanghai + ja_JP: Asia/Shanghai + pt_BR: Asia/Shanghai + zh_Hans: 亚洲/上海 + value: Asia/Shanghai + - label: + en_US: Asia/Ho_Chi_Minh + ja_JP: Asia/Ho_Chi_Minh + pt_BR: Ásia/Ho Chi Minh + zh_Hans: 亚洲/胡志明市 + value: Asia/Ho_Chi_Minh + - label: + en_US: Asia/Tokyo + ja_JP: Asia/Tokyo + pt_BR: Asia/Tokyo + zh_Hans: 亚洲/东京 + value: Asia/Tokyo + - label: + en_US: Asia/Dubai + ja_JP: Asia/Dubai + pt_BR: Asia/Dubai + zh_Hans: 亚洲/迪拜 + value: Asia/Dubai + - label: + en_US: Asia/Kolkata + ja_JP: Asia/Kolkata + pt_BR: Asia/Kolkata + zh_Hans: 亚洲/加尔各答 + value: Asia/Kolkata + - label: + en_US: Asia/Seoul + ja_JP: Asia/Seoul + pt_BR: Asia/Seoul + zh_Hans: 亚洲/首尔 + value: Asia/Seoul + - label: + en_US: Asia/Singapore + ja_JP: Asia/Singapore + pt_BR: Asia/Singapore + zh_Hans: 亚洲/新加坡 + value: Asia/Singapore + - label: + en_US: Europe/London + ja_JP: Europe/London + pt_BR: Europe/London + zh_Hans: 欧洲/伦敦 + value: Europe/London + - label: + en_US: Europe/Berlin + ja_JP: Europe/Berlin + pt_BR: Europe/Berlin + zh_Hans: 欧洲/柏林 + value: Europe/Berlin + - label: + en_US: Europe/Moscow + ja_JP: Europe/Moscow + pt_BR: Europe/Moscow + zh_Hans: 欧洲/莫斯科 + value: Europe/Moscow + - label: + en_US: Australia/Sydney + ja_JP: Australia/Sydney + pt_BR: Australia/Sydney + zh_Hans: 澳大利亚/悉尼 + value: Australia/Sydney + - label: + en_US: Pacific/Auckland + ja_JP: Pacific/Auckland + pt_BR: Pacific/Auckland + zh_Hans: 太平洋/奥克兰 + value: Pacific/Auckland + - label: + en_US: Africa/Cairo + ja_JP: Africa/Cairo + pt_BR: Africa/Cairo + zh_Hans: 非洲/开罗 + value: Africa/Cairo + placeholder: null + precision: null + required: false + scope: null + template: null + type: select + params: + format: '' + timezone: '' + provider_id: time + provider_name: time + provider_type: builtin + selected: false + title: 获取当前时间 + tool_configurations: + format: '%Y-%m-%d ' + timezone: Asia/Shanghai + tool_label: 获取当前时间 + tool_name: current_time + tool_parameters: {} + type: tool + height: 114 + id: '1743072489803' + position: + x: 1059.9509040696312 + y: 76.00005469245286 + positionAbsolute: + x: 1059.9509040696312 + y: 76.00005469245286 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 243 + viewport: + x: -279.36159886754365 + y: 57.442418442218184 + zoom: 0.5325205490838113