From 1364a5dc6f023297d12923117c27ea8170141482 Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Fri, 14 Nov 2025 10:30:29 +0800 Subject: [PATCH] add 25.12 Llama3-8B and qwen2_5vl --- README.md | 98 ++++++++++--------- README_en.md | 98 ++++++++++--------- .../nlp/llm/llama3_8b/llamafactory/README.md | 64 ++++++++++++ models/nlp/llm/qwen2.5-3b/pytorch/README.md | 5 +- .../llm/qwen2.5-vl-7b/llamafactory/README.md | 55 +++++++++++ tests/model_info.json | 96 ++++++++++++++++++ 6 files changed, 326 insertions(+), 90 deletions(-) create mode 100644 models/nlp/llm/llama3_8b/llamafactory/README.md create mode 100644 models/nlp/llm/qwen2.5-vl-7b/llamafactory/README.md diff --git a/README.md b/README.md index 1c18cb2d0..19de91dc1 100644 --- a/README.md +++ b/README.md @@ -19,50 +19,58 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 ### 大语言模型(LLM) -| Model | Framework | ToolBox | Dataset/Weight | IXUCA SDK | -|-------------------------------------------------------|-----------|--------------------|------------------------|-----------| -| [Aquila2-34B](models/nlp/llm/aquila2-34b/pytorch) | PyTorch | Megatron-DeepSpeed | Bookcorpus | 3.4.0 | -| [Baichuan2-7B](models/nlp/llm/baichuan2-7b/pytorch) | PyTorch | DeepSpeed | baichuan2-7b-base | 3.4.0 | -| [Bloom-7B1](models/nlp/llm/bloom-7b1/pytorch) | PyTorch | Firefly | school_math_0.25M | 3.4.0 | -| [ChatGLM-6B](models/nlp/llm/chatglm-6b/pytorch) | PyTorch | DeepSpeed | ADGEN & chatglm-6b | 3.1.0 | -| [ChatGLM2-6B SFT](models/nlp/llm/chatglm2-6b-sft/pytorch) | PyTorch | DeepSpeed | ADGEN & chatglm2-6b | 3.4.0 | -| [ChatGLM3-6B](models/nlp/llm/chatglm3-6b/pytorch) | PyTorch | DeepSpeed | ADGEN & chatglm3-6b | 4.1.1 | -| [DeepSeekMoE 7B](models/nlp/llm/deepseek_moe_7b/pytorch) | PyTorch | ColossalAI | deepseek-moe-16b-base | 4.1.1 | -| [DeepSeek-LLM-7B](models/nlp/llm/deepseek-llm-7b/verl) | PyTorch | verl | deepseek-llm-7b-chat | dev-only | -| [GLM-4](models/nlp/llm/glm-4/pytorch) | PyTorch | Torchrun | glm-4-9b-chat | 4.2.0 | -| [Gemma-2-2B-IT](models/nlp/llm/gemma-2-2b-it/verl) | PyTorch | verl | gemma-2-2b-it | dev-only | -| [Llama-7B](models/nlp/llm/llama-7b/pytorch) | PyTorch | ColossalAI | llama-7b-hf | 3.1.0 | -| [Llama2-7B](models/nlp/llm/llama2-7b/pytorch) | PyTorch | Megatron-DeepSpeed | Bookcorpus | 3.1.0 | -| [Llama2-7B RMF](models/nlp/llm/llama2-7b_reward_sft/pytorch) | PyTorch | DeepSpeed | Dahoas/rm-static | 3.1.1 | -| [Llama2-7B RLHF](models/nlp/llm/llama2-7b_rlhf/pytorch) | PyTorch | Megatron-DeepSpeed | llama2-7b&tiny-llama | 3.4.0 | -| [Llama2-7B SFT](models/nlp/llm/llama2-7b_sft/pytorch) | PyTorch | Megatron-DeepSpeed | GPT Small-117M | 3.1.1 | -| [Llama2-13B](models/nlp/llm/llama2-13b/pytorch) | PyTorch | Megatron-DeepSpeed | Bookcorpus | 3.4.0 | -| [Llama2-34B](models/nlp/llm/llama2-34b/pytorch) | PyTorch | Megatron-DeepSpeed | Bookcorpus | 3.4.0 | -| [Llama3-8B](models/nlp/llm/llama3_8b/pytorch) | PyTorch | Megatron-DeepSpeed | Bookcorpus | 4.1.1 | -| [Llama3-8B](models/nlp/llm/llama3_8b/megatron-lm) | PyTorch | Megatron-LM | GPT Small-117M | 4.3.0 | -| [Llama3-8B SFT](models/nlp/llm/llama3_8b_sft/pytorch) | PyTorch | ColossalAI | school_math_0.25M | 4.1.1 | -| [Llama3-8B SFT](models/nlp/llm/llama3_8b/openrlhf) | PyTorch | OpenRLHF | Meta-Llama-3-8B | 4.3.0 | -| [Llama3-8B PPO](models/nlp/llm/llama3_8b/openrlhf) | PyTorch | OpenRLHF | Llama-3-8b-sft-mixture | 4.2.0 | -| [Llama3-8B DPO](models/nlp/llm/llama3_8b/openrlhf) | PyTorch | OpenRLHF | Llama-3-8b-sft-mixture | 4.2.0 | -| [Llama3-8B KTO](models/nlp/llm/llama3_8b/openrlhf) | PyTorch | OpenRLHF | Llama-3-8b-sft-mixture | 4.2.0 | -| [Mamba-2](models/nlp/llm/mamba-2/pytorch) | PyTorch | Megatron-LM | GPT Small-117M | 4.1.1 | -| [MiniCPM](models/nlp/llm/minicpm/pytorch) | PyTorch | DeepSpeed | MiniCPM-2B-sft-bf16 | 4.2.0 | -| [Mixtral 8x7B](models/nlp/llm/mixtral/pytorch) | PyTorch | Megatron-LM | GPT Small-117M | 4.1.1 | -| [Mixtral 8x7B](models/nlp/llm/mixtral/openrlhf) | PyTorch | OpenRLHF | Mixtral-8x7B-v0.1 | 4.3.0 | -| [Phi-3](models/nlp/llm/phi-3/pytorch) | PyTorch | Torchrun | Phi-3-mini-4k-instruct | 4.2.0 | -| [QWen-7B](models/nlp/llm/qwen-7b/pytorch) | PyTorch | Firefly | qwen-7b | 3.4.0 | -| [QWen1.5-7B](models/nlp/llm/qwen1.5-7b/pytorch) | PyTorch | Firefly | school_math | 4.1.1 | -| [QWen1.5-14B](models/nlp/llm/qwen1.5-14b/pytorch) | PyTorch | Firefly | school_math | 4.1.1 | -| [Qwen2-7B](models/nlp/llm/qwen2-7b/verl) | PyTorch | verl | qwen2-7b | dev-only | -| [Qwen2.5-7B SFT](models/nlp/llm/qwen2.5-7b/pytorch) | PyTorch | LLaMA-Factory | qwen2.5-7b | 4.1.1 | -| [Qwen2.5-1.5B verl](models/nlp/llm/qwen2.5-1.5b/verl) | PyTorch | verl | qwen2.5-1.5b | 4.2.0 | -| [Qwen2.5-7B verl](models/nlp/llm/qwen2.5-7b/verl) | PyTorch | verl | qwen2.5-7b | 4.2.0 | -| [Qwen2.5-3B](models/nlp/llm/qwen2.5-3b/pytorch) | PyTorch | ColossalAI | qwen2.5-3b | 4.3.0 | -| [Qwen2.5-VL-7B](models/nlp/llm/qwen2.5-vl-7b/verl) | PyTorch | verl | qwen2.5-vl-7b | dev-only | -| [Qwen3-8B](models/nlp/llm/qwen3-8b/verl) | PyTorch | verl | qwen3-8b | dev-only | -| [Yi-6B](models/nlp/llm/yi-6b/pytorch) | PyTorch | DeepSpeed | Yi-6B | 4.2.0 | -| [Yi-1.5-6B](models/nlp/llm/yi-1.5-6b/pytorch) | PyTorch | DeepSpeed | Yi-1.5-6B | 4.2.0 | -| [Yi-VL-6B](models/nlp/llm/yi-vl-6b/pytorch) | PyTorch | LLaMA-Factory | Yi-VL-6B-hf | 4.2.0 | +| Model | Framework | Dataset/Weight | IXUCA SDK | +|-------------------------------------------------------|--------------------|------------------------|-----------| +| [Aquila2-34B](models/nlp/llm/aquila2-34b/pytorch) | Megatron-DeepSpeed | Bookcorpus | 3.4.0 | +| [Baichuan2-7B](models/nlp/llm/baichuan2-7b/pytorch) | DeepSpeed | baichuan2-7b-base | 3.4.0 | +| [Bloom-7B1](models/nlp/llm/bloom-7b1/pytorch) | Firefly | school_math_0.25M | 3.4.0 | +| [ChatGLM-6B](models/nlp/llm/chatglm-6b/pytorch) | DeepSpeed | ADGEN & chatglm-6b | 3.1.0 | +| [ChatGLM2-6B SFT](models/nlp/llm/chatglm2-6b-sft/pytorch) | DeepSpeed | ADGEN & chatglm2-6b | 3.4.0 | +| [ChatGLM3-6B](models/nlp/llm/chatglm3-6b/pytorch) | DeepSpeed | ADGEN & chatglm3-6b | 4.1.1 | +| [DeepSeekMoE 7B](models/nlp/llm/deepseek_moe_7b/pytorch) | ColossalAI | deepseek-moe-16b-base | 4.1.1 | +| [DeepSeek-LLM-7B](models/nlp/llm/deepseek-llm-7b/verl) | verl | deepseek-llm-7b-chat | dev-only | +| [GLM-4](models/nlp/llm/glm-4/pytorch) | Torchrun | glm-4-9b-chat | 4.2.0 | +| [Gemma-2-2B-IT](models/nlp/llm/gemma-2-2b-it/verl) | verl | gemma-2-2b-it | dev-only | +| [Llama-7B](models/nlp/llm/llama-7b/pytorch) | ColossalAI | llama-7b-hf | 3.1.0 | +| [Llama2-7B](models/nlp/llm/llama2-7b/pytorch) | Megatron-DeepSpeed | Bookcorpus | 3.1.0 | +| [Llama2-7B RMF](models/nlp/llm/llama2-7b_reward_sft/pytorch) | DeepSpeed | Dahoas/rm-static | 3.1.1 | +| [Llama2-7B RLHF](models/nlp/llm/llama2-7b_rlhf/pytorch) | Megatron-DeepSpeed | llama2-7b&tiny-llama | 3.4.0 | +| [Llama2-7B SFT](models/nlp/llm/llama2-7b_sft/pytorch) | Megatron-DeepSpeed | GPT Small-117M | 3.1.1 | +| [Llama2-13B](models/nlp/llm/llama2-13b/pytorch) | Megatron-DeepSpeed | Bookcorpus | 3.4.0 | +| [Llama2-34B](models/nlp/llm/llama2-34b/pytorch) | Megatron-DeepSpeed | Bookcorpus | 3.4.0 | +| [Llama3-8B](models/nlp/llm/llama3_8b/pytorch) | Megatron-DeepSpeed | Bookcorpus | 4.1.1 | +| [Llama3-8B](models/nlp/llm/llama3_8b/megatron-lm) | Megatron-LM | GPT Small-117M | 4.3.0 | +| [Llama3-8B SFT](models/nlp/llm/llama3_8b_sft/pytorch) | ColossalAI | school_math_0.25M | 4.1.1 | +| [Llama3-8B SFT](models/nlp/llm/llama3_8b/openrlhf) | OpenRLHF | Meta-Llama-3-8B | 4.3.0 | +| [Llama3-8B PPO](models/nlp/llm/llama3_8b/openrlhf) | OpenRLHF | Llama-3-8b-sft-mixture | 4.2.0 | +| [Llama3-8B DPO](models/nlp/llm/llama3_8b/openrlhf) | OpenRLHF | Llama-3-8b-sft-mixture | 4.2.0 | +| [Llama3-8B KTO](models/nlp/llm/llama3_8b/openrlhf) | OpenRLHF | Llama-3-8b-sft-mixture | 4.2.0 | +| [Llama3-8B DPO](models/nlp/llm/llama3_8b/llamafactory) | LLaMA-Factory | Meta-Llama-3-8B-Instruct | 4.3.0 | +| [Llama3-8B Full SFT](models/nlp/llm/llama3_8b/llamafactory) | LLaMA-Factory | Meta-Llama-3-8B-Instruct | 4.3.0 | +| [Llama3-8B KTO](models/nlp/llm/llama3_8b/llamafactory) | LLaMA-Factory | Meta-Llama-3-8B-Instruct | 4.3.0 | +| [Llama3-8B Pretrain](models/nlp/llm/llama3_8b/llamafactory) | LLaMA-Factory | Meta-Llama-3-8B-Instruct | 4.3.0 | +| [Llama3-8B Reward](models/nlp/llm/llama3_8b/llamafactory) | LLaMA-Factory | Meta-Llama-3-8B-Instruct | 4.3.0 | +| [Llama3-8B SFT](models/nlp/llm/llama3_8b/llamafactory) | LLaMA-Factory | Meta-Llama-3-8B-Instruct | 4.3.0 | +| [Mamba-2](models/nlp/llm/mamba-2/pytorch) | Megatron-LM | GPT Small-117M | 4.1.1 | +| [MiniCPM](models/nlp/llm/minicpm/pytorch) | DeepSpeed | MiniCPM-2B-sft-bf16 | 4.2.0 | +| [Mixtral 8x7B](models/nlp/llm/mixtral/pytorch) | Megatron-LM | GPT Small-117M | 4.1.1 | +| [Mixtral 8x7B](models/nlp/llm/mixtral/openrlhf) | OpenRLHF | Mixtral-8x7B-v0.1 | 4.3.0 | +| [Phi-3](models/nlp/llm/phi-3/pytorch) | Torchrun | Phi-3-mini-4k-instruct | 4.2.0 | +| [QWen-7B](models/nlp/llm/qwen-7b/pytorch) | Firefly | qwen-7b | 3.4.0 | +| [QWen1.5-7B](models/nlp/llm/qwen1.5-7b/pytorch) | Firefly | school_math | 4.1.1 | +| [QWen1.5-14B](models/nlp/llm/qwen1.5-14b/pytorch) | Firefly | school_math | 4.1.1 | +| [Qwen2-7B](models/nlp/llm/qwen2-7b/verl) | verl | qwen2-7b | dev-only | +| [Qwen2.5-7B SFT](models/nlp/llm/qwen2.5-7b/pytorch) | LLaMA-Factory | qwen2.5-7b | 4.1.1 | +| [Qwen2.5-1.5B verl](models/nlp/llm/qwen2.5-1.5b/verl) | verl | qwen2.5-1.5b | 4.2.0 | +| [Qwen2.5-7B verl](models/nlp/llm/qwen2.5-7b/verl) | verl | qwen2.5-7b | 4.2.0 | +| [Qwen2.5-3B](models/nlp/llm/qwen2.5-3b/pytorch) | ColossalAI | qwen2.5-3b | 4.3.0 | +| [Qwen2.5-VL-7B](models/nlp/llm/qwen2.5-vl-7b/verl) | verl | qwen2.5-vl-7b | dev-only | +| [Qwen2.5-VL-7B DPO](models/nlp/llm/qwen2.5-vl-7b/llamafactory) | LLaMA-Factory | Qwen2.5-VL-7B-Instruct | 4.3.0 | +| [Qwen2.5-VL-7B SFT](models/nlp/llm/qwen2.5-vl-7b/llamafactory) | LLaMA-Factory | Qwen2.5-VL-7B-Instruct | 4.3.0 | +| [Qwen3-8B](models/nlp/llm/qwen3-8b/verl) | verl | qwen3-8b | dev-only | +| [Yi-6B](models/nlp/llm/yi-6b/pytorch) | DeepSpeed | Yi-6B | 4.2.0 | +| [Yi-1.5-6B](models/nlp/llm/yi-1.5-6b/pytorch) | DeepSpeed | Yi-1.5-6B | 4.2.0 | +| [Yi-VL-6B](models/nlp/llm/yi-vl-6b/pytorch) | LLaMA-Factory | Yi-VL-6B-hf | 4.2.0 | ### 计算机视觉(CV) @@ -408,6 +416,7 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 |------------------------------------------------------------------|-------------------|--------------------|-------| | [BART](models/nlp/language_model/bart_fairseq/pytorch) | PyTorch (Fairseq) | RTE | 3.0.0 | | [BERT NER](models/nlp/ner/bert/pytorch) | PyTorch | CoNLL-2003 | 3.0.0 | +| [BERT Pretraining](models/nlp/language_model/bert_sample/pytorch) | PyTorch | bert_mini | 4.3.0 | | [BERT Pretraining](models/nlp/language_model/bert/pytorch) | PyTorch | MLCommon Wikipedia | 2.2.0 | | [BERT Pretraining](models/nlp/language_model/bert/paddlepaddle) | PaddlePaddle | MNLI | 2.3.0 | | [BERT Pretraining](models/nlp/language_model/bert/tensorflow) | TensorFlow | MNLI | 3.0.0 | @@ -447,6 +456,7 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 | Model | Framework | Dataset | IXUCA SDK | |-----------------------------------------------------------------------------------------|-----------------|----------|-------| | [Conformer](models/audio/speech_recognition/conformer_wenet/pytorch) | PyTorch (WeNet) | AISHELL | 2.2.0 | +| [Conformer](models/audio/speech_recognition/conformer/pytorch) | PyTorch | LibriSpeech | 4.3.0 | | [Efficient Conformer v2](models/audio/speech_recognition/efficient_conformer_v2_wenet/pytorch) | PyTorch (WeNet) | AISHELL | 3.1.0 | | [PP-ASR-Conformer](models/audio/speech_recognition/conformer/paddlepaddle) | PaddlePaddle | AISHELL | 3.1.0 | | [RNN-T](models/audio/speech_recognition/rnnt/pytorch) | PyTorch | LJSpeech | 2.2.0 | diff --git a/README_en.md b/README_en.md index f62f57e33..4a0fae6c4 100644 --- a/README_en.md +++ b/README_en.md @@ -21,50 +21,58 @@ individuals, healthcare, education, communication, energy, and more. ### LLM (Large Language Model) -| Model | Framework | ToolBox | Dataset/Weight | IXUCA SDK | -|-------------------------------------------------------|-----------|--------------------|------------------------|-----------| -| [Aquila2-34B](models/nlp/llm/aquila2-34b/pytorch) | PyTorch | Megatron-DeepSpeed | Bookcorpus | 3.4.0 | -| [Baichuan2-7B](models/nlp/llm/baichuan2-7b/pytorch) | PyTorch | DeepSpeed | baichuan2-7b-base | 3.4.0 | -| [Bloom-7B1](models/nlp/llm/bloom-7b1/pytorch) | PyTorch | Firefly | school_math_0.25M | 3.4.0 | -| [ChatGLM-6B](models/nlp/llm/chatglm-6b/pytorch) | PyTorch | DeepSpeed | ADGEN & chatglm-6b | 3.1.0 | -| [ChatGLM2-6B SFT](models/nlp/llm/chatglm2-6b-sft/pytorch) | PyTorch | DeepSpeed | ADGEN & chatglm2-6b | 3.4.0 | -| [ChatGLM3-6B](models/nlp/llm/chatglm3-6b/pytorch) | PyTorch | DeepSpeed | ADGEN & chatglm3-6b | 4.1.1 | -| [DeepSeekMoE 7B](models/nlp/llm/deepseek_moe_7b/pytorch) | PyTorch | ColossalAI | deepseek-moe-16b-base | 4.1.1 | -| [DeepSeek-LLM-7B](models/nlp/llm/deepseek-llm-7b/verl ) | PyTorch | verl | deepseek-llm-7b-chat | dev-only | -| [GLM-4](models/nlp/llm/glm-4/pytorch) | PyTorch | Torchrun | glm-4-9b-chat | 4.2.0 | -| [Gemma-2-2B-IT](models/nlp/llm/gemma-2-2b-it/verl) | PyTorch | verl | gemma-2-2b-it | dev-only | -| [Llama-7B](models/nlp/llm/llama-7b/pytorch) | PyTorch | ColossalAI | llama-7b-hf | 3.1.0 | -| [Llama2-7B](models/nlp/llm/llama2-7b/pytorch) | PyTorch | Megatron-DeepSpeed | Bookcorpus | 3.1.0 | -| [Llama2-7B RMF](models/nlp/llm/llama2-7b_reward_sft/pytorch) | PyTorch | DeepSpeed | Dahoas/rm-static | 3.1.1 | -| [Llama2-7B RLHF](models/nlp/llm/llama2-7b_rlhf/pytorch) | PyTorch | Megatron-DeepSpeed | llama2-7b&tiny-llama | 3.4.0 | -| [Llama2-7B SFT](models/nlp/llm/llama2-7b_sft/pytorch) | PyTorch | Megatron-DeepSpeed | GPT Small-117M | 3.1.1 | -| [Llama2-13B](models/nlp/llm/llama2-13b/pytorch) | PyTorch | Megatron-DeepSpeed | Bookcorpus | 3.4.0 | -| [Llama2-34B](models/nlp/llm/llama2-34b/pytorch) | PyTorch | Megatron-DeepSpeed | Bookcorpus | 3.4.0 | -| [Llama3-8B](models/nlp/llm/llama3_8b/pytorch) | PyTorch | Megatron-DeepSpeed | Bookcorpus | 4.1.1 | -| [Llama3-8B](models/nlp/llm/llama3_8b/megatron-lm) | PyTorch | Megatron-LM | GPT Small-117M | 4.3.0 | -| [Llama3-8B SFT](models/nlp/llm/llama3_8b_sft/pytorch) | PyTorch | ColossalAI | school_math_0.25M | 4.1.1 | -| [Llama3-8B SFT](models/nlp/llm/llama3_8b/openrlhf) | PyTorch | OpenRLHF | Meta-Llama-3-8B | 4.3.0 | -| [Llama3-8B PPO](models/nlp/llm/llama3_8b/openrlhf) | PyTorch | OpenRLHF | Llama-3-8b-sft-mixture | 4.2.0 | -| [Llama3-8B DPO](models/nlp/llm/llama3_8b/openrlhf) | PyTorch | OpenRLHF | Llama-3-8b-sft-mixture | 4.2.0 | -| [Llama3-8B KTO](models/nlp/llm/llama3_8b/openrlhf) | PyTorch | OpenRLHF | Llama-3-8b-sft-mixture | 4.2.0 | -| [Mamba-2](models/nlp/llm/mamba-2/pytorch) | PyTorch | Megatron-LM | GPT Small-117M | 4.1.1 | -| [MiniCPM](models/nlp/llm/minicpm/pytorch) | PyTorch | DeepSpeed | MiniCPM-2B-sft-bf16 | 4.2.0 | -| [Mixtral 8x7B](models/nlp/llm/mixtral/pytorch) | PyTorch | Megatron-LM | GPT Small-117M | 4.1.1 | -| [Mixtral 8x7B](models/nlp/llm/mixtral/openrlhf) | PyTorch | OpenRLHF | Mixtral-8x7B-v0.1 | 4.3.0 | -| [Phi-3](models/nlp/llm/phi-3/pytorch) | PyTorch | Torchrun | Phi-3-mini-4k-instruct | 4.2.0 | -| [QWen-7B](models/nlp/llm/qwen-7b/pytorch) | PyTorch | Firefly | qwen-7b | 3.4.0 | -| [QWen1.5-7B](models/nlp/llm/qwen1.5-7b/pytorch) | PyTorch | Firefly | school_math | 4.1.1 | -| [QWen1.5-14B](models/nlp/llm/qwen1.5-14b/pytorch) | PyTorch | Firefly | school_math | 4.1.1 | -| [Qwen2-7B](models/nlp/llm/qwen2-7b/verl) | PyTorch | verl | qwen2-7b | dev-only | -| [Qwen2.5-7B SFT](models/nlp/llm/qwen2.5-7b/pytorch) | PyTorch | LLaMA-Factory | qwen2.5-7b | 4.1.1 | -| [Qwen2.5-1.5B verl](models/nlp/llm/qwen2.5-1.5b/verl) | PyTorch | verl | qwen2.5-1.5b | 4.2.0 | -| [Qwen2.5-7B verl](models/nlp/llm/qwen2.5-7b/verl) | PyTorch | verl | qwen2.5-7b | 4.2.0 | -| [Qwen2.5-3B](models/nlp/llm/qwen2.5-3b/pytorch) | PyTorch | ColossalAI | qwen2.5-3b | 4.3.0 | -| [Qwen2.5-VL-7B](models/nlp/llm/qwen2.5-vl-7b/verl) | PyTorch | verl | qwen2.5-vl-7b | dev-only | -| [Qwen3-8B](models/nlp/llm/qwen3-8b/verl) | PyTorch | verl | qwen3-8b | dev-only | -| [Yi-6B](models/nlp/llm/yi-6b/pytorch) | PyTorch | DeepSpeed | Yi-6B | 4.2.0 | -| [Yi-1.5-6B](models/nlp/llm/yi-1.5-6b/pytorch) | PyTorch | DeepSpeed | Yi-1.5-6B | 4.2.0 | -| [Yi-VL-6B](models/nlp/llm/yi-vl-6b/pytorch) | PyTorch | LLaMA-Factory | Yi-VL-6B-hf | 4.2.0 | +| Model | Framework | Dataset/Weight | IXUCA SDK | +|-------------------------------------------------------|--------------------|------------------------|-----------| +| [Aquila2-34B](models/nlp/llm/aquila2-34b/pytorch) | Megatron-DeepSpeed | Bookcorpus | 3.4.0 | +| [Baichuan2-7B](models/nlp/llm/baichuan2-7b/pytorch) | DeepSpeed | baichuan2-7b-base | 3.4.0 | +| [Bloom-7B1](models/nlp/llm/bloom-7b1/pytorch) | Firefly | school_math_0.25M | 3.4.0 | +| [ChatGLM-6B](models/nlp/llm/chatglm-6b/pytorch) | DeepSpeed | ADGEN & chatglm-6b | 3.1.0 | +| [ChatGLM2-6B SFT](models/nlp/llm/chatglm2-6b-sft/pytorch) | DeepSpeed | ADGEN & chatglm2-6b | 3.4.0 | +| [ChatGLM3-6B](models/nlp/llm/chatglm3-6b/pytorch) | DeepSpeed | ADGEN & chatglm3-6b | 4.1.1 | +| [DeepSeekMoE 7B](models/nlp/llm/deepseek_moe_7b/pytorch) | ColossalAI | deepseek-moe-16b-base | 4.1.1 | +| [DeepSeek-LLM-7B](models/nlp/llm/deepseek-llm-7b/verl) | verl | deepseek-llm-7b-chat | dev-only | +| [GLM-4](models/nlp/llm/glm-4/pytorch) | Torchrun | glm-4-9b-chat | 4.2.0 | +| [Gemma-2-2B-IT](models/nlp/llm/gemma-2-2b-it/verl) | verl | gemma-2-2b-it | dev-only | +| [Llama-7B](models/nlp/llm/llama-7b/pytorch) | ColossalAI | llama-7b-hf | 3.1.0 | +| [Llama2-7B](models/nlp/llm/llama2-7b/pytorch) | Megatron-DeepSpeed | Bookcorpus | 3.1.0 | +| [Llama2-7B RMF](models/nlp/llm/llama2-7b_reward_sft/pytorch) | DeepSpeed | Dahoas/rm-static | 3.1.1 | +| [Llama2-7B RLHF](models/nlp/llm/llama2-7b_rlhf/pytorch) | Megatron-DeepSpeed | llama2-7b&tiny-llama | 3.4.0 | +| [Llama2-7B SFT](models/nlp/llm/llama2-7b_sft/pytorch) | Megatron-DeepSpeed | GPT Small-117M | 3.1.1 | +| [Llama2-13B](models/nlp/llm/llama2-13b/pytorch) | Megatron-DeepSpeed | Bookcorpus | 3.4.0 | +| [Llama2-34B](models/nlp/llm/llama2-34b/pytorch) | Megatron-DeepSpeed | Bookcorpus | 3.4.0 | +| [Llama3-8B](models/nlp/llm/llama3_8b/pytorch) | Megatron-DeepSpeed | Bookcorpus | 4.1.1 | +| [Llama3-8B](models/nlp/llm/llama3_8b/megatron-lm) | Megatron-LM | GPT Small-117M | 4.3.0 | +| [Llama3-8B SFT](models/nlp/llm/llama3_8b_sft/pytorch) | ColossalAI | school_math_0.25M | 4.1.1 | +| [Llama3-8B SFT](models/nlp/llm/llama3_8b/openrlhf) | OpenRLHF | Meta-Llama-3-8B | 4.3.0 | +| [Llama3-8B PPO](models/nlp/llm/llama3_8b/openrlhf) | OpenRLHF | Llama-3-8b-sft-mixture | 4.2.0 | +| [Llama3-8B DPO](models/nlp/llm/llama3_8b/openrlhf) | OpenRLHF | Llama-3-8b-sft-mixture | 4.2.0 | +| [Llama3-8B KTO](models/nlp/llm/llama3_8b/openrlhf) | OpenRLHF | Llama-3-8b-sft-mixture | 4.2.0 | +| [Llama3-8B DPO](models/nlp/llm/llama3_8b/llamafactory) | LLaMA-Factory | Meta-Llama-3-8B-Instruct | 4.3.0 | +| [Llama3-8B Full SFT](models/nlp/llm/llama3_8b/llamafactory) | LLaMA-Factory | Meta-Llama-3-8B-Instruct | 4.3.0 | +| [Llama3-8B KTO](models/nlp/llm/llama3_8b/llamafactory) | LLaMA-Factory | Meta-Llama-3-8B-Instruct | 4.3.0 | +| [Llama3-8B Pretrain](models/nlp/llm/llama3_8b/llamafactory) | LLaMA-Factory | Meta-Llama-3-8B-Instruct | 4.3.0 | +| [Llama3-8B Reward](models/nlp/llm/llama3_8b/llamafactory) | LLaMA-Factory | Meta-Llama-3-8B-Instruct | 4.3.0 | +| [Llama3-8B SFT](models/nlp/llm/llama3_8b/llamafactory) | LLaMA-Factory | Meta-Llama-3-8B-Instruct | 4.3.0 | +| [Mamba-2](models/nlp/llm/mamba-2/pytorch) | Megatron-LM | GPT Small-117M | 4.1.1 | +| [MiniCPM](models/nlp/llm/minicpm/pytorch) | DeepSpeed | MiniCPM-2B-sft-bf16 | 4.2.0 | +| [Mixtral 8x7B](models/nlp/llm/mixtral/pytorch) | Megatron-LM | GPT Small-117M | 4.1.1 | +| [Mixtral 8x7B](models/nlp/llm/mixtral/openrlhf) | OpenRLHF | Mixtral-8x7B-v0.1 | 4.3.0 | +| [Phi-3](models/nlp/llm/phi-3/pytorch) | Torchrun | Phi-3-mini-4k-instruct | 4.2.0 | +| [QWen-7B](models/nlp/llm/qwen-7b/pytorch) | Firefly | qwen-7b | 3.4.0 | +| [QWen1.5-7B](models/nlp/llm/qwen1.5-7b/pytorch) | Firefly | school_math | 4.1.1 | +| [QWen1.5-14B](models/nlp/llm/qwen1.5-14b/pytorch) | Firefly | school_math | 4.1.1 | +| [Qwen2-7B](models/nlp/llm/qwen2-7b/verl) | verl | qwen2-7b | dev-only | +| [Qwen2.5-7B SFT](models/nlp/llm/qwen2.5-7b/pytorch) | LLaMA-Factory | qwen2.5-7b | 4.1.1 | +| [Qwen2.5-1.5B verl](models/nlp/llm/qwen2.5-1.5b/verl) | verl | qwen2.5-1.5b | 4.2.0 | +| [Qwen2.5-7B verl](models/nlp/llm/qwen2.5-7b/verl) | verl | qwen2.5-7b | 4.2.0 | +| [Qwen2.5-3B](models/nlp/llm/qwen2.5-3b/pytorch) | ColossalAI | qwen2.5-3b | 4.3.0 | +| [Qwen2.5-VL-7B](models/nlp/llm/qwen2.5-vl-7b/verl) | verl | qwen2.5-vl-7b | dev-only | +| [Qwen2.5-VL-7B DPO](models/nlp/llm/qwen2.5-vl-7b/llamafactory) | LLaMA-Factory | Qwen2.5-VL-7B-Instruct | 4.3.0 | +| [Qwen2.5-VL-7B SFT](models/nlp/llm/qwen2.5-vl-7b/llamafactory) | LLaMA-Factory | Qwen2.5-VL-7B-Instruct | 4.3.0 | +| [Qwen3-8B](models/nlp/llm/qwen3-8b/verl) | verl | qwen3-8b | dev-only | +| [Yi-6B](models/nlp/llm/yi-6b/pytorch) | DeepSpeed | Yi-6B | 4.2.0 | +| [Yi-1.5-6B](models/nlp/llm/yi-1.5-6b/pytorch) | DeepSpeed | Yi-1.5-6B | 4.2.0 | +| [Yi-VL-6B](models/nlp/llm/yi-vl-6b/pytorch) | LLaMA-Factory | Yi-VL-6B-hf | 4.2.0 | ### Computer Vision @@ -410,6 +418,7 @@ individuals, healthcare, education, communication, energy, and more. |------------------------------------------------------------------|-------------------|--------------------|-------| | [BART](models/nlp/language_model/bart_fairseq/pytorch) | PyTorch (Fairseq) | RTE | 3.0.0 | | [BERT NER](models/nlp/ner/bert/pytorch) | PyTorch | CoNLL-2003 | 3.0.0 | +| [BERT Pretraining](models/nlp/language_model/bert_sample/pytorch) | PyTorch | bert_mini | 4.3.0 | | [BERT Pretraining](models/nlp/language_model/bert/pytorch) | PyTorch | MLCommon Wikipedia | 2.2.0 | | [BERT Pretraining](models/nlp/language_model/bert/paddlepaddle) | PaddlePaddle | MNLI | 2.3.0 | | [BERT Pretraining](models/nlp/language_model/bert/tensorflow) | TensorFlow | MNLI | 3.0.0 | @@ -449,6 +458,7 @@ individuals, healthcare, education, communication, energy, and more. | Model | Framework | Dataset | IXUCA SDK | |-----------------------------------------------------------------------------------------|-----------------|----------|-------| | [Conformer](models/audio/speech_recognition/conformer_wenet/pytorch) | PyTorch (WeNet) | AISHELL | 2.2.0 | +| [Conformer](models/audio/speech_recognition/conformer/pytorch) | PyTorch | LibriSpeech | 4.3.0 | | [Efficient Conformer v2](models/audio/speech_recognition/efficient_conformer_v2_wenet/pytorch) | PyTorch (WeNet) | AISHELL | 3.1.0 | | [PP-ASR-Conformer](models/audio/speech_recognition/conformer/paddlepaddle) | PaddlePaddle | AISHELL | 3.1.0 | | [RNN-T](models/audio/speech_recognition/rnnt/pytorch) | PyTorch | LJSpeech | 2.2.0 | diff --git a/models/nlp/llm/llama3_8b/llamafactory/README.md b/models/nlp/llm/llama3_8b/llamafactory/README.md new file mode 100644 index 000000000..a69817f0f --- /dev/null +++ b/models/nlp/llm/llama3_8b/llamafactory/README.md @@ -0,0 +1,64 @@ +# Llama3-8B (LLaMA-Factory) + +## Model Description + +Llama3-8B is an advanced auto-regressive language model developed by Meta, featuring 8 billion parameters. It utilizes +an optimized transformer architecture with Grouped-Query Attention (GQA) for improved inference efficiency. Trained on +sequences of 8,192 tokens and using a 128K token vocabulary, it excels in various natural language tasks. The model +incorporates supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human +preferences, ensuring both helpfulness and safety in its responses. Llama3-8B offers state-of-the-art performance in +language understanding and generation. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| BI-V150 | 4.3.0 | 25.12 | + +## Model Preparation + +### Prepare Resources + +```sh +git clone https://github.com/hiyouga/LLaMA-Factory.git +cd LLaMA-Factory/ +git checkout 8173a88a26a1cfe78738a826047d1ef923cd4ea3 +mkdir -p meta-llama +# download https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct into meta-llama folder +``` + +### Install Dependencies + +Contact the Iluvatar administrator to get the missing packages: +- transformers-4.45.2+corex.4.3.0-py3-none-any.whl +- accelerate-0.34.2+corex.4.3.0-py3-none-any.whl + +```sh +pip install llamafactory==0.9.2 +pip install peft==0.11.1 +``` + +## Model Training + +```sh +# please set val_size with 0.01 in yaml to disable eval +# dpo +llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml +# kto +llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml +# pretrain +llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml +# reward +llamafactory-cli train examples/train_lora/llama3_lora_reward.yaml +# sft +llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml +# full sft +pip install transformers==4.49.0 +llamafactory-cli train examples/train_full/llama3_full_sft.yaml +``` + +## Model Results + +## References + +- [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory.git) diff --git a/models/nlp/llm/qwen2.5-3b/pytorch/README.md b/models/nlp/llm/qwen2.5-3b/pytorch/README.md index f33f70047..0a44d747c 100644 --- a/models/nlp/llm/qwen2.5-3b/pytorch/README.md +++ b/models/nlp/llm/qwen2.5-3b/pytorch/README.md @@ -26,13 +26,14 @@ cd applications/ColossalChat/examples/ # get qwen2.5-3b from https://huggingface.co/Qwen/Qwen2.5-3B and put it in checkpoints/Qwen2.5-3B mkdir -p checkpoints # get qwedsacf/competition_math dataset and put it in datasets/competition_math/data/train-00000-of-00001-7320a6f3aba8ebd2.parquet -mkdir -p datasets/competition_math/data -mkdir -p datasets/competition_math/sft +mkdir -p dataset/competition_math/data +mkdir -p dataset/competition_math/sft ``` ### Install Dependencies ```sh +pip install wheel pip install transformers==4.39.3 pip install http://files.deepspark.org.cn:880/deepspark/add-ons/bitsandbytes-0.43.3+corex.4.3.0-cp310-cp310-linux_x86_64.whl diff --git a/models/nlp/llm/qwen2.5-vl-7b/llamafactory/README.md b/models/nlp/llm/qwen2.5-vl-7b/llamafactory/README.md new file mode 100644 index 000000000..cde45082a --- /dev/null +++ b/models/nlp/llm/qwen2.5-vl-7b/llamafactory/README.md @@ -0,0 +1,55 @@ +# Qwen2.5-VL-7B (LLaMA-Factory) + +## Model Description + +Qwen2.5-VL is not only proficient in recognizing common objects such as flowers, birds, fish, and insects, but it is highly capable of analyzing texts, charts, icons, graphics, and layouts within images. +Directly plays as a visual agent that can reason and dynamically direct tools, which is capable of computer use and phone use. Can comprehend videos of over 1 hour, and this time it has a new ability of cpaturing event by pinpointing the relevant video segments. Can accurately localize objects in an image by generating bounding boxes or points, and it can provide stable JSON outputs for coordinates and attributes. For data like scans of invoices, forms, tables, etc. Qwen2.5-VL supports structured outputs of their contents, benefiting usages in finance, commerce, etc. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| BI-V150 | 4.3.0 | 25.12 | + +## Model Preparation + +### Prepare Resources + +```sh +git clone https://github.com/hiyouga/LLaMA-Factory.git +cd LLaMA-Factory/ +git checkout 8173a88a26a1cfe78738a826047d1ef923cd4ea3 +mkdir -p Qwen +# download https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct into Qwen folder +mkdir -p llamafactory +# dowload https://huggingface.co/datasets/llamafactory/RLHF-V into llamafactory folder +``` + +### Install Dependencies + +Contact the Iluvatar administrator to get the missing packages: +- accelerate-0.34.2+corex.4.3.0-py3-none-any.whl + +```sh +pip install llamafactory==0.9.2 +pip install peft==0.11.1 +pip install transformers==4.49.0 +``` + +## Model Training + +```bash +# please set val_size with 0.01 in yaml to disable eval +# dpo +llamafactory-cli train examples/train_lora/qwen2_5vl_lora_dpo.yaml +# sft +pip install +llamafactory-cli train examples/train_lora/qwen2_5vl_lora_sft.yaml +``` + +## Model Results + +## References + +- [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory.git) + diff --git a/tests/model_info.json b/tests/model_info.json index 7795220a8..fa6bb7fe4 100644 --- a/tests/model_info.json +++ b/tests/model_info.json @@ -7583,6 +7583,102 @@ "github_branch": "", "github_path": "", "priority": "P4" + }, + { + "model_name": "llama3_8b", + "framework": "llamafactory", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "BI-V150", + "latest_sdk": "", + "latest_gpgpu": "", + "category": "nlp/llm", + "toolbox": "LLaMA-Factory", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "deepsparkhub/models/nlp/llm/llama3_8b/llamafactory/", + "readme_file": "", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "priority": "P4" + }, + { + "model_name": "qwen2.5-vl-7b", + "framework": "llamafactory", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "BI-V150", + "latest_sdk": "", + "latest_gpgpu": "", + "category": "nlp/llm", + "toolbox": "LLaMA-Factory", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "deepsparkhub/models/nlp/llm/qwen2.5-vl-7b/llamafactory/", + "readme_file": "", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "priority": "P4" + }, + { + "model_name": "bert_sample", + "framework": "pytorch", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "BI-V150", + "latest_sdk": "", + "latest_gpgpu": "", + "category": "nlp/language_model", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "deepsparkhub/models/nlp/language_model/bert_sample/pytorch/", + "readme_file": "", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "priority": "P0" + }, + { + "model_name": "conformer", + "framework": "pytorch", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "BI-V150", + "latest_sdk": "", + "latest_gpgpu": "", + "category": "audio/speech_recognition", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "deepsparkhub/audio/speech_recognition/conformer/pytorch/", + "readme_file": "", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "priority": "P0" } ] } \ No newline at end of file -- Gitee