From 215fb82ef8b92f74098959c198c51d2f59930e90 Mon Sep 17 00:00:00 2001 From: armstrongttwalker-alt Date: Sat, 23 May 2026 06:24:24 +0000 Subject: [PATCH] Auto-update ModelScope documentation [$(TZ='Asia/Shanghai' date +'%Y-%m-%d %H:%M')] --- docs/flagrelease_en/model_list.txt | 12 +- .../FlagRelease_HY-MT2-1.8B-ascend-FlagOS.md | 134 +++++++++++++++++ .../FlagRelease_HY-MT2-1.8B-nvidia-FlagOS.md | 123 ++++++++++++++++ .../FlagRelease_HY-MT2-1.8B-zhenwu-FlagOS.md | 119 +++++++++++++++ ...lagRelease_HY-MT2-30B-A3B-ascend-FlagOS.md | 122 ++++++++++++++++ ...lagRelease_HY-MT2-30B-A3B-nvidia-FlagOS.md | 124 ++++++++++++++++ ...lagRelease_HY-MT2-30B-A3B-zhenwu-FlagOS.md | 119 +++++++++++++++ .../FlagRelease_HY-MT2-7B-ascend-FlagOS.md | 136 ++++++++++++++++++ .../FlagRelease_HY-MT2-7B-nvidia-FlagOS.md | 123 ++++++++++++++++ .../FlagRelease_HY-MT2-7B-zhenwu-FlagOS.md | 119 +++++++++++++++ ...i-Linear-48B-A3B-Instruct-nvidia-FlagOS.md | 127 ++++++++++++++++ .../FlagRelease_MiniMax-M2.7-hygon-FlagOS.md | 38 ++++- ..._Qwen3.6-35B-A3B-nomtp-kunlunxin-FlagOS.md | 3 +- ..._TeleChat3-36B-Thinking-mthreads-FlagOS.md | 2 +- 14 files changed, 1293 insertions(+), 8 deletions(-) create mode 100644 docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-1.8B-ascend-FlagOS.md create mode 100644 docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-1.8B-nvidia-FlagOS.md create mode 100644 docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-1.8B-zhenwu-FlagOS.md create mode 100644 docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-30B-A3B-ascend-FlagOS.md create mode 100644 docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-30B-A3B-nvidia-FlagOS.md create mode 100644 docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-30B-A3B-zhenwu-FlagOS.md create mode 100644 docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-7B-ascend-FlagOS.md create mode 100644 docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-7B-nvidia-FlagOS.md create mode 100644 docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-7B-zhenwu-FlagOS.md create mode 100644 docs/flagrelease_en/model_readmes/FlagRelease_Kimi-Linear-48B-A3B-Instruct-nvidia-FlagOS.md diff --git a/docs/flagrelease_en/model_list.txt b/docs/flagrelease_en/model_list.txt index a57e7d5..9ca1bb6 100644 --- a/docs/flagrelease_en/model_list.txt +++ b/docs/flagrelease_en/model_list.txt @@ -27,9 +27,19 @@ FlagRelease/Emu3.5-FlagOS FlagRelease/GLM-4.5-FlagOS FlagRelease/GLM-5-FP8-FlagOS FlagRelease/GLM-5-ascend-FlagOS +FlagRelease/HY-MT2-1.8B-ascend-FlagOS +FlagRelease/HY-MT2-1.8B-nvidia-FlagOS +FlagRelease/HY-MT2-1.8B-zhenwu-FlagOS +FlagRelease/HY-MT2-30B-A3B-ascend-FlagOS +FlagRelease/HY-MT2-30B-A3B-nvidia-FlagOS +FlagRelease/HY-MT2-30B-A3B-zhenwu-FlagOS +FlagRelease/HY-MT2-7B-ascend-FlagOS +FlagRelease/HY-MT2-7B-nvidia-FlagOS +FlagRelease/HY-MT2-7B-zhenwu-FlagOS FlagRelease/Hunyuan-A13B-Instruct-FlagOS FlagRelease/Kimi-K2-Instruct-FlagOS FlagRelease/Kimi-K2-Thinking-FlagOS +FlagRelease/Kimi-Linear-48B-A3B-Instruct-nvidia-FlagOS FlagRelease/MiniCPM-V-4-FlagOS FlagRelease/MiniCPM-V-4-metax-FlagOS FlagRelease/MiniCPM-o-4.5-ascend-FlagOS @@ -105,8 +115,6 @@ FlagRelease/RoboBrain2.5-8B-FlagOS FlagRelease/RoboBrain2.5-8B-ascend-FlagOS FlagRelease/Seed-OSS-36B-Instruct-FlagOS FlagRelease/TeleChat3-36B-Thinking-mthreads-FlagOS -FlagRelease/gemma-3-1b-it-FlagOS -FlagRelease/gemma-3-1b-it-plugin-FlagOS FlagRelease/gpt-oss-120b-FlagOS FlagRelease/grok-2-FlagOS FlagRelease/phi-4-FlagOS diff --git a/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-1.8B-ascend-FlagOS.md b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-1.8B-ascend-FlagOS.md new file mode 100644 index 0000000..7478009 --- /dev/null +++ b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-1.8B-ascend-FlagOS.md @@ -0,0 +1,134 @@ +--- +base_model: +- "" +--- +# Introduction +Hy-MT2 is a multilingual translation model series open-sourced by Tencent Hunyuan. It includes three sizes — Hy-MT2-1.8B, Hy-MT2-7B, and Hy-MT2-30B-A3B — all supporting translation across 33 languages and 5 Chinese ethnic minority / dialect translation pairs. The 30B-A3B uses a MoE architecture (30B total parameters / 3B activated), while the 1.8B and 7B are dense models. Compared to the previous generation Hy-MT1.5, MT2 brings improvements in domain-specific translation, instruction following, and on-device deployment: + +The 7B and 30B-A3B achieve 96.9% and 98.1% of Gemini 2.5 Pro's performance respectively on the FLORES-200 general translation benchmark, surpassing open-source models such as DeepSeek-V4-Pro and Kimi K2.6; the 1.8B outperforms leading commercial translation APIs overall. +The 30B-A3B achieves a GEMBA score of 99.0% of Gemini 2.5 Pro's on the DomainMTBench benchmark across vertical domains including finance, politics, and education. +Supports translation instructions such as glossary/terminology control, style transformation, and structured output (HTML/JSON), with instruction-following capability exceeding open-source models of the same size. +The 1.8B offers a 1.25-bit quantized version based on the Sherry framework, requiring only ~440 MB of storage, enabling local inference on mobile chips from Apple, Qualcomm, MediaTek, and others. + +### Integrated Deployment +- Out-of-the-box inference scripts with pre-configured hardware and software parameters +- Released **FlagOS-Ascend** container image supporting deployment within minutes +### Consistency Validation +- Rigorously evaluated through benchmark testing: Performance and results from the FlagOS software stack are compared against native stacks on multiple public. + + +# Evaluation Results +## Benchmark Result +| Metrics(chrf) | Hy-MT2-1.8B-Nvidia-Origin | Hy-MT2-1.8B-Ascend-FlagOS | +|--------------|----------------------------------|----------------------------------| +| flores_ca | 45.32 | 45.33 | +| wmt16 | 57.2 | 60.2 | + +# User Guide +Environment Setup + +| Item | Version | +|------------------|----------------------| +| Docker Version | 29.4.3 | +| Operating System | Ubuntu 24.04.4 LTS (Noble Numbat) | + +## Operation Steps + +### Download FlagOS Image +```bash +docker pull harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-1.8b-ascend-tree_none-gems_5.0.2-vllm_0.18.0_empty-plugin_0.1.1_vllm0.13.0.g6c344a5e1.d20260514-cx_0.10.0-python_3.11.14-torch_npu_2.9.0.post1_gitee7ba04-pcp_cann8.5.1-gpu_ascend001-arc_arm64-driver_25.2.3:202605201117 +``` + +### Download Open-source Model Weights +```bash +pip install modelscope +modelscope download --model FlagRelease/Hy-MT2-1.8B-ascend-FlagOS --local_dir /data/HY-MT2-1.8B +``` + +### Start the Container +```bash +docker run -d --name flagos \ + --device /dev/davinci5 \ + --device /dev/davinci_manager \ + --device /dev/devmm_svm \ + --device /dev/hisi_hdc \ + -v /usr/local/dcmi:/usr/local/dcmi \ + -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ + -v /usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64 \ + -v /data:/data \ + -p 8000:8000 \ + harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-1.8b-ascend-tree_none-gems_5.0.2-vllm_0.18.0_empty-plugin_0.1.1_vllm0.13.0.g6c344a5e1.d20260514-cx_0.10.0-python_3.11.14-torch_npu_2.9.0.post1_gitee7ba04-pcp_cann8.5.1-gpu_ascend001-arc_arm64-driver_25.2.3:202605201117 + +docker exec -it flagos /bin/bash +``` +### Start the Server +```bash +export TORCH_DEVICE_BACKEND_AUTOLOAD=0 +export VLLM_FL_PREFER_ENABLED=False +export ASCEND_RT_VISIBLE_DEVICES=0 +vllm serve /data/HY-MT2-1.8B --dtype bfloat16 --enforce-eager --port 8004 --served-model-name hy1.8b-ascend-flagos + +``` + +## Service Invocation +### Invocation Script +```bash +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "hy1.8b-ascend-flagos", + "messages": [{"role": "user", "content": "你好"}] + }' +``` + + +### AnythingLLM Integration Guide + +#### 1. Download & Install + +- Visit the official site: https://anythingllm.com/ +- Choose the appropriate version for your OS (Windows/macOS/Linux) +- Follow the installation wizard to complete the setup + +#### 2. Configuration + +- Launch AnythingLLM +- Open settings (bottom left, fourth tab) +- Configure core LLM parameters +- Click "Save Settings" to apply changes + +#### 3. Model Interaction + +- After model loading is complete: +- Click **"New Conversation"** +- Enter your question (e.g., "Explain the basics of quantum computing") +- Click the send button to get a response +# Technical Overview +**FlagOS** is a fully open-source system software stack designed to unify the "model–system–chip" layers and foster an open, collaborative ecosystem. It enables a "develop once, run anywhere" workflow across diverse AI accelerators, unlocking hardware performance, eliminating fragmentation among vendor-specific software stacks, and substantially lowering the cost of porting and maintaining AI workloads. With core technologies such as the **FlagScale**, together with vllm-plugin-fl, distributed training/inference framework, **FlagGems** universal operator library, **FlagCX** communication library, and **FlagTree** unified compiler, the **FlagRelease** platform leverages the **FlagOS** stack to automatically produce and release various combinations of \. This enables efficient and automated model migration across diverse chips, opening a new chapter for large model deployment and application. +## FlagGems +FlagGems is a high-performance, generic operator library implemented in [Triton](https://github.com/openai/triton) language. It is built on a collection of backend-neutral kernels that aims to accelerate LLM (Large-Language Models) training and inference across diverse hardware platforms. +## FlagTree +FlagTree is an open source, unified compiler for multiple AI chips project dedicated to developing a diverse ecosystem of AI chip compilers and related tooling platforms, thereby fostering and strengthening the upstream and downstream Triton ecosystem. Currently in its initial phase, the project aims to maintain compatibility with existing adaptation solutions while unifying the codebase to rapidly implement single-repository multi-backend support. For upstream model users, it provides unified compilation capabilities across multiple backends; for downstream chip manufacturers, it offers examples of Triton ecosystem integration. +## FlagScale and vllm-plugin-fl +Flagscale is a comprehensive toolkit designed to support the entire lifecycle of large models. It builds on the strengths of several prominent open-source projects, including [Megatron-LM](https://github.com/NVIDIA/Megatron-LM) and [vLLM](https://github.com/vllm-project/vllm), to provide a robust, end-to-end solution for managing and scaling large models. +vllm-plugin-fl is a vLLM plugin built on the FlagOS unified multi-chip backend, to help flagscale support multi-chip on vllm framework. +## **FlagCX** +FlagCX is a scalable and adaptive cross-chip communication library. It serves as a platform where developers, researchers, and AI engineers can collaborate on various projects, contribute to the development of cutting-edge AI solutions, and share their work with the global community. + +## **FlagEval Evaluation Framework** + FlagEval is a comprehensive evaluation system and open platform for large models launched in 2023. It aims to establish scientific, fair, and open benchmarks, methodologies, and tools to help researchers assess model and training algorithm performance. It features: + - **Multi-dimensional Evaluation**: Supports 800+ model evaluations across NLP, CV, Audio, and Multimodal fields, covering 20+ downstream tasks including language understanding and image-text generation. + - **Industry-Grade Use Cases**: Has completed horizontal evaluations of mainstream large models, providing authoritative benchmarks for chip-model performance validation. + +# Contributing + +We warmly welcome global developers to join us: + +1. Submit Issues to report problems +2. Create Pull Requests to contribute code +3. Improve technical documentation +4. Expand hardware adaptation support +# License +The model weights are derived from Tencent-Hunyuan/HY-MT2-1.8B and are open‑sourced under the Apache License 2.0: https://www.apache.org/licenses/LICENSE-2.0.txt + + diff --git a/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-1.8B-nvidia-FlagOS.md b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-1.8B-nvidia-FlagOS.md new file mode 100644 index 0000000..74d0e55 --- /dev/null +++ b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-1.8B-nvidia-FlagOS.md @@ -0,0 +1,123 @@ +--- +base_model: +- "" +--- +# Introduction +Hy-MT2 is a multilingual translation model series open-sourced by Tencent Hunyuan. It includes three sizes — Hy-MT2-1.8B, Hy-MT2-7B, and Hy-MT2-30B-A3B — all supporting translation across 33 languages and 5 Chinese ethnic minority / dialect translation pairs. The 30B-A3B uses a MoE architecture (30B total parameters / 3B activated), while the 1.8B and 7B are dense models. Compared to the previous generation Hy-MT1.5, MT2 brings improvements in domain-specific translation, instruction following, and on-device deployment: + +The 7B and 30B-A3B achieve 96.9% and 98.1% of Gemini 2.5 Pro's performance respectively on the FLORES-200 general translation benchmark, surpassing open-source models such as DeepSeek-V4-Pro and Kimi K2.6; the 1.8B outperforms leading commercial translation APIs overall. +The 30B-A3B achieves a GEMBA score of 99.0% of Gemini 2.5 Pro's on the DomainMTBench benchmark across vertical domains including finance, politics, and education. +Supports translation instructions such as glossary/terminology control, style transformation, and structured output (HTML/JSON), with instruction-following capability exceeding open-source models of the same size. +The 1.8B offers a 1.25-bit quantized version based on the Sherry framework, requiring only ~440 MB of storage, enabling local inference on mobile chips from Apple, Qualcomm, MediaTek, and others. + +### Integrated Deployment +- Out-of-the-box inference scripts with pre-configured hardware and software parameters +- Released **FlagOS-Nvidia** container image supporting deployment within minutes +### Consistency Validation +- Rigorously evaluated through benchmark testing: Performance and results from the FlagOS software stack are compared against native stacks on multiple public. + + +# Evaluation Results +## Benchmark Result +| Metrics(chrf) | HY-MT2-1.8B-Nvidia-Origin | HY-MT2-1.8B-Nvidia-FlagOS | +|--------------|---------------------------|---------------------------| +| flores_ca | 45.32 | 45.32 | +| wmt16 | 57.2 | 57.22 | + +# User Guide +Environment Setup + +| Item | Version | +|------------------|----------------------| +| Docker Version | 29.4.3 | +| Operating System | Ubuntu 24.04.4 LTS (Noble Numbat) | + +## Operation Steps + +### Download FlagOS Image +```bash +docker pull harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-1.8b-nvidia-tree_0.5.0_3.5-gems_5.0.1rc0-vllm_0.20.2-plugin_0.0.0-cx_none-python_3.12.3-torch_2.11.0-pcp_cuda13.2-gpu_nvidia003-arc_amd64-driver_570.158.01:202605191822 +``` + +### Download Open-source Model Weights +```bash +pip install modelscope +modelscope download --model FlagRelease/HY-MT2-1.8B-nvidia-FlagOS --local_dir /data/HY-MT2-1.8B +``` + +### Start the Container +```bash +docker run --init --detach --net=host --uts=host --ipc=host --security-opt=seccomp=unconfined --privileged=true --ulimit stack=67108864 --ulimit memlock=-1 --ulimit nofile=1048576:1048576 --shm-size=32G -v /data:/data --gpus all --name flagos harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-1.8b-nvidia-tree_0.5.0_3.5-gems_5.0.1rc0-vllm_0.20.2-plugin_0.0.0-cx_none-python_3.12.3-torch_2.11.0-pcp_cuda13.2-gpu_nvidia003-arc_amd64-driver_570.158.01:202605191822 sleep infinity +docker exec -it flagos /bin/bash +``` +### Start the Server +```bash +vllm serve /data/HY-MT2-1.8B \ +--host 0.0.0.0 --port 8000 \ +--tensor-parallel-size 1 \ +--served-model-name flagOS \ +--enforce-eager +``` + +## Service Invocation +### Invocation Script +```bash +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "flagOS", + "messages": [{"role": "user", "content": "你好"}] + }' +``` + + +### AnythingLLM Integration Guide + +#### 1. Download & Install + +- Visit the official site: https://anythingllm.com/ +- Choose the appropriate version for your OS (Windows/macOS/Linux) +- Follow the installation wizard to complete the setup + +#### 2. Configuration + +- Launch AnythingLLM +- Open settings (bottom left, fourth tab) +- Configure core LLM parameters +- Click "Save Settings" to apply changes + +#### 3. Model Interaction + +- After model loading is complete: +- Click **"New Conversation"** +- Enter your question (e.g., "Explain the basics of quantum computing") +- Click the send button to get a response +# Technical Overview +**FlagOS** is a fully open-source system software stack designed to unify the "model–system–chip" layers and foster an open, collaborative ecosystem. It enables a "develop once, run anywhere" workflow across diverse AI accelerators, unlocking hardware performance, eliminating fragmentation among vendor-specific software stacks, and substantially lowering the cost of porting and maintaining AI workloads. With core technologies such as the **FlagScale**, together with vllm-plugin-fl, distributed training/inference framework, **FlagGems** universal operator library, **FlagCX** communication library, and **FlagTree** unified compiler, the **FlagRelease** platform leverages the **FlagOS** stack to automatically produce and release various combinations of \. This enables efficient and automated model migration across diverse chips, opening a new chapter for large model deployment and application. +## FlagGems +FlagGems is a high-performance, generic operator library implemented in [Triton](https://github.com/openai/triton) language. It is built on a collection of backend-neutral kernels that aims to accelerate LLM (Large-Language Models) training and inference across diverse hardware platforms. +## FlagTree +FlagTree is an open source, unified compiler for multiple AI chips project dedicated to developing a diverse ecosystem of AI chip compilers and related tooling platforms, thereby fostering and strengthening the upstream and downstream Triton ecosystem. Currently in its initial phase, the project aims to maintain compatibility with existing adaptation solutions while unifying the codebase to rapidly implement single-repository multi-backend support. For upstream model users, it provides unified compilation capabilities across multiple backends; for downstream chip manufacturers, it offers examples of Triton ecosystem integration. +## FlagScale and vllm-plugin-fl +Flagscale is a comprehensive toolkit designed to support the entire lifecycle of large models. It builds on the strengths of several prominent open-source projects, including [Megatron-LM](https://github.com/NVIDIA/Megatron-LM) and [vLLM](https://github.com/vllm-project/vllm), to provide a robust, end-to-end solution for managing and scaling large models. +vllm-plugin-fl is a vLLM plugin built on the FlagOS unified multi-chip backend, to help flagscale support multi-chip on vllm framework. +## **FlagCX** +FlagCX is a scalable and adaptive cross-chip communication library. It serves as a platform where developers, researchers, and AI engineers can collaborate on various projects, contribute to the development of cutting-edge AI solutions, and share their work with the global community. + +## **FlagEval Evaluation Framework** + FlagEval is a comprehensive evaluation system and open platform for large models launched in 2023. It aims to establish scientific, fair, and open benchmarks, methodologies, and tools to help researchers assess model and training algorithm performance. It features: + - **Multi-dimensional Evaluation**: Supports 800+ model evaluations across NLP, CV, Audio, and Multimodal fields, covering 20+ downstream tasks including language understanding and image-text generation. + - **Industry-Grade Use Cases**: Has completed horizontal evaluations of mainstream large models, providing authoritative benchmarks for chip-model performance validation. + +# Contributing + +We warmly welcome global developers to join us: + +1. Submit Issues to report problems +2. Create Pull Requests to contribute code +3. Improve technical documentation +4. Expand hardware adaptation support +# License +The model weights are derived from Tencent-Hunyuan/HY-MT2-1.8B and are open‑sourced under the Apache License 2.0: https://www.apache.org/licenses/LICENSE-2.0.txt + + diff --git a/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-1.8B-zhenwu-FlagOS.md b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-1.8B-zhenwu-FlagOS.md new file mode 100644 index 0000000..6dd9130 --- /dev/null +++ b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-1.8B-zhenwu-FlagOS.md @@ -0,0 +1,119 @@ +--- +base_model: +- "" +--- +# Introduction +Hy-MT2 is a multilingual translation model series open-sourced by Tencent Hunyuan. It includes three sizes — Hy-MT2-1.8B, Hy-MT2-7B, and Hy-MT2-30B-A3B — all supporting translation across 33 languages and 5 Chinese ethnic minority / dialect translation pairs. The 30B-A3B uses a MoE architecture (30B total parameters / 3B activated), while the 1.8B and 7B are dense models. Compared to the previous generation Hy-MT1.5, MT2 brings improvements in domain-specific translation, instruction following, and on-device deployment: + +The 7B and 30B-A3B achieve 96.9% and 98.1% of Gemini 2.5 Pro's performance respectively on the FLORES-200 general translation benchmark, surpassing open-source models such as DeepSeek-V4-Pro and Kimi K2.6; the 1.8B outperforms leading commercial translation APIs overall. +The 30B-A3B achieves a GEMBA score of 99.0% of Gemini 2.5 Pro's on the DomainMTBench benchmark across vertical domains including finance, politics, and education. +Supports translation instructions such as glossary/terminology control, style transformation, and structured output (HTML/JSON), with instruction-following capability exceeding open-source models of the same size. +The 1.8B offers a 1.25-bit quantized version based on the Sherry framework, requiring only ~440 MB of storage, enabling local inference on mobile chips from Apple, Qualcomm, MediaTek, and others. + +### Integrated Deployment +- Out-of-the-box inference scripts with pre-configured hardware and software parameters +- Released **FlagOS-Zhenwu** container image supporting deployment within minutes +### Consistency Validation +- Rigorously evaluated through benchmark testing: Performance and results from the FlagOS software stack are compared against native stacks on multiple public. + + +# Evaluation Results +## Benchmark Result +| Metrics(chrf) | HY-MT2-1.8B-Nvidia-Origin | HY-MT2-1.8B-Zhenwu-FlagOS | +|--------------|--------------------------------|--------------------------------------| +| flores_ca | 45.32 | 45.3192 | +| wmt16 | 57.2 | 57.1791 | + +# User Guide +Environment Setup + +| Item | Version | +|------------------|----------------------| +| Docker Version | Docker version 28.1.0, build 4d8c241 | +| Operating System | Ubuntu 24.04.2 LTS | + +## Operation Steps +This model requires 1 machine with 16 GPUs. Please follow this link to apply for 1 machine resource. link:https://help.aliyun.com/zh/pai/user-guide + +### Download FlagOS Image +```bash +docker pull harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-1.8b-zhenwu-tree_none-gems_5.0.1rc0-vllm_0.13.1.dev0_g72506c983.d20260218-plugin_0.1.0_vllm0.13.0-cx_none-python_3.12.3-torch_2.9.0-pcp_hggc13.0-gpu_pp001-arc_amd64-driver_1.3.2-d7f5a2:202605191318 +``` + +### Download Open-source Model Weights +```bash +pip install modelscope +modelscope download --model FlagRelease/HY-MT2-1.8B-zhenwu-FlagOS --local_dir /data/HY-MT2-1.8B +``` +### Start the Server +```bash +vllm serve /data/HY-MT2-1.8B \ +--trust-remote-code \ +--dtype bfloat16 \ +--enforce-eager \ +--port 8000 \ +--host 0.0.0.0 \ +--served-model-name hy_mt2 \ +--gpu-memory-utilization 0.85 +``` + +## Service Invocation +### Invocation Script +```bash +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "hy_mt2", + "messages": [{"role": "user", "content": "你好"}] + }' +``` + + +### AnythingLLM Integration Guide + +#### 1. Download & Install + +- Visit the official site: https://anythingllm.com/ +- Choose the appropriate version for your OS (Windows/macOS/Linux) +- Follow the installation wizard to complete the setup + +#### 2. Configuration + +- Launch AnythingLLM +- Open settings (bottom left, fourth tab) +- Configure core LLM parameters +- Click "Save Settings" to apply changes + +#### 3. Model Interaction + +- After model loading is complete: +- Click **"New Conversation"** +- Enter your question (e.g., “Explain the basics of quantum computing”) +- Click the send button to get a response +# Technical Overview +**FlagOS** is a fully open-source system software stack designed to unify the "model–system–chip" layers and foster an open, collaborative ecosystem. It enables a “develop once, run anywhere” workflow across diverse AI accelerators, unlocking hardware performance, eliminating fragmentation among vendor-specific software stacks, and substantially lowering the cost of porting and maintaining AI workloads. With core technologies such as the **FlagScale**, together with vllm-plugin-fl, distributed training/inference framework, **FlagGems** universal operator library, **FlagCX** communication library, and **FlagTree** unified compiler, the **FlagRelease** platform leverages the **FlagOS** stack to automatically produce and release various combinations of \. This enables efficient and automated model migration across diverse chips, opening a new chapter for large model deployment and application. +## FlagGems +FlagGems is a high-performance, generic operator libraryimplemented in [Triton](https://github.com/openai/triton) language. It is built on a collection of backend-neutralkernels that aims to accelerate LLM (Large-Language Models) training and inference across diverse hardware platforms. +## FlagTree +FlagTree is an open source, unified compiler for multipleAI chips project dedicated to developing a diverse ecosystem of AI chip compilers and related tooling platforms, thereby fostering and strengthening the upstream and downstream Triton ecosystem. Currently in its initial phase, the project aims to maintain compatibility with existing adaptation solutions while unifying the codebase to rapidly implement single-repository multi-backend support. Forupstream model users, it provides unified compilation capabilities across multiple backends; for downstream chip manufacturers, it offers examples of Triton ecosystem integration. +## FlagScale and vllm-plugin-fl +Flagscale is a comprehensive toolkit designed to supportthe entire lifecycle of large models. It builds on the strengths of several prominent open-source projects, including [Megatron-LM](https://github.com/NVIDIA/Megatron-LM) and [vLLM](https://github.com/vllm-project/vllm), to provide a robust, end-to-end solution for managing and scaling large models. +vllm-plugin-fl is a vLLM plugin built on the FlagOS unified multi-chip backend, to help flagscale support multi-chip on vllm framework. +## **FlagCX** +FlagCX is a scalable and adaptive cross-chip communication library. It serves as a platform where developers, researchers, and AI engineers can collaborate on various projects, contribute to the development of cutting-edge AI solutions, and share their work with the global community. + +## **FlagEval Evaluation Framework** + FlagEval is a comprehensive evaluation system and open platform for large models launched in 2023. It aims to establish scientific, fair, and open benchmarks, methodologies, and tools to help researchers assess model and training algorithm performance. It features: + - **Multi-dimensional Evaluation**: Supports 800+ modelevaluations across NLP, CV, Audio, and Multimodal fields,covering 20+ downstream tasks including language understanding and image-text generation. + - **Industry-Grade Use Cases**: Has completed horizonta1 evaluations of mainstream large models, providing authoritative benchmarks for chip-model performance validation. + +# Contributing + +We warmly welcome global developers to join us: + +1. Submit Issues to report problems +2. Create Pull Requests to contribute code +3. Improve technical documentation +4. Expand hardware adaptation support +# License +The model weights are derived from Tencent-Hunyuan/HY-MT2-1.8B and are open‑sourced under the Apache License 2.0: https://www.apache.org/licenses/LICENSE-2.0.txt diff --git a/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-30B-A3B-ascend-FlagOS.md b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-30B-A3B-ascend-FlagOS.md new file mode 100644 index 0000000..a69e9c9 --- /dev/null +++ b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-30B-A3B-ascend-FlagOS.md @@ -0,0 +1,122 @@ +--- +base_model: +- "" +--- + +# Introduction +Hy-MT2 is a multilingual translation model series open-sourced by Tencent Hunyuan. It includes three sizes — Hy-MT2-1.8B, Hy-MT2-7B, and Hy-MT2-30B-A3B — all supporting translation across 33 languages and 5 Chinese ethnic minority / dialect translation pairs. The 30B-A3B uses a MoE architecture (30B total parameters / 3B activated), while the 1.8B and 7B are dense models. Compared to the previous generation Hy-MT1.5, MT2 brings improvements in domain-specific translation, instruction following, and on-device deployment: + +The 7B and 30B-A3B achieve 96.9% and 98.1% of Gemini 2.5 Pro's performance respectively on the FLORES-200 general translation benchmark, surpassing open-source models such as DeepSeek-V4-Pro and Kimi K2.6; the 1.8B outperforms leading commercial translation APIs overall. +The 30B-A3B achieves a GEMBA score of 99.0% of Gemini 2.5 Pro's on the DomainMTBench benchmark across vertical domains including finance, politics, and education. +Supports translation instructions such as glossary/terminology control, style transformation, and structured output (HTML/JSON), with instruction-following capability exceeding open-source models of the same size. +The 1.8B offers a 1.25-bit quantized version based on the Sherry framework, requiring only ~440 MB of storage, enabling local inference on mobile chips from Apple, Qualcomm, MediaTek, and others. + +### Integrated Deployment +- Out-of-the-box inference scripts with pre-configured hardware and software parameters +- Released **FlagOS-Ascend** container image supporting deployment within minutes +### Consistency Validation +- Rigorously evaluated through benchmark testing: Performance and results from the FlagOS software stack are compared against native stacks on multiple public. + + +# Evaluation Results +## Benchmark Result +| Metrics(chrf) | Hy-MT2-30B-A3B-Nvidia-Origin | Hy-MT2-30B-A3B-Ascend-FlagOS | +|--------------|------------------------------|------------------------------| +| flores_ca | 57.79 | 54.38 | +| wmt16 | 60.92 | 60.85 | + +# User Guide +Environment Setup + +| Item | Version | +|------------------|----------------------| +| Docker Version | 29.4.3 | +| Operating System | Ubuntu 24.04.4 LTS (Noble Numbat) | + +## Operation Steps + +### Download FlagOS Image +```bash +docker pull harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-30b-a3b-ascend-tree_none-gems_5.0.2-vllm_0.18.0_empty-plugin_0.1.1_vllm0.13.0.g6c344a5e1.d20260514-cx_0.10.0-python_3.11.14-torch_npu_2.9.0.post1_gitee7ba04-pcp_cann8.5.1-driver_25.2.3:202605201419 +``` + +### Download Open-source Model Weights +```bash +pip install modelscope +modelscope download --model FlagRelease/Hy-MT2-30B-A3B-ascend-FlagOS --local_dir /data/HY-MT2-30B-A3B +``` + +### Start the Container +```bash +docker run -itd --name flagos -w /workspace --privileged --ipc=host --net=host --shm-size=100g -v /usr/local/Ascend/driver:/usr/local/Ascend/driver -v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware -v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi -v /usr/local/dcmi:/usr/local/dcmi -v /usr/local/sbin:/usr/local/sbin -v /usr/bin/hostname:/usr/bin/hostname -v /etc/ascend_install.info:/etc/ascend_install.info -v /var/log/npu/:/usr/slog -v /etc/hccn.conf:/etc/hccn.conf -v /etc/localtime:/etc/localtime -v /etc/hosts:/etc/hosts -v /data:/data -e VLLM_USE_MODELSCOPE=true harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-30b-a3b-ascend-tree_none-gems_5.0.2-vllm_0.18.0_empty-plugin_0.1.1_vllm0.13.0.g6c344a5e1.d20260514-cx_0.10.0-python_3.11.14-torch_npu_2.9.0.post1_gitee7ba04-pcp_cann8.5.1-driver_25.2.3:202605201419 bash +docker exec -it flagos /bin/bash +``` +### Start the Server +```bash +export ASCEND_RT_VISIBLE_DEVICES=1,2 +vllm serve /data/HY-MT2-30B-A3B --served-model-name hy30a3-ascend-flagos --tensor-parallel-size 2 --enforce-eager --port 8000 +``` + +## Service Invocation +### Invocation Script +```bash +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "hy30a3-ascend-flagos", + "messages": [{"role": "user", "content": "你好"}] + }' +``` + + +### AnythingLLM Integration Guide + +#### 1. Download & Install + +- Visit the official site: https://anythingllm.com/ +- Choose the appropriate version for your OS (Windows/macOS/Linux) +- Follow the installation wizard to complete the setup + +#### 2. Configuration + +- Launch AnythingLLM +- Open settings (bottom left, fourth tab) +- Configure core LLM parameters +- Click "Save Settings" to apply changes + +#### 3. Model Interaction + +- After model loading is complete: +- Click **"New Conversation"** +- Enter your question (e.g., "Explain the basics of quantum computing") +- Click the send button to get a response +# Technical Overview +**FlagOS** is a fully open-source system software stack designed to unify the "model–system–chip" layers and foster an open, collaborative ecosystem. It enables a "develop once, run anywhere" workflow across diverse AI accelerators, unlocking hardware performance, eliminating fragmentation among vendor-specific software stacks, and substantially lowering the cost of porting and maintaining AI workloads. With core technologies such as the **FlagScale**, together with vllm-plugin-fl, distributed training/inference framework, **FlagGems** universal operator library, **FlagCX** communication library, and **FlagTree** unified compiler, the **FlagRelease** platform leverages the **FlagOS** stack to automatically produce and release various combinations of \. This enables efficient and automated model migration across diverse chips, opening a new chapter for large model deployment and application. +## FlagGems +FlagGems is a high-performance, generic operator library implemented in [Triton](https://github.com/openai/triton) language. It is built on a collection of backend-neutral kernels that aims to accelerate LLM (Large-Language Models) training and inference across diverse hardware platforms. +## FlagTree +FlagTree is an open source, unified compiler for multiple AI chips project dedicated to developing a diverse ecosystem of AI chip compilers and related tooling platforms, thereby fostering and strengthening the upstream and downstream Triton ecosystem. Currently in its initial phase, the project aims to maintain compatibility with existing adaptation solutions while unifying the codebase to rapidly implement single-repository multi-backend support. For upstream model users, it provides unified compilation capabilities across multiple backends; for downstream chip manufacturers, it offers examples of Triton ecosystem integration. +## FlagScale and vllm-plugin-fl +Flagscale is a comprehensive toolkit designed to support the entire lifecycle of large models. It builds on the strengths of several prominent open-source projects, including [Megatron-LM](https://github.com/NVIDIA/Megatron-LM) and [vLLM](https://github.com/vllm-project/vllm), to provide a robust, end-to-end solution for managing and scaling large models. +vllm-plugin-fl is a vLLM plugin built on the FlagOS unified multi-chip backend, to help flagscale support multi-chip on vllm framework. +## **FlagCX** +FlagCX is a scalable and adaptive cross-chip communication library. It serves as a platform where developers, researchers, and AI engineers can collaborate on various projects, contribute to the development of cutting-edge AI solutions, and share their work with the global community. + +## **FlagEval Evaluation Framework** + FlagEval is a comprehensive evaluation system and open platform for large models launched in 2023. It aims to establish scientific, fair, and open benchmarks, methodologies, and tools to help researchers assess model and training algorithm performance. It features: + - **Multi-dimensional Evaluation**: Supports 800+ model evaluations across NLP, CV, Audio, and Multimodal fields, covering 20+ downstream tasks including language understanding and image-text generation. + - **Industry-Grade Use Cases**: Has completed horizontal evaluations of mainstream large models, providing authoritative benchmarks for chip-model performance validation. + +# Contributing + +We warmly welcome global developers to join us: + +1. Submit Issues to report problems +2. Create Pull Requests to contribute code +3. Improve technical documentation +4. Expand hardware adaptation support +# License +The model weights are derived from Tencent-Hunyuan/HY-MT2-30B-A3B and are open‑sourced under the Apache License 2.0: https://www.apache.org/licenses/LICENSE-2.0.txt + + + diff --git a/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-30B-A3B-nvidia-FlagOS.md b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-30B-A3B-nvidia-FlagOS.md new file mode 100644 index 0000000..2a557fd --- /dev/null +++ b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-30B-A3B-nvidia-FlagOS.md @@ -0,0 +1,124 @@ +--- +base_model: +- "" +tasks: [] +--- +# Introduction +Hy-MT2 is a multilingual translation model series open-sourced by Tencent Hunyuan. It includes three sizes — Hy-MT2-1.8B, Hy-MT2-7B, and Hy-MT2-30B-A3B — all supporting translation across 33 languages and 5 Chinese ethnic minority / dialect translation pairs. The 30B-A3B uses a MoE architecture (30B total parameters / 3B activated), while the 1.8B and 7B are dense models. Compared to the previous generation Hy-MT1.5, MT2 brings improvements in domain-specific translation, instruction following, and on-device deployment: + +The 7B and 30B-A3B achieve 96.9% and 98.1% of Gemini 2.5 Pro's performance respectively on the FLORES-200 general translation benchmark, surpassing open-source models such as DeepSeek-V4-Pro and Kimi K2.6; the 1.8B outperforms leading commercial translation APIs overall. +The 30B-A3B achieves a GEMBA score of 99.0% of Gemini 2.5 Pro's on the DomainMTBench benchmark across vertical domains including finance, politics, and education. +Supports translation instructions such as glossary/terminology control, style transformation, and structured output (HTML/JSON), with instruction-following capability exceeding open-source models of the same size. +The 1.8B offers a 1.25-bit quantized version based on the Sherry framework, requiring only ~440 MB of storage, enabling local inference on mobile chips from Apple, Qualcomm, MediaTek, and others. + +### Integrated Deployment +- Out-of-the-box inference scripts with pre-configured hardware and software parameters +- Released **FlagOS-Nvidia** container image supporting deployment within minutes +### Consistency Validation +- Rigorously evaluated through benchmark testing: Performance and results from the FlagOS software stack are compared against native stacks on multiple public. + + +# Evaluation Results +## Benchmark Result +| Metrics(chrf) | HY-MT2-30B-A3B-Nvidia-Origin | HY-MT2-30B-A3B-FlagOS | +|--------------|------------------------------|------------------------------| +| flores_ca | 57.79 | 57.8 | +| wmt16 | 60.92 | 60.89 | + +# User Guide +Environment Setup + +| Item | Version | +|------------------|----------------------| +| Docker Version | 29.4.3 | +| Operating System | Ubuntu 24.04.4 LTS (Noble Numbat) | + +## Operation Steps + +### Download FlagOS Image +```bash +docker pull harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-30b-a3b-nvidia-tree_0.5.0_3.5-gems_5.0.1rc0-vllm_0.20.2-plugin_0.0.0-cx_none-python_3.12.3-torch_2.11.0-pcp_cuda13.2-gpu_nvidia003-arc_amd64-driver_570.158.01:202605191822 +``` + +### Download Open-source Model Weights +```bash +pip install modelscope +modelscope download --model FlagRelease/HY-MT2-30B-A3B-nvidia-FlagOS --local_dir /data/HY-MT2-30B-A3B-FlagOS +``` + +### Start the Container +```bash +docker run --init --detach --net=host --uts=host --ipc=host --security-opt=seccomp=unconfined --privileged=true --ulimit stack=67108864 --ulimit memlock=-1 --ulimit nofile=1048576:1048576 --shm-size=32G -v /data:/data --gpus all --name flagos harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-30b-a3b-nvidia-tree_0.5.0_3.5-gems_5.0.1rc0-vllm_0.20.2-plugin_0.0.0-cx_none-python_3.12.3-torch_2.11.0-pcp_cuda13.2-gpu_nvidia003-arc_amd64-driver_570.158.01:202605191822 sleep infinity +docker exec -it flagos /bin/bash +``` +### Start the Server +```bash +vllm serve /data/HY-MT2-30B-A3B-FlagOS \ +--host 0.0.0.0 --port 8000 \ +--tensor-parallel-size 1 \ +--served-model-name flagOS \ +--enforce-eager + +``` + +## Service Invocation +### Invocation Script +```bash +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "flagOS", + "messages": [{"role": "user", "content": "你好"}] + }' +``` + + +### AnythingLLM Integration Guide + +#### 1. Download & Install + +- Visit the official site: https://anythingllm.com/ +- Choose the appropriate version for your OS (Windows/macOS/Linux) +- Follow the installation wizard to complete the setup + +#### 2. Configuration + +- Launch AnythingLLM +- Open settings (bottom left, fourth tab) +- Configure core LLM parameters +- Click "Save Settings" to apply changes + +#### 3. Model Interaction + +- After model loading is complete: +- Click **"New Conversation"** +- Enter your question (e.g., "Explain the basics of quantum computing") +- Click the send button to get a response +# Technical Overview +**FlagOS** is a fully open-source system software stack designed to unify the "model–system–chip" layers and foster an open, collaborative ecosystem. It enables a "develop once, run anywhere" workflow across diverse AI accelerators, unlocking hardware performance, eliminating fragmentation among vendor-specific software stacks, and substantially lowering the cost of porting and maintaining AI workloads. With core technologies such as the **FlagScale**, together with vllm-plugin-fl, distributed training/inference framework, **FlagGems** universal operator library, **FlagCX** communication library, and **FlagTree** unified compiler, the **FlagRelease** platform leverages the **FlagOS** stack to automatically produce and release various combinations of \. This enables efficient and automated model migration across diverse chips, opening a new chapter for large model deployment and application. +## FlagGems +FlagGems is a high-performance, generic operator library implemented in [Triton](https://github.com/openai/triton) language. It is built on a collection of backend-neutral kernels that aims to accelerate LLM (Large-Language Models) training and inference across diverse hardware platforms. +## FlagTree +FlagTree is an open source, unified compiler for multiple AI chips project dedicated to developing a diverse ecosystem of AI chip compilers and related tooling platforms, thereby fostering and strengthening the upstream and downstream Triton ecosystem. Currently in its initial phase, the project aims to maintain compatibility with existing adaptation solutions while unifying the codebase to rapidly implement single-repository multi-backend support. For upstream model users, it provides unified compilation capabilities across multiple backends; for downstream chip manufacturers, it offers examples of Triton ecosystem integration. +## FlagScale and vllm-plugin-fl +Flagscale is a comprehensive toolkit designed to support the entire lifecycle of large models. It builds on the strengths of several prominent open-source projects, including [Megatron-LM](https://github.com/NVIDIA/Megatron-LM) and [vLLM](https://github.com/vllm-project/vllm), to provide a robust, end-to-end solution for managing and scaling large models. +vllm-plugin-fl is a vLLM plugin built on the FlagOS unified multi-chip backend, to help flagscale support multi-chip on vllm framework. +## **FlagCX** +FlagCX is a scalable and adaptive cross-chip communication library. It serves as a platform where developers, researchers, and AI engineers can collaborate on various projects, contribute to the development of cutting-edge AI solutions, and share their work with the global community. + +## **FlagEval Evaluation Framework** + FlagEval is a comprehensive evaluation system and open platform for large models launched in 2023. It aims to establish scientific, fair, and open benchmarks, methodologies, and tools to help researchers assess model and training algorithm performance. It features: + - **Multi-dimensional Evaluation**: Supports 800+ model evaluations across NLP, CV, Audio, and Multimodal fields, covering 20+ downstream tasks including language understanding and image-text generation. + - **Industry-Grade Use Cases**: Has completed horizontal evaluations of mainstream large models, providing authoritative benchmarks for chip-model performance validation. + +# Contributing + +We warmly welcome global developers to join us: + +1. Submit Issues to report problems +2. Create Pull Requests to contribute code +3. Improve technical documentation +4. Expand hardware adaptation support +# License +The model weights are derived from Tencent-Hunyuan/HY-MT2-30B-A3B and are open‑sourced under the Apache License 2.0: https://www.apache.org/licenses/LICENSE-2.0.txt + diff --git a/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-30B-A3B-zhenwu-FlagOS.md b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-30B-A3B-zhenwu-FlagOS.md new file mode 100644 index 0000000..f5a24d7 --- /dev/null +++ b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-30B-A3B-zhenwu-FlagOS.md @@ -0,0 +1,119 @@ +--- +base_model: +- "" +--- +# Introduction +Hy-MT2 is a multilingual translation model series open-sourced by Tencent Hunyuan. It includes three sizes — Hy-MT2-1.8B, Hy-MT2-7B, and Hy-MT2-30B-A3B — all supporting translation across 33 languages and 5 Chinese ethnic minority / dialect translation pairs. The 30B-A3B uses a MoE architecture (30B total parameters / 3B activated), while the 1.8B and 7B are dense models. Compared to the previous generation Hy-MT1.5, MT2 brings improvements in domain-specific translation, instruction following, and on-device deployment: + +The 7B and 30B-A3B achieve 96.9% and 98.1% of Gemini 2.5 Pro's performance respectively on the FLORES-200 general translation benchmark, surpassing open-source models such as DeepSeek-V4-Pro and Kimi K2.6; the 1.8B outperforms leading commercial translation APIs overall. +The 30B-A3B achieves a GEMBA score of 99.0% of Gemini 2.5 Pro's on the DomainMTBench benchmark across vertical domains including finance, politics, and education. +Supports translation instructions such as glossary/terminology control, style transformation, and structured output (HTML/JSON), with instruction-following capability exceeding open-source models of the same size. +The 1.8B offers a 1.25-bit quantized version based on the Sherry framework, requiring only ~440 MB of storage, enabling local inference on mobile chips from Apple, Qualcomm, MediaTek, and others. + +### Integrated Deployment +- Out-of-the-box inference scripts with pre-configured hardware and software parameters +- Released **FlagOS-Zhenwu** container image supporting deployment within minutes +### Consistency Validation +- Rigorously evaluated through benchmark testing: Performance and results from the FlagOS software stack are compared against native stacks on multiple public. + + +# Evaluation Results +## Benchmark Result +| Metrics(chrf) | HY-MT2-30B-A3B-Nvidia-Origin | HY-MT2-30B-A3B-Zhenwu-FlagOS | +|--------------|------------------------------|------------------------------| +| flores_ca | 57.79 | 57.558 | +| wmt16 | 60.92 | 60.7098 | + +# User Guide +Environment Setup + +| Item | Version | +|------------------|----------------------| +| Docker Version | Docker version 28.1.0, build 4d8c241 | +| Operating System | Ubuntu 24.04.2 LTS | + +## Operation Steps +This model requires 1 machine with 16 GPUs. Please follow this link to apply for 1 machine resource. link:https://help.aliyun.com/zh/pai/user-guide + +### Download FlagOS Image +```bash +docker pull harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-30b-a3b-zhenwu-tree_none-gems_5.0.1rc0-vllm_0.13.1.dev0_g72506c983.d20260218-plugin_0.1.0_vllm0.13.0-cx_none-python_3.12.3-torch_2.9.0-pcp_hggc13.0-gpu_pp001-arc_amd64-driver_1.3.2-d7f5a2:202605191318 +``` + +### Download Open-source Model Weights +```bash +pip install modelscope +modelscope download --model FlagRelease/HY-MT2-30B-A3B-zhenwu-FlagOS --local_dir /data/HY-MT2-30B-A3B +``` +### Start the Server +```bash +vllm serve /data/HY-MT2-30B-A3B \ +--trust-remote-code \ +--dtype bfloat16 \ +--enforce-eager \ +--port 8000 \ +--host 0.0.0.0 \ +--served-model-name hy_mt2 \ +--gpu-memory-utilization 0.85 +``` + +## Service Invocation +### Invocation Script +```bash +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "hy_mt2", + "messages": [{"role": "user", "content": "你好"}] + }' +``` + + +### AnythingLLM Integration Guide + +#### 1. Download & Install + +- Visit the official site: https://anythingllm.com/ +- Choose the appropriate version for your OS (Windows/macOS/Linux) +- Follow the installation wizard to complete the setup + +#### 2. Configuration + +- Launch AnythingLLM +- Open settings (bottom left, fourth tab) +- Configure core LLM parameters +- Click "Save Settings" to apply changes + +#### 3. Model Interaction + +- After model loading is complete: +- Click **"New Conversation"** +- Enter your question (e.g., “Explain the basics of quantum computing”) +- Click the send button to get a response +# Technical Overview +**FlagOS** is a fully open-source system software stack designed to unify the "model–system–chip" layers and foster an open, collaborative ecosystem. It enables a “develop once, run anywhere” workflow across diverse AI accelerators, unlocking hardware performance, eliminating fragmentation among vendor-specific software stacks, and substantially lowering the cost of porting and maintaining AI workloads. With core technologies such as the **FlagScale**, together with vllm-plugin-fl, distributed training/inference framework, **FlagGems** universal operator library, **FlagCX** communication library, and **FlagTree** unified compiler, the **FlagRelease** platform leverages the **FlagOS** stack to automatically produce and release various combinations of \. This enables efficient and automated model migration across diverse chips, opening a new chapter for large model deployment and application. +## FlagGems +FlagGems is a high-performance, generic operator libraryimplemented in [Triton](https://github.com/openai/triton) language. It is built on a collection of backend-neutralkernels that aims to accelerate LLM (Large-Language Models) training and inference across diverse hardware platforms. +## FlagTree +FlagTree is an open source, unified compiler for multipleAI chips project dedicated to developing a diverse ecosystem of AI chip compilers and related tooling platforms, thereby fostering and strengthening the upstream and downstream Triton ecosystem. Currently in its initial phase, the project aims to maintain compatibility with existing adaptation solutions while unifying the codebase to rapidly implement single-repository multi-backend support. Forupstream model users, it provides unified compilation capabilities across multiple backends; for downstream chip manufacturers, it offers examples of Triton ecosystem integration. +## FlagScale and vllm-plugin-fl +Flagscale is a comprehensive toolkit designed to supportthe entire lifecycle of large models. It builds on the strengths of several prominent open-source projects, including [Megatron-LM](https://github.com/NVIDIA/Megatron-LM) and [vLLM](https://github.com/vllm-project/vllm), to provide a robust, end-to-end solution for managing and scaling large models. +vllm-plugin-fl is a vLLM plugin built on the FlagOS unified multi-chip backend, to help flagscale support multi-chip on vllm framework. +## **FlagCX** +FlagCX is a scalable and adaptive cross-chip communication library. It serves as a platform where developers, researchers, and AI engineers can collaborate on various projects, contribute to the development of cutting-edge AI solutions, and share their work with the global community. + +## **FlagEval Evaluation Framework** + FlagEval is a comprehensive evaluation system and open platform for large models launched in 2023. It aims to establish scientific, fair, and open benchmarks, methodologies, and tools to help researchers assess model and training algorithm performance. It features: + - **Multi-dimensional Evaluation**: Supports 800+ modelevaluations across NLP, CV, Audio, and Multimodal fields,covering 20+ downstream tasks including language understanding and image-text generation. + - **Industry-Grade Use Cases**: Has completed horizonta1 evaluations of mainstream large models, providing authoritative benchmarks for chip-model performance validation. + +# Contributing + +We warmly welcome global developers to join us: + +1. Submit Issues to report problems +2. Create Pull Requests to contribute code +3. Improve technical documentation +4. Expand hardware adaptation support +# License +The model weights are derived from Tencent-Hunyuan/HY-MT2-30B-A3B and are open‑sourced under the Apache License 2.0: https://www.apache.org/licenses/LICENSE-2.0.txt diff --git a/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-7B-ascend-FlagOS.md b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-7B-ascend-FlagOS.md new file mode 100644 index 0000000..6225a3a --- /dev/null +++ b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-7B-ascend-FlagOS.md @@ -0,0 +1,136 @@ +--- +base_model: +- "" +--- + +# Introduction +Hy-MT2 is a multilingual translation model series open-sourced by Tencent Hunyuan. It includes three sizes — Hy-MT2-1.8B, Hy-MT2-7B, and Hy-MT2-30B-A3B — all supporting translation across 33 languages and 5 Chinese ethnic minority / dialect translation pairs. The 30B-A3B uses a MoE architecture (30B total parameters / 3B activated), while the 1.8B and 7B are dense models. Compared to the previous generation Hy-MT1.5, MT2 brings improvements in domain-specific translation, instruction following, and on-device deployment: + +The 7B and 30B-A3B achieve 96.9% and 98.1% of Gemini 2.5 Pro's performance respectively on the FLORES-200 general translation benchmark, surpassing open-source models such as DeepSeek-V4-Pro and Kimi K2.6; the 1.8B outperforms leading commercial translation APIs overall. +The 30B-A3B achieves a GEMBA score of 99.0% of Gemini 2.5 Pro's on the DomainMTBench benchmark across vertical domains including finance, politics, and education. +Supports translation instructions such as glossary/terminology control, style transformation, and structured output (HTML/JSON), with instruction-following capability exceeding open-source models of the same size. +The 1.8B offers a 1.25-bit quantized version based on the Sherry framework, requiring only ~440 MB of storage, enabling local inference on mobile chips from Apple, Qualcomm, MediaTek, and others. + +### Integrated Deployment +- Out-of-the-box inference scripts with pre-configured hardware and software parameters +- Released **FlagOS-Ascend** container image supporting deployment within minutes +### Consistency Validation +- Rigorously evaluated through benchmark testing: Performance and results from the FlagOS software stack are compared against native stacks on multiple public. + + +# Evaluation Results +## Benchmark Result +| Metrics(chrf) | Hy-MT2-7B-Nvidia-Origin | Hy-MT2-7B-Ascend-FlagOS | +|--------------|-------------------------|-------------------------| +| flores_ca | 52.46 | 52.44 | +| wmt16 | 60.21 | 57.24 | + + +# User Guide +Environment Setup + +| Item | Version | +|------------------|----------------------| +| Docker Version | 29.4.3 | +| Operating System | Ubuntu 24.04.4 LTS (Noble Numbat) | + +## Operation Steps + +### Download FlagOS Image +```bash +docker pull harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-7b-ascend-tree_none-gems_5.0.2-vllm_0.18.0_empty-plugin_0.1.1_vllm0.13.0.g6c344a5e1.d20260514-cx_0.10.0-python_3.11.14-torch_npu_2.9.0.post1_gitee7ba04-pcp_cann8.5.1-gpu_ascend001-arc_arm64-driver_25.2.3:202605201117 +``` + +### Download Open-source Model Weights +```bash +pip install modelscope +modelscope download --model FlagRelease/Hy-MT2-7B-ascend-FlagOS --local_dir /data/HY-MT2-7B +``` + +### Start the Container +```bash +docker run -d --name flagos \ + --device /dev/davinci6 \ + --device /dev/davinci_manager \ + --device /dev/devmm_svm \ + --device /dev/hisi_hdc \ + -v /usr/local/dcmi:/usr/local/dcmi \ + -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ + -v /usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64 \ + -v /data:/data \ + -p 8000:8000 \ + harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-7b-ascend-tree_none-gems_5.0.2-vllm_0.18.0_empty-plugin_0.1.1_vllm0.13.0.g6c344a5e1.d20260514-cx_0.10.0-python_3.11.14-torch_npu_2.9.0.post1_gitee7ba04-pcp_cann8.5.1-gpu_ascend001-arc_arm64-driver_25.2.3:202605201117 + +docker exec -it flagos /bin/bash +``` +### Start the Server +```bash +export TORCH_DEVICE_BACKEND_AUTOLOAD=0 +export VLLM_FL_PREFER_ENABLED=False +export ASCEND_RT_VISIBLE_DEVICES=0 +vllm serve /data/Hy-MT2-7B --dtype bfloat16 --enforce-eager --port 8000 --served-model-name hy-7b-ascend-flagos +``` + +## Service Invocation +### Invocation Script +```bash +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "hy-7b-ascend-flagos", + "messages": [{"role": "user", "content": "你好"}] + }' +``` + + +### AnythingLLM Integration Guide + +#### 1. Download & Install + +- Visit the official site: https://anythingllm.com/ +- Choose the appropriate version for your OS (Windows/macOS/Linux) +- Follow the installation wizard to complete the setup + +#### 2. Configuration + +- Launch AnythingLLM +- Open settings (bottom left, fourth tab) +- Configure core LLM parameters +- Click "Save Settings" to apply changes + +#### 3. Model Interaction + +- After model loading is complete: +- Click **"New Conversation"** +- Enter your question (e.g., "Explain the basics of quantum computing") +- Click the send button to get a response +# Technical Overview +**FlagOS** is a fully open-source system software stack designed to unify the "model–system–chip" layers and foster an open, collaborative ecosystem. It enables a "develop once, run anywhere" workflow across diverse AI accelerators, unlocking hardware performance, eliminating fragmentation among vendor-specific software stacks, and substantially lowering the cost of porting and maintaining AI workloads. With core technologies such as the **FlagScale**, together with vllm-plugin-fl, distributed training/inference framework, **FlagGems** universal operator library, **FlagCX** communication library, and **FlagTree** unified compiler, the **FlagRelease** platform leverages the **FlagOS** stack to automatically produce and release various combinations of \. This enables efficient and automated model migration across diverse chips, opening a new chapter for large model deployment and application. +## FlagGems +FlagGems is a high-performance, generic operator library implemented in [Triton](https://github.com/openai/triton) language. It is built on a collection of backend-neutral kernels that aims to accelerate LLM (Large-Language Models) training and inference across diverse hardware platforms. +## FlagTree +FlagTree is an open source, unified compiler for multiple AI chips project dedicated to developing a diverse ecosystem of AI chip compilers and related tooling platforms, thereby fostering and strengthening the upstream and downstream Triton ecosystem. Currently in its initial phase, the project aims to maintain compatibility with existing adaptation solutions while unifying the codebase to rapidly implement single-repository multi-backend support. For upstream model users, it provides unified compilation capabilities across multiple backends; for downstream chip manufacturers, it offers examples of Triton ecosystem integration. +## FlagScale and vllm-plugin-fl +Flagscale is a comprehensive toolkit designed to support the entire lifecycle of large models. It builds on the strengths of several prominent open-source projects, including [Megatron-LM](https://github.com/NVIDIA/Megatron-LM) and [vLLM](https://github.com/vllm-project/vllm), to provide a robust, end-to-end solution for managing and scaling large models. +vllm-plugin-fl is a vLLM plugin built on the FlagOS unified multi-chip backend, to help flagscale support multi-chip on vllm framework. +## **FlagCX** +FlagCX is a scalable and adaptive cross-chip communication library. It serves as a platform where developers, researchers, and AI engineers can collaborate on various projects, contribute to the development of cutting-edge AI solutions, and share their work with the global community. + +## **FlagEval Evaluation Framework** + FlagEval is a comprehensive evaluation system and open platform for large models launched in 2023. It aims to establish scientific, fair, and open benchmarks, methodologies, and tools to help researchers assess model and training algorithm performance. It features: + - **Multi-dimensional Evaluation**: Supports 800+ model evaluations across NLP, CV, Audio, and Multimodal fields, covering 20+ downstream tasks including language understanding and image-text generation. + - **Industry-Grade Use Cases**: Has completed horizontal evaluations of mainstream large models, providing authoritative benchmarks for chip-model performance validation. + +# Contributing + +We warmly welcome global developers to join us: + +1. Submit Issues to report problems +2. Create Pull Requests to contribute code +3. Improve technical documentation +4. Expand hardware adaptation support +# License +The model weights are derived from Tencent-Hunyuan/HY-MT2-7B and are open‑sourced under the Apache License 2.0: https://www.apache.org/licenses/LICENSE-2.0.txt + + + diff --git a/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-7B-nvidia-FlagOS.md b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-7B-nvidia-FlagOS.md new file mode 100644 index 0000000..1e9ff5c --- /dev/null +++ b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-7B-nvidia-FlagOS.md @@ -0,0 +1,123 @@ +--- +base_model: +- "" +--- +# Introduction +Hy-MT2 is a multilingual translation model series open-sourced by Tencent Hunyuan. It includes three sizes — Hy-MT2-1.8B, Hy-MT2-7B, and Hy-MT2-30B-A3B — all supporting translation across 33 languages and 5 Chinese ethnic minority / dialect translation pairs. The 30B-A3B uses a MoE architecture (30B total parameters / 3B activated), while the 1.8B and 7B are dense models. Compared to the previous generation Hy-MT1.5, MT2 brings improvements in domain-specific translation, instruction following, and on-device deployment: + +The 7B and 30B-A3B achieve 96.9% and 98.1% of Gemini 2.5 Pro's performance respectively on the FLORES-200 general translation benchmark, surpassing open-source models such as DeepSeek-V4-Pro and Kimi K2.6; the 1.8B outperforms leading commercial translation APIs overall. +The 30B-A3B achieves a GEMBA score of 99.0% of Gemini 2.5 Pro's on the DomainMTBench benchmark across vertical domains including finance, politics, and education. +Supports translation instructions such as glossary/terminology control, style transformation, and structured output (HTML/JSON), with instruction-following capability exceeding open-source models of the same size. +The 1.8B offers a 1.25-bit quantized version based on the Sherry framework, requiring only ~440 MB of storage, enabling local inference on mobile chips from Apple, Qualcomm, MediaTek, and others. + +### Integrated Deployment +- Out-of-the-box inference scripts with pre-configured hardware and software parameters +- Released **FlagOS-Nvidia** container image supporting deployment within minutes +### Consistency Validation +- Rigorously evaluated through benchmark testing: Performance and results from the FlagOS software stack are compared against native stacks on multiple public. + + +# Evaluation Results +## Benchmark Result +| Metrics(chrf) | HY-MT2-7B-Nvidia-Origin | HY-MT2-7B-Nvidia-FlagOS | +|--------------|-------------------------|-------------------------| +| flores_ca | 52.46 | 52.46 | +| wmt16 | 60.21 | 60.18 | + +# User Guide +Environment Setup + +| Item | Version | +|------------------|----------------------| +| Docker Version | 29.4.3 | +| Operating System | Ubuntu 24.04.4 LTS (Noble Numbat) | + +## Operation Steps + +### Download FlagOS Image +```bash +docker pull harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-7b-nvidia-tree_0.5.0_3.5-gems_5.0.1rc0-vllm_0.20.2-plugin_0.0.0-cx_none-python_3.12.3-torch_2.11.0-pcp_cuda13.2-gpu_nvidia003-arc_amd64-driver_570.158.01:202605191822 +``` + +### Download Open-source Model Weights +```bash +pip install modelscope +modelscope download --model FlagRelease/HY-MT2-7B-nvidia-FlagOS --local_dir /data/HY-MT2-7B-FlagOS +``` + +### Start the Container +```bash +docker run --init --detach --net=host --uts=host --ipc=host --security-opt=seccomp=unconfined --privileged=true --ulimit stack=67108864 --ulimit memlock=-1 --ulimit nofile=1048576:1048576 --shm-size=32G -v /data:/data --gpus all --name flagos harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-7b-nvidia-tree_0.5.0_3.5-gems_5.0.1rc0-vllm_0.20.2-plugin_0.0.0-cx_none-python_3.12.3-torch_2.11.0-pcp_cuda13.2-gpu_nvidia003-arc_amd64-driver_570.158.01:202605191822 sleep infinity +docker exec -it flagos /bin/bash +``` +### Start the Server +```bash +vllm serve /data/HY-MT2-7B-FlagOS \ +--host 0.0.0.0 --port 8000 \ +--tensor-parallel-size 1 \ +--served-model-name flagOS \ +--enforce-eager +``` + +## Service Invocation +### Invocation Script +```bash +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "flagOS", + "messages": [{"role": "user", "content": "你好"}] + }' +``` + + +### AnythingLLM Integration Guide + +#### 1. Download & Install + +- Visit the official site: https://anythingllm.com/ +- Choose the appropriate version for your OS (Windows/macOS/Linux) +- Follow the installation wizard to complete the setup + +#### 2. Configuration + +- Launch AnythingLLM +- Open settings (bottom left, fourth tab) +- Configure core LLM parameters +- Click "Save Settings" to apply changes + +#### 3. Model Interaction + +- After model loading is complete: +- Click **"New Conversation"** +- Enter your question (e.g., "Explain the basics of quantum computing") +- Click the send button to get a response +# Technical Overview +**FlagOS** is a fully open-source system software stack designed to unify the "model–system–chip" layers and foster an open, collaborative ecosystem. It enables a "develop once, run anywhere" workflow across diverse AI accelerators, unlocking hardware performance, eliminating fragmentation among vendor-specific software stacks, and substantially lowering the cost of porting and maintaining AI workloads. With core technologies such as the **FlagScale**, together with vllm-plugin-fl, distributed training/inference framework, **FlagGems** universal operator library, **FlagCX** communication library, and **FlagTree** unified compiler, the **FlagRelease** platform leverages the **FlagOS** stack to automatically produce and release various combinations of \. This enables efficient and automated model migration across diverse chips, opening a new chapter for large model deployment and application. +## FlagGems +FlagGems is a high-performance, generic operator library implemented in [Triton](https://github.com/openai/triton) language. It is built on a collection of backend-neutral kernels that aims to accelerate LLM (Large-Language Models) training and inference across diverse hardware platforms. +## FlagTree +FlagTree is an open source, unified compiler for multiple AI chips project dedicated to developing a diverse ecosystem of AI chip compilers and related tooling platforms, thereby fostering and strengthening the upstream and downstream Triton ecosystem. Currently in its initial phase, the project aims to maintain compatibility with existing adaptation solutions while unifying the codebase to rapidly implement single-repository multi-backend support. For upstream model users, it provides unified compilation capabilities across multiple backends; for downstream chip manufacturers, it offers examples of Triton ecosystem integration. +## FlagScale and vllm-plugin-fl +Flagscale is a comprehensive toolkit designed to support the entire lifecycle of large models. It builds on the strengths of several prominent open-source projects, including [Megatron-LM](https://github.com/NVIDIA/Megatron-LM) and [vLLM](https://github.com/vllm-project/vllm), to provide a robust, end-to-end solution for managing and scaling large models. +vllm-plugin-fl is a vLLM plugin built on the FlagOS unified multi-chip backend, to help flagscale support multi-chip on vllm framework. +## **FlagCX** +FlagCX is a scalable and adaptive cross-chip communication library. It serves as a platform where developers, researchers, and AI engineers can collaborate on various projects, contribute to the development of cutting-edge AI solutions, and share their work with the global community. + +## **FlagEval Evaluation Framework** + FlagEval is a comprehensive evaluation system and open platform for large models launched in 2023. It aims to establish scientific, fair, and open benchmarks, methodologies, and tools to help researchers assess model and training algorithm performance. It features: + - **Multi-dimensional Evaluation**: Supports 800+ model evaluations across NLP, CV, Audio, and Multimodal fields, covering 20+ downstream tasks including language understanding and image-text generation. + - **Industry-Grade Use Cases**: Has completed horizontal evaluations of mainstream large models, providing authoritative benchmarks for chip-model performance validation. + +# Contributing + +We warmly welcome global developers to join us: + +1. Submit Issues to report problems +2. Create Pull Requests to contribute code +3. Improve technical documentation +4. Expand hardware adaptation support +# License +The model weights are derived from Tencent-Hunyuan/HY-MT2-7B and are open‑sourced under the Apache License 2.0: https://www.apache.org/licenses/LICENSE-2.0.txt + + diff --git a/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-7B-zhenwu-FlagOS.md b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-7B-zhenwu-FlagOS.md new file mode 100644 index 0000000..3b0918a --- /dev/null +++ b/docs/flagrelease_en/model_readmes/FlagRelease_HY-MT2-7B-zhenwu-FlagOS.md @@ -0,0 +1,119 @@ +--- +base_model: +- "" +--- +# Introduction +Hy-MT2 is a multilingual translation model series open-sourced by Tencent Hunyuan. It includes three sizes — Hy-MT2-1.8B, Hy-MT2-7B, and Hy-MT2-30B-A3B — all supporting translation across 33 languages and 5 Chinese ethnic minority / dialect translation pairs. The 30B-A3B uses a MoE architecture (30B total parameters / 3B activated), while the 1.8B and 7B are dense models. Compared to the previous generation Hy-MT1.5, MT2 brings improvements in domain-specific translation, instruction following, and on-device deployment: + +The 7B and 30B-A3B achieve 96.9% and 98.1% of Gemini 2.5 Pro's performance respectively on the FLORES-200 general translation benchmark, surpassing open-source models such as DeepSeek-V4-Pro and Kimi K2.6; the 1.8B outperforms leading commercial translation APIs overall. +The 30B-A3B achieves a GEMBA score of 99.0% of Gemini 2.5 Pro's on the DomainMTBench benchmark across vertical domains including finance, politics, and education. +Supports translation instructions such as glossary/terminology control, style transformation, and structured output (HTML/JSON), with instruction-following capability exceeding open-source models of the same size. +The 1.8B offers a 1.25-bit quantized version based on the Sherry framework, requiring only ~440 MB of storage, enabling local inference on mobile chips from Apple, Qualcomm, MediaTek, and others. + +### Integrated Deployment +- Out-of-the-box inference scripts with pre-configured hardware and software parameters +- Released **FlagOS-Zhenwu** container image supporting deployment within minutes +### Consistency Validation +- Rigorously evaluated through benchmark testing: Performance and results from the FlagOS software stack are compared against native stacks on multiple public. + + +# Evaluation Results +## Benchmark Result +| Metrics(chrf) | HY-MT2-7B-Nvidia-Origin | HY-MT2-7B-Zhenwu-FlagOS | +|--------------|-------------------------|-------------------------| +| flores_ca | 52.46 | 52.4511 | +| wmt16 | 60.21 | 60.1873 | + +# User Guide +Environment Setup + +| Item | Version | +|------------------|----------------------| +| Docker Version | Docker version 28.1.0, build 4d8c241 | +| Operating System | Ubuntu 24.04.2 LTS | + +## Operation Steps +This model requires 1 machine with 16 GPUs. Please follow this link to apply for 1 machine resource. link:https://help.aliyun.com/zh/pai/user-guide + +### Download FlagOS Image +```bash +docker pull harbor.baai.ac.cn/flagrelease-public/flagrelease-hy-mt2-7b-zhenwu-tree_none-gems_5.0.1rc0-vllm_0.13.1.dev0_g72506c983.d20260218-plugin_0.1.0_vllm0.13.0-cx_none-python_3.12.3-torch_2.9.0-pcp_hggc13.0-gpu_pp001-arc_amd64-driver_1.3.2-d7f5a2:202605191318 +``` + +### Download Open-source Model Weights +```bash +pip install modelscope +modelscope download --model FlagRelease/HY-MT2-7B-zhenwu-FlagOS --local_dir /data/HY-MT2-7B +``` +### Start the Server +```bash +vllm serve /data/HY-MT2-7B \ +--trust-remote-code \ +--dtype bfloat16 \ +--enforce-eager \ +--port 8000 \ +--host 0.0.0.0 \ +--served-model-name hy_mt2 \ +--gpu-memory-utilization 0.85 +``` + +## Service Invocation +### Invocation Script +```bash +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "hy_mt2", + "messages": [{"role": "user", "content": "你好"}] + }' +``` + + +### AnythingLLM Integration Guide + +#### 1. Download & Install + +- Visit the official site: https://anythingllm.com/ +- Choose the appropriate version for your OS (Windows/macOS/Linux) +- Follow the installation wizard to complete the setup + +#### 2. Configuration + +- Launch AnythingLLM +- Open settings (bottom left, fourth tab) +- Configure core LLM parameters +- Click "Save Settings" to apply changes + +#### 3. Model Interaction + +- After model loading is complete: +- Click **"New Conversation"** +- Enter your question (e.g., “Explain the basics of quantum computing”) +- Click the send button to get a response +# Technical Overview +**FlagOS** is a fully open-source system software stack designed to unify the "model–system–chip" layers and foster an open, collaborative ecosystem. It enables a “develop once, run anywhere” workflow across diverse AI accelerators, unlocking hardware performance, eliminating fragmentation among vendor-specific software stacks, and substantially lowering the cost of porting and maintaining AI workloads. With core technologies such as the **FlagScale**, together with vllm-plugin-fl, distributed training/inference framework, **FlagGems** universal operator library, **FlagCX** communication library, and **FlagTree** unified compiler, the **FlagRelease** platform leverages the **FlagOS** stack to automatically produce and release various combinations of \. This enables efficient and automated model migration across diverse chips, opening a new chapter for large model deployment and application. +## FlagGems +FlagGems is a high-performance, generic operator libraryimplemented in [Triton](https://github.com/openai/triton) language. It is built on a collection of backend-neutralkernels that aims to accelerate LLM (Large-Language Models) training and inference across diverse hardware platforms. +## FlagTree +FlagTree is an open source, unified compiler for multipleAI chips project dedicated to developing a diverse ecosystem of AI chip compilers and related tooling platforms, thereby fostering and strengthening the upstream and downstream Triton ecosystem. Currently in its initial phase, the project aims to maintain compatibility with existing adaptation solutions while unifying the codebase to rapidly implement single-repository multi-backend support. Forupstream model users, it provides unified compilation capabilities across multiple backends; for downstream chip manufacturers, it offers examples of Triton ecosystem integration. +## FlagScale and vllm-plugin-fl +Flagscale is a comprehensive toolkit designed to supportthe entire lifecycle of large models. It builds on the strengths of several prominent open-source projects, including [Megatron-LM](https://github.com/NVIDIA/Megatron-LM) and [vLLM](https://github.com/vllm-project/vllm), to provide a robust, end-to-end solution for managing and scaling large models. +vllm-plugin-fl is a vLLM plugin built on the FlagOS unified multi-chip backend, to help flagscale support multi-chip on vllm framework. +## **FlagCX** +FlagCX is a scalable and adaptive cross-chip communication library. It serves as a platform where developers, researchers, and AI engineers can collaborate on various projects, contribute to the development of cutting-edge AI solutions, and share their work with the global community. + +## **FlagEval Evaluation Framework** + FlagEval is a comprehensive evaluation system and open platform for large models launched in 2023. It aims to establish scientific, fair, and open benchmarks, methodologies, and tools to help researchers assess model and training algorithm performance. It features: + - **Multi-dimensional Evaluation**: Supports 800+ modelevaluations across NLP, CV, Audio, and Multimodal fields,covering 20+ downstream tasks including language understanding and image-text generation. + - **Industry-Grade Use Cases**: Has completed horizonta1 evaluations of mainstream large models, providing authoritative benchmarks for chip-model performance validation. + +# Contributing + +We warmly welcome global developers to join us: + +1. Submit Issues to report problems +2. Create Pull Requests to contribute code +3. Improve technical documentation +4. Expand hardware adaptation support +# License +The model weights are derived from Tencent-Hunyuan/HY-MT2-7B and are open‑sourced under the Apache License 2.0: https://www.apache.org/licenses/LICENSE-2.0.txt diff --git a/docs/flagrelease_en/model_readmes/FlagRelease_Kimi-Linear-48B-A3B-Instruct-nvidia-FlagOS.md b/docs/flagrelease_en/model_readmes/FlagRelease_Kimi-Linear-48B-A3B-Instruct-nvidia-FlagOS.md new file mode 100644 index 0000000..bb4f9c1 --- /dev/null +++ b/docs/flagrelease_en/model_readmes/FlagRelease_Kimi-Linear-48B-A3B-Instruct-nvidia-FlagOS.md @@ -0,0 +1,127 @@ +# Introduction +Kimi-Linear-48B-A3B-Instruct is a high-efficiency large language model developed by MoonshotAI. Built with an innovative hybrid linear attention architecture and equipped with 48B total parameters, it is specially optimized for long-context comprehension, multi-turn dialogue and complex reasoning scenarios, supporting an ultra-long context window up to 1 million tokens. + +Adopting a 3:1 structural ratio of Kimi Delta Attention and global MLA, this model greatly cuts down KV cache occupancy and improves inference throughput while maintaining strong comprehensive capability. It achieves outstanding results on multiple authoritative benchmarks, natively compatible with Transformers and vLLM frameworks, and can be quickly deployed for long document parsing, knowledge question answering and industrial intelligent conversation services. + + +### Integrated Deployment +- Out-of-the-box inference scripts with pre-configured hardware and software parameters +- Released **FlagOS-Nvidia** container image supporting deployment within minutes +### Consistency Validation +- Rigorously evaluated through benchmark testing: Performance and results from the FlagOS software stack are compared against native stacks on multiple public. + + +# Evaluation Results +## Benchmark Result +| Metrics | Kimi-Linear-48B-A3B-Instruct-nvidia-FlagOS-Nvidia-Origin | Kimi-Linear-48B-A3B-Instruct-nvidia-FlagOS-Nvidia-FlagOS | +|---------------------|----------------------------------------------------------|--------------------------------------| +| aime | 0.4667 | 0.4667 | +| musr_generative | 0.5926 | 0.5635 | +| mmlu_pro | 0.515 | 0.5315 | +| gpqa_generative_cot | 0.4295 | 0.4295 | +| livebench_new | 0.5438 | 0.5178 | + +# User Guide +Environment Setup + +| Item | Version | +|------------------|----------------------| +| Docker Version | Docker version 24.0.0, build 98fdcd7 | +| Operating System | 22.04.4 LTS (Jammy Jellyfish) | + +## Operation Steps + +### Download FlagOS Image +```bash +docker pull harbor.baai.ac.cn/external-cooperation/kimi-linear-48b-a3b-instruct-nvidia-tree_0.5.0_3.5-gems_5.0.2-vllm_0.13.0-plugin_0.1-cx_none-python_3.12.3-torch_2.9.0_cu128-pcp_cuda12.8-gpu_nvidia003-arc_amd64-driver_570.158.01:2605110300 +``` + +### Download Open-source Model Weights +```bash +pip install modelscope +modelscope download --model FlagRelease/Kimi-Linear-48B-A3B-Instruct-nvidia-FlagOS --local_dir /data/Kimi-Linear-48B-A3B-Instruct-nvidia-FlagOS +``` + +### Start the Container +```bash +docker run -itd --name=xxx --gpus=all --network=host -v /data:/data harbor.baai.ac.cn/external-cooperation/kimi-linear-48b-a3b-instruct-nvidia-tree_0.5.0_3.5-gems_5.0.2-vllm_0.13.0-plugin_0.1-cx_none-python_3.12.3-torch_2.9.0_cu128-pcp_cuda12.8-gpu_nvidia003-arc_amd64-driver_570.158.01:2605110300 sleep infinity + +docker exec -it xxx bash +``` +### Start the Server +```bash +export VLLM_PLUGINS=fl +export TRITON_ALL_BLOCKS_PARALLEL=1 +nohup vllm serve \ +--model /data/Kimi-Linear-48B-A3B-Instruct/ \ +--served-model-name kimi-linear \ +--host 0.0.0.0 \ +--port 6677 \ +--trust-remote-code \ +--tensor-parallel-size 2 \ +--enforce-eager \ +> kimi-flagos.log 2>&1 & + +tail -f imi-flagos.log +``` + +## Service Invocation +### Invocation Script +```bash +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "flagOS", + "messages": [{"role": "user", "content": "你好"}] + }' +``` + + +### AnythingLLM Integration Guide + +#### 1. Download & Install + +- Visit the official site: https://anythingllm.com/ +- Choose the appropriate version for your OS (Windows/macOS/Linux) +- Follow the installation wizard to complete the setup + +#### 2. Configuration + +- Launch AnythingLLM +- Open settings (bottom left, fourth tab) +- Configure core LLM parameters +- Click "Save Settings" to apply changes + +#### 3. Model Interaction + +- After model loading is complete: +- Click **"New Conversation"** +- Enter your question (e.g., “Explain the basics of quantum computing”) +- Click the send button to get a response +# Technical Overview +**FlagOS** is a fully open-source system software stack designed to unify the "model–system–chip" layers and foster an open, collaborative ecosystem. It enables a “develop once, run anywhere” workflow across diverse AI accelerators, unlocking hardware performance, eliminating fragmentation among vendor-specific software stacks, and substantially lowering the cost of porting and maintaining AI workloads. With core technologies such as the **FlagScale**, together with vllm-plugin-fl, distributed training/inference framework, **FlagGems** universal operator library, **FlagCX** communication library, and **FlagTree** unified compiler, the **FlagRelease** platform leverages the **FlagOS** stack to automatically produce and release various combinations of \. This enables efficient and automated model migration across diverse chips, opening a new chapter for large model deployment and application. +## FlagGems +FlagGems is a high-performance, generic operator libraryimplemented in [Triton](https://github.com/openai/triton) language. It is built on a collection of backend-neutralkernels that aims to accelerate LLM (Large-Language Models) training and inference across diverse hardware platforms. +## FlagTree +FlagTree is an open source, unified compiler for multipleAI chips project dedicated to developing a diverse ecosystem of AI chip compilers and related tooling platforms, thereby fostering and strengthening the upstream and downstream Triton ecosystem. Currently in its initial phase, the project aims to maintain compatibility with existing adaptation solutions while unifying the codebase to rapidly implement single-repository multi-backend support. Forupstream model users, it provides unified compilation capabilities across multiple backends; for downstream chip manufacturers, it offers examples of Triton ecosystem integration. +## FlagScale and vllm-plugin-fl +Flagscale is a comprehensive toolkit designed to supportthe entire lifecycle of large models. It builds on the strengths of several prominent open-source projects, including [Megatron-LM](https://github.com/NVIDIA/Megatron-LM) and [vLLM](https://github.com/vllm-project/vllm), to provide a robust, end-to-end solution for managing and scaling large models. +vllm-plugin-fl is a vLLM plugin built on the FlagOS unified multi-chip backend, to help flagscale support multi-chip on vllm framework. +## **FlagCX** +FlagCX is a scalable and adaptive cross-chip communication library. It serves as a platform where developers, researchers, and AI engineers can collaborate on various projects, contribute to the development of cutting-edge AI solutions, and share their work with the global community. + +## **FlagEval Evaluation Framework** + FlagEval is a comprehensive evaluation system and open platform for large models launched in 2023. It aims to establish scientific, fair, and open benchmarks, methodologies, and tools to help researchers assess model and training algorithm performance. It features: + - **Multi-dimensional Evaluation**: Supports 800+ modelevaluations across NLP, CV, Audio, and Multimodal fields,covering 20+ downstream tasks including language understanding and image-text generation. + - **Industry-Grade Use Cases**: Has completed horizonta1 evaluations of mainstream large models, providing authoritative benchmarks for chip-model performance validation. + +# Contributing + +We warmly welcome global developers to join us: + +1. Submit Issues to report problems +2. Create Pull Requests to contribute code +3. Improve technical documentation +4. Expand hardware adaptation support +# License +The model weights are derived from /data/vllm-plugin-fl/Kimi-Linear-48B-A3B-Instruct and are open‑sourced under the Apache License 2.0: https://www.apache.org/licenses/LICENSE-2.0.txt diff --git a/docs/flagrelease_en/model_readmes/FlagRelease_MiniMax-M2.7-hygon-FlagOS.md b/docs/flagrelease_en/model_readmes/FlagRelease_MiniMax-M2.7-hygon-FlagOS.md index 6f0cc7a..7ff60db 100644 --- a/docs/flagrelease_en/model_readmes/FlagRelease_MiniMax-M2.7-hygon-FlagOS.md +++ b/docs/flagrelease_en/model_readmes/FlagRelease_MiniMax-M2.7-hygon-FlagOS.md @@ -30,7 +30,7 @@ Environment Setup ### Download FlagOS Image ```bash -docker pull harbor.baai.ac.cn/flagrelease-public/flagrelease-hygon-minimax:202604201005 +docker pull harbor.baai.ac.cn/flagrelease-public/flagrelease-hygon-minimax:202604120035 ``` ### Download Open-source Model Weights @@ -55,18 +55,48 @@ docker run \ --cap-add=SYS_PTRACE \ --security-opt seccomp=unconfined \ -itd \ - harbor.baai.ac.cn/flagrelease-public/flagrelease-hygon-minimax:202604201005 + harbor.baai.ac.cn/flagrelease-public/flagrelease-hygon-minimax:202604120035 docker exec -it flagos /bin/bash ``` ### Start the Server ```bash -USE_FLAGGEMS=1 vllm serve /data/MiniMax-M2.7 --tensor-parallel-size 8 --served-model-name minimax-m2.7 --trust-remote-code +# You need to prepare two machines named node0 and node1, and run the following commands on each respectively to start the services. +# in node0 (master node) +export GLOO_SOCKET_IFNAME=eno1 +export NCCL_SOCKET_IFNAME=eno1 +export GEMS_VENDOR=hygon + +USE_FLAGGEMS=1 vllm serve /data/MiniMax-M2.7 \ + --tensor-parallel-size 8 \ + --pipeline-parallel-size 2 \ + --served-model-name minimax-m2.7 \ + --nnodes 2 \ + --node-rank 0 \ + --port 8000 \ + --master-addr \ + --trust-remote-code + +# in node1 +export GLOO_SOCKET_IFNAME=eno1 +export NCCL_SOCKET_IFNAME=eno1 +export GEMS_VENDOR=hygon +USE_FLAGGEMS=1 vllm serve /data/MiniMax-M2.7 \ + --tensor-parallel-size 8 \ + --pipeline-parallel-size 2 \ + --served-model-name minimax-m2.7 \ + --nnodes 2 \ + --node-rank 1 \ + --port 8000 \ + --master-addr \ + --headless \ + --trust-remote-code ``` ## Service Invocation ### Invocation Script ```bash +# in master node (node0) curl http://localhost:8000/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ @@ -123,4 +153,4 @@ We warmly welcome global developers to join us: 3. Improve technical documentation 4. Expand hardware adaptation support # License -本模型的权重来源于MiniMaxAI/MiniMax-M2.7,以apache2.0协议开源: https://www.apache.org/licenses/LICENSE-2.0.txt。 +The weights of this model are derived from MiniMaxAI/MiniMax‑M2.7, open‑sourced under the Apache License 2.0. License link: https://www.apache.org/licenses/LICENSE-2.0.txt diff --git a/docs/flagrelease_en/model_readmes/FlagRelease_Qwen3.6-35B-A3B-nomtp-kunlunxin-FlagOS.md b/docs/flagrelease_en/model_readmes/FlagRelease_Qwen3.6-35B-A3B-nomtp-kunlunxin-FlagOS.md index 94cade4..e54857a 100644 --- a/docs/flagrelease_en/model_readmes/FlagRelease_Qwen3.6-35B-A3B-nomtp-kunlunxin-FlagOS.md +++ b/docs/flagrelease_en/model_readmes/FlagRelease_Qwen3.6-35B-A3B-nomtp-kunlunxin-FlagOS.md @@ -50,6 +50,7 @@ docker run -itd \ --privileged \ --net=host \ --name flagos \ + -v /data:/data \ -w /workspace \ harbor.baai.ac.cn/flagrelease-public/qwen3.6-35b-a3b-nomtp-kunlunxin-gems_4.2.1rc0-vllm_0.13-plugin_0.1-cx_0.10.0-python_3.10.18-x86_64-driver_515.58:2604161518 bash docker exec -it flagos /bin/bash @@ -72,7 +73,7 @@ vllm serve /data/Qwen3.6-35B-A3B-nomtp/ \ --block-size 256 \ --enforce-eager \ --max-num-batched-tokens 16384 \ - --port 8000 + --port 8000 \ --served-model-name qwen36 ``` diff --git a/docs/flagrelease_en/model_readmes/FlagRelease_TeleChat3-36B-Thinking-mthreads-FlagOS.md b/docs/flagrelease_en/model_readmes/FlagRelease_TeleChat3-36B-Thinking-mthreads-FlagOS.md index 31a8054..dabc369 100644 --- a/docs/flagrelease_en/model_readmes/FlagRelease_TeleChat3-36B-Thinking-mthreads-FlagOS.md +++ b/docs/flagrelease_en/model_readmes/FlagRelease_TeleChat3-36B-Thinking-mthreads-FlagOS.md @@ -56,7 +56,7 @@ docker pull harbor.baai.ac.cn/external-cooperation/teleai_telechat3-36b-thinking ### Download the Model ```bash -modelscope download --model FlagRelease/TeleChat3-36B-Thinking-FlagOS --local_dir /data/TeleChat3-36B-Thinking +modelscope download --model FlagRelease/TeleChat3-36B-Thinking-mthreads-FlagOS --local_dir /data/TeleChat3-36B-Thinking ``` ### Start Inference Container