From 16af4487f4d14b66b791e7f156a30eadd86b6d78 Mon Sep 17 00:00:00 2001 From: Alberto Ricardo Rodriguez Date: Fri, 22 May 2026 13:17:08 +0200 Subject: [PATCH] feat: migrate frontend to C++ and integrate LLM inference pipeline - Replace Python frontend with C++ request_preprocessor - Add commands module (base, builder, commons, factory, move_to) - Add executive module with behavior tree nodes and tree_executor - Add features module with landmarks and retrievals support - Add inference module with OpenVINO GenAI integration - Add distilled-1b-robot-router model (LoRA, checkpoints, OpenVINO int4 export) - Bundle openvino.genai-2026.1.0.0 as dependency resource - Remove deprecated Python frontend, commands, and test modules Replaces Python frontend with full C++ pipeline including commands, executive (behavior tree), features (landmarks), and OpenVINO-based LLM inference. Includes distilled-1b-robot-router model assets. --- src/CMakeLists.txt | 55 + src/commands/base.h | 3 + .../__init__.py => commands/builder.h} | 0 src/commands/commons.h | 25 + .../__init__.py => commands/factory.h} | 0 .../__init__.py => commands/move_to.h} | 0 .../behave_tree/command_executor_tree.xml | 199 + .../inference/model_inference_config.xml | 11 + src/definitions.h | 88 + src/executive/CMakeLists.txt | 0 src/executive/builder.h | 0 src/executive/configurator.h | 46 + src/executive/executive.h | 67 + src/executive/factory.h | 0 ...ard_from_previous_command_execution_node.h | 37 + .../communicate_command_was_not_listed_node.h | 40 + .../nodes/communicate_return_home_node.h | 43 + .../nodes/communicate_waiting_status_node.h | 43 + .../execute_command_with_feedback_node.h | 57 + src/executive/nodes/get_next_command_node.h | 44 + .../nodes/validate_arrival_to_landmark_node.h | 44 + src/executive/nodes/validate_component_node.h | 44 + src/executive/nodes/validate_landmark_node.h | 44 + src/executive/tree_executor.h | 82 + src/features/builder.h | 24 + src/features/landmarks/landmark_info.h | 23 + src/features/landmarks/landmarks_container.h | 63 + src/features/manager.h | 25 + .../retrievals/retrieve_features_from_file.h | 51 + src/frontend/CMakeLists.txt | 4 + src/frontend/frontend/commands/executor.py | 40 - src/frontend/frontend/commands/pipeline.py | 68 - src/frontend/frontend/commands/repository.py | 32 - src/frontend/frontend/commands/states.py | 6 - src/frontend/frontend/commands/tracker.py | 106 - .../frontend/frontend/ollama_client_cli.py | 75 - src/frontend/frontend/frontend/readme.md | 8 - src/frontend/frontend/frontend/server.py | 368 - src/frontend/package.xml | 22 - src/frontend/readme.md | 29 - src/frontend/request_preprocessor.h | 80 + src/frontend/setup.cfg | 4 - src/frontend/setup.py | 33 - src/frontend/test/test_copyright.py | 25 - src/frontend/test/test_flake8.py | 25 - src/frontend/test/test_pep257.py | 23 - src/inference/CMakeLists.txt | 4 + src/inference/configure.h | 29 + src/inference/factory.h | 46 + src/inference/gen_ai_model.h | 63 + src/inference/model_loader.h | 31 + .../checkpoints/README.md | 59 + .../checkpoints/checkpoint-141/README.md | 210 + .../checkpoint-141/adapter_config.json | 50 + .../checkpoint-141/chat_template.jinja | 93 + .../checkpoints/checkpoint-141/rng_state.pth | Bin 0 -> 14645 bytes .../checkpoints/checkpoint-141/scheduler.pt | Bin 0 -> 1465 bytes .../checkpoint-141/special_tokens_map.json | 17 + .../checkpoint-141/tokenizer_config.json | 2066 ++ .../checkpoint-141/trainer_state.json | 156 + .../checkpoint-141/training_args.bin | Bin 0 -> 6545 bytes .../checkpoints/checkpoint-188/README.md | 210 + .../checkpoint-188/adapter_config.json | 50 + .../checkpoint-188/chat_template.jinja | 93 + .../checkpoints/checkpoint-188/rng_state.pth | Bin 0 -> 14645 bytes .../checkpoints/checkpoint-188/scheduler.pt | Bin 0 -> 1465 bytes .../checkpoint-188/special_tokens_map.json | 17 + .../checkpoint-188/tokenizer_config.json | 2066 ++ .../checkpoint-188/trainer_state.json | 192 + .../checkpoint-188/training_args.bin | Bin 0 -> 6545 bytes .../openvino_int4/chat_template.jinja | 93 + .../exported/openvino_int4/config.json | 38 + .../openvino_int4/generation_config.json | 7 + .../openvino_int4/openvino_detokenizer.xml | 272 + .../openvino_int4/openvino_tokenizer.xml | 764 + .../openvino_int4/special_tokens_map.json | 23 + .../openvino_int4/tokenizer_config.json | 2066 ++ .../distilled-1b-robot-router/lora/README.md | 210 + .../lora/adapter_config.json | 50 + .../lora/chat_template.jinja | 93 + .../lora/special_tokens_map.json | 17 + .../lora/tokenizer_config.json | 2066 ++ .../merged/chat_template.jinja | 93 + .../merged/config.json | 37 + .../merged/special_tokens_map.json | 23 + .../merged/tokenizer_config.json | 2066 ++ .../openvino.genai-2026.1.0.0/.clang-format | 28 + .../openvino.genai-2026.1.0.0/.gitattributes | 69 + .../.github/CODEOWNERS | 62 + .../.github/CONTRIBUTING.md | 17 + .../.github/actions/build_app/action.yml | 23 + .../actions/install_openvino/action.yml | 18 + .../actions/install_python_deps/action.yml | 15 + .../actions/install_wheel/.node-version | 1 + .../actions/install_wheel/.prettierignore | 3 + .../actions/install_wheel/.prettierrc.json | 16 + .../.github/actions/install_wheel/action.yml | 22 + .../actions/install_wheel/package-lock.json | 589 + .../actions/install_wheel/package.json | 33 + .../install_wheel/src/install_packages.js | 119 + .../.github/agents/agentic-workflows.agent.md | 143 + .../.github/aw/actions-lock.json | 14 + .../.github/components.yml | 173 + .../.github/copilot-instructions.md | 57 + .../.github/dependabot.yml | 42 + .../.github/dependency_review.yml | 25 + .../.github/labeler.yml | 220 + .../.github/pull_request_template.md | 16 + .../scripts/generate_reference_llava.py | 58 + .../scripts/workflow_rerun/__init__.py | 0 .../scripts/workflow_rerun/argument_parser.py | 30 + .../scripts/workflow_rerun/constants.py | 20 + .../workflow_rerun/errors_to_look_for.json | 186 + .../scripts/workflow_rerun/log_analyzer.py | 123 + .../scripts/workflow_rerun/log_collector.py | 100 + .../scripts/workflow_rerun/requirements.txt | 3 + .../scripts/workflow_rerun/rerunner.py | 138 + .../scripts/workflow_rerun/tests/__init__.py | 0 .../logs_with_error/19_Samples _ Samples.txt | 101 + .../data/logs_with_error/29_Build _ Build.txt | 1 + .../data/logs_with_error/31_Smart_CI.txt | 1 + .../logs_with_error/Build _ Build/system.txt | 14 + .../Samples _ Samples/system.txt | 14 + .../data/logs_with_error/Smart_CI/system.txt | 9 + .../logs_wo_error/dir_should_be_empty.txt | 1 + .../workflow_rerun/tests/integration_test.py | 127 + .../workflow_rerun/tests/log_analyzer_test.py | 94 + .../tests/log_collector_test.py | 69 + .../.github/workflows/assign_issue.yml | 25 + .../.github/workflows/ci-doctor.lock.yml | 1233 + .../.github/workflows/ci-doctor.md | 218 + .../.github/workflows/cleanup_caches.yml | 35 + .../.github/workflows/copilot-setup-steps.yml | 26 + .../.github/workflows/coverity.yml | 187 + .../.github/workflows/deploy_gh_pages.yml | 61 + .../.github/workflows/labeler.yml | 21 + .../.github/workflows/lint.yml | 40 + .../.github/workflows/linux.yml | 1056 + .../.github/workflows/mac.yml | 784 + .../.github/workflows/manylinux_2_28.yml | 700 + .../.github/workflows/sdl.yml | 73 + .../.github/workflows/stale.yml | 28 + .../.github/workflows/windows.yml | 1037 + .../.github/workflows/workflow_rerunner.yml | 100 + .../openvino.genai-2026.1.0.0/.gitignore | 46 + .../openvino.genai-2026.1.0.0/.gitmodules | 3 + .../.pre-commit-config.yaml | 35 + .../openvino.genai-2026.1.0.0/CMakeLists.txt | 132 + .../openvino.genai-2026.1.0.0/Jenkinsfile | 22 + .../openvino.genai-2026.1.0.0/LICENSE | 201 + .../openvino.genai-2026.1.0.0/README.md | 103 + .../openvino.genai-2026.1.0.0/SECURITY.md | 12 + .../openvino.genai-2026.1.0.0/bandit.yml | 398 + .../cmake/features.cmake | 21 + .../templates/OpenVINOGenAIConfig.cmake.in | 10 + .../cmake/templates/version.cpp.in | 19 + .../cmake/templates/version.hpp.in | 34 + .../cmake/templates/vs_version.rc.in | 33 + .../cmake/version.cmake | 72 + .../cmake/vs_version.cmake | 18 + .../openvino.genai-2026.1.0.0/pyproject.toml | 74 + .../requirements-build.txt | 3 + .../samples/CMakeLists.txt | 51 + .../samples/c/text_generation/CMakeLists.txt | 32 + .../samples/c/text_generation/README.md | 87 + .../c/text_generation/benchmark_genai_c.c | 191 + .../samples/c/text_generation/chat_sample_c.c | 305 + .../c/text_generation/greedy_causal_lm_c.c | 56 + .../c/visual_language_chat/CMakeLists.txt | 46 + .../c/visual_language_chat/load_image.c | 178 + .../c/visual_language_chat/load_image.h | 27 + .../c/visual_language_chat/vlm_pipeline.c | 99 + .../whisper_speech_recognition/CMakeLists.txt | 26 + .../c/whisper_speech_recognition/README.md | 133 + .../whisper_speech_recognition.c | 135 + .../whisper_utils.c | 141 + .../whisper_utils.h | 75 + .../samples/cpp/README.md | 3 + .../samples/cpp/fetch_opencv.cmake | 86 + .../samples/cpp/image_generation/512x512.bmp | 3 + .../cpp/image_generation/CMakeLists.txt | 194 + .../samples/cpp/image_generation/README.md | 330 + .../samples/cpp/image_generation/baseline.bmp | 3 + .../image_generation/benchmark_image_gen.cpp | 328 + .../heterogeneous_stable_diffusion.cpp | 87 + .../cpp/image_generation/image2image.cpp | 38 + .../image2image_concurrency.cpp | 85 + .../cpp/image_generation/imageimage.bmp | 3 + .../samples/cpp/image_generation/imwrite.cpp | 153 + .../samples/cpp/image_generation/imwrite.hpp | 16 + .../cpp/image_generation/inpainting.bmp | 3 + .../cpp/image_generation/inpainting.cpp | 36 + .../cpp/image_generation/load_image.cpp | 45 + .../cpp/image_generation/load_image.hpp | 12 + .../samples/cpp/image_generation/lora.bmp | 3 + .../cpp/image_generation/lora_text2image.cpp | 56 + .../cpp/image_generation/progress_bar.hpp | 35 + .../stable_diffusion_export_import.cpp | 119 + .../cpp/image_generation/taylorseer.bmp | 3 + .../image_generation/taylorseer_baseline.bmp | 3 + .../taylorseer_text2image.cpp | 101 + .../cpp/image_generation/text2image.cpp | 37 + .../text2image_concurrency.cpp | 81 + .../samples/cpp/rag/CMakeLists.txt | 36 + .../samples/cpp/rag/README.md | 66 + .../samples/cpp/rag/text_embeddings.cpp | 34 + .../samples/cpp/rag/text_rerank.cpp | 45 + .../cpp/speech_generation/CMakeLists.txt | 39 + .../samples/cpp/speech_generation/README.md | 60 + .../cpp/speech_generation/audio_utils.cpp | 63 + .../cpp/speech_generation/audio_utils.hpp | 36 + .../cpp/speech_generation/text2speech.cpp | 57 + .../cpp/text_generation/CMakeLists.txt | 59 + .../samples/cpp/text_generation/README.md | 255 + .../text_generation/beam_search_causal_lm.cpp | 35 + .../cpp/text_generation/benchmark_genai.cpp | 107 + .../cpp/text_generation/chat_sample.cpp | 46 + .../encrypted_model_causal_lm.cpp | 97 + .../cpp/text_generation/greedy_causal_lm.cpp | 29 + .../text_generation/lora_greedy_causal_lm.cpp | 39 + .../text_generation/multinomial_causal_lm.cpp | 40 + .../prompt_lookup_decoding_lm.cpp | 49 + .../text_generation/read_prompt_from_file.cpp | 19 + .../text_generation/read_prompt_from_file.h | 11 + .../speculative_decoding_lm.cpp | 85 + .../structured_output_generation.cpp | 107 + .../cpp/video_generation/CMakeLists.txt | 76 + .../samples/cpp/video_generation/README.md | 140 + .../cpp/video_generation/imwrite_video.cpp | 54 + .../cpp/video_generation/imwrite_video.hpp | 15 + .../taylorseer_text2video.cpp | 95 + .../cpp/video_generation/text2video.cpp | 50 + .../cpp/visual_language_chat/CMakeLists.txt | 88 + .../cpp/visual_language_chat/README.md | 127 + .../visual_language_chat/benchmark_vlm.cpp | 123 + .../encrypted_model_vlm.cpp | 123 + .../cpp/visual_language_chat/load_image.cpp | 59 + .../cpp/visual_language_chat/load_image.hpp | 13 + .../video_to_text_chat.cpp | 135 + .../visual_language_chat.cpp | 83 + .../visual_language_lora.cpp | 69 + .../whisper_speech_recognition/CMakeLists.txt | 39 + .../cpp/whisper_speech_recognition/README.md | 130 + .../audio_utils.cpp | 118 + .../audio_utils.hpp | 12 + .../whisper_speech_recognition.cpp | 70 + .../samples/deployment-requirements.txt | 7 + .../samples/export-requirements.txt | 17 + .../samples/generation.gif | 3 + .../samples/js/.gitignore | 1 + .../samples/js/package-lock.json | 713 + .../samples/js/package.json | 17 + .../samples/js/rag/README.md | 68 + .../samples/js/rag/text_embeddings.js | 30 + .../samples/js/rag/text_rerank.js | 42 + .../samples/js/text_generation/README.md | 232 + .../text_generation/beam_search_causal_lm.js | 36 + .../js/text_generation/benchmark_genai.js | 116 + .../samples/js/text_generation/chat_sample.js | 60 + .../compound_grammar_generation.js | 160 + .../js/text_generation/greedy_causal_lm.js | 35 + .../samples/js/text_generation/helper.js | 35 + .../text_generation/multinomial_causal_lm.js | 40 + .../js/text_generation/react_sample.js | 227 + .../structural_tags_generation.js | 129 + .../structured_output_generation.js | 138 + .../js/text_generation/tests/usage.test.js | 62 + .../js/whisper_speech_recognition/README.md | 147 + .../whisper_speech_recognition/wav_utils.js | 102 + .../whisper_speech_recognition.js | 85 + .../samples/python/image_generation/README.md | 298 + .../image_generation/benchmark_image_gen.py | 252 + .../heterogeneous_stable_diffusion.py | 80 + .../python/image_generation/image2image.py | 45 + .../python/image_generation/inpainting.py | 42 + .../image_generation/lora_text2image.py | 56 + .../stable_diffusion_export_import.py | 112 + .../image_generation/taylorseer_text2image.py | 88 + .../python/image_generation/text2image.py | 37 + .../samples/python/rag/README.md | 83 + .../samples/python/rag/text_embeddings.py | 27 + .../samples/python/rag/text_rerank.py | 31 + .../python/speech_generation/README.md | 79 + .../create_speaker_embedding.py | 42 + .../python/speech_generation/text2speech.py | 49 + .../samples/python/text_generation/README.md | 324 + .../text_generation/beam_search_causal_lm.py | 30 + .../python/text_generation/benchmark_genai.py | 85 + .../python/text_generation/chat_sample.py | 40 + .../compound_grammar_generation.py | 183 + .../encrypted_model_causal_lm.py | 76 + .../text_generation/greedy_causal_lm.py | 25 + .../python/text_generation/limit_checker.py | 242 + .../text_generation/lora_greedy_causal_lm.py | 29 + .../text_generation/multinomial_causal_lm.py | 207 + .../prompt_lookup_decoding_lm.py | 36 + .../python/text_generation/react_sample.py | 192 + .../speculative_decoding_lm.py | 72 + .../structural_tags_generation.py | 150 + .../structured_output_generation.py | 114 + .../samples/python/video_generation/README.md | 164 + .../video_generation/taylorseer_text2video.py | 81 + .../python/video_generation/text2video.py | 47 + .../python/video_generation/video_utils.py | 26 + .../python/visual_language_chat/README.md | 148 + .../visual_language_chat/benchmark_vlm.py | 140 + .../encrypted_model_vlm.py | 134 + .../milebench_eval_vlm.py | 497 + .../video_to_text_chat.py | 108 + .../visual_language_chat.py | 102 + .../visual_language_lora.py | 128 + .../whisper_speech_recognition/README.md | 160 + .../whisper_speech_recognition/recorder.py | 50 + .../whisper_speech_recognition.py | 63 + .../samples/requirements.txt | 3 + .../site/.editorconfig | 16 + .../openvino.genai-2026.1.0.0/site/.gitignore | 26 + .../site/.prettierignore | 12 + .../site/.prettierrc | 10 + .../openvino.genai-2026.1.0.0/site/README.md | 41 + .../site/docs/bindings/_category_.json | 8 + .../site/docs/bindings/node-js.md | 98 + .../site/docs/concepts/_category_.json | 8 + .../site/docs/concepts/beam-search.md | 7 + .../site/docs/concepts/how-it-works.md | 70 + .../site/docs/concepts/lora.md | 33 + .../optimization-techniques/_category_.json | 8 + .../continuous-batching.md | 7 + .../diffusion-caching.md | 74 + .../kvcache-eviction-algorithm.md | 152 + .../optimization-techniques/prefix-caching.md | 7 + .../sparse-attention-prefill.md | 69 + .../speculative-decoding.md | 7 + .../visual-token-pruning.md | 78 + .../site/docs/getting-started/_category_.json | 8 + .../docs/getting-started/installation.mdx | 57 + .../docs/getting-started/introduction.mdx | 69 + .../site/docs/guides/_category_.json | 5 + .../site/docs/guides/chat-scenario.mdx | 373 + .../site/docs/guides/debug-logging.mdx | 88 + .../site/docs/guides/lora-adapters.mdx | 105 + .../guides/model-preparation/_category_.json | 8 + .../model-preparation/_use_cases_note.mdx | 5 + .../model-preparation/convert-to-openvino.mdx | 73 + .../download-openvino-models.mdx | 53 + .../site/docs/guides/performance-metrics.mdx | 226 + .../site/docs/guides/streaming.mdx | 217 + .../site/docs/guides/structured-output.mdx | 130 + .../site/docs/guides/tokenization.mdx | 179 + .../site/docs/samples/_category_.json | 8 + .../_components/samples-list/index.tsx | 40 + .../site/docs/samples/index.mdx | 9 + .../docs/supported-models/_category_.json | 8 + .../_components/base-models-table/index.tsx | 38 + .../image-generation-models-table/index.tsx | 31 + .../image-generation-models-table/models.ts | 113 + .../_components/llm-models-table/index.tsx | 27 + .../_components/llm-models-table/models.ts | 886 + .../speech-generation-models-table/index.tsx | 27 + .../speech-generation-models-table/models.ts | 19 + .../text-embeddings-models-table/index.tsx | 18 + .../text-embeddings-models-table/models.ts | 68 + .../text-rerank-models-table/index.tsx | 23 + .../text-rerank-models-table/models.ts | 63 + .../video-generation-models-table/index.tsx | 29 + .../video-generation-models-table/models.ts | 17 + .../_components/vlm-models-table/index.tsx | 35 + .../_components/vlm-models-table/models.ts | 184 + .../whisper-models-table/index.tsx | 27 + .../whisper-models-table/models.ts | 37 + .../site/docs/supported-models/index.mdx | 139 + .../site/docs/use-cases/_category_.json | 8 + .../_basic_generation_configuration.mdx | 19 + .../_shared/_beam_search_generation.mdx | 17 + .../docs/use-cases/_shared/_chat_scenario.mdx | 3 + .../docs/use-cases/_shared/_convert_model.mdx | 8 + .../_generation_configuration_workflow.mdx | 8 + .../docs/use-cases/_shared/_streaming.mdx | 3 + .../_sections/_run_model/_image2image_cpp.mdx | 19 + .../_run_model/_image2image_python.mdx | 22 + .../_sections/_run_model/_inpainting_cpp.mdx | 20 + .../_run_model/_inpainting_python.mdx | 23 + .../_sections/_run_model/_text2image_cpp.mdx | 16 + .../_run_model/_text2image_python.mdx | 13 + .../_sections/_run_model/index.mdx | 103 + .../_sections/_usage_options/index.mdx | 82 + .../docs/use-cases/image-generation/index.mdx | 21 + .../_run_model/_code_example_cpp.mdx | 24 + .../_sections/_run_model/_code_example_js.mdx | 40 + .../_run_model/_code_example_python.mdx | 30 + .../_sections/_run_model/index.mdx | 47 + .../_sections/_usage_options/index.mdx | 120 + .../docs/use-cases/image-processing/index.mdx | 21 + .../_run_model/_code_example_cpp.mdx | 21 + .../_run_model/_code_example_python.mdx | 15 + .../_sections/_run_model/index.mdx | 96 + .../use-cases/speech-generation/index.mdx | 27 + .../_run_model/_code_example_cpp.mdx | 19 + .../_sections/_run_model/_code_example_js.mdx | 14 + .../_run_model/_code_example_python.mdx | 17 + .../_sections/_run_model/index.mdx | 52 + .../_sections/_usage_options/index.mdx | 414 + .../use-cases/speech-recognition/index.mdx | 28 + .../_run_model/_code_example_cpp.mdx | 21 + .../_sections/_run_model/_code_example_js.mdx | 18 + .../_run_model/_code_example_python.mdx | 16 + .../_sections/_run_model/index.mdx | 44 + .../_sections/_usage_options/index.mdx | 110 + .../docs/use-cases/text-embedding/index.mdx | 21 + .../_run_model/_code_example_cpp.mdx | 13 + .../_sections/_run_model/_code_example_js.mdx | 10 + .../_run_model/_code_example_python.mdx | 9 + .../_sections/_run_model/index.mdx | 47 + .../_usage_options/_generation_parameters.mdx | 138 + .../_usage_options/_lora_adapters.mdx | 6 + .../_usage_options/_speculative_decoding.mdx | 105 + .../_sections/_usage_options/index.mdx | 21 + .../docs/use-cases/text-generation/index.mdx | 21 + .../_run_model/_code_example_cpp.mdx | 20 + .../_sections/_run_model/_code_example_js.mdx | 15 + .../_run_model/_code_example_python.mdx | 14 + .../_sections/_run_model/index.mdx | 40 + .../site/docs/use-cases/text-rerank/index.mdx | 18 + .../_sections/_run_model/_text2video_cpp.mdx | 16 + .../_run_model/_text2video_python.mdx | 34 + .../_sections/_run_model/index.mdx | 36 + .../_sections/_usage_options/index.mdx | 83 + .../docs/use-cases/video-generation/index.mdx | 31 + .../site/docusaurus.config.ts | 162 + .../site/eslint.config.mjs | 34 + .../site/package-lock.json | 20358 ++++++++++++++++ .../site/package.json | 57 + .../site/sidebars.ts | 25 + .../site/src/components/Button/index.tsx | 64 + .../src/components/Button/styles.module.css | 19 + .../site/src/components/Carousel/index.tsx | 171 + .../src/components/Carousel/styles.module.css | 69 + .../src/components/LanguageTabs/index.tsx | 53 + .../site/src/components/OptimumCLI/index.tsx | 54 + .../site/src/css/breadcrumbs.css | 13 + .../site/src/css/custom.css | 76 + .../site/src/css/footer.css | 22 + .../site/src/css/menu.css | 16 + .../site/src/css/navbar.css | 45 + .../site/src/css/toc.css | 9 + .../site/src/css/typography.css | 17 + .../site/src/hooks/use-screen-size.ts | 35 + .../FeaturesSection/FeatureItem/index.tsx | 20 + .../FeatureItem/styles.module.css | 19 + .../pages/_sections/FeaturesSection/index.tsx | 44 + .../FeaturesSection/styles.module.css | 17 + .../HeroSection/PipelinesCarousel/index.tsx | 86 + .../PipelinesCarousel/styles.module.css | 27 + .../src/pages/_sections/HeroSection/index.tsx | 31 + .../_sections/HeroSection/styles.module.css | 59 + .../pages/_sections/InstallSection/index.tsx | 72 + .../InstallSection/styles.module.css | 60 + .../components/UseCaseCard/index.tsx | 87 + .../components/UseCaseCard/styles.module.css | 87 + .../components/image-generation.tsx | 40 + .../components/image-processing.tsx | 41 + .../components/speech-generation.tsx | 34 + .../components/speech-recognition.tsx | 38 + .../components/text-embedding.tsx | 40 + .../components/text-generation.tsx | 41 + .../components/text-rerank.tsx | 37 + .../components/video-generation.tsx | 36 + .../pages/_sections/UseCasesSection/index.tsx | 35 + .../UseCasesSection/styles.module.css | 26 + .../pages/_sections/section-styles.module.css | 15 + .../site/src/pages/index.module.css | 65 + .../site/src/pages/index.tsx | 17 + .../src/plugins/genai-samples-docs-plugin.ts | 132 + .../site/src/theme/MDXComponents.tsx | 19 + .../site/src/types/images.d.ts | 4 + .../site/static/.nojekyll | 0 .../site/static/img/background.webp | Bin 0 -> 57240 bytes .../site/static/img/beam_idx-drop.gif | 3 + .../site/static/img/beam_idx-fork.gif | 3 + .../site/static/img/chevron-right.svg | 3 + .../site/static/img/chevron-up.svg | 3 + .../site/static/img/favicon.png | 3 + .../site/static/img/image.svg | 3 + .../site/static/img/intel-logo.svg | 3 + .../static/img/kv-cache-areas-diagram.svg | 3 + .../site/static/img/linux-logo.svg | 3 + .../site/static/img/lora.png | 3 + .../site/static/img/ltx-pipeline.png | 3 + .../site/static/img/mac-os-logo.svg | 3 + .../site/static/img/magnifying-glass.svg | 3 + .../img/openvino-genai-logo-gradient.svg | 3 + .../static/img/openvino-genai-workflow.svg | 3 + .../site/static/img/openvino.svg | 3 + .../site/static/img/sound-on.svg | 3 + .../site/static/img/stateful.jpg | 3 + .../site/static/img/stateless.jpg | 3 + .../img/structured_output_work_example.png | 3 + .../site/static/img/text.svg | 3 + .../site/static/img/trishape.svg | 3 + .../site/static/img/windows-logo.svg | 3 + .../site/static/img/xattention.svg | 3 + .../site/tsconfig.json | 11 + .../src/CMakeLists.txt | 84 + .../openvino.genai-2026.1.0.0/src/README.md | 313 + .../src/bindings_utils.hpp | 35 + .../src/c/CMakeLists.txt | 44 + .../c/include/openvino/genai/c/chat_history.h | 180 + .../openvino/genai/c/generation_config.h | 344 + .../include/openvino/genai/c/json_container.h | 89 + .../c/include/openvino/genai/c/llm_pipeline.h | 200 + .../c/include/openvino/genai/c/perf_metrics.h | 148 + .../c/include/openvino/genai/c/visibility.h | 28 + .../c/include/openvino/genai/c/vlm_pipeline.h | 189 + .../genai/c/whisper_generation_config.h | 415 + .../openvino/genai/c/whisper_pipeline.h | 257 + .../src/c/src/chat_history.cpp | 271 + .../src/c/src/generation_config.cpp | 396 + .../src/c/src/json_container.cpp | 117 + .../src/c/src/llm_pipeline.cpp | 227 + .../src/c/src/perf_metrics.cpp | 179 + .../src/c/src/types_c.h | 148 + .../src/c/src/vlm_pipeline.cpp | 235 + .../src/c/src/whisper_generation_config.cpp | 588 + .../src/c/src/whisper_pipeline.cpp | 353 + .../src/cpp/CMakeLists.txt | 348 + .../include/openvino/genai/cache_eviction.hpp | 228 + .../include/openvino/genai/chat_history.hpp | 79 + .../include/openvino/genai/common_types.hpp | 30 + .../genai/continuous_batching_pipeline.hpp | 233 + .../openvino/genai/generation_config.hpp | 762 + .../openvino/genai/generation_handle.hpp | 121 + .../genai/image_generation/autoencoder_kl.hpp | 158 + .../image_generation/clip_text_model.hpp | 116 + .../clip_text_model_with_projection.hpp | 32 + .../flux_transformer_2d_model.hpp | 94 + .../image_generation/generation_config.hpp | 261 + .../image_generation/image2image_pipeline.hpp | 159 + .../image_generation_perf_metrics.hpp | 53 + .../image_generation/inpainting_pipeline.hpp | 184 + .../genai/image_generation/scheduler.hpp | 34 + .../sd3_transformer_2d_model.hpp | 101 + .../image_generation/t5_encoder_model.hpp | 92 + .../image_generation/text2image_pipeline.hpp | 272 + .../unet2d_condition_model.hpp | 124 + .../include/openvino/genai/json_container.hpp | 253 + .../include/openvino/genai/llm_pipeline.hpp | 408 + .../include/openvino/genai/lora_adapter.hpp | 208 + .../cpp/include/openvino/genai/parsers.hpp | 260 + .../include/openvino/genai/perf_metrics.hpp | 185 + .../genai/rag/text_embedding_pipeline.hpp | 224 + .../genai/rag/text_rerank_pipeline.hpp | 117 + .../openvino/genai/scheduler_config.hpp | 106 + .../openvino/genai/sparse_attention.hpp | 122 + .../speculative_decoding/perf_metrics.hpp | 84 + .../speech_generation_config.hpp | 50 + .../speech_generation_perf_metrics.hpp | 21 + .../text2speech_pipeline.hpp | 92 + .../include/openvino/genai/streamer_base.hpp | 47 + .../openvino/genai/taylorseer_config.hpp | 37 + .../include/openvino/genai/text_streamer.hpp | 70 + .../cpp/include/openvino/genai/tokenizer.hpp | 339 + .../autoencoder_kl_ltx_video.hpp | 71 + .../video_generation/generation_config.hpp | 99 + .../ltx_video_transformer_3d_model.hpp | 70 + .../video_generation/text2video_pipeline.hpp | 185 + .../cpp/include/openvino/genai/visibility.hpp | 12 + .../genai/visual_language/perf_metrics.hpp | 34 + .../genai/visual_language/pipeline.hpp | 306 + .../genai/whisper_generation_config.hpp | 146 + .../openvino/genai/whisper_pipeline.hpp | 174 + .../src/cpp/src/chat_history.cpp | 127 + .../src/cpp/src/circular_buffer_queue.hpp | 103 + .../continuous_batching/attention_output.hpp | 13 + .../src/continuous_batching/block_manager.hpp | 1183 + .../continuous_batching/cache_eviction.cpp | 739 + .../continuous_batching/cache_eviction.hpp | 387 + .../src/continuous_batching/cache_manager.hpp | 313 + .../cache_state_dumper.hpp | 108 + .../cpp/src/continuous_batching/kvcrush.cpp | 176 + .../cpp/src/continuous_batching/kvcrush.hpp | 62 + .../src/continuous_batching/model_runner.hpp | 1395 ++ .../paged_attention_transformations.hpp | 35 + .../cpp/src/continuous_batching/pipeline.cpp | 356 + .../src/continuous_batching/pipeline_base.cpp | 675 + .../src/continuous_batching/pipeline_base.hpp | 196 + .../src/continuous_batching/pipeline_impl.cpp | 922 + .../src/continuous_batching/pipeline_impl.hpp | 161 + .../cpp/src/continuous_batching/scheduler.hpp | 636 + .../continuous_batching/sparse_attention.cpp | 36 + .../continuous_batching/sparse_attention.hpp | 62 + .../continuous_batching/threaded_streamer.hpp | 104 + .../src/cpp/src/continuous_batching/timer.hpp | 56 + .../src/cpp/src/debug_utils.hpp | 250 + .../src/diffusion_caching/taylorseer_lite.hpp | 315 + .../src/cpp/src/generation_config.cpp | 465 + .../src/cpp/src/generation_handle.cpp | 79 + .../src/cpp/src/generation_stream.hpp | 59 + .../cpp/src/gguf_utils/building_blocks.cpp | 1020 + .../cpp/src/gguf_utils/building_blocks.hpp | 59 + .../src/cpp/src/gguf_utils/gguf.cpp | 581 + .../src/cpp/src/gguf_utils/gguf.hpp | 44 + .../src/cpp/src/gguf_utils/gguf_modeling.cpp | 175 + .../src/cpp/src/gguf_utils/gguf_modeling.hpp | 10 + .../src/cpp/src/gguf_utils/gguf_quants.cpp | 269 + .../src/cpp/src/gguf_utils/gguf_tokenizer.cpp | 623 + .../src/cpp/src/gguf_utils/gguf_tokenizer.hpp | 62 + .../image_generation/diffusion_pipeline.hpp | 247 + .../image_generation/flux_fill_pipeline.hpp | 314 + .../src/image_generation/flux_pipeline.hpp | 703 + .../image_generation/generation_config.cpp | 100 + .../image_generation/image2image_pipeline.cpp | 212 + .../image_generation_perf_metrics.cpp | 180 + .../src/image_generation/image_processor.cpp | 167 + .../src/image_generation/image_processor.hpp | 50 + .../image_generation/inpainting_pipeline.cpp | 230 + .../models/autoencoder_kl.cpp | 371 + .../models/clip_text_model.cpp | 224 + .../models/flux_transformer_2d_model.cpp | 162 + .../models/sd3_transformer_2d_model.cpp | 144 + .../models/sd3transformer_2d_inference.hpp | 66 + .../sd3transformer_2d_inference_dynamic.hpp | 54 + ...sd3transformer_2d_inference_static_bs1.hpp | 162 + .../models/t5_encoder_model.cpp | 173 + .../models/unet2d_condition_model.cpp | 161 + .../models/unet_inference.hpp | 73 + .../models/unet_inference_dynamic.hpp | 70 + .../models/unet_inference_static_bs1.hpp | 189 + .../cpp/src/image_generation/numpy_utils.cpp | 155 + .../cpp/src/image_generation/numpy_utils.hpp | 57 + .../src/image_generation/schedulers/ddim.cpp | 229 + .../src/image_generation/schedulers/ddim.hpp | 61 + .../schedulers/euler_ancestral_discrete.cpp | 267 + .../schedulers/euler_ancestral_discrete.hpp | 61 + .../schedulers/euler_discrete.cpp | 323 + .../schedulers/euler_discrete.hpp | 65 + .../schedulers/flow_match_euler_discrete.cpp | 281 + .../schedulers/flow_match_euler_discrete.hpp | 68 + .../schedulers/ischeduler.hpp | 52 + .../src/image_generation/schedulers/lcm.cpp | 267 + .../src/image_generation/schedulers/lcm.hpp | 71 + .../src/image_generation/schedulers/pndm.cpp | 291 + .../src/image_generation/schedulers/pndm.hpp | 67 + .../image_generation/schedulers/scheduler.cpp | 54 + .../src/image_generation/schedulers/types.cpp | 149 + .../src/image_generation/schedulers/types.hpp | 75 + .../stable_diffusion_3_pipeline.hpp | 798 + .../stable_diffusion_pipeline.hpp | 528 + .../stable_diffusion_xl_pipeline.hpp | 489 + .../image_generation/text2image_pipeline.cpp | 240 + .../image_generation/threaded_callback.hpp | 85 + .../src/cpp/src/json_container.cpp | 451 + .../src/cpp/src/json_utils.hpp | 159 + .../src/cpp/src/llm/pipeline.cpp | 414 + .../src/cpp/src/llm/pipeline_base.hpp | 79 + .../pipeline_continuous_batching_adapter.hpp | 268 + .../src/cpp/src/llm/pipeline_stateful.cpp | 521 + .../src/cpp/src/llm/pipeline_stateful.hpp | 97 + .../src/cpp/src/llm/pipeline_static.cpp | 425 + .../src/cpp/src/llm/pipeline_static.hpp | 80 + .../src/cpp/src/lm_encoding.cpp | 384 + .../src/cpp/src/lm_encoding.hpp | 34 + .../src/cpp/src/logger.cpp | 164 + .../src/cpp/src/logger.hpp | 87 + .../src/cpp/src/lora/adapter.cpp | 1899 ++ .../src/cpp/src/lora/common.hpp | 35 + .../src/cpp/src/lora/helper.cpp | 62 + .../src/cpp/src/lora/helper.hpp | 35 + .../src/cpp/src/lora/names_mapping.cpp | 590 + .../src/cpp/src/lora/names_mapping.hpp | 46 + .../src/cpp/src/lora/safetensors.c | 2 + .../src/cpp/src/parsers.cpp | 335 + .../src/cpp/src/perf_metrics.cpp | 228 + .../continuous_batching_for_prompt_lookup.cpp | 100 + .../continuous_batching_for_prompt_lookup.hpp | 60 + .../src/prompt_lookup/prompt_lookup_impl.cpp | 210 + .../src/prompt_lookup/prompt_lookup_impl.hpp | 81 + .../src/rag/npu/text_embedding_pipeline.cpp | 57 + .../src/rag/npu/text_embedding_pipeline.hpp | 22 + .../cpp/src/rag/text_embedding_pipeline.cpp | 338 + .../src/cpp/src/rag/text_embedding_utils.cpp | 210 + .../src/cpp/src/rag/text_embedding_utils.hpp | 32 + .../src/cpp/src/rag/text_rerank_pipeline.cpp | 317 + .../src/cpp/src/sampling/logit_processor.hpp | 127 + .../cpp/src/sampling/logit_transformers.hpp | 323 + .../src/cpp/src/sampling/sampler.cpp | 1100 + .../src/cpp/src/sampling/sampler.hpp | 179 + .../structured_output_controller.cpp | 87 + .../structured_output_controller.hpp | 96 + .../structured_output/xgrammar_backend.cpp | 188 + .../structured_output/xgrammar_backend.hpp | 105 + .../src/cpp/src/sampling/threadpool.hpp | 73 + .../src/cpp/src/sequence_group.cpp | 102 + .../src/cpp/src/sequence_group.hpp | 943 + .../continuous_batching/eagle3_strategy.cpp | 167 + .../continuous_batching/eagle3_strategy.hpp | 48 + .../fast_draft_strategy.cpp | 316 + .../fast_draft_strategy.hpp | 203 + .../continuous_batching/pipeline_impl.cpp | 415 + .../continuous_batching/pipeline_impl.hpp | 126 + .../update_request_structs.hpp | 39 + .../eagle3_model_transforms.cpp | 304 + .../eagle3_model_transforms.hpp | 109 + .../src/speculative_decoding/perf_metrics.cpp | 110 + .../speculative_decoding_metrics.cpp | 171 + .../speculative_decoding_metrics.hpp | 48 + .../stateful/eagle3_strategy.cpp | 798 + .../stateful/eagle3_strategy.hpp | 239 + .../stateful/fast_draft_strategy.cpp | 720 + .../stateful/fast_draft_strategy.hpp | 113 + .../stateful/stateful_pipeline_base.cpp | 233 + .../stateful/stateful_pipeline_base.hpp | 135 + .../default_speaker_embedding.hpp | 124 + .../speech_generation_config.cpp | 40 + .../speech_generation_perf_metrics.cpp | 32 + .../speecht5_tts_decoder.cpp | 93 + .../speecht5_tts_decoder.hpp | 38 + .../speech_generation/speecht5_tts_model.cpp | 213 + .../speech_generation/speecht5_tts_model.hpp | 44 + .../text2speech_pipeline.cpp | 71 + .../text2speech_pipeline_impl.cpp | 19 + .../text2speech_pipeline_impl.hpp | 41 + .../src/cpp/src/synchronized_queue.hpp | 47 + .../src/cpp/src/text_streamer.cpp | 213 + .../src/tokenizer/add_second_input_pass.cpp | 347 + .../src/tokenizer/add_second_input_pass.hpp | 66 + .../tokenizer/chat_template_fallback_map.hpp | 19 + .../src/tokenizer/make_tokenizer_stateful.cpp | 313 + .../src/tokenizer/make_tokenizer_stateful.hpp | 96 + .../src/cpp/src/tokenizer/tokenizer.cpp | 188 + .../src/cpp/src/tokenizer/tokenizer_impl.cpp | 868 + .../src/cpp/src/tokenizer/tokenizer_impl.hpp | 89 + .../src/cpp/src/tokenizer/tokenizers_path.cpp | 86 + .../src/cpp/src/tokenizer/tokenizers_path.hpp | 61 + .../src/cpp/src/utils.cpp | 980 + .../src/cpp/src/utils.hpp | 342 + .../generation_config_utils.cpp | 53 + .../generation_config_utils.hpp | 16 + .../cpp/src/video_generation/ltx_pipeline.hpp | 745 + .../models/autoencoder_kl_ltx_video.cpp | 210 + .../models/ltx_video_transformer_3d_model.cpp | 153 + .../video_generation/text2video_pipeline.cpp | 71 + .../src/visual_language/cdpruner/cdpruner.cpp | 412 + .../src/visual_language/cdpruner/cdpruner.hpp | 120 + .../cdpruner/cdpruner_config.cpp | 47 + .../cdpruner/cdpruner_config.hpp | 70 + .../cdpruner/conditional_kernel.cpp | 512 + .../cdpruner/conditional_kernel.hpp | 112 + .../src/visual_language/cdpruner/fast_dpp.cpp | 605 + .../src/visual_language/cdpruner/fast_dpp.hpp | 167 + .../visual_language/cdpruner/fast_dpp_cl.cpp | 221 + .../visual_language/cdpruner/fast_dpp_cl.hpp | 153 + .../cdpruner/relevance_calculator.cpp | 239 + .../cdpruner/relevance_calculator.hpp | 50 + .../visual_language/chat_history_state.cpp | 323 + .../visual_language/chat_history_state.hpp | 141 + .../src/cpp/src/visual_language/clip.cpp | 514 + .../src/cpp/src/visual_language/clip.hpp | 76 + .../continuous_batching_adapter.hpp | 130 + .../src/visual_language/embedding_model.cpp | 99 + .../src/visual_language/embedding_model.hpp | 75 + .../src/visual_language/gemma3/classes.cpp | 193 + .../src/visual_language/gemma3/classes.hpp | 55 + .../src/visual_language/inputs_embedder.cpp | 519 + .../src/visual_language/inputs_embedder.hpp | 395 + .../visual_language/internvl_chat/classes.cpp | 290 + .../visual_language/internvl_chat/classes.hpp | 47 + .../cpp/src/visual_language/llava/classes.cpp | 140 + .../cpp/src/visual_language/llava/classes.hpp | 55 + .../visual_language/llava_next/classes.cpp | 407 + .../visual_language/llava_next/classes.hpp | 42 + .../llava_next_video/classes.cpp | 623 + .../llava_next_video/classes.hpp | 96 + .../src/visual_language/minicpm/classes.cpp | 802 + .../src/visual_language/minicpm/classes.hpp | 72 + .../src/visual_language/nanollava/classes.cpp | 249 + .../src/visual_language/nanollava/classes.hpp | 55 + .../cpp/src/visual_language/perf_metrics.cpp | 36 + .../visual_language/phi3_vision/classes.cpp | 1105 + .../visual_language/phi3_vision/classes.hpp | 85 + .../src/visual_language/phi4mm/classes.cpp | 883 + .../src/visual_language/phi4mm/classes.hpp | 80 + .../src/cpp/src/visual_language/pipeline.cpp | 873 + .../cpp/src/visual_language/pipeline_base.hpp | 132 + .../src/visual_language/processor_config.cpp | 53 + .../src/visual_language/processor_config.hpp | 70 + .../visual_language/qwen2_5_vl/classes.cpp | 180 + .../visual_language/qwen2_5_vl/classes.hpp | 45 + .../src/visual_language/qwen2vl/classes.cpp | 1578 ++ .../src/visual_language/qwen2vl/classes.hpp | 202 + .../src/visual_language/qwen3_vl/classes.cpp | 581 + .../src/visual_language/qwen3_vl/classes.hpp | 99 + .../video_processor_config.hpp | 42 + .../src/visual_language/vision_encoder.cpp | 122 + .../src/visual_language/vision_encoder.hpp | 172 + .../src/visual_language/vision_registry.cpp | 236 + .../src/visual_language/vision_registry.hpp | 74 + .../vision_token_pruning_processor.cpp | 912 + .../vision_token_pruning_processor.hpp | 247 + .../vl_sdpa_transformations.cpp | 33 + .../vl_sdpa_transformations.hpp | 26 + .../src/visual_language/vlm_chat_context.cpp | 214 + .../src/visual_language/vlm_chat_context.hpp | 84 + .../cpp/src/visual_language/vlm_config.cpp | 79 + .../cpp/src/visual_language/vlm_config.hpp | 117 + .../src/cpp/src/whisper/alignment_heads.cpp | 45 + .../src/cpp/src/whisper/alignment_heads.hpp | 15 + .../src/cpp/src/whisper/config.cpp | 32 + .../src/cpp/src/whisper/config.hpp | 22 + .../src/cpp/src/whisper/context_tokens.cpp | 89 + .../src/cpp/src/whisper/context_tokens.hpp | 25 + .../src/cpp/src/whisper/feature_extractor.cpp | 497 + .../src/cpp/src/whisper/feature_extractor.hpp | 69 + .../src/cpp/src/whisper/generation_config.cpp | 104 + .../src/cpp/src/whisper/logit_processor.cpp | 117 + .../src/cpp/src/whisper/logit_processor.hpp | 22 + .../src/cpp/src/whisper/models.hpp | 17 + .../src/cpp/src/whisper/models/decoder.cpp | 89 + .../src/cpp/src/whisper/models/decoder.hpp | 43 + .../src/whisper/models/statefull_decoder.cpp | 124 + .../src/whisper/models/statefull_decoder.hpp | 37 + .../src/cpp/src/whisper/perf_metrics.cpp | 64 + .../src/cpp/src/whisper/pipeline.cpp | 253 + .../src/cpp/src/whisper/pipeline_base.hpp | 37 + .../src/cpp/src/whisper/pipeline_static.cpp | 1303 + .../src/cpp/src/whisper/pipeline_static.hpp | 33 + .../src/cpp/src/whisper/timestamps.cpp | 90 + .../src/cpp/src/whisper/timestamps.hpp | 27 + ...ed_dot_product_attention_decomposition.cpp | 220 + ...ed_dot_product_attention_decomposition.hpp | 16 + .../src/cpp/src/whisper/whisper.cpp | 410 + .../src/cpp/src/whisper/whisper.hpp | 46 + .../src/cpp/src/whisper/whisper_utils.cpp | 62 + .../src/cpp/src/whisper/whisper_utils.hpp | 24 + .../cpp/src/whisper/word_level_timestamps.cpp | 742 + .../cpp/src/whisper/word_level_timestamps.hpp | 37 + .../src/docs/BUILD.md | 286 + .../src/js/.gitignore | 8 + .../src/js/.npmignore | 18 + .../src/js/.prettierrc | 11 + .../openvino.genai-2026.1.0.0/src/js/BUILD.md | 166 + .../src/js/CMakeLists.txt | 98 + .../src/js/README.md | 52 + .../src/js/eslint.config.cjs | 103 + .../src/js/include/addon.hpp | 34 + .../src/js/include/base/perf_metrics.hpp | 261 + .../src/js/include/chat_history.hpp | 28 + .../src/js/include/helper.hpp | 287 + .../llm_pipeline/finish_chat_worker.hpp | 19 + .../js/include/llm_pipeline/init_worker.hpp | 30 + .../llm_pipeline/llm_pipeline_wrapper.hpp | 28 + .../llm_pipeline/start_chat_worker.hpp | 20 + .../src/js/include/parser.hpp | 103 + .../src/js/include/perf_metrics.hpp | 19 + .../embed_documents_worker.hpp | 23 + .../embed_query_worker.hpp | 22 + .../text_embedding_pipeline/init_worker.hpp | 27 + .../pipeline_wrapper.hpp | 17 + .../text_rerank_pipeline/init_worker.hpp | 31 + .../text_rerank_pipeline/pipeline_wrapper.hpp | 21 + .../text_rerank_pipeline/rerank_worker.hpp | 28 + .../src/js/include/tokenizer.hpp | 26 + .../vlm_pipeline/finish_chat_worker.hpp | 22 + .../js/include/vlm_pipeline/init_worker.hpp | 33 + .../js/include/vlm_pipeline/perf_metrics.hpp | 21 + .../vlm_pipeline/start_chat_worker.hpp | 23 + .../vlm_pipeline/vlm_pipeline_wrapper.hpp | 32 + .../include/whisper_pipeline/init_worker.hpp | 30 + .../include/whisper_pipeline/perf_metrics.hpp | 23 + .../whisper_pipeline/pipeline_wrapper.hpp | 25 + .../src/js/lib/addon.ts | 260 + .../src/js/lib/chatHistory.ts | 170 + .../src/js/lib/decodedResults.ts | 105 + .../src/js/lib/index.ts | 93 + .../src/js/lib/parsers.ts | 45 + .../src/js/lib/perfMetrics.ts | 147 + .../src/js/lib/pipelines/llmPipeline.ts | 234 + .../js/lib/pipelines/textEmbeddingPipeline.ts | 68 + .../js/lib/pipelines/textRerankPipeline.ts | 74 + .../src/js/lib/pipelines/vlmPipeline.ts | 243 + .../src/js/lib/pipelines/whisperPipeline.ts | 190 + .../src/js/lib/tokenizer.ts | 228 + .../src/js/lib/utils.ts | 490 + .../src/js/package-lock.json | 2385 ++ .../src/js/package.json | 59 + .../src/js/scripts/download-runtime.cjs | 23 + .../src/js/src/addon.cpp | 106 + .../src/js/src/chat_history.cpp | 168 + .../src/js/src/helper.cpp | 1539 ++ .../src/llm_pipeline/finish_chat_worker.cpp | 16 + .../src/js/src/llm_pipeline/init_worker.cpp | 29 + .../src/llm_pipeline/llm_pipeline_wrapper.cpp | 291 + .../js/src/llm_pipeline/start_chat_worker.cpp | 19 + .../src/js/src/parser.cpp | 264 + .../src/js/src/perf_metrics.cpp | 27 + .../embed_documents_worker.cpp | 23 + .../embed_query_worker.cpp | 23 + .../text_embedding_pipeline/init_worker.cpp | 33 + .../pipeline_wrapper.cpp | 74 + .../src/text_rerank_pipeline/init_worker.cpp | 37 + .../text_rerank_pipeline/pipeline_wrapper.cpp | 75 + .../text_rerank_pipeline/rerank_worker.cpp | 32 + .../src/js/src/tokenizer.cpp | 311 + .../src/vlm_pipeline/finish_chat_worker.cpp | 16 + .../src/js/src/vlm_pipeline/init_worker.cpp | 32 + .../src/js/src/vlm_pipeline/perf_metrics.cpp | 48 + .../js/src/vlm_pipeline/start_chat_worker.cpp | 19 + .../src/vlm_pipeline/vlm_pipeline_wrapper.cpp | 313 + .../js/src/whisper_pipeline/init_worker.cpp | 37 + .../js/src/whisper_pipeline/perf_metrics.cpp | 61 + .../src/whisper_pipeline/pipeline_wrapper.cpp | 258 + .../src/js/tests/bindings.test.js | 44 + .../src/js/tests/chatHistory.test.js | 198 + .../src/js/tests/generationConfig.test.js | 218 + .../src/js/tests/llmPipeline.test.js | 394 + .../src/js/tests/parsers.test.js | 389 + .../src/js/tests/setup.py | 82 + .../src/js/tests/structuredOutput.test.js | 402 + .../js/tests/textEmbeddingsPipeline.test.js | 63 + .../src/js/tests/textRerankPipeline.test.js | 82 + .../src/js/tests/tokenizer.test.js | 395 + .../src/js/tests/utils.js | 75 + .../src/js/tests/vlmPipeline.test.js | 225 + .../src/js/tests/whisperPipeline.test.js | 218 + .../src/js/thirdparty/node-lib.def | 147 + .../src/js/thirdparty/win_delay_load_hook.cc | 52 + .../src/js/tsconfig.json | 27 + .../src/python/CMakeLists.txt | 213 + .../src/python/clean_version.cmake | 21 + .../src/python/compare_pyi.cmake | 32 + .../src/python/openvino_genai/__init__.py | 136 + .../src/python/openvino_genai/__init__.pyi | 88 + .../openvino_genai/py_openvino_genai.pyi | 4850 ++++ .../src/python/py_chat_history.cpp | 98 + .../py_continuous_batching_pipeline.cpp | 648 + .../src/python/py_generation_config.cpp | 465 + .../src/python/py_image_generation_models.cpp | 557 + .../python/py_image_generation_pipelines.cpp | 762 + .../src/python/py_llm_pipeline.cpp | 272 + .../src/python/py_lora_adapter.cpp | 107 + .../src/python/py_openvino_genai.cpp | 143 + .../src/python/py_parsers.cpp | 217 + .../src/python/py_perf_metrics.cpp | 264 + .../src/python/py_rag.cpp | 267 + .../src/python/py_speech_generation.cpp | 198 + .../src/python/py_streamers.cpp | 166 + .../src/python/py_tokenizer.cpp | 326 + .../src/python/py_utils.cpp | 584 + .../src/python/py_utils.hpp | 84 + .../src/python/py_video_generation_models.cpp | 230 + .../python/py_video_generation_pipelines.cpp | 101 + .../src/python/py_vlm_pipeline.cpp | 391 + .../src/python/py_whisper_pipeline.cpp | 387 + .../src/python/remove_abi_specific_info.cmake | 23 + .../tests/cpp/CMakeLists.txt | 50 + .../tests/cpp/block_allocator.cpp | 301 + .../tests/cpp/block_hash_store.cpp | 54 + .../tests/cpp/block_manager.cpp | 113 + .../tests/cpp/cache_eviction.cpp | 1798 ++ .../tests/cpp/cache_manager.cpp | 108 + ...ation_poc_ref_coefficients_per_block_0.txt | 30 + ...ation_poc_ref_coefficients_per_block_1.txt | 33 + ...ation_poc_ref_coefficients_per_block_2.txt | 12 + ...ation_poc_ref_coefficients_per_block_3.txt | 33 + .../tests/cpp/diffusion_caching.cpp | 512 + .../tests/cpp/helper.cpp | 30 + .../tests/cpp/helper.hpp | 8 + .../tests/cpp/kvcrush.cpp | 360 + .../tests/cpp/logger.cpp | 117 + .../tests/cpp/logit_filtering.cpp | 345 + .../tests/cpp/parser.cpp | 218 + .../tests/cpp/sampler.cpp | 303 + .../tests/cpp/scheduler.cpp | 1113 + .../tests/cpp/sparse_attention.cpp | 103 + .../tests/cpp/speculative_decoding.cpp | 428 + .../tests/cpp/test_add_second_input_pass.cpp | 414 + .../tests/cpp/test_cdpruner_dpp.cpp | 565 + .../tests/cpp/test_json_container.cpp | 502 + .../tests/cpp/utils.cpp | 21 + .../tests/python_tests/README.md | 70 + .../tests/python_tests/conftest.py | 121 + .../tests/python_tests/data/__init__.py | 2 + .../tests/python_tests/data/long_prompts.txt | 15 + .../tests/python_tests/data/models.py | 48 + .../tests/python_tests/data/short_prompts.txt | 16 + .../tests/python_tests/data/test_dataset.py | 22 + .../python_tests/data/tokenizer_configs.py | 1184 + .../generate_openai_word_level_timestamps.py | 33 + ..._dummy_10_openai_whisper_tiny_results.json | 1 + .../tests/python_tests/models/lightweight | 3 + .../tests/python_tests/models/nightly | 51 + .../tests/python_tests/models/real_models | 130 + .../tests/python_tests/pytest.ini | 29 + .../tests/python_tests/requirements.txt | 36 + .../tests/python_tests/samples/conftest.py | 479 + .../samples/test_beam_search_causal_lm.py | 107 + .../samples/test_benchmark_genai.py | 76 + .../samples/test_benchmark_image_gen.py | 33 + .../samples/test_benchmark_vlm.py | 31 + .../python_tests/samples/test_chat_sample.py | 71 + .../samples/test_compound_grammar_sample.py | 27 + .../samples/test_continuous_batching_tools.py | 34 + .../samples/test_encrypted_model_causal_lm.py | 29 + .../samples/test_encrypted_model_vlm.py | 40 + .../samples/test_greedy_causal_lm.py | 77 + .../test_heterogeneous_stable_diffusion.py | 25 + .../python_tests/samples/test_image2image.py | 46 + .../python_tests/samples/test_inpainting.py | 43 + .../tests/python_tests/samples/test_lora.py | 143 + .../samples/test_lora_text2image.py | 26 + .../samples/test_multinomial_causal_lm.py | 45 + .../samples/test_prompt_lookup_decoding_lm.py | 58 + .../python_tests/samples/test_rag_sample.py | 78 + .../python_tests/samples/test_react_sample.py | 29 + .../samples/test_scheduler_config.py | 38 + .../samples/test_speculative_decoding_lm.py | 79 + .../samples/test_structural_tag_generation.py | 28 + .../samples/test_structured_output_sample.py | 118 + .../python_tests/samples/test_taylorseer.py | 161 + .../python_tests/samples/test_text2image.py | 50 + .../python_tests/samples/test_text2speech.py | 71 + .../python_tests/samples/test_text2video.py | 31 + .../samples/test_tools_llm_benchmark.py | 493 + .../tests/python_tests/samples/test_utils.py | 32 + .../samples/test_video_to_text_chat.py | 33 + .../samples/test_visual_language_chat.py | 92 + .../test_whisper_speech_recognition.py | 61 + .../python_tests/test_continuous_batching.py | 737 + .../python_tests/test_generation_config.py | 162 + .../tests/python_tests/test_gguf_lora.py | 379 + .../tests/python_tests/test_gguf_reader.py | 224 + .../python_tests/test_image_generation.py | 215 + .../test_image_generation_multi_call.py | 80 + .../kv_cache_eviction_utils.py | 10 + .../test_kv_cache_eviction_1.py | 265 + .../test_kv_cache_eviction_2.py | 110 + .../tests/python_tests/test_llm_pipeline.py | 878 + .../python_tests/test_llm_pipeline_static.py | 448 + .../tests/python_tests/test_parsers.py | 676 + .../tests/python_tests/test_rag.py | 881 + .../tests/python_tests/test_sampling.py | 467 + .../test_stateful_speculative_decoding.py | 310 + .../python_tests/test_structured_output.py | 275 + .../tests/python_tests/test_text_streamer.py | 123 + .../tests/python_tests/test_tokenizer.py | 801 + .../python_tests/test_video_generation.py | 333 + .../python_tests/test_vllm_parsers_wrapper.py | 80 + .../tests/python_tests/test_vlm_pipeline.py | 2459 ++ .../python_tests/test_whisper_pipeline.py | 866 + .../test_whisper_pipeline_static.py | 266 + .../tests/python_tests/utils/__init__.py | 2 + .../python_tests/utils/atomic_download.py | 62 + .../tests/python_tests/utils/comparation.py | 52 + .../tests/python_tests/utils/constants.py | 69 + .../python_tests/utils/generation_config.py | 125 + .../tests/python_tests/utils/hugging_face.py | 334 + .../tests/python_tests/utils/longbench.py | 254 + .../tests/python_tests/utils/network.py | 50 + .../python_tests/utils/ov_genai_pipelines.py | 362 + .../utils/qwen3_reranker_utils.py | 10 + .../tests/python_tests/utils/tokenizers.py | 48 + .../third-party-programs.txt | 98 + .../thirdparty/CMakeLists.txt | 17 + .../tools/__init__.py | 0 .../tools/cacheviz/__init__.py | 2 + .../tools/cacheviz/cacheviz.py | 315 + .../tools/cacheviz/requirements.txt | 2 + .../tools/continuous_batching/CMakeLists.txt | 6 + .../accuracy/CMakeLists.txt | 43 + .../accuracy/continuous_batching_accuracy.cpp | 170 + ...ntinuous_batching_speculative_decoding.cpp | 170 + .../benchmark/CMakeLists.txt | 40 + .../continuous_batching_benchmark.cpp | 551 + .../tools/llm_bench/README.md | 352 + .../tools/llm_bench/benchmark.py | 550 + .../llm_bench/llm_bench_utils/config_class.py | 235 + .../llm_bench_utils/gen_output_data.py | 83 + .../llm_bench/llm_bench_utils/get_use_case.py | 181 + .../llm_bench_utils/hook_beam_search.py | 87 + .../llm_bench/llm_bench_utils/hook_common.py | 35 + .../llm_bench/llm_bench_utils/hook_forward.py | 290 + .../llm_bench_utils/hook_forward_whisper.py | 140 + .../llm_bench_utils/hook_greedy_search.py | 406 + .../llm_hook_beam_search/__init__.py | 0 .../hook_beam_search_v40.py | 454 + .../hook_beam_search_v51.py | 384 + .../hook_beam_search_v52.py | 422 + .../hook_beam_search_v55.py | 429 + .../hook_beam_search_v57.py | 423 + .../llm_hook_sample/__init__.py | 0 .../llm_hook_sample/hook_sample.py | 229 + .../llm_hook_sample/hook_sample_v43.py | 233 + .../llm_hook_sample/hook_sample_v45.py | 225 + .../llm_hook_sample/hook_sample_v51.py | 244 + .../llm_hook_sample/hook_sample_v52.py | 237 + .../llm_hook_sample/hook_sample_v55.py | 251 + .../llm_hook_sample/hook_sample_v57.py | 239 + .../llm_bench_utils/memory_monitor.py | 501 + .../llm_bench_utils/metrics_print.py | 279 + .../llm_bench/llm_bench_utils/model_utils.py | 342 + .../llm_bench/llm_bench_utils/output_csv.py | 213 + .../llm_bench/llm_bench_utils/output_file.py | 159 + .../llm_bench/llm_bench_utils/output_json.py | 147 + .../llm_bench_utils/ov_model_classes.py | 480 + .../llm_bench/llm_bench_utils/ov_utils.py | 1320 + .../llm_bench_utils/parse_json_data.py | 79 + .../llm_bench/llm_bench_utils/prompt_utils.py | 182 + .../llm_bench/llm_bench_utils/pt_utils.py | 364 + .../llm_bench/prompts/llama-2-7b-chat_l.jsonl | 1 + .../llm_bench/prompts/llava-1.5-7b.jsonl | 1 + .../llm_bench/prompts/scheduler_config.json | 26 + .../prompts/stable-diffusion-i2i.jsonl | 2 + .../prompts/stable-diffusion-inpainting.jsonl | 2 + .../llm_bench/prompts/stable-diffusion.jsonl | 2 + .../llm_bench/prompts/texts_for_rerank.jsonl | 2 + .../prompts/video_generation_ltx_video.jsonl | 2 + .../tools/llm_bench/requirements.txt | 43 + .../tools/llm_bench/setup.cfg | 25 + .../tools/llm_bench/task/image_generation.py | 271 + .../tools/llm_bench/task/pipeline_utils.py | 267 + .../task/speech_to_text_generation.py | 194 + .../task/super_resolution_generation.py | 129 + .../tools/llm_bench/task/text_embeddings.py | 221 + .../tools/llm_bench/task/text_generation.py | 637 + .../tools/llm_bench/task/text_reranker.py | 436 + .../task/text_to_speech_generation.py | 243 + .../tools/llm_bench/task/video_generation.py | 431 + .../task/visual_language_generation.py | 348 + .../tools/who_what_benchmark/README.md | 237 + .../who_what_benchmark/examples/gptq_eval.py | 29 + .../examples/huggingface_eval.py | 30 + .../examples/openvino_batched_eval.py | 128 + .../examples/openvino_eval.py | 31 + .../tools/who_what_benchmark/requirements.txt | 29 + .../tools/who_what_benchmark/setup.cfg | 28 + .../tools/who_what_benchmark/setup.py | 63 + .../who_what_benchmark/tests/conftest.py | 168 + .../who_what_benchmark/tests/ov_utils.py | 200 + .../tests/test_cli_cdpruner.py | 86 + .../tests/test_cli_embeddings.py | 239 + .../tests/test_cli_image.py | 271 + .../tests/test_cli_reranking.py | 128 + .../who_what_benchmark/tests/test_cli_text.py | 274 + .../tests/test_cli_videos.py | 145 + .../who_what_benchmark/tests/test_cli_vlm.py | 247 + .../whowhatbench/__init__.py | 25 + .../whowhatbench/embeddings_evaluator.py | 205 + .../whowhatbench/im2im_evaluator.py | 131 + .../whowhatbench/inpaint_evaluator.py | 152 + .../whowhatbench/model_loaders.py | 768 + .../prompts/text_long_prompts.yaml | 1117 + .../whowhatbench/prompts/text_prompts.yaml | 58 + .../prompts/text_to_video_prompts.json | 72 + .../whowhatbench/registry.py | 49 + .../whowhatbench/reranking_evaluator.py | 197 + .../whowhatbench/text2image_evaluator.py | 178 + .../whowhatbench/text2video_evaluator.py | 194 + .../whowhatbench/text_evaluator.py | 232 + .../who_what_benchmark/whowhatbench/utils.py | 306 + .../whowhatbench/visualtext_evaluator.py | 201 + .../whowhatbench/whowhat_metrics.py | 398 + .../who_what_benchmark/whowhatbench/wwb.py | 1009 + .../openvino/openvino.genai-2026.1.0.0.tar.gz | Bin 0 -> 2055338 bytes src/resources/xml/pugixml-1.15.tar.gz | Bin 0 -> 395045 bytes src/robot_state_expert/CMakeLists.txt | 0 src/robot_state_expert/builder.h | 64 + .../compact_state_representation.h | 123 + src/robot_state_expert/reporter.h | 69 + .../base_interface_type_erasure.h | 29 + .../lifecycles/lifecycle_node_strategy.h | 30 + .../lifecycles/mock_node_reporter.h | 19 + .../tf_availability/mock_tf_reporter.h | 24 + .../tf_availability/tf_strategy.h | 37 + .../topics/mock_topic_reporter.h | 23 + .../topics/topic_freshness_strategy.h | 35 + src/settings/inference/model_inference.h | 69 + .../inference/model_inference_settings.h | 20 + src/utils/concepts/validators.h | 35 + src/utils/parsers/xml_parser.h | 162 + src/utils/validation/validator.h | 19 + 1179 files changed, 215345 insertions(+), 864 deletions(-) create mode 100644 src/CMakeLists.txt create mode 100644 src/commands/base.h rename src/{frontend/frontend/__init__.py => commands/builder.h} (100%) create mode 100644 src/commands/commons.h rename src/{frontend/frontend/commands/__init__.py => commands/factory.h} (100%) rename src/{frontend/frontend/frontend/__init__.py => commands/move_to.h} (100%) create mode 100644 src/config/executive/behave_tree/command_executor_tree.xml create mode 100644 src/config/inference/model_inference_config.xml create mode 100644 src/definitions.h create mode 100644 src/executive/CMakeLists.txt create mode 100644 src/executive/builder.h create mode 100644 src/executive/configurator.h create mode 100644 src/executive/executive.h create mode 100644 src/executive/factory.h create mode 100644 src/executive/nodes/clear_board_from_previous_command_execution_node.h create mode 100644 src/executive/nodes/communicate_command_was_not_listed_node.h create mode 100644 src/executive/nodes/communicate_return_home_node.h create mode 100644 src/executive/nodes/communicate_waiting_status_node.h create mode 100644 src/executive/nodes/execute_command_with_feedback_node.h create mode 100644 src/executive/nodes/get_next_command_node.h create mode 100644 src/executive/nodes/validate_arrival_to_landmark_node.h create mode 100644 src/executive/nodes/validate_component_node.h create mode 100644 src/executive/nodes/validate_landmark_node.h create mode 100644 src/executive/tree_executor.h create mode 100644 src/features/builder.h create mode 100644 src/features/landmarks/landmark_info.h create mode 100644 src/features/landmarks/landmarks_container.h create mode 100644 src/features/manager.h create mode 100644 src/features/retrievals/retrieve_features_from_file.h create mode 100644 src/frontend/CMakeLists.txt delete mode 100644 src/frontend/frontend/commands/executor.py delete mode 100644 src/frontend/frontend/commands/pipeline.py delete mode 100644 src/frontend/frontend/commands/repository.py delete mode 100644 src/frontend/frontend/commands/states.py delete mode 100644 src/frontend/frontend/commands/tracker.py delete mode 100644 src/frontend/frontend/frontend/ollama_client_cli.py delete mode 100644 src/frontend/frontend/frontend/readme.md delete mode 100644 src/frontend/frontend/frontend/server.py delete mode 100644 src/frontend/package.xml delete mode 100644 src/frontend/readme.md create mode 100644 src/frontend/request_preprocessor.h delete mode 100644 src/frontend/setup.cfg delete mode 100644 src/frontend/setup.py delete mode 100644 src/frontend/test/test_copyright.py delete mode 100644 src/frontend/test/test_flake8.py delete mode 100644 src/frontend/test/test_pep257.py create mode 100644 src/inference/CMakeLists.txt create mode 100644 src/inference/configure.h create mode 100644 src/inference/factory.h create mode 100644 src/inference/gen_ai_model.h create mode 100644 src/inference/model_loader.h create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/README.md create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/README.md create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/adapter_config.json create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/chat_template.jinja create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/rng_state.pth create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/scheduler.pt create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/special_tokens_map.json create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/tokenizer_config.json create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/trainer_state.json create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/training_args.bin create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/README.md create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/adapter_config.json create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/chat_template.jinja create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/rng_state.pth create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/scheduler.pt create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/special_tokens_map.json create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/tokenizer_config.json create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/trainer_state.json create mode 100644 src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/training_args.bin create mode 100644 src/models/llm/distilled-1b-robot-router/exported/openvino_int4/chat_template.jinja create mode 100644 src/models/llm/distilled-1b-robot-router/exported/openvino_int4/config.json create mode 100644 src/models/llm/distilled-1b-robot-router/exported/openvino_int4/generation_config.json create mode 100644 src/models/llm/distilled-1b-robot-router/exported/openvino_int4/openvino_detokenizer.xml create mode 100644 src/models/llm/distilled-1b-robot-router/exported/openvino_int4/openvino_tokenizer.xml create mode 100644 src/models/llm/distilled-1b-robot-router/exported/openvino_int4/special_tokens_map.json create mode 100644 src/models/llm/distilled-1b-robot-router/exported/openvino_int4/tokenizer_config.json create mode 100644 src/models/llm/distilled-1b-robot-router/lora/README.md create mode 100644 src/models/llm/distilled-1b-robot-router/lora/adapter_config.json create mode 100644 src/models/llm/distilled-1b-robot-router/lora/chat_template.jinja create mode 100644 src/models/llm/distilled-1b-robot-router/lora/special_tokens_map.json create mode 100644 src/models/llm/distilled-1b-robot-router/lora/tokenizer_config.json create mode 100644 src/models/llm/distilled-1b-robot-router/merged/chat_template.jinja create mode 100644 src/models/llm/distilled-1b-robot-router/merged/config.json create mode 100644 src/models/llm/distilled-1b-robot-router/merged/special_tokens_map.json create mode 100644 src/models/llm/distilled-1b-robot-router/merged/tokenizer_config.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/.clang-format create mode 100644 src/resources/openvino.genai-2026.1.0.0/.gitattributes create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/CODEOWNERS create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/CONTRIBUTING.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/actions/build_app/action.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/actions/install_openvino/action.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/actions/install_python_deps/action.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/.node-version create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/.prettierignore create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/.prettierrc.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/action.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/package-lock.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/package.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/src/install_packages.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/agents/agentic-workflows.agent.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/aw/actions-lock.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/components.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/copilot-instructions.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/dependabot.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/dependency_review.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/labeler.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/pull_request_template.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/generate_reference_llava.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/__init__.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/argument_parser.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/constants.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/errors_to_look_for.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/log_analyzer.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/log_collector.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/requirements.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/rerunner.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/__init__.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/19_Samples _ Samples.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/29_Build _ Build.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/31_Smart_CI.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/Build _ Build/system.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/Samples _ Samples/system.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/Smart_CI/system.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_wo_error/dir_should_be_empty.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/integration_test.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/log_analyzer_test.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/log_collector_test.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/assign_issue.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/ci-doctor.lock.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/ci-doctor.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/cleanup_caches.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/copilot-setup-steps.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/coverity.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/deploy_gh_pages.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/labeler.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/lint.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/linux.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/mac.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/manylinux_2_28.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/sdl.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/stale.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/windows.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.github/workflows/workflow_rerunner.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/.gitignore create mode 100644 src/resources/openvino.genai-2026.1.0.0/.gitmodules create mode 100644 src/resources/openvino.genai-2026.1.0.0/.pre-commit-config.yaml create mode 100644 src/resources/openvino.genai-2026.1.0.0/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/Jenkinsfile create mode 100644 src/resources/openvino.genai-2026.1.0.0/LICENSE create mode 100644 src/resources/openvino.genai-2026.1.0.0/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/SECURITY.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/bandit.yml create mode 100644 src/resources/openvino.genai-2026.1.0.0/cmake/features.cmake create mode 100644 src/resources/openvino.genai-2026.1.0.0/cmake/templates/OpenVINOGenAIConfig.cmake.in create mode 100644 src/resources/openvino.genai-2026.1.0.0/cmake/templates/version.cpp.in create mode 100644 src/resources/openvino.genai-2026.1.0.0/cmake/templates/version.hpp.in create mode 100644 src/resources/openvino.genai-2026.1.0.0/cmake/templates/vs_version.rc.in create mode 100644 src/resources/openvino.genai-2026.1.0.0/cmake/version.cmake create mode 100644 src/resources/openvino.genai-2026.1.0.0/cmake/vs_version.cmake create mode 100644 src/resources/openvino.genai-2026.1.0.0/pyproject.toml create mode 100644 src/resources/openvino.genai-2026.1.0.0/requirements-build.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/benchmark_genai_c.c create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/chat_sample_c.c create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/greedy_causal_lm_c.c create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/load_image.c create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/load_image.h create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/vlm_pipeline.c create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/whisper_speech_recognition.c create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/whisper_utils.c create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/whisper_utils.h create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/fetch_opencv.cmake create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/512x512.bmp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/baseline.bmp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/benchmark_image_gen.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/image2image.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/image2image_concurrency.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/imageimage.bmp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/imwrite.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/imwrite.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/inpainting.bmp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/inpainting.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/load_image.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/load_image.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/lora.bmp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/lora_text2image.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/progress_bar.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/stable_diffusion_export_import.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/taylorseer.bmp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/taylorseer_baseline.bmp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/taylorseer_text2image.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/text2image.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/text2image_concurrency.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/text_embeddings.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/text_rerank.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/audio_utils.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/audio_utils.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/text2speech.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/beam_search_causal_lm.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/benchmark_genai.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/chat_sample.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/encrypted_model_causal_lm.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/greedy_causal_lm.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/lora_greedy_causal_lm.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/multinomial_causal_lm.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/prompt_lookup_decoding_lm.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/read_prompt_from_file.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/read_prompt_from_file.h create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/speculative_decoding_lm.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/structured_output_generation.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/imwrite_video.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/imwrite_video.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/taylorseer_text2video.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/text2video.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/benchmark_vlm.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/encrypted_model_vlm.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/load_image.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/load_image.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/video_to_text_chat.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/visual_language_chat.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/visual_language_lora.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/audio_utils.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/audio_utils.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/whisper_speech_recognition.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/deployment-requirements.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/export-requirements.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/generation.gif create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/.gitignore create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/package-lock.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/package.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/rag/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/rag/text_embeddings.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/rag/text_rerank.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/beam_search_causal_lm.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/benchmark_genai.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/chat_sample.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/compound_grammar_generation.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/greedy_causal_lm.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/helper.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/multinomial_causal_lm.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/react_sample.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/structural_tags_generation.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/structured_output_generation.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/tests/usage.test.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/whisper_speech_recognition/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/whisper_speech_recognition/wav_utils.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/js/whisper_speech_recognition/whisper_speech_recognition.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/benchmark_image_gen.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/heterogeneous_stable_diffusion.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/image2image.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/inpainting.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/lora_text2image.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/stable_diffusion_export_import.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/taylorseer_text2image.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/text2image.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/rag/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/rag/text_embeddings.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/rag/text_rerank.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/speech_generation/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/speech_generation/create_speaker_embedding.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/speech_generation/text2speech.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/beam_search_causal_lm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/benchmark_genai.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/chat_sample.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/compound_grammar_generation.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/encrypted_model_causal_lm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/greedy_causal_lm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/limit_checker.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/lora_greedy_causal_lm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/multinomial_causal_lm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/prompt_lookup_decoding_lm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/react_sample.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/speculative_decoding_lm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/structural_tags_generation.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/structured_output_generation.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/taylorseer_text2video.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/text2video.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/video_utils.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/benchmark_vlm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/encrypted_model_vlm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/milebench_eval_vlm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/video_to_text_chat.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/visual_language_chat.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/visual_language_lora.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/whisper_speech_recognition/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/whisper_speech_recognition/recorder.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/python/whisper_speech_recognition/whisper_speech_recognition.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/samples/requirements.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/.editorconfig create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/.gitignore create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/.prettierignore create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/.prettierrc create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/bindings/_category_.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/bindings/node-js.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/_category_.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/beam-search.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/how-it-works.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/lora.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/_category_.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/continuous-batching.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/diffusion-caching.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/kvcache-eviction-algorithm.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/prefix-caching.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/sparse-attention-prefill.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/speculative-decoding.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/visual-token-pruning.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/getting-started/_category_.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/getting-started/installation.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/getting-started/introduction.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/guides/_category_.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/guides/chat-scenario.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/guides/debug-logging.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/guides/lora-adapters.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/_category_.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/_use_cases_note.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/convert-to-openvino.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/download-openvino-models.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/guides/performance-metrics.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/guides/streaming.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/guides/structured-output.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/guides/tokenization.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/samples/_category_.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/samples/_components/samples-list/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/samples/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_category_.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/base-models-table/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/image-generation-models-table/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/image-generation-models-table/models.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/llm-models-table/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/llm-models-table/models.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/speech-generation-models-table/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/speech-generation-models-table/models.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-embeddings-models-table/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-embeddings-models-table/models.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-rerank-models-table/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-rerank-models-table/models.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/video-generation-models-table/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/video-generation-models-table/models.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/vlm-models-table/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/vlm-models-table/models.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/whisper-models-table/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/whisper-models-table/models.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_category_.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_basic_generation_configuration.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_beam_search_generation.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_chat_scenario.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_convert_model.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_generation_configuration_workflow.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_streaming.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_cpp.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_python.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_cpp.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_python.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_cpp.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_python.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_usage_options/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_js.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/speech-generation/_sections/_run_model/_code_example_cpp.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/speech-generation/_sections/_run_model/_code_example_python.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/speech-generation/_sections/_run_model/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/speech-generation/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/speech-recognition/_sections/_run_model/_code_example_cpp.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/speech-recognition/_sections/_run_model/_code_example_js.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/speech-recognition/_sections/_run_model/_code_example_python.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/speech-recognition/_sections/_run_model/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/speech-recognition/_sections/_usage_options/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/speech-recognition/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-embedding/_sections/_run_model/_code_example_cpp.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-embedding/_sections/_run_model/_code_example_js.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-embedding/_sections/_run_model/_code_example_python.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-embedding/_sections/_run_model/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-embedding/_sections/_usage_options/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-embedding/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-generation/_sections/_run_model/_code_example_cpp.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-generation/_sections/_run_model/_code_example_js.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-generation/_sections/_run_model/_code_example_python.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-generation/_sections/_run_model/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-generation/_sections/_usage_options/_generation_parameters.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-generation/_sections/_usage_options/_lora_adapters.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-generation/_sections/_usage_options/_speculative_decoding.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-generation/_sections/_usage_options/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-generation/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-rerank/_sections/_run_model/_code_example_cpp.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-rerank/_sections/_run_model/_code_example_js.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-rerank/_sections/_run_model/_code_example_python.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-rerank/_sections/_run_model/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/text-rerank/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/video-generation/_sections/_run_model/_text2video_cpp.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/video-generation/_sections/_run_model/_text2video_python.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/video-generation/_sections/_run_model/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/video-generation/_sections/_usage_options/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/video-generation/index.mdx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/docusaurus.config.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/eslint.config.mjs create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/package-lock.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/package.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/sidebars.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/components/Button/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/components/Button/styles.module.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/components/Carousel/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/components/Carousel/styles.module.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/components/LanguageTabs/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/components/OptimumCLI/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/css/breadcrumbs.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/css/custom.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/css/footer.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/css/menu.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/css/navbar.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/css/toc.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/css/typography.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/hooks/use-screen-size.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/FeaturesSection/FeatureItem/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/FeaturesSection/FeatureItem/styles.module.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/FeaturesSection/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/FeaturesSection/styles.module.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/HeroSection/PipelinesCarousel/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/HeroSection/PipelinesCarousel/styles.module.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/HeroSection/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/HeroSection/styles.module.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/InstallSection/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/InstallSection/styles.module.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/UseCasesSection/components/UseCaseCard/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/UseCasesSection/components/UseCaseCard/styles.module.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/UseCasesSection/components/image-generation.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/UseCasesSection/components/image-processing.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/UseCasesSection/components/speech-generation.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/UseCasesSection/components/speech-recognition.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/UseCasesSection/components/text-embedding.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/UseCasesSection/components/text-generation.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/UseCasesSection/components/text-rerank.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/UseCasesSection/components/video-generation.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/UseCasesSection/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/UseCasesSection/styles.module.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/_sections/section-styles.module.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/index.module.css create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/pages/index.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/plugins/genai-samples-docs-plugin.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/theme/MDXComponents.tsx create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/src/types/images.d.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/.nojekyll create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/background.webp create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/beam_idx-drop.gif create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/beam_idx-fork.gif create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/chevron-right.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/chevron-up.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/favicon.png create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/image.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/intel-logo.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/kv-cache-areas-diagram.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/linux-logo.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/lora.png create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/ltx-pipeline.png create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/mac-os-logo.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/magnifying-glass.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/openvino-genai-logo-gradient.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/openvino-genai-workflow.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/openvino.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/sound-on.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/stateful.jpg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/stateless.jpg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/structured_output_work_example.png create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/text.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/trishape.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/windows-logo.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/static/img/xattention.svg create mode 100644 src/resources/openvino.genai-2026.1.0.0/site/tsconfig.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/bindings_utils.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/include/openvino/genai/c/chat_history.h create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/include/openvino/genai/c/generation_config.h create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/include/openvino/genai/c/json_container.h create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/include/openvino/genai/c/llm_pipeline.h create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/include/openvino/genai/c/perf_metrics.h create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/include/openvino/genai/c/visibility.h create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/include/openvino/genai/c/vlm_pipeline.h create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/include/openvino/genai/c/whisper_generation_config.h create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/include/openvino/genai/c/whisper_pipeline.h create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/src/chat_history.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/src/generation_config.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/src/json_container.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/src/llm_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/src/perf_metrics.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/src/types_c.h create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/src/vlm_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/src/whisper_generation_config.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/c/src/whisper_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/cache_eviction.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/chat_history.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/common_types.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/generation_config.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/generation_handle.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/image_generation/generation_config.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/image_generation/scheduler.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/json_container.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/llm_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/lora_adapter.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/parsers.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/perf_metrics.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/rag/text_embedding_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/rag/text_rerank_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/scheduler_config.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/sparse_attention.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/speculative_decoding/perf_metrics.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/speech_generation/speech_generation_config.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/speech_generation/speech_generation_perf_metrics.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/speech_generation/text2speech_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/streamer_base.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/taylorseer_config.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/text_streamer.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/tokenizer.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/video_generation/autoencoder_kl_ltx_video.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/video_generation/generation_config.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/video_generation/ltx_video_transformer_3d_model.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/video_generation/text2video_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/visibility.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/visual_language/perf_metrics.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/visual_language/pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/whisper_generation_config.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/include/openvino/genai/whisper_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/chat_history.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/circular_buffer_queue.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/attention_output.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/block_manager.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/cache_eviction.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/cache_eviction.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/cache_manager.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/cache_state_dumper.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/kvcrush.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/kvcrush.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/model_runner.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/paged_attention_transformations.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/pipeline_base.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/pipeline_base.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/pipeline_impl.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/pipeline_impl.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/scheduler.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/sparse_attention.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/sparse_attention.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/threaded_streamer.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/continuous_batching/timer.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/debug_utils.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/diffusion_caching/taylorseer_lite.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/generation_config.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/generation_handle.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/generation_stream.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/gguf_utils/building_blocks.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/gguf_utils/building_blocks.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/gguf_utils/gguf.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/gguf_utils/gguf.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/gguf_utils/gguf_modeling.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/gguf_utils/gguf_modeling.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/gguf_utils/gguf_quants.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/gguf_utils/gguf_tokenizer.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/gguf_utils/gguf_tokenizer.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/diffusion_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/flux_fill_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/flux_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/generation_config.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/image2image_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/image_generation_perf_metrics.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/image_processor.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/image_processor.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/inpainting_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/models/autoencoder_kl.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/models/clip_text_model.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/models/flux_transformer_2d_model.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/models/sd3_transformer_2d_model.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/models/sd3transformer_2d_inference.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/models/sd3transformer_2d_inference_dynamic.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/models/sd3transformer_2d_inference_static_bs1.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/models/t5_encoder_model.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/models/unet2d_condition_model.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/models/unet_inference.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/numpy_utils.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/numpy_utils.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/ddim.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/ddim.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/euler_discrete.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/euler_discrete.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/flow_match_euler_discrete.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/flow_match_euler_discrete.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/ischeduler.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/lcm.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/lcm.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/pndm.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/pndm.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/scheduler.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/types.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/schedulers/types.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/text2image_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/image_generation/threaded_callback.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/json_container.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/json_utils.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/llm/pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/llm/pipeline_base.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/llm/pipeline_continuous_batching_adapter.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/llm/pipeline_stateful.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/llm/pipeline_stateful.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/llm/pipeline_static.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/llm/pipeline_static.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/lm_encoding.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/lm_encoding.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/logger.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/logger.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/lora/adapter.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/lora/common.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/lora/helper.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/lora/helper.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/lora/names_mapping.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/lora/names_mapping.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/lora/safetensors.c create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/parsers.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/perf_metrics.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/prompt_lookup/continuous_batching_for_prompt_lookup.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/prompt_lookup/continuous_batching_for_prompt_lookup.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/prompt_lookup/prompt_lookup_impl.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/rag/npu/text_embedding_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/rag/npu/text_embedding_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/rag/text_embedding_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/rag/text_embedding_utils.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/rag/text_embedding_utils.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/rag/text_rerank_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/sampling/logit_processor.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/sampling/logit_transformers.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/sampling/sampler.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/sampling/sampler.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/sampling/structured_output/structured_output_controller.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/sampling/structured_output/structured_output_controller.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/sampling/structured_output/xgrammar_backend.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/sampling/structured_output/xgrammar_backend.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/sampling/threadpool.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/sequence_group.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/sequence_group.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/continuous_batching/eagle3_strategy.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/continuous_batching/eagle3_strategy.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/continuous_batching/fast_draft_strategy.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/continuous_batching/fast_draft_strategy.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/continuous_batching/pipeline_impl.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/continuous_batching/pipeline_impl.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/continuous_batching/update_request_structs.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/eagle3_model_transforms.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/eagle3_model_transforms.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/perf_metrics.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/speculative_decoding_metrics.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/speculative_decoding_metrics.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/stateful/eagle3_strategy.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/stateful/eagle3_strategy.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/stateful/fast_draft_strategy.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/stateful/fast_draft_strategy.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/stateful/stateful_pipeline_base.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speculative_decoding/stateful/stateful_pipeline_base.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speech_generation/default_speaker_embedding.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speech_generation/speech_generation_config.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speech_generation/speech_generation_perf_metrics.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speech_generation/speecht5_tts_decoder.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speech_generation/speecht5_tts_decoder.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speech_generation/speecht5_tts_model.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speech_generation/speecht5_tts_model.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speech_generation/text2speech_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speech_generation/text2speech_pipeline_impl.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/speech_generation/text2speech_pipeline_impl.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/synchronized_queue.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/text_streamer.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/tokenizer/add_second_input_pass.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/tokenizer/add_second_input_pass.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/tokenizer/chat_template_fallback_map.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/tokenizer/make_tokenizer_stateful.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/tokenizer/tokenizer.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/tokenizer/tokenizer_impl.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/tokenizer/tokenizer_impl.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/tokenizer/tokenizers_path.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/tokenizer/tokenizers_path.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/utils.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/utils.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/video_generation/generation_config_utils.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/video_generation/generation_config_utils.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/video_generation/ltx_pipeline.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/video_generation/models/autoencoder_kl_ltx_video.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/video_generation/models/ltx_video_transformer_3d_model.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/video_generation/text2video_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/cdpruner/cdpruner.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/cdpruner/cdpruner.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/cdpruner/cdpruner_config.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/cdpruner/cdpruner_config.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/cdpruner/conditional_kernel.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/cdpruner/conditional_kernel.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/cdpruner/fast_dpp.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/cdpruner/fast_dpp.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/cdpruner/fast_dpp_cl.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/cdpruner/fast_dpp_cl.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/cdpruner/relevance_calculator.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/cdpruner/relevance_calculator.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/chat_history_state.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/chat_history_state.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/clip.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/clip.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/continuous_batching_adapter.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/embedding_model.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/embedding_model.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/gemma3/classes.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/gemma3/classes.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/inputs_embedder.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/inputs_embedder.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/internvl_chat/classes.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/internvl_chat/classes.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/llava/classes.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/llava/classes.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/llava_next/classes.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/llava_next/classes.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/llava_next_video/classes.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/llava_next_video/classes.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/minicpm/classes.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/minicpm/classes.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/nanollava/classes.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/nanollava/classes.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/perf_metrics.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/phi3_vision/classes.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/phi3_vision/classes.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/phi4mm/classes.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/phi4mm/classes.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/pipeline_base.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/processor_config.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/processor_config.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/qwen2_5_vl/classes.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/qwen2_5_vl/classes.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/qwen2vl/classes.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/qwen2vl/classes.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/qwen3_vl/classes.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/qwen3_vl/classes.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/video_processor_config.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/vision_encoder.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/vision_encoder.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/vision_registry.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/vision_registry.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/vision_token_pruning_processor.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/vision_token_pruning_processor.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/vl_sdpa_transformations.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/vl_sdpa_transformations.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/vlm_chat_context.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/vlm_chat_context.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/vlm_config.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/visual_language/vlm_config.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/alignment_heads.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/alignment_heads.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/config.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/config.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/context_tokens.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/context_tokens.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/feature_extractor.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/feature_extractor.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/generation_config.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/logit_processor.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/logit_processor.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/models.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/models/decoder.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/models/decoder.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/models/statefull_decoder.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/models/statefull_decoder.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/perf_metrics.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/pipeline_base.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/pipeline_static.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/pipeline_static.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/timestamps.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/timestamps.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/transformations/scaled_dot_product_attention_decomposition.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/transformations/scaled_dot_product_attention_decomposition.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/whisper.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/whisper.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/whisper_utils.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/whisper_utils.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/word_level_timestamps.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/cpp/src/whisper/word_level_timestamps.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/docs/BUILD.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/.gitignore create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/.npmignore create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/.prettierrc create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/BUILD.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/eslint.config.cjs create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/addon.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/base/perf_metrics.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/chat_history.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/helper.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/llm_pipeline/finish_chat_worker.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/llm_pipeline/init_worker.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/llm_pipeline/llm_pipeline_wrapper.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/llm_pipeline/start_chat_worker.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/parser.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/perf_metrics.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/text_embedding_pipeline/embed_documents_worker.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/text_embedding_pipeline/embed_query_worker.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/text_embedding_pipeline/init_worker.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/text_embedding_pipeline/pipeline_wrapper.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/text_rerank_pipeline/init_worker.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/text_rerank_pipeline/pipeline_wrapper.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/text_rerank_pipeline/rerank_worker.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/tokenizer.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/vlm_pipeline/finish_chat_worker.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/vlm_pipeline/init_worker.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/vlm_pipeline/perf_metrics.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/vlm_pipeline/start_chat_worker.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/vlm_pipeline/vlm_pipeline_wrapper.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/whisper_pipeline/init_worker.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/whisper_pipeline/perf_metrics.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/include/whisper_pipeline/pipeline_wrapper.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/lib/addon.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/lib/chatHistory.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/lib/decodedResults.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/lib/index.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/lib/parsers.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/lib/perfMetrics.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/lib/pipelines/llmPipeline.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/lib/pipelines/textEmbeddingPipeline.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/lib/pipelines/textRerankPipeline.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/lib/pipelines/vlmPipeline.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/lib/pipelines/whisperPipeline.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/lib/tokenizer.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/lib/utils.ts create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/package-lock.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/package.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/scripts/download-runtime.cjs create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/addon.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/chat_history.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/helper.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/llm_pipeline/finish_chat_worker.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/llm_pipeline/init_worker.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/llm_pipeline/llm_pipeline_wrapper.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/llm_pipeline/start_chat_worker.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/parser.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/perf_metrics.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/text_embedding_pipeline/embed_documents_worker.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/text_embedding_pipeline/embed_query_worker.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/text_embedding_pipeline/init_worker.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/text_embedding_pipeline/pipeline_wrapper.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/text_rerank_pipeline/init_worker.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/text_rerank_pipeline/pipeline_wrapper.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/text_rerank_pipeline/rerank_worker.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/tokenizer.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/vlm_pipeline/finish_chat_worker.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/vlm_pipeline/init_worker.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/vlm_pipeline/perf_metrics.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/vlm_pipeline/start_chat_worker.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/vlm_pipeline/vlm_pipeline_wrapper.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/whisper_pipeline/init_worker.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/whisper_pipeline/perf_metrics.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/src/whisper_pipeline/pipeline_wrapper.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/tests/bindings.test.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/tests/chatHistory.test.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/tests/generationConfig.test.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/tests/llmPipeline.test.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/tests/parsers.test.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/tests/setup.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/tests/structuredOutput.test.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/tests/textEmbeddingsPipeline.test.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/tests/textRerankPipeline.test.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/tests/tokenizer.test.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/tests/utils.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/tests/vlmPipeline.test.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/tests/whisperPipeline.test.js create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/thirdparty/node-lib.def create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/thirdparty/win_delay_load_hook.cc create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/js/tsconfig.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/clean_version.cmake create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/compare_pyi.cmake create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/openvino_genai/__init__.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/openvino_genai/__init__.pyi create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/openvino_genai/py_openvino_genai.pyi create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_chat_history.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_continuous_batching_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_generation_config.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_image_generation_models.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_image_generation_pipelines.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_llm_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_lora_adapter.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_openvino_genai.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_parsers.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_perf_metrics.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_rag.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_speech_generation.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_streamers.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_tokenizer.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_utils.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_utils.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_video_generation_models.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_video_generation_pipelines.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_vlm_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/py_whisper_pipeline.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/src/python/remove_abi_specific_info.cmake create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/block_allocator.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/block_hash_store.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/block_manager.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/cache_eviction.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/cache_manager.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/data/cache_rotation_poc_ref_coefficients_per_block_0.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/data/cache_rotation_poc_ref_coefficients_per_block_1.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/data/cache_rotation_poc_ref_coefficients_per_block_2.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/data/cache_rotation_poc_ref_coefficients_per_block_3.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/diffusion_caching.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/helper.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/helper.hpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/kvcrush.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/logger.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/logit_filtering.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/parser.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/sampler.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/scheduler.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/sparse_attention.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/speculative_decoding.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/test_add_second_input_pass.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/test_cdpruner_dpp.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/test_json_container.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/cpp/utils.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/conftest.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/data/__init__.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/data/long_prompts.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/data/models.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/data/short_prompts.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/data/test_dataset.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/data/tokenizer_configs.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/data/whisper/generate_openai_word_level_timestamps.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/data/whisper/librispeech_asr_dummy_10_openai_whisper_tiny_results.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/models/lightweight create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/models/nightly create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/models/real_models create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/pytest.ini create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/requirements.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/conftest.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_beam_search_causal_lm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_benchmark_genai.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_benchmark_image_gen.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_benchmark_vlm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_chat_sample.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_compound_grammar_sample.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_continuous_batching_tools.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_encrypted_model_causal_lm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_encrypted_model_vlm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_greedy_causal_lm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_heterogeneous_stable_diffusion.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_image2image.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_inpainting.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_lora.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_lora_text2image.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_multinomial_causal_lm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_prompt_lookup_decoding_lm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_rag_sample.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_react_sample.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_scheduler_config.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_speculative_decoding_lm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_structural_tag_generation.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_structured_output_sample.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_taylorseer.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_text2image.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_text2speech.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_text2video.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_tools_llm_benchmark.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_utils.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_video_to_text_chat.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_visual_language_chat.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/samples/test_whisper_speech_recognition.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_continuous_batching.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_generation_config.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_gguf_lora.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_gguf_reader.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_image_generation.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_image_generation_multi_call.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_kv_cache_eviction/kv_cache_eviction_utils.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_llm_pipeline.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_llm_pipeline_static.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_parsers.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_rag.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_sampling.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_stateful_speculative_decoding.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_structured_output.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_text_streamer.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_tokenizer.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_video_generation.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_vllm_parsers_wrapper.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_vlm_pipeline.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_whisper_pipeline.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/test_whisper_pipeline_static.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/utils/__init__.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/utils/atomic_download.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/utils/comparation.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/utils/constants.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/utils/generation_config.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/utils/hugging_face.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/utils/longbench.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/utils/network.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/utils/ov_genai_pipelines.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/utils/qwen3_reranker_utils.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tests/python_tests/utils/tokenizers.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/third-party-programs.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/thirdparty/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/__init__.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/cacheviz/__init__.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/cacheviz/cacheviz.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/cacheviz/requirements.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/continuous_batching/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/continuous_batching/accuracy/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/continuous_batching/accuracy/continuous_batching_accuracy.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/continuous_batching/accuracy/continuous_batching_speculative_decoding.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/continuous_batching/benchmark/CMakeLists.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/benchmark.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/config_class.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/gen_output_data.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/get_use_case.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/hook_beam_search.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/hook_common.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/hook_forward.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/hook_forward_whisper.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/hook_greedy_search.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/llm_hook_beam_search/__init__.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/llm_hook_beam_search/hook_beam_search_v40.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/llm_hook_beam_search/hook_beam_search_v51.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/llm_hook_beam_search/hook_beam_search_v52.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/llm_hook_beam_search/hook_beam_search_v55.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/llm_hook_beam_search/hook_beam_search_v57.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/llm_hook_sample/__init__.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/llm_hook_sample/hook_sample.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/llm_hook_sample/hook_sample_v43.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/llm_hook_sample/hook_sample_v45.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/llm_hook_sample/hook_sample_v51.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/llm_hook_sample/hook_sample_v52.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/llm_hook_sample/hook_sample_v55.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/llm_hook_sample/hook_sample_v57.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/memory_monitor.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/metrics_print.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/model_utils.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/output_csv.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/output_file.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/output_json.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/ov_model_classes.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/ov_utils.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/parse_json_data.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/prompt_utils.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/llm_bench_utils/pt_utils.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/prompts/llama-2-7b-chat_l.jsonl create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/prompts/llava-1.5-7b.jsonl create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/prompts/scheduler_config.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/prompts/stable-diffusion-i2i.jsonl create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/prompts/stable-diffusion-inpainting.jsonl create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/prompts/stable-diffusion.jsonl create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/prompts/texts_for_rerank.jsonl create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/prompts/video_generation_ltx_video.jsonl create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/requirements.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/setup.cfg create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/task/image_generation.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/task/pipeline_utils.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/task/speech_to_text_generation.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/task/super_resolution_generation.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/task/text_embeddings.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/task/text_generation.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/task/text_reranker.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/task/text_to_speech_generation.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/task/video_generation.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/llm_bench/task/visual_language_generation.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/README.md create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/examples/gptq_eval.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/examples/huggingface_eval.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/examples/openvino_batched_eval.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/examples/openvino_eval.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/requirements.txt create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/setup.cfg create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/setup.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/tests/conftest.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/tests/ov_utils.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/tests/test_cli_cdpruner.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/tests/test_cli_embeddings.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/tests/test_cli_image.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/tests/test_cli_reranking.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/tests/test_cli_text.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/tests/test_cli_videos.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/tests/test_cli_vlm.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/__init__.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/embeddings_evaluator.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/im2im_evaluator.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/inpaint_evaluator.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/model_loaders.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/prompts/text_long_prompts.yaml create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/prompts/text_prompts.yaml create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/prompts/text_to_video_prompts.json create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/registry.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/reranking_evaluator.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/text2image_evaluator.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/text2video_evaluator.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/text_evaluator.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/utils.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/visualtext_evaluator.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/whowhat_metrics.py create mode 100644 src/resources/openvino.genai-2026.1.0.0/tools/who_what_benchmark/whowhatbench/wwb.py create mode 100644 src/resources/openvino/openvino.genai-2026.1.0.0.tar.gz create mode 100644 src/resources/xml/pugixml-1.15.tar.gz create mode 100644 src/robot_state_expert/CMakeLists.txt create mode 100644 src/robot_state_expert/builder.h create mode 100644 src/robot_state_expert/compact_state_representation.h create mode 100644 src/robot_state_expert/reporter.h create mode 100644 src/robot_state_expert/state_reporters/base_interface_type_erasure.h create mode 100644 src/robot_state_expert/state_reporters/lifecycles/lifecycle_node_strategy.h create mode 100644 src/robot_state_expert/state_reporters/lifecycles/mock_node_reporter.h create mode 100644 src/robot_state_expert/state_reporters/tf_availability/mock_tf_reporter.h create mode 100644 src/robot_state_expert/state_reporters/tf_availability/tf_strategy.h create mode 100644 src/robot_state_expert/state_reporters/topics/mock_topic_reporter.h create mode 100644 src/robot_state_expert/state_reporters/topics/topic_freshness_strategy.h create mode 100644 src/settings/inference/model_inference.h create mode 100644 src/settings/inference/model_inference_settings.h create mode 100644 src/utils/concepts/validators.h create mode 100644 src/utils/parsers/xml_parser.h create mode 100644 src/utils/validation/validator.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..97e534f --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,55 @@ +cmake_minimum_required(VERSION 3.23) +project(agent CXX) + +set(CMAKE_CXX_STANDARD 23) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +set(OPENVINO_GENAI_SRC + "${CMAKE_SOURCE_DIR}/resources/openvino.genai-2026.1.0.0" +) +set(OPENVINO_GENAI_INCLUDE "${OPENVINO_GENAI_SRC}/src/cpp/include") + +set(OPENVINO_GENAI_LIB + "$ENV{HOME}/virtual_enviroments/kamerdyner/lib/python3.12/site-packages/openvino_genai/libopenvino_genai.so.2610" +) +set(OPENVINO_TOKENIZERS_LIB + "$ENV{HOME}/virtual_enviroments/kamerdyner/lib/python3.12/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so" +) + +find_package(OpenVINO REQUIRED PATHS /usr/lib/cmake/openvino2026.1.0) +find_package(tinyxml2 REQUIRED) + +add_library(openvino_genai SHARED IMPORTED) +set_target_properties(openvino_genai PROPERTIES + IMPORTED_LOCATION "${OPENVINO_GENAI_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${OPENVINO_GENAI_INCLUDE}" +) +target_link_libraries(openvino_genai INTERFACE openvino::runtime) + +# El root del proyecto es el include base para todos los subdirectorios +# Permite includes del estilo: #include "inference/foo.h", #include "frontend/bar.h" +include_directories(${CMAKE_SOURCE_DIR}) + +add_subdirectory(inference) +add_subdirectory(frontend) + +add_library(openvino_tokenizers SHARED IMPORTED) +set_target_properties(openvino_tokenizers PROPERTIES + IMPORTED_LOCATION "${OPENVINO_TOKENIZERS_LIB}" +) + +install(FILES config/inference/model_inference_config.xml + DESTINATION agent/config/inference + RENAME model_inferences_setting.xml) + +add_executable(robot_router main.cpp) +target_compile_definitions(robot_router PRIVATE + MODEL_INFERENCE_CONFIG_PATH="${CMAKE_INSTALL_PREFIX}/agent/config/inference/model_inferences_setting.xml" +) +target_link_libraries(robot_router PRIVATE frontend_lib openvino_tokenizers) +set_target_properties(robot_router PROPERTIES + BUILD_RPATH "$ENV{HOME}/virtual_enviroments/kamerdyner/lib/python3.12/site-packages/openvino_genai;\ +$ENV{HOME}/virtual_enviroments/kamerdyner/lib/python3.12/site-packages/openvino_tokenizers/lib;\ +$ENV{HOME}/virtual_enviroments/kamerdyner/lib/python3.12/site-packages/openvino/libs" +) diff --git a/src/commands/base.h b/src/commands/base.h new file mode 100644 index 0000000..162d18d --- /dev/null +++ b/src/commands/base.h @@ -0,0 +1,3 @@ +namespace butler::commands { +struct ExecutiveCommandType{}; +} diff --git a/src/frontend/frontend/__init__.py b/src/commands/builder.h similarity index 100% rename from src/frontend/frontend/__init__.py rename to src/commands/builder.h diff --git a/src/commands/commons.h b/src/commands/commons.h new file mode 100644 index 0000000..a0db45e --- /dev/null +++ b/src/commands/commons.h @@ -0,0 +1,25 @@ +#ifndef BUTLER_COMMANDS_HIGH_LEVEL_COMMANDS_H +#define BUTLER_COMMANDS_HIGH_LEVEL_COMMANDS_H + +#include + + +namespace butler::commands { +enum class Priority: std::uint8_t +{ + kLow=0, + kLevelOne=1 +}; + +enum class Type: std::uint8_t +{ + kStop, + kMoveTo +}; + + + +} // namespace butler::commands + + +#endif // BUTLER_COMMANDS_HIGH_LEVEL_COMMANDS_H diff --git a/src/frontend/frontend/commands/__init__.py b/src/commands/factory.h similarity index 100% rename from src/frontend/frontend/commands/__init__.py rename to src/commands/factory.h diff --git a/src/frontend/frontend/frontend/__init__.py b/src/commands/move_to.h similarity index 100% rename from src/frontend/frontend/frontend/__init__.py rename to src/commands/move_to.h diff --git a/src/config/executive/behave_tree/command_executor_tree.xml b/src/config/executive/behave_tree/command_executor_tree.xml new file mode 100644 index 0000000..6530de7 --- /dev/null +++ b/src/config/executive/behave_tree/command_executor_tree.xml @@ -0,0 +1,199 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/config/inference/model_inference_config.xml b/src/config/inference/model_inference_config.xml new file mode 100644 index 0000000..46f2aa8 --- /dev/null +++ b/src/config/inference/model_inference_config.xml @@ -0,0 +1,11 @@ + + + /home/operador/Documents/tmp/agent/models/llm/distilled-1b-robot-router/exported/openvino_int4 + CPU + /home/operador/Documents/brain-ws/docker/models/gpu/Modelfile + + 150 + false + true + + diff --git a/src/definitions.h b/src/definitions.h new file mode 100644 index 0000000..47fd1e5 --- /dev/null +++ b/src/definitions.h @@ -0,0 +1,88 @@ +#ifndef BUTLER_DEFINITIONS_H +#define BUTLER_DEFINITIONS_H + +#include +#include +#include +#include +#include +#include + + +namespace butler::features{ +enum class FeatureType : std::uint8_t {kLandmarks}; +} + +template<> +struct std::hash { + std::size_t operator()(butler::features::FeatureType e) const noexcept { + return std::hash>{}( + static_cast>(e)); + } +}; + +namespace butler::robot_state { + +static constepxr std::uint8_t kMaxCommandAllowedPerExecution{2u}; + +enum class GeneralState : std::uint8_t { Unknown, kFullyOperational }; + +enum class ComponentState : std::uint8_t { Unknown, Operational, Failure, _Count }; + +enum class ComponentId : std::uint8_t { + amcl_localization_pose, + amcl_particle_cloud, + amcl_transform, + map_server, + map_occupancy_grid, + tf_broadcast, + tf_static_broadcast, + nav2_planner_server, + nav2_controller_server, + nav2_behavior_server, + nav2_bt_navigator, + nav2_costmap_global, + nav2_costmap_local, + _Count +}; + +struct EnumHash { + template + std::size_t operator()(E e) const noexcept { + return std::hash>{}( + static_cast>(e)); + } +}; + +} + +namespace butler::robot_state::components { + +template struct LifecycleNodeStrategy; +template struct TopicFreshnessStrategy; +template struct TfStrategy; + +// ── AMCL ───────────────────────────────────────────────────────────────────── +template using AmclLocalizationPose = TopicFreshnessStrategy; +template using AmclParticleCloud = TopicFreshnessStrategy; +template using AmclTransform = TfStrategy; + +// ── Map ────────────────────────────────────────────────────────────────────── +template using MapServer = LifecycleNodeStrategy; +template using MapOccupancyGrid = TopicFreshnessStrategy; + +// ── TF ─────────────────────────────────────────────────────────────────────── +template using TfBroadcast = TfStrategy; +template using TfStaticBroadcast = TfStrategy; + +// ── Nav2 ───────────────────────────────────────────────────────────────────── +template using Nav2PlannerServer = LifecycleNodeStrategy; +template using Nav2ControllerServer = LifecycleNodeStrategy; +template using Nav2BehaviorServer = LifecycleNodeStrategy; +template using Nav2BtNavigator = LifecycleNodeStrategy; +template using Nav2CostmapGlobal = LifecycleNodeStrategy; +template using Nav2CostmapLocal = LifecycleNodeStrategy; + +} // namespace butler::robot_state::components + +#endif // BUTLER_DEFINITIONS_H diff --git a/src/executive/CMakeLists.txt b/src/executive/CMakeLists.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/executive/builder.h b/src/executive/builder.h new file mode 100644 index 0000000..e69de29 diff --git a/src/executive/configurator.h b/src/executive/configurator.h new file mode 100644 index 0000000..e9fd611 --- /dev/null +++ b/src/executive/configurator.h @@ -0,0 +1,46 @@ +#ifndef BUTLER_EXECUTIVE_CONFIG_H +#define BUTLER_EXECUTIVE_CONFIG_H + +#include "../commands/high_level_commands.h" +#include "../landmarks/landmark_info.h" +#include "../robot_state/state.h" + +#include +#include +#include + +namespace butler::executive { + + + +struct CommandTreeExecutorConfig { + CommandTreeExecutorConfig() = delete; + CommandTreeExecutorConfig( + ValidateLandmarkFn validate_landmark, + ValidateArrivalFn validate_arrival, + ValidateComponentFn validate_component, + ExecuteCommandFn execute_command, + CommunicateReturnHomeFn communicate_return_home, + CommunicateWaitingFn communicate_waiting, + CommunicateNotListedFn communicate_not_listed) + : validate_landmark(std::move(validate_landmark)) + , validate_arrival(std::move(validate_arrival)) + , validate_component(std::move(validate_component)) + , execute_command(std::move(execute_command)) + , communicate_return_home(std::move(communicate_return_home)) + , communicate_waiting(std::move(communicate_waiting)) + , communicate_not_listed(std::move(communicate_not_listed)) + {} + + const ValidateLandmarkFn validate_landmark; + const ValidateArrivalFn validate_arrival; + const ValidateComponentFn validate_component; + const ExecuteCommandFn execute_command; + const CommunicateReturnHomeFn communicate_return_home; + const CommunicateWaitingFn communicate_waiting; + const CommunicateNotListedFn communicate_not_listed; +}; + +} // namespace butler::executive + +#endif // BUTLER_EXECUTIVE_CONFIG_H diff --git a/src/executive/executive.h b/src/executive/executive.h new file mode 100644 index 0000000..857cf3d --- /dev/null +++ b/src/executive/executive.h @@ -0,0 +1,67 @@ +#ifndef +#define +#include "agent/definitions.h" +#include "agent/commands/tree_commands.h" +#include +#include + +namespace butler::executive +{ + using PriorityQueueType = std::queue,kMaxCommandAllowedPerExecution>, + std::greater>; + + template + class Executive{ + public: + Executive() = delete; + explicit Executive(const CommandRepositoryType & command_repository):command_repository_{command_repository} + ,is_working{false} + { + worker_ = std::jthread(); + is_working = true; + } + + + ~Executive() + { + stop(); + } + + void ExecuteCommand(commands::ExecutiveCommandType & command_to_execute) + { + std::lock_guard guard{mutex_}; + commands_to_execute_.enqueue(command_to_execute); + conditional_variable_.notify_once(); + } + + void stop() + { + is_working = false; + } + private: + + void worker() + { + while(is_working) + { + if (conditional_variable_) + { + //1.- get the command + const auto command_to_execute = commands_to_execute_.top(); + commands_to_execute_.pop(); + + } + } + } + + + const CommandRepositoryType & command_repository_; + std::jthread worker_; + std::mutex mutex_; + std::conditional_variable conditional_variable_; + PriorityQueueType commands_to_execute_{}; + std::atomic_bool is_working{false;} +}; +} +#endif diff --git a/src/executive/factory.h b/src/executive/factory.h new file mode 100644 index 0000000..e69de29 diff --git a/src/executive/nodes/clear_board_from_previous_command_execution_node.h b/src/executive/nodes/clear_board_from_previous_command_execution_node.h new file mode 100644 index 0000000..8db059e --- /dev/null +++ b/src/executive/nodes/clear_board_from_previous_command_execution_node.h @@ -0,0 +1,37 @@ +#ifndef BUTLER_EXECUTIVE_NODES_CLEAR_BOARD_FROM_PREVIOUS_COMMAND_EXECUTION_NODE_H +#define BUTLER_EXECUTIVE_NODES_CLEAR_BOARD_FROM_PREVIOUS_COMMAND_EXECUTION_NODE_H + +#include "../config.h" + +#include + +#include + +namespace butler::executive { + +class ClearBoardFromPreviousCommandExecutionNode final : public BT::SyncActionNode { +public: + ClearBoardFromPreviousCommandExecutionNode(const std::string& name, const BT::NodeConfig& config) + : BT::SyncActionNode(name, config) {} + + static BT::PortsList providedPorts() { + return { + BT::InputPort("command"), + BT::BidirectionalPort("feedback"), + BT::BidirectionalPort("status"), + BT::BidirectionalPort("error"), + }; + } + +private: + BT::NodeStatus tick() override { + setOutput("feedback", std::string{}); + setOutput("status", std::string{}); + setOutput("error", std::string{}); + return BT::NodeStatus::SUCCESS; + } +}; + +} // namespace butler::executive + +#endif // BUTLER_EXECUTIVE_NODES_CLEAR_BOARD_FROM_PREVIOUS_COMMAND_EXECUTION_NODE_H diff --git a/src/executive/nodes/communicate_command_was_not_listed_node.h b/src/executive/nodes/communicate_command_was_not_listed_node.h new file mode 100644 index 0000000..edb52a8 --- /dev/null +++ b/src/executive/nodes/communicate_command_was_not_listed_node.h @@ -0,0 +1,40 @@ +#ifndef BUTLER_EXECUTIVE_NODES_COMMUNICATE_COMMAND_WAS_NOT_LISTED_NODE_H +#define BUTLER_EXECUTIVE_NODES_COMMUNICATE_COMMAND_WAS_NOT_LISTED_NODE_H + +#include "../config.h" + +#include + +#include +#include + +namespace butler::executive { + +class CommunicateCommandWasNotListedNode final : public BT::SyncActionNode { +public: + CommunicateCommandWasNotListedNode(const std::string& name, + const BT::NodeConfig& config, + const CommunicateNotListedFn fn) + : BT::SyncActionNode(name, config), communicate_fn_(fn) {} + + static BT::PortsList providedPorts() { + return { + BT::InputPort("command"), + }; + } + +private: + BT::NodeStatus tick() override { + const auto command = getInput("command"); + if (!command) + return BT::NodeStatus::FAILURE; + communicate_fn_(*command); + return BT::NodeStatus::SUCCESS; + } + + const CommunicateNotListedFn communicate_fn_; +}; + +} // namespace butler::executive + +#endif // BUTLER_EXECUTIVE_NODES_COMMUNICATE_COMMAND_WAS_NOT_LISTED_NODE_H diff --git a/src/executive/nodes/communicate_return_home_node.h b/src/executive/nodes/communicate_return_home_node.h new file mode 100644 index 0000000..deac9e9 --- /dev/null +++ b/src/executive/nodes/communicate_return_home_node.h @@ -0,0 +1,43 @@ +#ifndef BUTLER_EXECUTIVE_NODES_COMMUNICATE_RETURN_HOME_NODE_H +#define BUTLER_EXECUTIVE_NODES_COMMUNICATE_RETURN_HOME_NODE_H + +#include "../config.h" + +#include + +#include +#include +#include + +namespace butler::executive { + +class CommunicateReturnHomeNode final : public BT::SyncActionNode { +public: + CommunicateReturnHomeNode(const std::string& name, + const BT::NodeConfig& config, + const CommunicateReturnHomeFn fn) + : BT::SyncActionNode(name, config), communicate_fn_(fn) {} + + static BT::PortsList providedPorts() { + return { + BT::InputPort("command"), + BT::InputPort>("landmarks"), + }; + } + +private: + BT::NodeStatus tick() override { + const auto command = getInput("command"); + const auto landmarks = getInput>("landmarks"); + if (!command || !landmarks) + return BT::NodeStatus::FAILURE; + communicate_fn_(*command, *landmarks); + return BT::NodeStatus::SUCCESS; + } + + const CommunicateReturnHomeFn communicate_fn_; +}; + +} // namespace butler::executive + +#endif // BUTLER_EXECUTIVE_NODES_COMMUNICATE_RETURN_HOME_NODE_H diff --git a/src/executive/nodes/communicate_waiting_status_node.h b/src/executive/nodes/communicate_waiting_status_node.h new file mode 100644 index 0000000..c0e251e --- /dev/null +++ b/src/executive/nodes/communicate_waiting_status_node.h @@ -0,0 +1,43 @@ +#ifndef BUTLER_EXECUTIVE_NODES_COMMUNICATE_WAITING_STATUS_NODE_H +#define BUTLER_EXECUTIVE_NODES_COMMUNICATE_WAITING_STATUS_NODE_H + +#include "../config.h" + +#include + +#include +#include +#include + +namespace butler::executive { + +class CommunicateWaitingStatusNode final : public BT::SyncActionNode { +public: + CommunicateWaitingStatusNode(const std::string& name, + const BT::NodeConfig& config, + const CommunicateWaitingFn fn) + : BT::SyncActionNode(name, config), communicate_fn_(fn) {} + + static BT::PortsList providedPorts() { + return { + BT::InputPort("command"), + BT::InputPort>("landmarks"), + }; + } + +private: + BT::NodeStatus tick() override { + const auto command = getInput("command"); + const auto landmarks = getInput>("landmarks"); + if (!command || !landmarks) + return BT::NodeStatus::FAILURE; + communicate_fn_(*command, *landmarks); + return BT::NodeStatus::SUCCESS; + } + + const CommunicateWaitingFn communicate_fn_; +}; + +} // namespace butler::executive + +#endif // BUTLER_EXECUTIVE_NODES_COMMUNICATE_WAITING_STATUS_NODE_H diff --git a/src/executive/nodes/execute_command_with_feedback_node.h b/src/executive/nodes/execute_command_with_feedback_node.h new file mode 100644 index 0000000..8ff3c22 --- /dev/null +++ b/src/executive/nodes/execute_command_with_feedback_node.h @@ -0,0 +1,57 @@ +#ifndef BUTLER_EXECUTIVE_NODES_EXECUTE_COMMAND_WITH_FEEDBACK_NODE_H +#define BUTLER_EXECUTIVE_NODES_EXECUTE_COMMAND_WITH_FEEDBACK_NODE_H + +#include "../config.h" + +#include + +#include +#include + +namespace butler::executive { + +class ExecuteCommandWithFeedbackNode final : public BT::StatefulActionNode { +public: + ExecuteCommandWithFeedbackNode(const std::string& name, + const BT::NodeConfig& config, + const ExecuteCommandFn fn) + : BT::StatefulActionNode(name, config), execute_fn_(fn) {} + + static BT::PortsList providedPorts() { + return { + BT::InputPort("command"), + BT::InputPort("phase"), + BT::OutputPort("feedback"), + BT::OutputPort("status"), + BT::OutputPort("error"), + }; + } + +private: + BT::NodeStatus onStart() override { + feedback_ = {}; + return onRunning(); + } + + BT::NodeStatus onRunning() override { + const auto command = getInput("command"); + const auto phase = getInput("phase"); + if (!command || !phase) + return BT::NodeStatus::FAILURE; + + const BT::NodeStatus result = execute_fn_(*command, *phase, feedback_); + setOutput("feedback", feedback_.feedback); + setOutput("status", feedback_.status); + setOutput("error", feedback_.error); + return result; + } + + void onHalted() override { feedback_ = {}; } + + const ExecuteCommandFn execute_fn_; + CommandWithFeedback feedback_; +}; + +} // namespace butler::executive + +#endif // BUTLER_EXECUTIVE_NODES_EXECUTE_COMMAND_WITH_FEEDBACK_NODE_H diff --git a/src/executive/nodes/get_next_command_node.h b/src/executive/nodes/get_next_command_node.h new file mode 100644 index 0000000..a971945 --- /dev/null +++ b/src/executive/nodes/get_next_command_node.h @@ -0,0 +1,44 @@ +#ifndef BUTLER_EXECUTIVE_NODES_GET_NEXT_COMMAND_NODE_H +#define BUTLER_EXECUTIVE_NODES_GET_NEXT_COMMAND_NODE_H + +#include "../config.h" + +#include + +#include +#include + +namespace butler::executive { + +class GetNextCommandNode final : public BT::SyncActionNode { +public: + GetNextCommandNode(const std::string& name, const BT::NodeConfig& config) + : BT::SyncActionNode(name, config) {} + + static BT::PortsList providedPorts() { + return { + BT::BidirectionalPort>("commands"), + BT::OutputPort("command"), + BT::OutputPort("command_type"), + }; + } + +private: + BT::NodeStatus tick() override { + const auto command_list = getInput>("commands"); + if (!command_list || command_list->empty()) + return BT::NodeStatus::FAILURE; + + const Command next = command_list->front(); + command_list->pop_front(); + + setOutput("commands", *res); + setOutput("command", next); + setOutput("command_type", next.type); + return BT::NodeStatus::SUCCESS; + } +}; + +} // namespace butler::executive + +#endif // BUTLER_EXECUTIVE_NODES_GET_NEXT_COMMAND_NODE_H diff --git a/src/executive/nodes/validate_arrival_to_landmark_node.h b/src/executive/nodes/validate_arrival_to_landmark_node.h new file mode 100644 index 0000000..85ff8ef --- /dev/null +++ b/src/executive/nodes/validate_arrival_to_landmark_node.h @@ -0,0 +1,44 @@ +#ifndef BUTLER_EXECUTIVE_NODES_VALIDATE_ARRIVAL_TO_LANDMARK_NODE_H +#define BUTLER_EXECUTIVE_NODES_VALIDATE_ARRIVAL_TO_LANDMARK_NODE_H + +#include "../config.h" + +#include + +#include +#include +#include + +namespace butler::executive { + +class ValidateArrivalToLandmarkNode final : public BT::ConditionNode { +public: + ValidateArrivalToLandmarkNode(const std::string& name, + const BT::NodeConfig& config, + const ValidateArrivalFn fn) + : BT::ConditionNode(name, config), validate_fn_(fn) {} + + static BT::PortsList providedPorts() { + return { + BT::InputPort("command"), + BT::InputPort>("landmarks"), + }; + } + +private: + BT::NodeStatus tick() override { + auto command = getInput("command"); + auto landmarks = getInput>("landmarks"); + if (!command || !landmarks) + return BT::NodeStatus::FAILURE; + return validate_fn_(*command, *landmarks) + ? BT::NodeStatus::SUCCESS + : BT::NodeStatus::FAILURE; + } + + const ValidateArrivalFn validate_fn_; +}; + +} // namespace butler::executive + +#endif // BUTLER_EXECUTIVE_NODES_VALIDATE_ARRIVAL_TO_LANDMARK_NODE_H diff --git a/src/executive/nodes/validate_component_node.h b/src/executive/nodes/validate_component_node.h new file mode 100644 index 0000000..2599463 --- /dev/null +++ b/src/executive/nodes/validate_component_node.h @@ -0,0 +1,44 @@ +#ifndef BUTLER_EXECUTIVE_NODES_VALIDATE_COMPONENT_NODE_H +#define BUTLER_EXECUTIVE_NODES_VALIDATE_COMPONENT_NODE_H + +#include "../config.h" + +#include + +#include +#include +#include + +namespace butler::executive { + +class ValidateComponentNode final : public BT::ConditionNode { +public: + ValidateComponentNode(const std::string& name, + const BT::NodeConfig& config, + const ValidateComponentFn fn) + : BT::ConditionNode(name, config), validate_fn_(fn) {} + + static BT::PortsList providedPorts() { + return { + BT::InputPort("command"), + BT::InputPort>("components"), + }; + } + +private: + BT::NodeStatus tick() override { + auto command = getInput("command"); + auto components = getInput>("components"); + if (!command || !components) + return BT::NodeStatus::FAILURE; + return validate_fn_(*command, *components) + ? BT::NodeStatus::SUCCESS + : BT::NodeStatus::FAILURE; + } + + const ValidateComponentFn validate_fn_; +}; + +} // namespace butler::executive + +#endif // BUTLER_EXECUTIVE_NODES_VALIDATE_COMPONENT_NODE_H diff --git a/src/executive/nodes/validate_landmark_node.h b/src/executive/nodes/validate_landmark_node.h new file mode 100644 index 0000000..ac9ba78 --- /dev/null +++ b/src/executive/nodes/validate_landmark_node.h @@ -0,0 +1,44 @@ +#ifndef BUTLER_EXECUTIVE_NODES_VALIDATE_LANDMARK_NODE_H +#define BUTLER_EXECUTIVE_NODES_VALIDATE_LANDMARK_NODE_H + +#include "../config.h" + +#include + +#include +#include +#include + +namespace butler::executive { + +class ValidateLandmarkNode final : public BT::ConditionNode { +public: + ValidateLandmarkNode(const std::string& name, + const BT::NodeConfig& config, + const ValidateLandmarkFn fn) + : BT::ConditionNode(name, config), validate_fn_(fn) {} + + static BT::PortsList providedPorts() { + return { + BT::InputPort("command"), + BT::InputPort>("landmarks"), + }; + } + +private: + BT::NodeStatus tick() override { + auto command = getInput("command"); + auto landmarks = getInput>("landmarks"); + if (!command || !landmarks) + return BT::NodeStatus::FAILURE; + return validate_fn_(*command, *landmarks) + ? BT::NodeStatus::SUCCESS + : BT::NodeStatus::FAILURE; + } + + const ValidateLandmarkFn validate_fn_; +}; + +} // namespace butler::executive + +#endif // BUTLER_EXECUTIVE_NODES_VALIDATE_LANDMARK_NODE_H diff --git a/src/executive/tree_executor.h b/src/executive/tree_executor.h new file mode 100644 index 0000000..35c9c52 --- /dev/null +++ b/src/executive/tree_executor.h @@ -0,0 +1,82 @@ +#ifndef BUTLER_EXECUTIVE_COMMAND_TREE_EXECUTOR_H +#define BUTLER_EXECUTIVE_COMMAND_TREE_EXECUTOR_H + +#include "config.h" +#include "nodes/nodes.h" + +#include + +#include +#include +#include +#include + +namespace butler::executive { + + +// ─── Executor ───────────────────────────────────────────────────────────────── + +class CommandTreeExecutor { +public: + CommandTreeExecutor(const CommandTreeExecutorConfig config) { + registerNodes(config); + factory_.registerBehaviorTreeFromFile(xml_path); + blackboard_ = BT::Blackboard::create(); + tree_ = factory_.createTree("AgentCommandExecutorTree", blackboard_); + } + + // Returns the blackboard so that robot_state::Reporter can write {components} to it. + [[nodiscard]] BT::Blackboard::Ptr blackboard() const { return blackboard_; } + + void setCommands(std::deque commands) { + blackboard_->set("commands", std::move(commands)); + } + + void setLandmarks(std::vector landmarks) { + blackboard_->set("landmarks", std::move(landmarks)); + } + + // Tick the tree once. + // SUCCESS — command completed + // RUNNING — command in progress + // FAILURE — command failed or queue is empty + [[nodiscard]] BT::NodeStatus tick() { + return tree_.tickOnce(); + } + + // Halts all running nodes and resets SequenceWithMemory state. + // Must be called by the orchestrator after each command cycle ends. + void reset() { + tree_.haltTree(); + } + +private: + void registerNodes(const CommandTreeExecutorConfig& cfg) { + factory_.registerNodeType( + "GetNextCommand"); + factory_.registerNodeType( + "ClearBoardFromPreviousCommandExecution"); + factory_.registerNodeType( + "ExecuteCommandWithFeedback", cfg.execute_command); + factory_.registerNodeType( + "ValidateLandmark", cfg.validate_landmark); + factory_.registerNodeType( + "ValidateArrivalToLandmark", cfg.validate_arrival); + factory_.registerNodeType( + "ValidateComponent", cfg.validate_component); + factory_.registerNodeType( + "CommunicateReturnHome", cfg.communicate_return_home); + factory_.registerNodeType( + "CommunicateWaitingStatus", cfg.communicate_waiting); + factory_.registerNodeType( + "CommunicateCommandWasNotListed", cfg.communicate_not_listed); + } + + + BT::Blackboard::Ptr blackboard_; + BT::Tree tree_; +}; + +} // namespace butler::executive + +#endif // BUTLER_EXECUTIVE_COMMAND_TREE_EXECUTOR_H diff --git a/src/features/builder.h b/src/features/builder.h new file mode 100644 index 0000000..23abba9 --- /dev/null +++ b/src/features/builder.h @@ -0,0 +1,24 @@ +#ifndef BUTLER_FEATURES_BUILDER_H +#define BUTLER_FEATURES_BUILDER_H + +#include "/home/operador/Documents/tmp/agent/features/manager.h" +#include "/home/operador/Documents/tmp/agent/features/retrievals/retrieve_features_from_file.h" +#include "/home/operador/Documents/tmp/agent/features/landmarks/landmarks_container.h" +#include + +namespace butler::features { + +struct FeatureManagerBuilder { + explicit FeatureManagerBuilder(std::string landmarks_file) noexcept + : landmarks_file_{std::move(landmarks_file)} {} + + Manager> build() const { + return Manager{retrievals::RetrieveFeaturesFromFile{landmarks_file_}}; + } + + const std::string landmarks_file_; +}; + +} // namespace butler::features + +#endif // BUTLER_FEATURES_BUILDER_H diff --git a/src/features/landmarks/landmark_info.h b/src/features/landmarks/landmark_info.h new file mode 100644 index 0000000..151657f --- /dev/null +++ b/src/features/landmarks/landmark_info.h @@ -0,0 +1,23 @@ +#ifndef BUTLER_LANDMARKS_LANDMARK_INFO_H +#define BUTLER_LANDMARKS_LANDMARK_INFO_H + +#include "/home/operador/Documents/tmp/agent/definitions.h" +#include +#include + +namespace butler::features::landmarks { + +struct LandmarkInfo { + LandmarkInfo() = delete; + + explicit LandmarkInfo(std::string id, std::tuple position) noexcept + : id_{std::move(id)}, position_{std::move(position)} {} + + static constexpr butler::features::FeatureType kFeatureKind{butler::features::FeatureType::kLandmarks}; + const std::string id_; + const std::tuple position_; +}; + +} // namespace butler::features::landmarks + +#endif // BUTLER_LANDMARKS_LANDMARK_INFO_H diff --git a/src/features/landmarks/landmarks_container.h b/src/features/landmarks/landmarks_container.h new file mode 100644 index 0000000..d411404 --- /dev/null +++ b/src/features/landmarks/landmarks_container.h @@ -0,0 +1,63 @@ +#ifndef BUTLER_LANDMARKS_LANDMARKS_CONTAINER_H +#define BUTLER_LANDMARKS_LANDMARKS_CONTAINER_H + +#include "/home/operador/Documents/tmp/agent/features/landmarks/landmark_info.h" +#include +#include +#include +#include +#include + +namespace butler::features::landmarks { + +struct LandmarksContainer { + using ReturnType = LandmarkInfo; + using FeatureIdType = std::string; + + static constexpr butler::features::FeatureType kFeatureKind{butler::features::FeatureType::kLandmarks}; + + explicit LandmarksContainer(std::unordered_map landmarks) noexcept + : landmarks_{std::move(landmarks)} {} + + std::expected get(const FeatureIdType& id) const { + const auto it = landmarks_.find(id); + if (it == landmarks_.cend()) + return std::unexpected("Landmark '" + id + "' not found"); + return it->second; + } + + // Parses lines with format: , , + static LandmarksContainer from_lines(const std::vector& lines) { + std::unordered_map landmarks; + for (const auto& line : lines) { + const auto first_comma = line.find(','); + if (first_comma == std::string::npos) continue; + const auto second_comma = line.find(',', first_comma + 1); + if (second_comma == std::string::npos) continue; + + const auto trim = [](std::string_view s) -> std::string { + const auto start = s.find_first_not_of(" \t\r\n"); + if (start == std::string_view::npos) return {}; + const auto end = s.find_last_not_of(" \t\r\n"); + return std::string{s.substr(start, end - start + 1)}; + }; + + try { + const std::string name = trim(std::string_view{line}.substr(0, first_comma)); + const double x = std::stod(trim(std::string_view{line}.substr(first_comma + 1, second_comma - first_comma - 1))); + const double y = std::stod(trim(std::string_view{line}.substr(second_comma + 1))); + landmarks.emplace(name, LandmarkInfo{name, {x, y}}); + } catch (const std::exception&) { + continue; + } + } + return LandmarksContainer{std::move(landmarks)}; + } + +private: + const std::unordered_map landmarks_; +}; + +} // namespace butler::features::landmarks + +#endif // BUTLER_LANDMARKS_LANDMARKS_CONTAINER_H diff --git a/src/features/manager.h b/src/features/manager.h new file mode 100644 index 0000000..e07e818 --- /dev/null +++ b/src/features/manager.h @@ -0,0 +1,25 @@ +#ifndef BUTLER_FEATURES_MANAGER_H +#define BUTLER_FEATURES_MANAGER_H + +#include "/home/operador/Documents/tmp/agent/definitions.h" +#include + +namespace butler::features { + +template +struct Manager { + using FeatureReturnType = typename RetrievalStrategy::FeatureReturnType; + + explicit Manager(RetrievalStrategy strategy) noexcept + : strategy_{std::move(strategy)} {} + + FeatureReturnType get_feature(FeatureType type, const std::string& id) const { + return strategy_.get(type, id); + } + + const RetrievalStrategy strategy_; +}; + +} // namespace butler::features + +#endif // BUTLER_FEATURES_MANAGER_H diff --git a/src/features/retrievals/retrieve_features_from_file.h b/src/features/retrievals/retrieve_features_from_file.h new file mode 100644 index 0000000..2da26d8 --- /dev/null +++ b/src/features/retrievals/retrieve_features_from_file.h @@ -0,0 +1,51 @@ +#ifndef BUTLER_FEATURES_RETRIEVALS_RETRIEVE_FROM_FILE_H +#define BUTLER_FEATURES_RETRIEVALS_RETRIEVE_FROM_FILE_H + +#include "/home/operador/Documents/tmp/agent/definitions.h" +#include +#include +#include +#include +#include +#include + +namespace butler::features::retrievals { + +template +struct RetrieveFeaturesFromFile { + using FeatureReturnType = std::expected; + using FeatureIdType = typename FeatureFamilyContainerType::FeatureIdType; + + explicit RetrieveFeaturesFromFile(const std::string& input_file) + : features_map_{LoadFeaturesFromFile(input_file)} {} + + FeatureReturnType get(const butler::features::FeatureType type, const FeatureIdType& id) const { + const auto it = features_map_.find(type); + if (it == features_map_.cend()) + return std::unexpected("Feature type not registered"); + return it->second.get(id); + } + +private: + static std::unordered_map + LoadFeaturesFromFile(const std::string& file_name) { + std::ifstream file{file_name}; + if (!file.is_open()) + throw std::runtime_error{"Cannot open feature file: " + file_name}; + + std::vector lines; + std::string line; + while (std::getline(file, line)) + if (!line.empty()) + lines.push_back(std::move(line)); + + auto container = FeatureFamilyContainerType::from_lines(lines); + return {{FeatureFamilyContainerType::kFeatureKind, std::move(container)}}; + } + + const std::unordered_map features_map_; +}; + +} // namespace butler::features::retrievals + +#endif // BUTLER_FEATURES_RETRIEVALS_RETRIEVE_FROM_FILE_H diff --git a/src/frontend/CMakeLists.txt b/src/frontend/CMakeLists.txt new file mode 100644 index 0000000..5994f54 --- /dev/null +++ b/src/frontend/CMakeLists.txt @@ -0,0 +1,4 @@ +add_library(frontend_lib INTERFACE) + +target_include_directories(frontend_lib INTERFACE ${CMAKE_SOURCE_DIR}) +target_link_libraries(frontend_lib INTERFACE agent_lib) diff --git a/src/frontend/frontend/commands/executor.py b/src/frontend/frontend/commands/executor.py deleted file mode 100644 index 0552f0a..0000000 --- a/src/frontend/frontend/commands/executor.py +++ /dev/null @@ -1,40 +0,0 @@ -from __future__ import annotations - -from enum import Enum -from typing import Any, Callable, Optional - -class CommandExecutor: - """ - Public interface: - - ExecuteHighLevelCommand(command_id, action, *args, **kwargs) -> CommandState - - This class uses CommandExecutionTracker to ensure: - - No duplicate command_id can be enqueued. - - Only ONE command can be in EXECUTING at a time. - - When state becomes EXECUTED, it is removed from tracking. - """ - - def __init__(self, tracker=None) -> None: - self._tracker = tracker - - @property - def tracker(self): - return self._tracker - - def Execute(self, command_id: str, command_state): - """ - Executes a command transition. For now, this is a minimal API that - simply returns the provided state. - - In a real robot, this method would trigger the actual execution engine - (e.g., call into a controller) and return EXECUTED/EXECUTION_ERROR. - """ - if not command_id or not command_id.strip(): - raise ValueError("command_id must be a non-empty string") - - if not isinstance(command_state, CommandState): - raise ValueError("command_state must be a CommandState enum") - - # Placeholder behavior: - # - Server decides lifecycle; executor returns state as the "result". - return command_state diff --git a/src/frontend/frontend/commands/pipeline.py b/src/frontend/frontend/commands/pipeline.py deleted file mode 100644 index 6975f09..0000000 --- a/src/frontend/frontend/commands/pipeline.py +++ /dev/null @@ -1,68 +0,0 @@ -from __future__ import annotations - -from typing import Any, Dict, Tuple - -from frontend.commands.repository import COMMAND_NAME_TO_ID, CommandID - - -class CommandPipelineError(RuntimeError): - """High-level pipeline error for command processing.""" - - -def execute_robot_command( - cmd_obj: Dict[str, Any], - *, - tracker, - executor, -) -> Tuple[CommandState, str]: - """ - Pure pipeline: resolve name -> id, enforce tracker policy, call executor. - - - LLM provides ONLY command name (no IDs). - - Uses repository as the single source of truth for command ID resolution. - - Tracker enforces: no duplicates + only one EXECUTING at a time. - - When state becomes EXECUTED, tracker removes the command automatically. - - Returns: (CommandState, user_message) - """ - - cmd = cmd_obj.get("command") - if not isinstance(cmd, dict): - raise CommandPipelineError("Invalid robot_command: missing 'command' object") - - command_name = str(cmd.get("name", "")).strip().upper() - if not command_name: - raise CommandPipelineError("Invalid robot_command: missing command name") - - command_id_enum: CommandID | None = COMMAND_NAME_TO_ID.get(command_name) - if command_id_enum is None: - raise CommandPipelineError(f"Unknown or unsupported command name: {command_name}") - - command_id = command_id_enum.value # e.g. "CMD_MOVE" - - user_message = str(cmd_obj.get("message", f"Ok, I am executing this {command_name}")).strip() - - try: - tracker.enqueue(command_id, CommandState.EXECUTING) - - exec_result = executor.Execute(command_id, CommandState.EXECUTING) - - final_state = ( - CommandState.EXECUTION_ERROR - if exec_result == CommandState.EXECUTION_ERROR - else CommandState.EXECUTED - ) - - tracker.update(command_id, final_state) - return final_state, user_message - - except CommandTrackingError as e: - return CommandState.EXECUTION_ERROR, f"Execution blocked: {e}" - - except Exception as e: - # best-effort mark error - try: - tracker.update(command_id, CommandState.EXECUTION_ERROR) - except Exception: - pass - return CommandState.EXECUTION_ERROR, f"Execution error: {e}" diff --git a/src/frontend/frontend/commands/repository.py b/src/frontend/frontend/commands/repository.py deleted file mode 100644 index 1632de9..0000000 --- a/src/frontend/frontend/commands/repository.py +++ /dev/null @@ -1,32 +0,0 @@ -from enum import Enum -from typing import Dict - - -class CommandID(Enum): - """ - Canonical IDs for all executable high-level commands. - These IDs are used by the CommandExecutor and tracker. - """ - - MOVE = "CMD_MOVE" - TURN = "CMD_TURN" - STOP = "CMD_STOP" - SPEAK = "CMD_SPEAK" - SCAN = "CMD_SCAN" - PICKUP = "CMD_PICKUP" - DROP = "CMD_DROP" - STATUS = "CMD_STATUS" - - -# Map: LLM command name (string) -> CommandID -# This is the single source of truth for command resolution. -COMMAND_NAME_TO_ID: Dict[str, CommandID] = { - "MOVE": CommandID.MOVE, - "TURN": CommandID.TURN, - "STOP": CommandID.STOP, - "SPEAK": CommandID.SPEAK, - "SCAN": CommandID.SCAN, - "PICKUP": CommandID.PICKUP, - "DROP": CommandID.DROP, - "STATUS": CommandID.STATUS, -} diff --git a/src/frontend/frontend/commands/states.py b/src/frontend/frontend/commands/states.py deleted file mode 100644 index 532f2ed..0000000 --- a/src/frontend/frontend/commands/states.py +++ /dev/null @@ -1,6 +0,0 @@ -from enum import Enum - -class CommandState(Enum): - EXECUTING = "EXECUTING" - EXECUTED = "EXECUTED" - EXECUTION_ERROR = "EXECUTION_ERROR" \ No newline at end of file diff --git a/src/frontend/frontend/commands/tracker.py b/src/frontend/frontend/commands/tracker.py deleted file mode 100644 index 6fd58f9..0000000 --- a/src/frontend/frontend/commands/tracker.py +++ /dev/null @@ -1,106 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from threading import RLock -from typing import Dict, List, Tuple - -from frontend.commands.states import CommandState - -class CommandTrackingError(RuntimeError): - pass - - -@dataclass(frozen=True) -class TrackingEntry: - command_id: str - state: CommandState - - -class CommandExecutionTracker: - """ - Tracks commands by (COMMAND_ID, EXECUTION_STATE). - - Rules: - - No duplicate command_id can be enqueued. - - Only ONE command may be in EXECUTING at a time. - - When a command transitions to EXECUTED, it is removed from tracking. - """ - - def __init__(self) -> None: - self._lock = RLock() - self._entries: Dict[str, CommandState] = {} - - def enqueue(self, command_id: str, state: CommandState) -> None: - """ - Enqueue a command into tracking. - - - Rejects duplicate command_id. - - Rejects enqueueing EXECUTING if another command is already EXECUTING. - """ - if not command_id or not command_id.strip(): - raise CommandTrackingError("command_id must be a non-empty string") - - with self._lock: - if command_id in self._entries: - raise CommandTrackingError(f"command_id '{command_id}' is already tracked") - - if state == CommandState.EXECUTING and self._has_executing_locked(): - existing = self._current_executing_locked() - raise CommandTrackingError( - f"Cannot start '{command_id}' because '{existing}' is already EXECUTING" - ) - - self._entries[command_id] = state - - # If enqueued directly as EXECUTED, remove immediately (rare, but consistent) - if state == CommandState.EXECUTED: - self._entries.pop(command_id, None) - - def update(self, command_id: str, new_state: CommandState) -> None: - """ - Update the state of a tracked command. - - - If new_state is EXECUTING and another command is EXECUTING, reject. - - If new_state becomes EXECUTED, remove from tracking automatically. - """ - with self._lock: - if command_id not in self._entries: - raise CommandTrackingError(f"command_id '{command_id}' is not tracked") - - if new_state == CommandState.EXECUTING: - # Allow if it is the same command already executing; otherwise prevent concurrency - current_executing = self._current_executing_locked() - if current_executing is not None and current_executing != command_id: - raise CommandTrackingError( - f"Cannot set '{command_id}' to EXECUTING because '{current_executing}' is EXECUTING" - ) - - if new_state == CommandState.EXECUTED: - # Remove when executed - self._entries.pop(command_id, None) - return - - self._entries[command_id] = new_state - - def get(self, command_id: str) -> CommandState: - """Return the current state for a tracked command.""" - with self._lock: - if command_id not in self._entries: - raise CommandTrackingError(f"command_id '{command_id}' is not tracked") - return self._entries[command_id] - - def snapshot(self) -> List[Tuple[str, CommandState]]: - """ - Returns a snapshot list of (COMMAND_ID, EXECUTION_STATE). - """ - with self._lock: - return list(self._entries.items()) - - def _has_executing_locked(self) -> bool: - return any(state == CommandState.EXECUTING for state in self._entries.values()) - - def _current_executing_locked(self) -> str | None: - for cid, state in self._entries.items(): - if state == CommandState.EXECUTING: - return cid - return None diff --git a/src/frontend/frontend/frontend/ollama_client_cli.py b/src/frontend/frontend/frontend/ollama_client_cli.py deleted file mode 100644 index 570f38a..0000000 --- a/src/frontend/frontend/frontend/ollama_client_cli.py +++ /dev/null @@ -1,75 +0,0 @@ -import sys -import rclpy -from rclpy.action import ActionClient -from rclpy.node import Node - -from high_level_reasoning_interface.action import OllamaChatInteraction - - -class FrontendCli(Node): - def __init__(self): - super().__init__('frontend_cli') - self.cli = ActionClient(self, OllamaChatInteraction, '/ollama/chat') - while not self.cli.wait_for_server(timeout_sec=1.0): - self.get_logger().info('Esperando action /ollama/chat ...') - - def call(self, prompt: str, model: str = "") -> int: - goal = OllamaChatInteraction.Goal() - goal.prompt = prompt - goal.model = model - - goal_future = self.cli.send_goal_async( - goal, - feedback_callback=self._feedback_callback, - ) - rclpy.spin_until_future_complete(self, goal_future) - - goal_handle = goal_future.result() - if goal_handle is None or not goal_handle.accepted: - print("Action goal rejected") - return 1 - - result_future = goal_handle.get_result_async() - rclpy.spin_until_future_complete(self, result_future) - wrapped_result = result_future.result() - if wrapped_result is None or wrapped_result.result is None: - print("Action result failed") - return 1 - - res = wrapped_result.result - if not res.success: - print(f"[ERROR] {res.error}") - return 1 - - print(f"\033[32m{res.response}\033[0m") - if res.command_string != "none": - print(f"[COMMAND] {res.command_string}") - print(f"[EXECUTION] {res.execution_status}") - return 0 - - def _feedback_callback(self, feedback_msg) -> None: - status = feedback_msg.feedback.current_status.strip() - if status: - print(f"[STATUS] {status}") - - -def main(): - rclpy.init() - node = FrontendCli() - - model = sys.argv[1] if len(sys.argv) > 1 else "" - - print('Interactive Ollama CLI. Type "exit" or "quit" to exit.') - try: - while True: - prompt = input("Prompt> ").strip() - if not prompt: - continue - if prompt.lower() in ("exit", "quit"): - break - node.call(prompt, model) - except (KeyboardInterrupt, EOFError): - print() - - node.destroy_node() - rclpy.shutdown() diff --git a/src/frontend/frontend/frontend/readme.md b/src/frontend/frontend/frontend/readme.md deleted file mode 100644 index 1f993ca..0000000 --- a/src/frontend/frontend/frontend/readme.md +++ /dev/null @@ -1,8 +0,0 @@ -#Execute Server: -```bash -ros2 run frontend frontend_server --ros-args -p ollama_url:=http://localhost:11434 -p default_model:=robot-router:latest -``` -# Client -``` -ros2 run frontend frontend_cli -``` diff --git a/src/frontend/frontend/frontend/server.py b/src/frontend/frontend/frontend/server.py deleted file mode 100644 index e23b25f..0000000 --- a/src/frontend/frontend/frontend/server.py +++ /dev/null @@ -1,368 +0,0 @@ -import json -import re -import time -from typing import Any, Dict, List, Tuple - -import rclpy -from rclpy.action import ActionClient, ActionServer -from rclpy.callback_groups import ReentrantCallbackGroup -from rclpy.executors import MultiThreadedExecutor -from rclpy.node import Node -import requests - -from high_level_reasoning_interface.action import ExecuteCommand -from high_level_reasoning_interface.action import OllamaChatInteraction as OllamaChatAction - - -class FrontendServer(Node): - """ - FrontendServer: ROS 2 action server that calls Ollama and optionally routes - command execution through the executive action server. - - Design goals (per your request): - - The client never receives raw JSON from Ollama. - - The LLM returns plain text in a strict two-line protocol: - CLIENT_MESSAGE: ... - COMMAND_STRING: - - The client gets execution feedback via the frontend action feedback channel. - """ - - def __init__(self): - super().__init__("frontend_server") - - # Parameters - self.declare_parameter("ollama_url", "http://localhost:11434") - self.declare_parameter("default_model", "robot-router:latest") - self.declare_parameter("timeout_sec", 120.0) - - self._callback_group = ReentrantCallbackGroup() - - self._execute_command_client = ActionClient( - self, - ExecuteCommand, - "execute_command", - callback_group=self._callback_group, - ) - - self._chat_action_server = ActionServer( - self, - OllamaChatAction, - "/ollama/chat", - self.handle_chat, - callback_group=self._callback_group, - ) - - self.get_logger().info("frontend_server ready. Action: /ollama/chat") - self.get_logger().info('Forwarding commands to action server: "execute_command"') - - # ========================= - # Public ROS2 entrypoint - # ========================= - def handle_chat(self, goal_handle): - """ - Pipeline: - 1) Read config + normalize request - 2) Call Ollama - 3) Parse strict router output (two-line protocol) - 4) If needed, send the parsed command to execute_command - 5) Return the final text + execution state to the action client - """ - result = OllamaChatAction.Result() - - try: - cfg = self.__read_config() - prompt, model = self.__normalize_request(goal_handle.request, cfg["default_model"]) - - self.__ensure_prompt(prompt) - - self.__publish_feedback(goal_handle, "querying_ollama") - payload = self.__build_ollama_payload(model, prompt) - self.get_logger().info(f'Ollama request -> model="{payload.get("model")}" prompt="{prompt}"') - - data = self.__call_ollama(cfg["ollama_url"], payload, cfg["timeout_sec"]) - llm_text = self.__extract_llm_text(data) - - # Parse the router output (plain text protocol) - client_msg, command_string, landmarks_to_visit = self.__parse_router_output(llm_text) - - execution_status = "no_command" - error = "" - success = True - - if command_string != "none": - self.__publish_feedback(goal_handle, f"command_detected:{command_string}") - execute_result = self.__execute_command( - goal_handle, - command_string, - landmarks_to_visit, - ) - execution_status = execute_result.status - error = execute_result.error_description or "" - success = execute_result.status == "completed" - else: - self.__publish_feedback(goal_handle, "no_command_detected") - - if success: - goal_handle.succeed() - else: - goal_handle.abort() - - return self.__fill_result( - result=result, - success=success, - error=error, - text=client_msg, - command_string=command_string, - execution_status=execution_status, - ) - - except requests.exceptions.Timeout: - goal_handle.abort() - return self.__fill_result( - result=result, - success=False, - error="Timeout calling Ollama", - text="", - command_string="none", - execution_status="failed", - ) - except requests.exceptions.ConnectionError: - goal_handle.abort() - return self.__fill_result( - result=result, - success=False, - error="HighLevel Ollama Docker is not online", - text="", - command_string="none", - execution_status="failed", - ) - except Exception as e: - goal_handle.abort() - return self.__fill_result( - result=result, - success=False, - error=str(e), - text="", - command_string="none", - execution_status="failed", - ) - - # ========================= - # Private helpers (config + request) - # ========================= - def __read_config(self) -> Dict[str, Any]: - return { - "ollama_url": self.get_parameter("ollama_url").value, - "default_model": self.get_parameter("default_model").value, - "timeout_sec": float(self.get_parameter("timeout_sec").value), - } - - def __normalize_request(self, request, default_model: str) -> Tuple[str, str]: - prompt = (request.prompt or "").strip() - req_model = (request.model or "").strip() - model = req_model if req_model else default_model - return prompt, model - - def __ensure_prompt(self, prompt: str) -> None: - if not prompt: - raise ValueError("Empty prompt") - - def __publish_feedback(self, goal_handle, status: str) -> None: - feedback = OllamaChatAction.Feedback() - feedback.current_status = status - goal_handle.publish_feedback(feedback) - - def __wait_for_future(self, future, timeout_sec: float, wait_label: str): - deadline = time.monotonic() + timeout_sec - while rclpy.ok() and not future.done(): - if time.monotonic() >= deadline: - raise TimeoutError(f"Timeout waiting for {wait_label}") - time.sleep(0.05) - return future.result() - - # ========================= - # Ollama call - # ========================= - def __build_ollama_payload(self, model: str, prompt: str) -> Dict[str, Any]: - # NOTE: - # - Use /api/generate (simple completion) - # - stream False for single JSON response - return { - "model": model, - "prompt": prompt, - "stream": False, - } - - def __call_ollama(self, ollama_url: str, payload: Dict[str, Any], timeout_sec: float) -> Dict[str, Any]: - r = requests.post( - f"{ollama_url}/api/generate", - json=payload, - timeout=timeout_sec, - ) - - if r.status_code != 200: - raise RuntimeError(f"HTTP {r.status_code}: {r.text[:200]}") - - return r.json() - - def __extract_llm_text(self, data: Dict[str, Any]) -> str: - # Ollama /api/generate returns: {"response": "...", ...} - return (data.get("response") or "").strip() - - def __execute_command( - self, - goal_handle, - command_string: str, - landmarks_to_visit: List[str], - ): - self.__publish_feedback(goal_handle, f"waiting_for_execute_command:{command_string}") - - if not self._execute_command_client.wait_for_server(timeout_sec=5.0): - raise RuntimeError("execute_command action server not available") - - exec_goal = ExecuteCommand.Goal() - exec_goal.command = command_string - exec_goal.landmarks_to_visit = landmarks_to_visit - - send_goal_future = self._execute_command_client.send_goal_async( - exec_goal, - feedback_callback=lambda feedback_msg: self.__handle_execute_feedback( - goal_handle, - feedback_msg, - ), - ) - goal_response = self.__wait_for_future( - send_goal_future, - timeout_sec=5.0, - wait_label="execute_command goal response", - ) - - if goal_response is None or not goal_response.accepted: - raise RuntimeError(f"execute_command rejected command '{command_string}'") - - self.__publish_feedback(goal_handle, f"executing:{command_string}") - - result_future = goal_response.get_result_async() - wrapped_result = self.__wait_for_future( - result_future, - timeout_sec=300.0, - wait_label=f"execute_command result for '{command_string}'", - ) - - if wrapped_result is None or wrapped_result.result is None: - raise RuntimeError(f"execute_command returned no result for '{command_string}'") - - return wrapped_result.result - - def __handle_execute_feedback(self, goal_handle, feedback_msg) -> None: - current_status = feedback_msg.feedback.current_status.strip() - if current_status: - self.__publish_feedback(goal_handle, f"executor:{current_status}") - - # ========================= - # Router output parsing (NO JSON) - # ========================= - def __parse_router_output(self, text: str) -> Tuple[str, str, List[str]]: - """ - Preferred format (two lines, plain text, no markdown): - - CLIENT_MESSAGE: - COMMAND_STRING: - - Legacy compatibility: - - Accepts the older JSON router payload used by the Ollama Modelfile. - - Returns: (client_message, command_string, landmarks_to_visit) - - Robustness: - - If the LLM fails to follow the protocol, we return the full text as client_message, - "none" as command_string, and no landmarks. - - If there is extra text, we still try to extract the two fields. - """ - raw = (text or "").strip() - if not raw: - return "", "none", [] - - client_msg = "" - command_string = "none" - landmarks_to_visit: List[str] = [] - - # We accept any ordering, but these labels must appear. - # Use regex to tolerate minor spacing differences. - m1 = re.search(r"^\s*CLIENT_MESSAGE\s*:\s*(.+)\s*$", raw, flags=re.MULTILINE) - m2 = re.search(r"^\s*COMMAND_STRING\s*:\s*(.+)\s*$", raw, flags=re.MULTILINE) - - if m1: - client_msg = m1.group(1).strip() - if m2: - command_string = m2.group(1).strip() - - # Fallback to the legacy JSON router payload if the plain-text protocol is absent. - if not client_msg and not m2: - legacy_message, legacy_command, legacy_landmarks = self.__parse_router_json_output(raw) - if legacy_message or legacy_command != "none": - return legacy_message or raw, legacy_command, legacy_landmarks - return raw, "none", [] - - # Normalize command string - command_string = command_string.strip() if command_string else "none" - - return client_msg or raw, command_string or "none", landmarks_to_visit - - def __parse_router_json_output(self, raw: str) -> Tuple[str, str, List[str]]: - try: - payload = json.loads(raw) - except json.JSONDecodeError: - return "", "none", [] - - if not isinstance(payload, dict): - return "", "none", [] - - client_msg = str(payload.get("message") or "").strip() - command = payload.get("command") or {} - if not isinstance(command, dict): - return client_msg, "none", [] - - command_name = str(command.get("name") or "").strip() or "none" - parameters = command.get("parameters") or {} - landmarks = parameters.get("landmarks_to_visit") if isinstance(parameters, dict) else [] - - if not isinstance(landmarks, list): - landmarks = [] - - normalized_landmarks = [ - str(landmark).strip() - for landmark in landmarks - if str(landmark).strip() - ] - return client_msg, command_name, normalized_landmarks - - def __fill_result( - self, - result, - success: bool, - error: str, - text: str, - command_string: str, - execution_status: str, - ): - result.success = bool(success) - result.error = error or "" - result.response = text or "" - result.command_string = command_string or "none" - result.execution_status = execution_status or "" - return result - - -def main(): - rclpy.init() - node = FrontendServer() - executor = MultiThreadedExecutor() - executor.add_node(node) - executor.spin() - node.destroy_node() - rclpy.shutdown() - - -if __name__ == "__main__": - main() diff --git a/src/frontend/package.xml b/src/frontend/package.xml deleted file mode 100644 index e91459e..0000000 --- a/src/frontend/package.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - frontend - 0.0.0 - TODO: Package description - operador - TODO: License declaration - - rclpy - high_level_reasoning_interface - python3-requests - - ament_copyright - ament_flake8 - ament_pep257 - python3-pytest - - - ament_python - - diff --git a/src/frontend/readme.md b/src/frontend/readme.md deleted file mode 100644 index 49f4bcf..0000000 --- a/src/frontend/readme.md +++ /dev/null @@ -1,29 +0,0 @@ -## Execute Ros2 Nodes for Ollama frontend and user interaction -# Ollama docker -```bash -cd /home/operador/Documents/kamerdyner-dev && -sudo docker compose -f ci-scripts/dockerfiles/ollama/docker-compose.yaml up -d --build -``` -# Setup ROS2 Enviroment -```bash -source /opt/ros/jazzy/setup.bash -cd ~/Documents/kamerdyner-dev/ros_workspace -source install/setup.bash -``` -# Ollama FrontEnd - Server -```bash -source /opt/ros/jazzy/setup.bash && -cd /home/operador/Documents/kamerdyner-dev/ros_workspace && -source install/setup.bash && -ros2 run frontend frontend_server --ros-args \ - -p ollama_url:=http://localhost:11434 \ - -p default_model:=robot-router:latest -``` - -# Ollama FrontEnd - Client -```bash -source /opt/ros/jazzy/setup.bash && -cd /home/operador/Documents/kamerdyner-dev/ros_workspace && -source install/setup.bash && -ros2 run frontend frontend_cli -``` diff --git a/src/frontend/request_preprocessor.h b/src/frontend/request_preprocessor.h new file mode 100644 index 0000000..5e38583 --- /dev/null +++ b/src/frontend/request_preprocessor.h @@ -0,0 +1,80 @@ +#ifndef FRONTEND_REQUEST_PREPROCESSOR_H +#define FRONTEND_REQUEST_PREPROCESSOR_H + +#include +#include +#include +#include +#include +#include + +#include + +#include "inference/gen_ai_model.h" +#include "settings/inference/model_inference.h" + +namespace butler::frontend { + +class RequestProcessor { +public: + RequestProcessor()=delete; + explicit RequestProcessor(std::unique_ptr inference, + const buttler::settings::inference::Manager& settings_manager) + : m_inference(std::move(inference)) + , m_system_prompt(load_system_prompt(settings_manager.modelfile_path())) { + std::cerr << "[RequestPreprocessor] System prompt loaded (" + << m_system_prompt.size() << " chars)\n"; + } + + std::string process(std::string_view user_text) { + const std::string payload = build_payload(user_text); + std::cerr << "[RequestPreprocessor] Payload: " << payload << "\n"; + + ov::genai::ChatHistory history({ + {{"role", "system"}, {"content", m_system_prompt}}, + {{"role", "user"}, {"content", payload}} + }); + + return m_inference->infer(history); + } + +private: + static std::string build_payload(std::string_view user_text) { + return std::string( + R"({"version":"1.0","request_type":"from_user","user_message":")") + + std::string(user_text) + "\"}"; + } + + static std::string load_system_prompt(std::string_view modelfile_path) { + std::ifstream file{std::string(modelfile_path)}; + if (!file) + throw std::runtime_error(std::string("Cannot open Modelfile: ") + std::string(modelfile_path)); + + const std::string content{std::istreambuf_iterator(file), {}}; + + const std::string marker_open = "SYSTEM \"\"\""; + const std::string marker_close = "\"\"\""; + + const auto start = content.find(marker_open); + if (start == std::string::npos) + throw std::runtime_error("SYSTEM prompt not found in Modelfile"); + + const auto prompt_start = start + marker_open.size(); + const auto end = content.find(marker_close, prompt_start); + if (end == std::string::npos) + throw std::runtime_error("Closing triple-quotes not found in Modelfile"); + + auto prompt = content.substr(prompt_start, end - prompt_start); + if (!prompt.empty() && prompt.front() == '\n') prompt.erase(0, 1); + if (!prompt.empty() && prompt.back() == '\n') prompt.pop_back(); + return prompt; + } + + + std::unique_ptr m_inference; + const std::string m_system_prompt; +}; + +} // namespace butler::frontend + +#endif // FRONTEND_REQUEST_PREPROCESSOR_H diff --git a/src/frontend/setup.cfg b/src/frontend/setup.cfg deleted file mode 100644 index de2e583..0000000 --- a/src/frontend/setup.cfg +++ /dev/null @@ -1,4 +0,0 @@ -[develop] -script_dir=$base/lib/frontend -[install] -install_scripts=$base/lib/frontend diff --git a/src/frontend/setup.py b/src/frontend/setup.py deleted file mode 100644 index 855bf78..0000000 --- a/src/frontend/setup.py +++ /dev/null @@ -1,33 +0,0 @@ -from setuptools import find_packages, setup - -package_name = 'frontend' - -setup( - name=package_name, - version='0.0.0', - packages=find_packages(exclude=['test']), - data_files=[ - ('share/ament_index/resource_index/packages', - ['resource/' + package_name]), - ('share/' + package_name, ['package.xml']), - ], - install_requires=['setuptools'], - zip_safe=True, - maintainer='operador', - maintainer_email='aricardorodriguez@hotmail.com', - description='TODO: Package description', - license='TODO: License declaration', - extras_require={ - 'test': [ - 'pytest', - ], - }, - entry_points={ - 'console_scripts': [ - 'frontend_server = frontend.frontend.server:main', - 'frontend_cli = frontend.frontend.ollama_client_cli:main', - ], - }, - - -) diff --git a/src/frontend/test/test_copyright.py b/src/frontend/test/test_copyright.py deleted file mode 100644 index 97a3919..0000000 --- a/src/frontend/test/test_copyright.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright 2015 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_copyright.main import main -import pytest - - -# Remove the `skip` decorator once the source file(s) have a copyright header -@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.') -@pytest.mark.copyright -@pytest.mark.linter -def test_copyright(): - rc = main(argv=['.', 'test']) - assert rc == 0, 'Found errors' diff --git a/src/frontend/test/test_flake8.py b/src/frontend/test/test_flake8.py deleted file mode 100644 index 27ee107..0000000 --- a/src/frontend/test/test_flake8.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright 2017 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_flake8.main import main_with_errors -import pytest - - -@pytest.mark.flake8 -@pytest.mark.linter -def test_flake8(): - rc, errors = main_with_errors(argv=[]) - assert rc == 0, \ - 'Found %d code style errors / warnings:\n' % len(errors) + \ - '\n'.join(errors) diff --git a/src/frontend/test/test_pep257.py b/src/frontend/test/test_pep257.py deleted file mode 100644 index b234a38..0000000 --- a/src/frontend/test/test_pep257.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright 2015 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_pep257.main import main -import pytest - - -@pytest.mark.linter -@pytest.mark.pep257 -def test_pep257(): - rc = main(argv=['.', 'test']) - assert rc == 0, 'Found code style errors / warnings' diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt new file mode 100644 index 0000000..98ff5bf --- /dev/null +++ b/src/inference/CMakeLists.txt @@ -0,0 +1,4 @@ +add_library(agent_lib INTERFACE) + +target_include_directories(agent_lib INTERFACE ${CMAKE_SOURCE_DIR}) +target_link_libraries(agent_lib INTERFACE openvino_genai tinyxml2::tinyxml2) diff --git a/src/inference/configure.h b/src/inference/configure.h new file mode 100644 index 0000000..260058c --- /dev/null +++ b/src/inference/configure.h @@ -0,0 +1,29 @@ +#ifndef AGENT_INFERENCE_BUILDER_H +#define AGENT_INFERENCE_BUILDER_H + +#include +#include +#include +#include +#include +#include + +#include "factory.h" +#include "utils/parsers/xml_parser.h" + +namespace butler::agent::inference::configure { + + + inline void model(GenAiModelInference& model, + const buttler::settings::inference::Manager& settings) { + ov::genai::GenerationConfig config{}; + config.max_new_tokens = settings.max_new_tokens(); + config.do_sample = settings.do_sample(); + config.apply_chat_template = settings.apply_chat_template(); + + model.set_generation_config(std::move(config)); + } + +} // namespace butler::agent::inference + +#endif // AGENT_INFERENCE_BUILDER_H diff --git a/src/inference/factory.h b/src/inference/factory.h new file mode 100644 index 0000000..0229287 --- /dev/null +++ b/src/inference/factory.h @@ -0,0 +1,46 @@ +#ifndef AGENT_INFERENCE_FACTORY_H +#define AGENT_INFERENCE_FACTORY_H + +#include +#include +#include +#include +#include + +#include + +#include "gen_ai_model.h" +#include "settings/inference/model_inference.h" + +namespace butler::agent::inference { + + + +class ModelInferenceFactory { +public: + [[nodiscard]] static std::unique_ptr create( + const buttler::settings::inference::Manager& settings) { + return std::make_unique( + create_pipeline(settings.model_path(), settings.device())); + } + + [[nodiscard]] static std::unique_ptr create_pipeline( + const std::filesystem::path& model_path, + const std::string& device) { + std::cerr << "[ModelInferenceFactory] Loading model: " << model_path + << " on device: " << device << "\n"; + + const auto t0 = std::chrono::steady_clock::now(); + auto pipeline = std::make_unique(model_path, device); + const auto elapsed = std::chrono::duration_cast( + std::chrono::steady_clock::now() - t0).count(); + + std::cerr << "[ModelInferenceFactory] Model loaded in " << elapsed << " ms\n"; + return pipeline; + } + +}; + +} // namespace butler::agent::inference + +#endif // AGENT_INFERENCE_FACTORY_H diff --git a/src/inference/gen_ai_model.h b/src/inference/gen_ai_model.h new file mode 100644 index 0000000..e9915e2 --- /dev/null +++ b/src/inference/gen_ai_model.h @@ -0,0 +1,63 @@ +#ifndef AGENT_GEN_AI_MODEL_INFERENCE_H +#define AGENT_GEN_AI_MODEL_INFERENCE_H + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace butler::agent::inference { + +class GenAiModelInference { +public: + explicit GenAiModelInference(std::unique_ptr pipeline) + : pipeline_(std::move(pipeline)) + , config_() { + if (!pipeline_) { + throw std::invalid_argument("GenAiModelInference requires a valid LLMPipeline instance"); + } + } + + void set_generation_config(ov::genai::GenerationConfig config) { + config_ = std::move(config); + } + + std::string infer(const ov::genai::ChatHistory& history) { + std::cerr << "[GenAiModelInference] Sending chat history to model\n"; + + auto result = pipeline_->generate(history, config_); + log_metrics(result); + return result.texts.front(); + } + + std::string infer(std::string_view user_message) { + std::cerr << "[GenAiModelInference] Input: " << user_message << "\n"; + + auto result = pipeline_->generate(std::string(user_message), config_); + log_metrics(result); + return result.texts.front(); + } + +private: + void log_metrics(const ov::genai::DecodedResults& result) { + const auto& m = result.perf_metrics; + std::cerr << "[GenAiModelInference] Output: " << result.texts.front() << "\n" + << "[GenAiModelInference] Tokens in: " << m.num_input_tokens + << " out: " << m.num_generated_tokens << "\n" + << "[GenAiModelInference] TTFT: " << m.ttft.mean << " ms" + << " TPOT: " << m.tpot.mean << " ms/tok" + << " Throughput: " << m.throughput.mean << " tok/s\n"; + } + + std::unique_ptr pipeline_; + ov::genai::GenerationConfig config_; +}; + +} // namespace butler::agent::inference + +#endif // AGENT_GEN_AI_MODEL_INFERENCE_H diff --git a/src/inference/model_loader.h b/src/inference/model_loader.h new file mode 100644 index 0000000..0ca234b --- /dev/null +++ b/src/inference/model_loader.h @@ -0,0 +1,31 @@ +#ifndef AGENT_INFERENCE_MODEL_LOADER_H +#define AGENT_INFERENCE_MODEL_LOADER_H + +#include +#include +#include + +namespace butler::agent { + +class ModelLoader { +public: + ModelLoader(std::filesystem::path model_path, std::string device = "CPU") + : model_path_(std::move(model_path)) + , device_(std::move(device)) {} + + [[nodiscard]] const std::filesystem::path& model_path() const noexcept { + return model_path_; + } + + [[nodiscard]] const std::string& device() const noexcept { + return device_; + } + +private: + std::filesystem::path model_path_; + std::string device_; +}; + +} // namespace butler::agent + +#endif // AGENT_INFERENCE_MODEL_LOADER_H diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/README.md b/src/models/llm/distilled-1b-robot-router/checkpoints/README.md new file mode 100644 index 0000000..70c5d62 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/checkpoints/README.md @@ -0,0 +1,59 @@ +--- +base_model: unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit +library_name: transformers +model_name: checkpoints +tags: +- generated_from_trainer +- trl +- sft +- unsloth +licence: license +--- + +# Model Card for checkpoints + +This model is a fine-tuned version of [unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit](https://huggingface.co/unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="None", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + + + + +This model was trained with SFT. + +### Framework versions + +- TRL: 0.24.0 +- Transformers: 4.57.6 +- Pytorch: 2.10.0 +- Datasets: 4.3.0 +- Tokenizers: 0.22.2 + +## Citations + + + +Cite TRL as: + +```bibtex +@misc{vonwerra2022trl, + title = {{TRL: Transformer Reinforcement Learning}}, + author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec}, + year = 2020, + journal = {GitHub repository}, + publisher = {GitHub}, + howpublished = {\url{https://github.com/huggingface/trl}} +} +``` \ No newline at end of file diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/README.md b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/README.md new file mode 100644 index 0000000..0aaaa43 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/README.md @@ -0,0 +1,210 @@ +--- +base_model: unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit +- lora +- sft +- transformers +- trl +- unsloth +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/adapter_config.json b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/adapter_config.json new file mode 100644 index 0000000..a41be0c --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/adapter_config.json @@ -0,0 +1,50 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "LlamaForCausalLM", + "parent_library": "transformers.models.llama.modeling_llama", + "unsloth_fixed": true + }, + "base_model_name_or_path": "unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "v_proj", + "o_proj", + "up_proj", + "down_proj", + "gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/chat_template.jinja b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/chat_template.jinja new file mode 100644 index 0000000..1bad6a0 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/rng_state.pth b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..221e971d9819636977434eae61bbf40e712e8300 GIT binary patch literal 14645 zcmbW82|SkB`~Pq82+^iZ5n541LP`ix2U$|2?ECIPqLPHksH>pFdZ^Xog`|NLM7-|v~1c|X^6-{;)tzRu^m&be=LwwIO>1UWh3 zKR$znfkIq#h);ZiUqaB7K)(b(z1WCITPsDu^PA6_J}F{B%R?5L$0jF)#zc#y?4`9N zBCyv864BwB_=^J3pS#9hA__29{s1ne)ApAEHNS1@0c@BccR(QW4`ZiGMEm+ZDFxU4 z#=E-&((-nMiA1z$tk$34s*Be8C=jKOg~<|8mdfSZ;F1_!&IP2Mc~>uxx#MI~h}T0a zb<%1&OM{GYp##Y@VVwv0!&X>KbVuzCBXaZi50i*IcT`-5R@uK_uLLCCSknTqU-9b$ zfvkAE*qe+Vy1Tzbl#wO#oj^=(Jc=h5n&bzQ@aO6ZWUivNjYPEbSo%)_xtU!&nslyG zb(4r@y%^9dkhc35b&2j4szI6}8r;d2HM!Dol?k(NK==wBEWFPGlh@N~-06A3L6it3Kd?}KMw&dN* z63DtAU0}?c7S^4D6Xh;xodu|N%=%3rg+0HIl8D|ee!5j4#xfVClh)?#&}YAETki^F zgq{5evUb4hC{kw8D3*u@jFaz%mU`A{`vI#Sw7&(^>@e2x?Bj68d$W`u1#%$mU6Mp}b(4Q9xD;gcUkb#3X+;vzWnr-r zQS`ax7s1~@MD7mY#_8AV0q5*DTmW2JaTx<_oNmAXX*nfz0(t*9W(HhfW%XWg&HIvH z2RPgIGiWP5Hp8Bb`Q24RBHH%B?Hc%3dt_XOnA_ZinOQ6Peh;`v?vf&KW%V6OX6oz} z$ceLcuB5wi*>a+jI%5vGSeX$A*S&kQP#}AKCi#OXG|O-(`oEk*$(Q|n`Ul7Y&tr=T zb4&pR)|^VmAzldbdk+}nuC-nu=OufVNkl)M=m!^%98_y65&29GKOvA?MgDT+a8u!6 ziRh*Mv0QEGEg{G0O-7xp8=RM;m~V=%#QhK2sy9qA0-h@+F6?h4b475#9CEjF^!Kr zI*tvG|8ffz6>_w(0bu&U9#TWSzhXF191%WHA~JMRegVmIqee#}Ds|Yx8>%%t7l`|B zm*-1F@78M`f@W#fi$4iu;)rfI>v)6&QsvQjWlM6qwQw%!uY9PVL=@O?1qN{5;~bHQ zmc|&p6Nt^d;|pM#HT+=0JoX-SBxOG-MSu!^tH7K8L;Dg`AysKNUx{eqtzntqYVoc? zeVo(c0}j8_ib8&K^Z0Rr^z2e}k%*36m97BS=#xXhrC2#*C`ni{4brbPZU*YtZPLT% z07H%PEdb5iUAq9kZS8YVAodGC89|Ubeek?+LpdyZ5r=i}2qf`>wZ25uK6Mso_4hH_ zUJ_BN_v$ACIas9*6DCz+gI$KNiv9!OzWn7Dfn2<=5>D=pxd>4|RkZP>K>i4QU`(!t z^;0$zF4b$adj}&jJ~v*pN)x_4~?_X#>YYiH#br0W-dEva~#;#M=e60&#kB zQ$r#;;bI0dEwD})O^hmMkC%vKCcME~>^^7}rp*$gB)s&N=^x7hul1au;joq%4T-2| zLusZ!YVS+ikOL;Aiy&?j?NQiaAI|my9ygDG@~*D&3nYf?E!q77`R=NwokUcuW&r{k zf433}@9Gz4$^l`>oB8t0`gX4X!d*l@i$a>FIGWSpGV2MclrnVFO8GCQ+gv>rO z41}|s)kA_hSw9GY>#zN}Kp?gQ%;u8o_kEQ?2dM_t0x1s2bC8Ifr(f78kn778Crd;t zw=9AHC3{St3U~e7X)}Buwi^6(_41G&SBHB;9ep?NE!@C^Gx`vjYReqZ&C5r&i^x*X zMKFT-_w=x&N9r>;$IMbDhV;$dtVd*{rrD7oLn&S8%J0l+IHTU;V-p}@M;)viPA9FB zNZXqq%t__eqn5C`brr7>NS%MFETL)=Q!>@`)J)>?#*+7&y@rw~?nlNE^diTYEYmF7jc0668|#nU{Cr zaN&-8P{{i;suraD)DwZ+J0)MZn17dxxJud3aPwgP-WiR_Ck|8w578ZbbsHEh0 z_jE@xW8^0_GP(LNEP9{XN5U*iOGSqe^XDluU{RjrOT6ydn~@1w#-Nn%HY8nw-lo5s z_XFVB@A9~6tsJXy4se0BNdTJf zyyuocUWB-sk?nGqj6p%SS|Qag&1$eA$;XRfLl~PN4eqnZWS1Ai25Z$QADG0fuC zF!X%Y_-+)rP`5NtB3d>v2A4do6Avx|rW9xX1X#4>AykUyr;l8ro_ni26i8ITIa%UT zuMsa1U7t4|%Gc%@hnJH01zQ$Ed2V+79m@8Pxtipm%=Iwh@FH~_$=yg`F0JaFzCeFp zrrpPjn3Z>>0j@lc!U*qvT?WaZ^8H|_dWug(2f;<^m%yZ^#e^x7v)&fdiPpgd5u_-y zI+)xJZ5u-7_2j_iUg!8iv=R@!f^{vYY6NbuE;+x2jd7LD%xDsF^~OAjXocri+&GY;bM;MglYyLL{GSW+PD3tv}gJIQ}RA&x} zVKhYPyg)498-c**PX7V7BQGjeKyer_NeKeu6!0rnzS~~*g@7O4H*5#6rwv#I3T=xq zj*q+gt`mr>ju#ZT^=|j?fXnr*`5l4mD@st8h_!XdzOwN5#6hNC8F=4+(4@2 zM!_E0eEOt%I6wb*32j8JS}X1itVOy1Tt{PwVhI+av#_NkULuwP08hz13|$B zmki%QBf8oi!ybMd5TZ%1VtMob$r+_><43Q|PmR<^baIt^Svw zuDX`XN<_cN^S!Cw2C9n|`wW&T5$m4_ErsP4H26Q0xgAqOo^pv?l zpqy^T!J_$bOBfdLyzUIxFP(B!&?3i9IL(%b9(J98ZTlRBtl^}s{Uz+KMqj@R;^@3T z$b-z9wZsT+yZiPDXgT2X$8CVD8>J}5h3kTTg>oBlDw)K{SI7~?=^+C7bYXxmSx`R5 zgS-mw1@TO<&VM72ceT5gkjJ$T2M~uO>I-={V_X*WpmXHA9(Z*7RAng<{an|GJMP%| z*RkrW+dgOnT>WH?6)SagO(kIIna*Q?ni5}-@uv4)?xZgKP85-L5WA5cvw+E@wzCz& zc(X0$wm|C7_!`2q*@{k(|6FzjR`6cmeyCg_=hl6|#B3#81^-B!o)5^WYJ-U$?)wfN zLW{RAzAre&_DK-;nV%(=_P{6E=w(r{$zmBTkG^0yx*3?78k&ej0o<+U6A%*@q!+a; zG9k<_F3vAm!6z;#ASOCKA#Pbff`VA~>uCznfnSe```QbcVz~fl78n!|uMiZCUlhdh z9#YV`zgU4kOGo-8ghd;O`>hizx{8&u#Qj~x1GMa=U|gBvf&c0;Fd;b>8V>U40}Yo= z6A#W_pCwjy6%Wy}m*${#L0u-H2jkfmR<1%DS(XberunRiRXETr z_h7n)mh=k|h0~fqdVsal19lVo+xpXW33em<&^^Wr+-SY?c)~iMWi8t;V$BmHS+h57 zr3F+%GY8XR1KP`WEM_Hx;HGxdE~CM0lXlPwIaV#F~ z8fht$Obrfk_M(++yBE!Ip*^gIHLz9%9hS{{XgzCV`)AOtK5Ul{ZT4ZAwAo}VT%*f_ z=FwJLwuP3^c4aTt=`d2CHVshMXAQD+7i+fZPq&Ktu|l>*j~07b&~91{{cUDhEKO<_ zw8}`O&9StMR=ZP(P~24bx}T=A61F3lZZ(`W6Kc zHe|&dRx=}o?xUMn`d|yT&W2SerLgwJFlXhWRji2C>pK{@jPX)m9Srd+?@!Atma}!? ztjv@Zur+-k0JS6Z-Bp#`hfQRiG?%5*oz@Dpf>p6b*6!;?vxj&`={mDQg+%`a^7B&I z_CBm^e9&xG6>ZKsY|P{XAZQH`+cpqlCM#xpSS>Adj1?!?!VUJoQeV3=6~YK}A2=GO zvPgAwN-Ap#p5x5&SP?68XB`?KzdCi6N4K%}r`WxcvHwRGvOWYF?j6WYvn zd)c$j{;X!jtRYFPQ9FJJYnsaTz)co|BAbmsn61+t*g;yhOih#4(k!Q8Fp@@TI9R5{ z+J@6Yx^@)Jp&27QRZ~0EY z*UV>Y**;cGH%73gk#wKR;ADu%p8h(nw8&~W#IzDZ)Hs1{kCzJ?L`##DLPkXDErKCs z&g~mFWMUHSwu_EeOI8`CLtCZ<`_U?zsmu0i1P`P8tcQd}PY7WdCd0J`vD&2x{aFib zqSY%kAsaT1i1KDFp{ahf2kxnt=8l-C5KlLHv3#}(0#;5tX<;8-nzg78T^|miYoOaA z1LM`hpd(h2z^Z-eRxP&1iDr#vb*n(RX&`mUFlSaYKE=mWMw1p!1d&$-uys>eE5z|Y zByDw}xwOtEg6){w-%^IALA>|cu^vU*VC%{{t#mx;-f{M{HrZAWTJ(%?q6e0{(R_8b zp$~<6w>y#^0?BmEVcTf88mn|?ZM5AU(z}t>P3>bXOAkaYqxF;6fslX!v`S1j(2N0= zs;s`R?_$WZ%`ig96jnZs9h4tv%UZ4J`T&*##WRC$n89|Ko3jlt&%LZhkM6OR8%DcX z%{&>JNwY#(lNMCIwU8;rqG@hctePE~OS{Kwd$WyDKlafEA4u&i1&CLfl{2gLW?8h8 z)kf2UY%OhL`Ge^?)(mB@I+5*w5VcQSLO1!b9tXCcRT(>hOx8d}uB4UGv~!SpJS|UT znG+|`Ug)|PWUz@=dDFGD+h;ayTWJ zX$IZTiuGkCEbdF&$I+^VFpJeJ&qjupEKsBe-MszTZkY64sqvFE2h;VmWhD%>f~G0L zLXrk6!B*M|s>_Be=0LnNSUv3;7Rk2Cv&x96+I~>s8X^WwVmmZh{qV7b?S(UT(Kcne zW)xJS%;BtX31Pd2upSwEy3f(TPQ)@GskYEuLlt>erauCTULk~f2knxBRI3!!mcm62 z%F(?OVWPIO-UNCeDI(m6?RH6aUmXG?>tcJB$;RljUFIuk1JrVsKbqxZL6=dOYH8{d zN~;#FpzSLmeN~JXz*@Wy7T`>GmgPMZ`mde~VOtM{Z=z0fl1|X`miNUN#qzj|k0JWld zQILa_V;|ZXx0Dt;1Pumx!#cO!1eTLBC{XKRJ~t*yfrRSVt}s|%_Rzg_-(Z?6OIu?> z0NuW<1XlQMhSB54C&tf|iC~hUg7++%ZK6Xr1;e71uc1xLVRdGep~K*EYiWaO>N0hf zHY#zR<>&xb!D=0&pzk)8?Y5YKzh{aA&UhfMVgir4?=E)&dc=aMpHSE<7P~P zB~XgdXH4)kg4^HBvgO2dt)h-8 zbXi0*q4=+%9p-FpI4wz_B`aZj)2zta<)fG+Iu&xYkRIymLwB-@0jzQ{h^Ac*bh*<} z**Ar57(@$LDecr7#X8l;dBDsx(hbvAn)Wpj%dw{EsSE9<()~V>tT>RCgfE#j93)%J zN}`j!OjgruBf3}C-vSc1MxJh-?4scuDx)?ZlAt#f%1w=n(O?Hw$@V9zt(->>IK$dr zO*dLtvpVnTG)D*0Fqby2V(Vc`(v!T3wb^Ja96LUYwtEM#LXj6+Z#jWxX)7*KwB5W zJ_J-wbG&FNZHYGZq@5EZMzIcGxSiYtx`u6ol2W6?cF9kUiDzj#bPps|?g~ipoZ+;s zkD{Cg?5VaJFAa4E|29}8(&i@jg`IS*t`+27HrqA<3S#ac*2T7sR;AnZ5^w|U84wEs z_lEoKrh9@XxPq^ml~0B%DUuiu1WQ({s*6Qa! zilxc2^|NWFN?&%!oaU=QvNW^m02PZd22%R8$BM0ky-yX(?h`RbX{_!jJBWFc3+$P9 zLM1PyO>FznS+FO`Q&uOih;AR@B~3f*k|?a(be9v|y9h*GpkqL@*#=A5VV1BDFQNG} z{b^6^5J>s0tjl)hJhn5+CDjv_`evA*s%7xMJld%vg8fB-5I+ERa0#$ z(QVLOg@F!KvdvQB;i2%1JHl10k{}-G50AI@f*m}U@=t3{>QfM_ z^9N&nu|}w><6-3lv8Ic?0ADi2W9+3e#bdJr0%9T~g8~x5-~nF2DK0Q5E-26@44>o2 zX}gNYCx|DwiYI!&Z_I^&Wr2QTtx&jX=;+X|-5Q9sLsjhuCWv*QSNnmP;z`))%EM%dQM^nZTc{~A$bSVBT%5RM^p);$&cGX(tk=P?Wv z%)S`|{#C;ieOs&P|LXzZTNL~R!L4KX6}cq{)=wr(!mqUf*7J~0Vm7@)-tRn=jP!}J zIF5X}IJgJdetpbbWiB269@; zxz)d*KRNJ-De?!qh;>N!fHVAWWU@75EYXgYiRy>kv9gw5$J6{Azn>(Hf+F;%2Yr+v zRcm(7LS{Ufoq`-}J|rG_UMHv)DWj`+8%bu_@cHacmFDrRDSE@_Z$RY?V?6%FOSKHS zsitf(a*&lJpVwYX_!|!RwCtXQ@)Jr=|IFvr=JF?_LaS-yNpB2}QQS~f74j-Qk9-n{$86M9|Pg{6<&A|9g zc>V{Wl*eQ4gZw#QW(vq>FIRIt{PMJh>txm2JB!e-lo$|zl(+ry z19Fb>`vXX?iD4Izjhd^>kTHsl!N?-{%RDchoHZSc_G2kuhHUx0gzIOwLX#8P-7GSjZukGY{Xl7qM4v%Z4#XLa9KP=(?obuNL(2f}q?}ChSeAR`#YFIxH zX{sKz9r<(6B~N74s~&meW?%a ztsgddAeWvBe~gUx81w_u{dgYNW9huZzG&Y&wv&&;WYfKJv+nyn^Bb_=@prX& zza6I{dA<#oz4!wCZ^q{F`2Kly7DZd{Cl`MIp<4#?^9H^@=Y#&DL$9YGtKIMUAxE|x z<$A6(NPdR4|6PAw?(hA~<4~y9H5%=r=^=ZOmWQYE>s%mRTqjLz53a&OY{>6g$jQ?_ zE<}2;!+hRU6G{)Gt-2@f8Zy;XW+igy{^3WzlJGjv>TAm5Y_}?HANo62Df9C#=_>C+ zyW8$#Hu8q!aITBga@F@}pHSS!=l|TFVjiEo{INW~vm>|0;PEn}a-JWDmW&&V_Wo(N zt&tOE#b_drFAwHhT~R&pea48R-8q`Vzmdh=Xr=9?NMv zmY{#i``>t;Pxp@JaT%1?Qh@%kp2K;5+bkHv=d*Fi+0W=#+tsRwbbc4V1vy60+=zVk zL~90eXh-{OeLPI`DeclG_q$K7Xtzeji;`AH`N z*)M%CpO1jU9>>t0c`A(OQCOLz3hghFYg|8bcg8|Piw9t&y2e>6`A3$+#h))f4nME(>5?1+4uHc zZRBUs2EL9RHMg}y`^Y|j9*-wel;)#7Y=9j)=$sw0qIhZ#(!zXeG4ftvl{~VeGLzSpAG<;<(5^LW<^J@mioc_M^R^am7rFQO z6SRGNo@_>Tn5&c^=eXGL>pFkwj%jysG?-@9fkMmFMBw4f- z)3w`?*7I%$A&U!l&PImK&&x)B%yV0fbWD18ADMdG+YOl$FyJ(@Vnz@5e;FLI0qvpT$D&1soqVQtA}_W#@%uLk8|{L2S$Cg%$Rz)}t;kp;GY7M}OV6!Xi-wJ(0>?LLvPV6=a(akE5jUYgDG`)s&gI@+&I z68N~^FaI(S?Xr?FJnuI>z4<-bi-PR99_uPM+(Y}?r{pil6El-mAumZ4B_p4$StmoX|i}59+$CP`576wL^>2X#6XL$n|(X)HKYA@(U!MJ?>ADM zj^b{v<6TzuE6~4WwZePk_qK+y$n+DNqL8^o7W}?5O3xlgyTm@(0@>j+CKKrypv?2) z<(2BuXltYe^XvCdxx>d3(*S>60w3|G`MgeEiJZiBQ~8@WkMBM8<@`F`{R()T54?{a zjOV{ip7|Tnr}8*O<~qJ>{2)a%gKxk0?YI$^HBq=}KDoy5ak_^eEaV-fg^v?Dl@` zf;9Z2osa*B<;iZeZ)hD!KoWIlJ|Al}Uhz0o@1cBrn~HV0uI;137T|foi>@-{&9Fgz zkt<5{xNa8jH9U>>F|Rmdq=lt3@4xWvXr9L{2ZwNduM5xT^Q7$fjQcw(S6#vLUyYbi ziM&&8&htU>&75gy@2hjyK$>d%*&q!+>)%0ceC=`#InUtu8>HF`Z&l=ye$JZ6`qj&L z{W>=5*)M3%duPviveIG(+D4-zJCWU`+P9IT?k^sL{3x%Hg{*zHo3B%k(mh|HZDw(l z>v)s)vSnzS8jL-T{PCyW3CJ-!d!Uaq9XqiPx3D z7ybGC4IBLRdDWcleu(E??DJBBd}n;r4|(Ir``yS)Fp`F-H`S`RaHS&2M zP`8E0=ewI<>uly-<2F1mAnpU#tx!0f=Z$UB`6=i>uCHQ+ytYJH3VC*I>LTRHDf%8r z;{o|R{=%YzDQKTMWybSQOUszY_vxHWuG@as*O%gPtCnB69`3&IJcV|^#e&1gwQGC0 zjuWIea{au}(7uKK@Pu}rXJZ__dHxNU64s0U&liG&kUyT$4MQ5;yZ8kuWmfzQspho( zPo(>dS3e==WHoR-*PD|bw8h;Ub|8P8vzh0Ot67%{+9qK^2FMRbzVQBk4rIsBmVT#x z2|4-GIv)S_0O^TnxA=r=AZ;AkA!K{(Hl8Pkb#izf-Ljohh5j9S=l3HA7HJkCr}t>^ zxc<3x7uQL0&pmJS-?lZ`gj8DcS_4_Q^6h8jZ(FbO`EB`W$pN&lOgq5ac@7oxeiYXi z>_dMjTf^sX&Oy}=Xpbx0z}xRUvTGLFhieWbBAs3w;rHQL;CLTxsffIvk-t}nxeh~9 zTcgoFFYmJu>0`C)CG!9GgKrL3kM3Q6SQ)MfKmV^chgN@ob7){|W&Xc@bNKZQrp~_x zy5{R+JY2u8+{J|tR0e_{fBtzqg9L+b#`D)(Vcq}PfD9Gzis!b5EbIeU!=Hb?qK2UV z%@y_d%hULvxG;EwyaN7SEhgG0EYQ|!^8aW6`bNeE2Kv*DrWzU>m>Qet8%;HxW-xWS zfvM3{6BB)7eM6&Z`cn-}^l_BRM{~Dn!3gok*DhMp1ysZ4Z8g1XldQQTJ!Y@tpDODE+E)KL7(`6cD&?Y XnEutqb;Q|TB#V|D{DqQ7DKa zxCaF<9`!GH5&aQfMDQTq_9O@*c<|^@!7W=P-XBX6E*%Zjf8kWPbz%7Qb% z?+vg$rr6uH5l6+*9olTHgPTe{SeY`*anL?1+xnObe%M2lVjJ!g zJ|NB%%aR9E2XIPGJ(lXQv~ck0{59wF$A`gY=D-tu!>)~dcB;cNg$Y}muys5xwyae8 z_P5c6_d-mr*3#MJz>YGT;w{%3A`b6j&nK?aY+MYDNSW5+wj@R0e*f|)CIsbLE}925 zRn+IMhx!;IO8U+aJG|K_{k!hmiC&ckuI#g~D?(hZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": "<|eot_id|>", + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/tokenizer_config.json b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/tokenizer_config.json new file mode 100644 index 0000000..69b6ed0 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast", + "unk_token": null +} diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/trainer_state.json b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/trainer_state.json new file mode 100644 index 0000000..a3e9b4b --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/trainer_state.json @@ -0,0 +1,156 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 141, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.21621621621621623, + "grad_norm": 2.652937412261963, + "learning_rate": 1.8e-05, + "loss": 3.1648, + "step": 10 + }, + { + "epoch": 0.43243243243243246, + "grad_norm": 1.7348365783691406, + "learning_rate": 1.898876404494382e-05, + "loss": 2.8579, + "step": 20 + }, + { + "epoch": 0.6486486486486487, + "grad_norm": 1.5895073413848877, + "learning_rate": 1.7865168539325843e-05, + "loss": 2.4276, + "step": 30 + }, + { + "epoch": 0.8648648648648649, + "grad_norm": 1.8073664903640747, + "learning_rate": 1.6741573033707868e-05, + "loss": 2.0007, + "step": 40 + }, + { + "epoch": 1.0, + "eval_loss": 1.4169334173202515, + "eval_runtime": 6.9404, + "eval_samples_per_second": 11.959, + "eval_steps_per_second": 3.026, + "step": 47 + }, + { + "epoch": 1.0648648648648649, + "grad_norm": 2.063204526901245, + "learning_rate": 1.561797752808989e-05, + "loss": 1.5398, + "step": 50 + }, + { + "epoch": 1.281081081081081, + "grad_norm": 2.603747606277466, + "learning_rate": 1.4494382022471912e-05, + "loss": 1.0425, + "step": 60 + }, + { + "epoch": 1.4972972972972973, + "grad_norm": 1.8809202909469604, + "learning_rate": 1.3370786516853933e-05, + "loss": 0.5659, + "step": 70 + }, + { + "epoch": 1.7135135135135136, + "grad_norm": 0.9672934412956238, + "learning_rate": 1.2247191011235957e-05, + "loss": 0.2295, + "step": 80 + }, + { + "epoch": 1.9297297297297298, + "grad_norm": 0.29672369360923767, + "learning_rate": 1.1123595505617979e-05, + "loss": 0.088, + "step": 90 + }, + { + "epoch": 2.0, + "eval_loss": 0.06064866483211517, + "eval_runtime": 6.7219, + "eval_samples_per_second": 12.348, + "eval_steps_per_second": 3.124, + "step": 94 + }, + { + "epoch": 2.1297297297297297, + "grad_norm": 0.11157097667455673, + "learning_rate": 1e-05, + "loss": 0.0609, + "step": 100 + }, + { + "epoch": 2.345945945945946, + "grad_norm": 0.07702265679836273, + "learning_rate": 8.876404494382023e-06, + "loss": 0.0561, + "step": 110 + }, + { + "epoch": 2.562162162162162, + "grad_norm": 0.0633946880698204, + "learning_rate": 7.752808988764046e-06, + "loss": 0.0544, + "step": 120 + }, + { + "epoch": 2.7783783783783784, + "grad_norm": 0.05974782258272171, + "learning_rate": 6.629213483146067e-06, + "loss": 0.0535, + "step": 130 + }, + { + "epoch": 2.9945945945945946, + "grad_norm": 0.05799481272697449, + "learning_rate": 5.50561797752809e-06, + "loss": 0.053, + "step": 140 + }, + { + "epoch": 3.0, + "eval_loss": 0.05265544354915619, + "eval_runtime": 6.6822, + "eval_samples_per_second": 12.421, + "eval_steps_per_second": 3.143, + "step": 141 + } + ], + "logging_steps": 10, + "max_steps": 188, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6704502927261696.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/training_args.bin b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-141/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8d76bf208cf745ccc85c8950c5b91c9f09205fea GIT binary patch literal 6545 zcmcIpX_OpQ749UEgl6Buj)(|hnaq~VBrF*TFawO~43mH*7|ZIa*WJZbSJka$CLIz1 zVPIMjcM(+FcLg^@+!a>@6kJhU05=dt&*2%81_!YNWq{7Kp)1sZUc%@OqMnRf{X<|BVL`#gzFwk2I>Z&&gRb)9q z)O&7Vr*-8gac`L=_RPe1m$-FzyOp?s-&+qHU6zGuncgFAsK2c@M=W10S~rY(<_&55>n9Y)G=?Sz&$ zjeRkJ-)WuwUyS?EL(|PGh=tVqW#DBFpiu*NA&BM zasZ|ro=vHS>8>&kEE$J&yzgFrP)W3?!aFY9Vp=x*lX~I|*g=Ic9gHnlge+a){UPIv z#hCIQ-m@BX=y*YvBte9;sCt+*OxuF%GLb9B|0UM;lobgG+1_;6*!rjcdB@yiwZ9wt zV80pVR%?KgvLW&Cv5{-HKQeaPAy>#=mSfe;vP!IeI$~_($$R;^Z~mJBI&y5}sej!6 z(2DC%6?w7-2Xei@r=!N!pVzqY(Y_l_7e(sUHj`~&`fxQJJvQ<;D1gT}0RQ-DJ+rI0 zHl|}>k-hMOS7{p^d*Q$F7zbj|i_91&by5$WO&X!1=RFgOHs^Dyp`;CEMfEf^VM8~d7mkhGbu&M=biUJCo;Y3pT@ROq?3MJLNF?8&~OQ^Y7N19(>ODvahRI(5$2oAnH;Ri+hY zfYXd^82LXkKyQ@#aqrn-)IVSj4E78T_4EyxIoD86uBa%~3hGrg9jDqRy|dIeN&Ut_ z9lBTCC{Dz)G@xlAKb2lH2s^ImnD1M@>G~D3;(DM2TCLZ_)->Ziqmnhx2m(xP)5uT_ z>ks8=qbzXOgibNWfJdxHF;C5rYg>J=vtA1eGsbkTI{+Y&D z8K(52rm-J{@y~h~XEaA=Hx1;aLadZS8^%u&L~{NyW#pWVoA#BEyxdJhCzE-Fⅇ` zmHr_*Z%zbMLVO?9Tkv-{KFMG`W-P`IoBHW|@l(z6DopgY8#4#RIX)+&BD+2MVy#J1862REC2^4#z)pxie~LB! zxFiSen9V!R1kn`o7hMA7IG=R9hE)I-#Bjy3;cZNr*rpua|9IUePL_|z4)2i~Jz~tCY z0BBl?$-ddDrD@ZcNWdL)=;hjW)kp!9k?F>q3OMr2l0<=91&u3HgKEkR=@r^%mDmZ% z+Dc_%RQ_ewT^tD}3a!Yh$7E;jlXtdMIOOakwNjAKHMxSxOK{|Eo<&uCKzuc2?eqZE znnEhAA;~YOT@rGesc}3O%FK=m172<5Jhm5`Nnp-IR!EnZLz49xC<8hDDyNqn%nYR2JFRuhgtsMX`$bZ-9>cYEq*sH5Bx(EGTp&U8VEA zo_4hy6InNi5+HzHwUw^X2|5ZcS13}il_;vEl47x?qFM18Fa%2nCVYoF(sk%MS(Xjc z^*ZVS_X=VUb}GBryQ)bb)iq}t32<+M`aRjQG;GHXB-0`bW#o0pseRAGbf}Qni7dIVl-QqlOer!PLyVhd8%sJjl5FVRdf*2 zjXLDQG_G;D)Y3A&t|{~fenpe9OR{>shB78*TmwVBp*_}0)o)w?DhNv*R_CYXcBXz) z#=k0Rn4Dti&9n5DNqVdJk@ragWE#3j(-IGakvk32;?gRhx0NIZ^SI^BSdG}^!_du5 z2_YVi&>3*fqFXfLTOriW!5m<3*T!D>t(isGJ4#|v*SEdY0ZM!{YRKqdlq>t04>f0N0Dr*Xc&87_ew@)Y(mNWZ4{3grJnrx!kU1!ZKCDqBor=>y zb_?huI;tZJPPI`@(nqCGkd*teR(40zs?c!K$2Trtrt`ZrJ9bbHT~E;`bOIB(FqRCq zDuUZItIvPQ`HyZd8AsqGxeEk8fCbMiNkY*+|kh`gW*vXMhSgR zn^r>AtwL248$OzgJ2jlVjYg zSvSiueL+{Yx@v$glN{$?)Y{uMz%xPbiDp&-E1=;o={L+tF^-$4Z3%FqJcqt49_9i1 zilq8{P8K^~)e)SJO2rTbHhpb;k;J+RIAwFJe_bfy%pEE~0p;viPv4LUA*vib;cjv8 znL9wmO1u(Do68eq{x?Mcx0YG*_}0e4;;c>6>Ga#$-r#!9uI(1%J5r65D-L~E2m~S> zNsEzE^u4((yRsf8TKvRCzBYpj-IHf{)UcUM`o2a7ta(hAdmIPGXGs6F1 zb7>#=VuS8$%3jI$8?+FkW;nO4yzbZ$qMdAQC3RFFI~m_caPy6uK+bqp;}CRzCEDdHBme9 zy@BbQxCEl#LoPR7AcF*TNFFhNsC69okK={w=ueaMxGwwh(#j0I6zwuSp#d=Md39tq zpn!pt70G|rVT9J3F9&r`Q%n7+n4Z+V!m-#pe~Zo;p=tB`9Ca?r#J`kuMa?%TigKv` z6;Myht0u;mEq~(nn{L>S>plKV?Di(}n8F!dH<_zonmQsM+`)-+bi;7#LNmYEe3pOv zS&ciNdia_}I1Ybyuf}nO{%$q6e`fNsKz}~pc#dx9d2X5vt!4#AX6SKlX8E&w1r95$ z>r$ZHNK_D^UmV@A=DF>gn+M%N;wjc)_N>Q_V0iIo_c|O}81B-c<%7ZhMT5a2@BP57 z)r@X)}zfx-U1)qQ<^ zYlqeh4fm}ZTri$7U8J=j*)62g$9#v- zelCH*=Bt!ps#`>-15RkK$F~P}5wS2dCdQX8JuO>#?2?|A3pj*E5t XKMRFkuG{cvrr1-aFU2aJujl>;!?;C9 literal 0 HcmV?d00001 diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/README.md b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/README.md new file mode 100644 index 0000000..0aaaa43 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/README.md @@ -0,0 +1,210 @@ +--- +base_model: unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit +- lora +- sft +- transformers +- trl +- unsloth +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/adapter_config.json b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/adapter_config.json new file mode 100644 index 0000000..a41be0c --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/adapter_config.json @@ -0,0 +1,50 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "LlamaForCausalLM", + "parent_library": "transformers.models.llama.modeling_llama", + "unsloth_fixed": true + }, + "base_model_name_or_path": "unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "v_proj", + "o_proj", + "up_proj", + "down_proj", + "gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/chat_template.jinja b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/chat_template.jinja new file mode 100644 index 0000000..1bad6a0 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/rng_state.pth b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1a15f9227c1af3cb50d7bbf3b44912fad6ed3856 GIT binary patch literal 14645 zcmbW82Ut}{xA!-~K|ruK6a=h*iXhSy0Rce=kRl4A^xipu2r421B8R3lk)nuH5k*vr zfMrIb#*)~3>@`titg(J;jqlC%&HLqhzI#s|@;htoJ+o)8`L8vzH|S(1Eh7kWa>9Rn z1`7R!*r;IdxOm_Az$pQ~@xD4S;Sn~Lih{>?pVhsR#e$~0EHsZviVum75=+@hYf405 zuNEYtLp5<11fn}{iRFYE!VA1}90Ao34mj3uHyeV&wp>we?i z9Rg{2H{3`fS~y1YPjJ;mX?_xj(x-wXi6~R~(k*aF3@+sWQcu6H7s#BkGRegAp`{vW zHJPbSMmy8~WSX$no%~@VEFzOf?hYk#^Y#vvh&;AcT!U6wzhA2aBwSzJ0QCY6@hIqL#HpwEbw>F9NxdRXmDxu2gZA zh-SX*cTga0_b*H)lQ&Uy(iGm{MmDX^k%p^`pLHF&ca!ebA&^1C#@Uc)t9vUXqLqe2 zvjif3aNmpcXKv;aQTH+Z&2XYpaoRb+w8|}Ljmcm5NFX7`MaghNK;=7utlv6vr9^ao zxmFe&W*x^;0V|iZwL`Fqa>FE|Q$C?L!R0IaXpca2;C3OP%@Hl!pTwq1@Zg9>0npX!n z({>xQ6&I6kM@IkdqAn3_`RIBT{Hxs4FG0+$@50Qi5&gIeTqI|45x6pY4^ry-^^L-QE-ZKolBfI27Gq&!XhZZlC%IvcTi$BElS! zL4h?V({PCA1ARXLM!RXQ6UaHquB8&u&&T_~#UlpPnn*<6lfsS*n{=MI}UmY$#cC%TOum8-^3fLHoOps+w)8F zB%=50G!8(sRI5dw1u|iHH=K1WTmq@`XxtzRa;vpq4(U7SKp%-Hpy4tM;GFweA`vZ# zHh3=(>wCxM!!)b=!i2f+KH@;ieo+bs6+ExNoBu=WB2*z2X;&YKXu{2*8Q^O1szH66 z)#L*XyVHt7etYBCF@f~#P;{1vj$VeqGR!xsR3 z_3}*sjaywi0M9q~+Ak2h1)mKdNS)qzUYNcd7QOI8lkW&5;k=cuMASZYCTR8d(ORAo zQHs~9Cj!}Dr3MowRbq`@hOLbH1K_so)h2;lxUU>W?vB0yQ9oI<;e;Nus8j8<$&BJH}pWy!Su|a=;rMM`$>-C0bn~Dq3Hf zA&}bp($-|3QRza6+XOolcId}52LX?pheLT+Q~wnb!{w&zUV;2@MZ;DiDpoZI0gby` z359pn%hP27nWp_o6|%$2V?E51YtsVq&h#|YqX9RKn*{Qwp2HOAdgCCdSSJdWK=o=l zqrOuh2U^zoOGG|W2mTaDcD}nUQ7nI|Oe~MJLd)rakre{jymV6lxm0#yJPGQU94`@# z`)Cc>qB(usRAS>4Fk2#uy_}8XeKGm)YZ#E#uBl|spH@KR`L@PEe zga9SE8&8G1e&M(gz7Jgm{xx<>n(6u)1{>uNFw1U#TpisuB}2)#T(1;{4=1jP}@&%P*nbZjUZpGBsM8QSjr+ z5}~4L6e-4#4;wv)kVvn^P+#(;87x4nv!D2rpn)TdNb^IJl_dUkZUU(@xRgK~0*)>t zOJv?b^_;uZ_8D~Z$DA|G0$G(kN>L(8{-}f^O6~arOOW21yw`x2y*J(ftXgvg&YNI* z%!90{@mG+DMr^Atgxa0t3x%<2M%-6{%$+l0u|zaO=?g9t2flcI6v(@)-D4!8t1ix% zsop8CKweqlJ&VcKcQZgD=0!E2y?M^VL&%kJkD(u5r$>+@(?5&)5Y@@=W)Yw`#F zK^hdhCXXsy!c`G2)SI8$Lek4#@<||rX|@e40C7-B$+2!@ z2Qq!cXH_z(`VcI7pW8>kEK5s81rxIu$rcQZ4YO8(D2Gf|CgP61FbkiD-6#QnyutG? z0ITIe_u#PgYh7Zoa+DLaUbp0eKrTMM;z)wDCPHy4S-q!IAg4w+gJw(LTzn3V9xdy+ zDUg@JE~aFw+(koB(9KpzwTm+wtVzIeiTxsW8)&u(3uYU+oVDAnKO6*^zj3qf62+XBrz2jHt@9VVt zcoEa`u2jI~7m*m@oo~t@8I*q<2vtw*n*aW=WGjwvV7I=hCoJmY7Bw0e_$Z28WZcxATbOE zE1eUF#Rmfr_#ES(a69s{VmTCteiM}-FpmDeVdcB!X;%RF=|jU-0DIbiRiMD87~}Z3 ztM^)gxM+Jqfm`Q#{|>lZ-kIGI$eyBjHHm2J+O>@WxwC7@XcFGN(nlit5y}muN=_u~ zkjA{(=ae8~FhtfL0?}cF5+9-@23gMN|A>$}$+2D%QR9hepTPgwqYZZ8&PLv#)C=t? zY2a$xZ;Kk69U6TVcKSCW%*oS&2B_Gs%7-BXw_V*X1uFN3EdaT*Il+WnTG1aAoPSaO zJv5@L>@e(M=aNeVQo55sqd-rYD+J2% zMl39vpEiYJ0nhDDhyBt?2L(-X^tj_JiRfY1aoD!cR>&Mi+S*^i?rPMvyC9Cv`vcs` z%$bV~;I_MO9fy|vzI@sO$h=;PVqCB`@HZ&8;U|+ww0wmeQ8W$~$mjF@e8~Lr+3w_Z z*g+7_c&og(0(oD%V=;ML`>-FePozGOchkpaLJ!)9f9Qcnx6f4;64C9tM%;17%)5qF zU(M!0BjCzsE38;4qiQMvOHOwl1=N)IfQ$=2c)F3guse}N+FtBRdQAN%k=o8y2;+^m z=vxA*KkcIr&t}UzLH={tWmv%v`t(8N3fVXB114lC;VSq?s&O75yQ&Q)dYI39cnB@t zy6C>(5YsDO+-qK@SlS(*WTTcw#w3YlG~IiH;oz!gVxn&(7Ws3xj<z5dxcJzm{_zT8*>9&QL?1XsnP`|7YdH0OIf}jnpXd#uD(oQ35DX}xl zU(Oo5Xfv&$d!6Z4OSZ>{X0i%eO?R{M(awpiLWAv~>sdZ+(`RLD&luWh#d2sdOS5Oi z6WDg+=-#Yi!P0@W!;h84hpLUGJ6V4UX)(=XO{~J6X1WE@ z)wHBfuqcey1kin~o$j+8-`B>Eu8p@H(TnafobO8OoyHN?0WE9UUJ+}a5W$+gXe-U9 z5}GlP7VFW2Y}+DMG5~IBC+#vA$O^Rrm&>tgIc>M4bQh~}HFIR?G<|}UHEm+)GwC)~ z)!TG5En;1wMeG1OFra?~T}M-uSs~rrA41nmQ>|!=HQdhzg}!VrtzvCU%o1p+3u~mM zOfoen*vXStvaOyp+nM&T8rHyC6|`9v>!J0mjqROIH+!=k-n7}9Wzc4$F>sA8cbZFE zZP+GSLfZ#etjg_!%L3FeJ%o%WFO+)oqZZaz|GIoP&^-PcNZA+`7SnA;A zvsumbWV(kIvb2HbY^^n`P)cU)i(t;mMJrhmt=F~Ja~|!fwkinXSKgPFnJ;5&!&sRK z%V(>5K>%up>$<5ZxecAbI%y6|quZ?%Xa%ccjjY|rlV%O}ik$4k3KSCj=F87bW?Or) zvT=d4SXGo6>##PJ_lKZ0Ky2GUh#9Py?P9gGz#&E)Zv!{j1517FiWCSV%zeNpn93rR zQOPN+DQLD6%VkBZ%#C%ZgZ%2$ST5bdwu0~);7<0^oPIPL&S;|Bm93%o2G+w4&;qu7 z1Z|&B%ZIT1!NXXQCChWCWsCLLhQTSzjFZe)ur@o|Xu}Rpo}oFJ?nnYHuQ8&{Y^SFk z>+H*Fmd_lV$Qrfc2D7HAY!}>QF(|Ux0EF3UY|r-7vZbmTw3cQ%4uz34O2ff2CDt~K z7SJ^#X*NwC?xCEVz?xYd+X)%vM_brxx?wbJAMeUqL2w%q=w`Mn(F&%SwX>2DCWF{P zU)ITX>(Na#kEXKKY`q#?$GTZP+p|J;IoknumBFfm2C_~^7-l;ww4vP~x`A#caCXf+ zwubFt#dJeBYZ^iKC=X16i0taC?Lvzzhe1p$Aw-Si+15C@zyY*0Q7L$Mgw8@3QpTL# zv4bZh(r(+RIMpQOq1v=%N{}zDq8XFfZuOv{bdS~G(5UgjEZu0B<^Wc^B)%_ep-r@U zg$889hT)N3tR*DHm-fIt9i%zKCn&_x4W2BI6+*zuX(uh{HJN5E>_yjwLFgLjmWY5j zwNU7YmBh1ZAG%qSt#+iDqgdTaP;M$nT{6^(6^%>wHj&Yw1rtEzRsL-4RMrY{+!sMx zooNoOvkqt5=Jd6Yp{Wq>-L|Ypkv7=4uue;D54wA-9j#5W(Sa5{!yW0qWv(<&jjiuR zq2BF`pa(!QU9;I1+O5he-B=rKw}bRw3ZbGf#mbyh8m#o?}4!vfqWZiMSm#e*-*CjLelo=FB=4EZGdQ0 z#l*luvThk`w55A2;jU7l&x5RLILolLHKFSkDAKCC(RJg?H zbT2E`l^MUNH*FtFs}{g4R4NbLWOGxA25+^(_r<(#t^m}&e%cQ2GP|c zp%P^bV+D%|+cB8+$k@?64tlmCmJUg^iRS1l%d;}w;ZXDnAk^DvmmH*8rJ%Y5F0x;a z?w$Y>wV54^r~4Aa!wlF?=OnjP!7#Eewri!$*>FCr#d}}@&TwOyUPGY&>Nya$bx`;=*yyvpGkeeDQ_x1+v_TDJ?u6~p zW%(L?X{tX2ch4dNc2G5N0nLRmZ-X8n06XQP?W}3t;^9OOM3p)ri1mmj!>HCnt!Q2t zXfNf^i+08?p~d!r13})f&TTb<<)jP>)H;~Y4M|cUp*prB6xNqrbT{2IkmktJ))){# zw+}0U6@H6;)VOg8aWiDXnPiCIHIrr;Y16_WShVugwP-o4&a5(IC|qt0ZBR*Bs>V`B zCd{=M<DFOvpTT%NwjD~%ekTY>j~YE-K3}9rQxoGr$c@l>nJ(R6A_sEZh)J** zN^vgBH=v#REJK-XO-hzBv$g62wJ|->6|@Zr5% zV>DqcEt%jkLJlO;(i?`<$@1*kTG}*VIcrq1vjXYwW(Nlwg|R#%)~Ld^SV7L!vTQ|` zstn3(q4^0jXxcattMQ;Uw0H^R%vv~OA1pj2!L(-D3aHWP<2?-E_BXODIWb+MsBHpW z7SRkS{;O$+8Cw%ZOX6wC3fSH>E3$U^NG6F&fm|)12YP$c?X037t6T)4X_o_CZg&{u zlT6nSp!uwncIu2|ooZv3rsLq2VI2Z!urpDP|pgpT(dlOVw%%%IBU~R9a z8_ca(otH7q)`m39p^YoqI@pr*B&}p^*6It!j0>ghUjD2=l2BCDeE%(c|QVcS*2nplIt_QkTi zz7Sjq>|=68uzGEyJM9x7KR1nmq+1)n_C<$U=}vP6;eno6i3L=XESf7%_j`}0tqWiu z0xGB3p0t#EsnKRTG+;uu&;zhDTm!P+HUQ+c0VKUH z(1%v7WKCmP`FK{{-&6s-osjWmeFm}O@B~&r$_93?l`(^0y@kcjca=3{LbuaGS|?Al zm+H{1v{@D;a&Q_1Wgn}b2OHBpeZ~)+63yxZRagyeAI55DP8kNOYg;mbcKKMcR$sr7 zELE1Rn?*B}d$R*(G*20lrI}UxE1QqjlhUO zF@j~WlBVfG{^dBr3NZ|pt1Td@PAF6*Qez=Ecj>S$SmSbGqcTi~N+wQ^AL|#LC=~@e z{*7TEvjQmQY}87j|)P*$g80-N3z1p^3Jy9lzZu2+(V z_E=URLt7Tj3g&mbccK#86COWsCarN-iirb>HPeI2EG@(h`rJ)7K?amUBzLhU+S6w~ zZPSA4v5)1=4_-NdmBFSM7SrZ&x(+F3^8;b3%5mXj+k8dv=+M=NR?LEBsS$=)!M2z> z(k;+kg`PH4vW-&WVIlC0JKROA94{W>2amUQf-O9k@=v^Yq`NeH^N(37R`HLIj`a`G z@m?Aq8WD$&*nUexBLckR1Eb=iW4#lmDu`7zzdfJF!I{3nf#Ol$nkZN%#Rty+=2H-> z@dsmFv3iJ#!=XX(Vhv|I0ls92N83qdh{t64`$tDa1p3E^!UMd5V{AZRY+!(OC_cxJ z)p8M!ix-b~5l?W3-iyfV_P_fr|37~XDv)J%0GIn?EFyx~ADSsl*Jp6_GADG1R1niY4 zo{avo_SFNBc67`kq<7)n3Qpx->d0xylO`db4wF1brpq*ML;iGUOCvIF`5txTDZjEY z$g-X3O2|K6NUcE*$b4piJpJlR2Qp87xfgQ#;UV3~L-w;MvLR^WY-F!K@w&+HLDqbH zdWU)~L)&g&S2eO{Pl6?~;G;|m@<@}`ZRCi0#{lG&-QHJ_@^|BAA=8T7`M4(>Vmz*H zbEQf9K^EhL#~&F8Z_MViO6rsyr7zkZd|4e|I_Pn9xc zVNKa0YJBl*ePvgS=d{yBB_qM5gvui$G3IVhYG-uU2tA{Q9(p>tyA-I}6dTl;H1=l(+f$ zBXYLkhkZ!T38CkajT$RWk0oG}@Q_G2j@hHUx0gzIOgLX#uf-7>x1 zkUGJdYuIc1?XlhqI7LRMDMn6EtJuKn=?D9AL(2gD+=ZuVWc-@7(qF+B3 zX`&Xn6?r@Gq6f0-b&ou9Qgk4n=h4z(JpQ_?)$-84>B0W}NLiPxhsclXE+B+w^@kgf zRu7xpkxNd7Jx0d45BLe`b}X0cv2^YsAGGfs-Ok5hRCuo(?fyX?Gmytk`}6w`*DK`x zUkHBB>xkc>IzA3jOcd8`h^9Ty_s2w^&rh0NH_xA=yB_g=+7@_m9scQgZawxp?ye^9 zx8r04&$nT+7ha;>_p1DSI($W8Q)J1+@;rrk5JeJdP zC_(>}56^j?8+*m^xD3c`$w&Vfk6}E&t>=&C^Vzug%op^l?r2p+I=zqEgd8nsY(PGH zqB$Koq@#Tna<2HtcgR2HE>c6PZ;GjQ;*5KiVR% z{CI=w>9^IMPG~p2(BR{6Jar)z?UNVEdEB=)7x8%rBRxExJ9>ZO z?2|T-&xijZ_oHafI2p?GD6~vch4xp;Rj!{o+v8rKZEBsy``1cx=Qh9k#YjpV4zli|}~8V9V?4#KmofXy=^U z#N(nnDtH3giAmnPj@m!l$m3ppKhg;O2e$qcjC5~K=KXzI)|Sk zvd8lttfHKI`z7=Hw0N`o9QxCfDQ`cf#QHb1OKy+mc9{1Gu8Y^zn&)Om@il_D<&CNC!BkvVd$s;=|Gk9J3xhvQl?OM}T?oYd-_&eG+ZfWv%5xZYJ zLEGE=$wp*{nQ{qowzDGQko0;J{FBfM@ui44Ah_Q`W4 zd|t+cPvp9~<%idkXiv4a zU5E@CR_K5{^RC|mq`X%CTjZhPgU=$B{Z*=vN3<{U`z=~oH4^QFB&iih-Ps0d$e+%v zKZCrgYwL@2^q$&@ywKjn@82kNlr!38-M#K16aDVCB7fBgKZ#V|5i<#S`20K<T z=fTP9Jgy&+=2srCeR(pPc-|jV%q}6nhjZL;LFIq_4>1GZI%KFG>|9A)l>Y$NQa? zbGHxLPd;1ob}O~=dA-U$E-glXY^k3b^6}m0JbsSqsz0Iq>fyG0q(>l8MM{s|{}fq$ z@5K=00+Hz$6)f%Nbn#Pj0S z>-SB*!^acd0DoNqAMxjTyiQ(@n8}1K12<~ z^WP=Sc#ib0JVueZk3+hU=ECMEB>9}l=V{{7M=fYC_+iu_q>pK!}9n%4RUzK{T-DnFXQ>Ihfl9W z-YGZZ`JniA_B6Ej)VZl6O|*Qik@{bB?;tn4alVS2t9R@zQuU>m3UYBDCkq@}O zzI^_M4*d4KYQ}ax#PcrndL===H$38tyngt@PUOg6Ja~V_#$}OcC$v*OKCOw3eBS%j zZQ}9y;l{T*n{n5$4bStB{m6AI6d3cou}M5P1^vf#l`WB17Y~v`o>`N!5P4#Xt~=7O zUmlOYuyB7e+9yw%^8C}(H01GpIy-~ww$HV7rFh)3 zcbUh! zq}%k@zaVF4HgG-Hn~@&0#og<-A%C8|k>`zzX_qqEMxlXv$d8A=^8RlJu%l>8zgN46 zob-7ukAJ(r^aQk9yhGHH)(-3dvOQ)C&yz#i**uSK+DxfJ|2Ca-dy)N%G>VYMJ?cEJ ze=gaWz#N{ip9BMVl%`+|JF`3j%kmR}a{L;LcyeY~B=5HasZab5l% z^oOw3eEw$dSNVwc*n;)E{r1B0Z*MTQ|7)Pv z_%a69?=2U#eERMF;K!eT9?t+l@4NB*^;UTD|7<||3V6k{n}Zkhf~(=rKVMN@(EaX; zI{f8nTwrV{yg^#)6Gyq)#Lp?oRV}q&shI%H3M!E)5O{VEhHP$mR z(AS%4U|?i8&B$n~zVTEXWwVBOnI?=7f1K>3aIk;<%eC*{sPlyj5B~M}&wt1E_x9hv zLgxwgSKBpeu;W--w)`o z*XV!m;Cof0lK%U1N&Wi{zP$tgd+YBt`0uUz8~jIW_$9=@&&Kz98t~s2hHq8>*7{$n z{d@E8CmP23-DjZT_ZR=aOVG(qR(8_A+KiH({8wwfK7sXL9LWU)TPWxgKhTbs{PV`& UY+Og2>_oC?$-#ek{?Id52w|CI$7JYaH?upjC!Ei=A~P&$nkDvK^6 zzc(QEm=SN+#sU{dhpgFHNB5L^yfWda&<1=L`@~|N&to0FQaHHr^Vi#FCxaishO{%X zi(y|_WaRb+JZ^qyugP1MFUZL1xnoE0RrBNNJ?dN3p@L$@6UXh#(AFnH@>@O37_qG* zDhAYcE`GpS86efIa!q>^T*fLY;TUT$t z`6N-WT1#h>Lp#cBiuYY_h=uilcs_NVX5(6DM9Q=lw>pTv`~LMwObWtUE}BPss;Dnq z5BG_M8SOhm;)rHL|99QFlS&y3+&tpnR-`zrfP16@}<1QW0RuY*zsv_HS<-}(FVZy`<5F2a%6^@%4VN*`e%-@HZb zW@GzYRy3oo)ykDxrCP04jJi?MwX&vZ^|Dqi>1BP-Fe-Z8sMWybm*?zKK~4u&%Z!2H z;N+^g&B7m{T0F;5yc)q=2%DFwJi|w_9;H!(ln>Fo2-7U2H_D{OVKc;(l@ESqD2QQ{ zKz*khA%d47f?0;5aCO*QA$T^f>i;", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": "<|eot_id|>", + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/tokenizer_config.json b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/tokenizer_config.json new file mode 100644 index 0000000..69b6ed0 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizerFast", + "unk_token": null +} diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/trainer_state.json b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/trainer_state.json new file mode 100644 index 0000000..c18bca2 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/trainer_state.json @@ -0,0 +1,192 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 188, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.21621621621621623, + "grad_norm": 2.652937412261963, + "learning_rate": 1.8e-05, + "loss": 3.1648, + "step": 10 + }, + { + "epoch": 0.43243243243243246, + "grad_norm": 1.7348365783691406, + "learning_rate": 1.898876404494382e-05, + "loss": 2.8579, + "step": 20 + }, + { + "epoch": 0.6486486486486487, + "grad_norm": 1.5895073413848877, + "learning_rate": 1.7865168539325843e-05, + "loss": 2.4276, + "step": 30 + }, + { + "epoch": 0.8648648648648649, + "grad_norm": 1.8073664903640747, + "learning_rate": 1.6741573033707868e-05, + "loss": 2.0007, + "step": 40 + }, + { + "epoch": 1.0, + "eval_loss": 1.4169334173202515, + "eval_runtime": 6.9404, + "eval_samples_per_second": 11.959, + "eval_steps_per_second": 3.026, + "step": 47 + }, + { + "epoch": 1.0648648648648649, + "grad_norm": 2.063204526901245, + "learning_rate": 1.561797752808989e-05, + "loss": 1.5398, + "step": 50 + }, + { + "epoch": 1.281081081081081, + "grad_norm": 2.603747606277466, + "learning_rate": 1.4494382022471912e-05, + "loss": 1.0425, + "step": 60 + }, + { + "epoch": 1.4972972972972973, + "grad_norm": 1.8809202909469604, + "learning_rate": 1.3370786516853933e-05, + "loss": 0.5659, + "step": 70 + }, + { + "epoch": 1.7135135135135136, + "grad_norm": 0.9672934412956238, + "learning_rate": 1.2247191011235957e-05, + "loss": 0.2295, + "step": 80 + }, + { + "epoch": 1.9297297297297298, + "grad_norm": 0.29672369360923767, + "learning_rate": 1.1123595505617979e-05, + "loss": 0.088, + "step": 90 + }, + { + "epoch": 2.0, + "eval_loss": 0.06064866483211517, + "eval_runtime": 6.7219, + "eval_samples_per_second": 12.348, + "eval_steps_per_second": 3.124, + "step": 94 + }, + { + "epoch": 2.1297297297297297, + "grad_norm": 0.11157097667455673, + "learning_rate": 1e-05, + "loss": 0.0609, + "step": 100 + }, + { + "epoch": 2.345945945945946, + "grad_norm": 0.07702265679836273, + "learning_rate": 8.876404494382023e-06, + "loss": 0.0561, + "step": 110 + }, + { + "epoch": 2.562162162162162, + "grad_norm": 0.0633946880698204, + "learning_rate": 7.752808988764046e-06, + "loss": 0.0544, + "step": 120 + }, + { + "epoch": 2.7783783783783784, + "grad_norm": 0.05974782258272171, + "learning_rate": 6.629213483146067e-06, + "loss": 0.0535, + "step": 130 + }, + { + "epoch": 2.9945945945945946, + "grad_norm": 0.05799481272697449, + "learning_rate": 5.50561797752809e-06, + "loss": 0.053, + "step": 140 + }, + { + "epoch": 3.0, + "eval_loss": 0.05265544354915619, + "eval_runtime": 6.6822, + "eval_samples_per_second": 12.421, + "eval_steps_per_second": 3.143, + "step": 141 + }, + { + "epoch": 3.1945945945945944, + "grad_norm": 0.054419711232185364, + "learning_rate": 4.382022471910113e-06, + "loss": 0.0526, + "step": 150 + }, + { + "epoch": 3.410810810810811, + "grad_norm": 0.05400639399886131, + "learning_rate": 3.258426966292135e-06, + "loss": 0.0524, + "step": 160 + }, + { + "epoch": 3.627027027027027, + "grad_norm": 0.05625651776790619, + "learning_rate": 2.1348314606741574e-06, + "loss": 0.0522, + "step": 170 + }, + { + "epoch": 3.8432432432432435, + "grad_norm": 0.05460618808865547, + "learning_rate": 1.01123595505618e-06, + "loss": 0.0521, + "step": 180 + }, + { + "epoch": 4.0, + "eval_loss": 0.05196535214781761, + "eval_runtime": 6.6806, + "eval_samples_per_second": 12.424, + "eval_steps_per_second": 3.143, + "step": 188 + } + ], + "logging_steps": 10, + "max_steps": 188, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 8939337236348928.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/training_args.bin b/src/models/llm/distilled-1b-robot-router/checkpoints/checkpoint-188/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8d76bf208cf745ccc85c8950c5b91c9f09205fea GIT binary patch literal 6545 zcmcIpX_OpQ749UEgl6Buj)(|hnaq~VBrF*TFawO~43mH*7|ZIa*WJZbSJka$CLIz1 zVPIMjcM(+FcLg^@+!a>@6kJhU05=dt&*2%81_!YNWq{7Kp)1sZUc%@OqMnRf{X<|BVL`#gzFwk2I>Z&&gRb)9q z)O&7Vr*-8gac`L=_RPe1m$-FzyOp?s-&+qHU6zGuncgFAsK2c@M=W10S~rY(<_&55>n9Y)G=?Sz&$ zjeRkJ-)WuwUyS?EL(|PGh=tVqW#DBFpiu*NA&BM zasZ|ro=vHS>8>&kEE$J&yzgFrP)W3?!aFY9Vp=x*lX~I|*g=Ic9gHnlge+a){UPIv z#hCIQ-m@BX=y*YvBte9;sCt+*OxuF%GLb9B|0UM;lobgG+1_;6*!rjcdB@yiwZ9wt zV80pVR%?KgvLW&Cv5{-HKQeaPAy>#=mSfe;vP!IeI$~_($$R;^Z~mJBI&y5}sej!6 z(2DC%6?w7-2Xei@r=!N!pVzqY(Y_l_7e(sUHj`~&`fxQJJvQ<;D1gT}0RQ-DJ+rI0 zHl|}>k-hMOS7{p^d*Q$F7zbj|i_91&by5$WO&X!1=RFgOHs^Dyp`;CEMfEf^VM8~d7mkhGbu&M=biUJCo;Y3pT@ROq?3MJLNF?8&~OQ^Y7N19(>ODvahRI(5$2oAnH;Ri+hY zfYXd^82LXkKyQ@#aqrn-)IVSj4E78T_4EyxIoD86uBa%~3hGrg9jDqRy|dIeN&Ut_ z9lBTCC{Dz)G@xlAKb2lH2s^ImnD1M@>G~D3;(DM2TCLZ_)->Ziqmnhx2m(xP)5uT_ z>ks8=qbzXOgibNWfJdxHF;C5rYg>J=vtA1eGsbkTI{+Y&D z8K(52rm-J{@y~h~XEaA=Hx1;aLadZS8^%u&L~{NyW#pWVoA#BEyxdJhCzE-Fⅇ` zmHr_*Z%zbMLVO?9Tkv-{KFMG`W-P`IoBHW|@l(z6DopgY8#4#RIX)+&BD+2MVy#J1862REC2^4#z)pxie~LB! zxFiSen9V!R1kn`o7hMA7IG=R9hE)I-#Bjy3;cZNr*rpua|9IUePL_|z4)2i~Jz~tCY z0BBl?$-ddDrD@ZcNWdL)=;hjW)kp!9k?F>q3OMr2l0<=91&u3HgKEkR=@r^%mDmZ% z+Dc_%RQ_ewT^tD}3a!Yh$7E;jlXtdMIOOakwNjAKHMxSxOK{|Eo<&uCKzuc2?eqZE znnEhAA;~YOT@rGesc}3O%FK=m172<5Jhm5`Nnp-IR!EnZLz49xC<8hDDyNqn%nYR2JFRuhgtsMX`$bZ-9>cYEq*sH5Bx(EGTp&U8VEA zo_4hy6InNi5+HzHwUw^X2|5ZcS13}il_;vEl47x?qFM18Fa%2nCVYoF(sk%MS(Xjc z^*ZVS_X=VUb}GBryQ)bb)iq}t32<+M`aRjQG;GHXB-0`bW#o0pseRAGbf}Qni7dIVl-QqlOer!PLyVhd8%sJjl5FVRdf*2 zjXLDQG_G;D)Y3A&t|{~fenpe9OR{>shB78*TmwVBp*_}0)o)w?DhNv*R_CYXcBXz) z#=k0Rn4Dti&9n5DNqVdJk@ragWE#3j(-IGakvk32;?gRhx0NIZ^SI^BSdG}^!_du5 z2_YVi&>3*fqFXfLTOriW!5m<3*T!D>t(isGJ4#|v*SEdY0ZM!{YRKqdlq>t04>f0N0Dr*Xc&87_ew@)Y(mNWZ4{3grJnrx!kU1!ZKCDqBor=>y zb_?huI;tZJPPI`@(nqCGkd*teR(40zs?c!K$2Trtrt`ZrJ9bbHT~E;`bOIB(FqRCq zDuUZItIvPQ`HyZd8AsqGxeEk8fCbMiNkY*+|kh`gW*vXMhSgR zn^r>AtwL248$OzgJ2jlVjYg zSvSiueL+{Yx@v$glN{$?)Y{uMz%xPbiDp&-E1=;o={L+tF^-$4Z3%FqJcqt49_9i1 zilq8{P8K^~)e)SJO2rTbHhpb;k;J+RIAwFJe_bfy%pEE~0p;viPv4LUA*vib;cjv8 znL9wmO1u(Do68eq{x?Mcx0YG*_}0e4;;c>6>Ga#$-r#!9uI(1%J5r65D-L~E2m~S> zNsEzE^u4((yRsf8TKvRCzBYpj-IHf{)UcUM`o2a7ta(hAdmIPGXGs6F1 zb7>#=VuS8$%3jI$8?+FkW;nO4yzbZ$qMdAQC3RFFI~m_caPy6uK+bqp;}CRzCEDdHBme9 zy@BbQxCEl#LoPR7AcF*TNFFhNsC69okK={w=ueaMxGwwh(#j0I6zwuSp#d=Md39tq zpn!pt70G|rVT9J3F9&r`Q%n7+n4Z+V!m-#pe~Zo;p=tB`9Ca?r#J`kuMa?%TigKv` z6;Myht0u;mEq~(nn{L>S>plKV?Di(}n8F!dH<_zonmQsM+`)-+bi;7#LNmYEe3pOv zS&ciNdia_}I1Ybyuf}nO{%$q6e`fNsKz}~pc#dx9d2X5vt!4#AX6SKlX8E&w1r95$ z>r$ZHNK_D^UmV@A=DF>gn+M%N;wjc)_N>Q_V0iIo_c|O}81B-c<%7ZhMT5a2@BP57 z)r@X)}zfx-U1)qQ<^ zYlqeh4fm}ZTri$7U8J=j*)62g$9#v- zelCH*=Bt!ps#`>-15RkK$F~P}5wS2dCdQX8JuO>#?2?|A3pj*E5t XKMRFkuG{cvrr1-aFU2aJujl>;!?;C9 literal 0 HcmV?d00001 diff --git a/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/chat_template.jinja b/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/chat_template.jinja new file mode 100644 index 0000000..1bad6a0 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/config.json b/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/config.json new file mode 100644 index 0000000..ad4b683 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/config.json @@ -0,0 +1,38 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "dtype": "bfloat16", + "eos_token_id": 128009, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 128004, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "transformers_version": "4.57.6", + "unsloth_fixed": true, + "unsloth_version": "2026.4.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/generation_config.json b/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/generation_config.json new file mode 100644 index 0000000..3ccb85e --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "eos_token_id": 128009, + "pad_token_id": 128004, + "transformers_version": "4.57.6" +} diff --git a/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/openvino_detokenizer.xml b/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/openvino_detokenizer.xml new file mode 100644 index 0000000..694c491 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/openvino_detokenizer.xml @@ -0,0 +1,272 @@ + + + + + + + + -1 + -1 + + + + + + + + -1 + -1 + + + + + -1 + -1 + + + + + + + + 128256 + + + + + + + + 128256 + + + + + + + + 838768 + + + + + + + + 256 + + + + + + + + -1 + -1 + + + 128256 + + + 128256 + + + 838768 + + + 256 + + + + + -1 + + + -1 + + + -1 + + + -1 + + + -1 + + + + + + + -1 + + + -1 + + + -1 + + + -1 + + + + + -1 + + + -1 + + + + + + + + -1 + + + -1 + + + -1 + + + + + -1 + + + -1 + + + -1 + + + + + + + + 51 + + + + + + + + 2 + + + + + + + + -1 + + + -1 + + + -1 + + + 51 + + + 2 + + + + + -1 + + + -1 + + + -1 + + + + + + + -1 + + + -1 + + + -1 + + + + + -1 + + + + + + + -1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/openvino_tokenizer.xml b/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/openvino_tokenizer.xml new file mode 100644 index 0000000..3b3c41c --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/openvino_tokenizer.xml @@ -0,0 +1,764 @@ + + + + + + + + -1 + + + + + + + + + + + + + + + + + + + + 1 + + + + + + + + + + + + + -1 + + + + + -1 + + + -1 + + + -1 + + + + + + + + -1 + + + + + 1 + + + + + + + + + + + + + + + + + + + + 1 + + + + + + + + + + + + + + + + + + + + + + + + -1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -1 + + + + + + + + 8738 + + + + + + + -1 + + + -1 + + + -1 + + + -1 + + + -1 + + + 8738 + + + + + -1 + + + -1 + + + -1 + + + -1 + + + -1 + + + -1 + + + + + + + + 115 + + + + + + + + -1 + + + -1 + + + -1 + + + -1 + + + -1 + + + -1 + + + 115 + + + + + -1 + + + -1 + + + -1 + + + -1 + + + -1 + + + -1 + + + + + + + + 128256 + + + + + + + + 128256 + + + + + + + + 838768 + + + + + + + + 280147 + + + + + + + + 280147 + + + + + + + + 993515 + + + + + + + + 280147 + + + + + + + + 280147 + + + + + + + + 1036718 + + + + + + + + 256 + + + + + + + + 256 + + + + + + + + 7457 + + + + + + + + 256 + + + + + + + + -1 + + + -1 + + + -1 + + + -1 + + + -1 + + + 128256 + + + 128256 + + + 838768 + + + 280147 + + + 280147 + + + 993515 + + + 280147 + + + 280147 + + + 1036718 + + + 256 + + + 256 + + + 7457 + + + 256 + + + + + -1 + + + -1 + + + -1 + + + + + + + + + + + + + + 5 + + + + + + + + 13 + + + + + + + + -1 + + + -1 + + + -1 + + + + 5 + + + 13 + + + + + -1 + + + -1 + + + -1 + + + + + + + + 2 + + + + + + + + + 1 + + + -1 + + + -1 + + + -1 + + + 2 + + + + + -1 + + + -1 + + + -1 + + + -1 + + + -1 + + + -1 + + + + + + + + -1 + + + -1 + + + + + -1 + + + + + + + + + + + + + + -1 + + + + + + + + + + + + + + + + + + -1 + + + -1 + + + -1 + + + + + + + -1 + -1 + + + -1 + -1 + + + + + + + + -1 + -1 + + + + + -1 + -1 + + + + + + + + -1 + -1 + + + + + -1 + -1 + + + + + + + + -1 + -1 + + + + + -1 + -1 + + + + + + + -1 + -1 + + + + + + + -1 + -1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/special_tokens_map.json b/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/special_tokens_map.json new file mode 100644 index 0000000..3c1d049 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/tokenizer_config.json b/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/tokenizer_config.json new file mode 100644 index 0000000..eccf822 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/exported/openvino_int4/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "left", + "tokenizer_class": "PreTrainedTokenizerFast", + "unk_token": null +} diff --git a/src/models/llm/distilled-1b-robot-router/lora/README.md b/src/models/llm/distilled-1b-robot-router/lora/README.md new file mode 100644 index 0000000..0aaaa43 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/lora/README.md @@ -0,0 +1,210 @@ +--- +base_model: unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit +- lora +- sft +- transformers +- trl +- unsloth +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/src/models/llm/distilled-1b-robot-router/lora/adapter_config.json b/src/models/llm/distilled-1b-robot-router/lora/adapter_config.json new file mode 100644 index 0000000..a41be0c --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/lora/adapter_config.json @@ -0,0 +1,50 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "LlamaForCausalLM", + "parent_library": "transformers.models.llama.modeling_llama", + "unsloth_fixed": true + }, + "base_model_name_or_path": "unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "v_proj", + "o_proj", + "up_proj", + "down_proj", + "gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/src/models/llm/distilled-1b-robot-router/lora/chat_template.jinja b/src/models/llm/distilled-1b-robot-router/lora/chat_template.jinja new file mode 100644 index 0000000..1bad6a0 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/lora/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/src/models/llm/distilled-1b-robot-router/lora/special_tokens_map.json b/src/models/llm/distilled-1b-robot-router/lora/special_tokens_map.json new file mode 100644 index 0000000..cc07dca --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/lora/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": "<|eot_id|>", + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/src/models/llm/distilled-1b-robot-router/lora/tokenizer_config.json b/src/models/llm/distilled-1b-robot-router/lora/tokenizer_config.json new file mode 100644 index 0000000..eccf822 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/lora/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "left", + "tokenizer_class": "PreTrainedTokenizerFast", + "unk_token": null +} diff --git a/src/models/llm/distilled-1b-robot-router/merged/chat_template.jinja b/src/models/llm/distilled-1b-robot-router/merged/chat_template.jinja new file mode 100644 index 0000000..1bad6a0 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/merged/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/src/models/llm/distilled-1b-robot-router/merged/config.json b/src/models/llm/distilled-1b-robot-router/merged/config.json new file mode 100644 index 0000000..37a7d8a --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/merged/config.json @@ -0,0 +1,37 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "torch_dtype": "bfloat16", + "eos_token_id": 128009, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 128004, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "unsloth_version": "2026.4.4", + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/src/models/llm/distilled-1b-robot-router/merged/special_tokens_map.json b/src/models/llm/distilled-1b-robot-router/merged/special_tokens_map.json new file mode 100644 index 0000000..3c1d049 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/merged/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/src/models/llm/distilled-1b-robot-router/merged/tokenizer_config.json b/src/models/llm/distilled-1b-robot-router/merged/tokenizer_config.json new file mode 100644 index 0000000..eccf822 --- /dev/null +++ b/src/models/llm/distilled-1b-robot-router/merged/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "left", + "tokenizer_class": "PreTrainedTokenizerFast", + "unk_token": null +} diff --git a/src/resources/openvino.genai-2026.1.0.0/.clang-format b/src/resources/openvino.genai-2026.1.0.0/.clang-format new file mode 100644 index 0000000..ebe747b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.clang-format @@ -0,0 +1,28 @@ +BasedOnStyle: Google +IndentWidth: 4 +UseTab: Never +ColumnLimit: 120 + +Language: Cpp +Standard: Cpp11 + +AccessModifierOffset: -4 +AlignConsecutiveMacros: true +AllowAllArgumentsOnNextLine: false +AllowAllConstructorInitializersOnNextLine: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: Empty +AllowShortLoopsOnASingleLine: false +AlwaysBreakBeforeMultilineStrings: false +BinPackArguments: false +BinPackParameters: false +CommentPragmas: '^#' +DerivePointerAlignment: false +FixNamespaceComments: true +IndentCaseLabels: false +IndentPPDirectives: AfterHash +ForEachMacros: + - foreach + - FOREACH_CHILD diff --git a/src/resources/openvino.genai-2026.1.0.0/.gitattributes b/src/resources/openvino.genai-2026.1.0.0/.gitattributes new file mode 100644 index 0000000..92abf5e --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.gitattributes @@ -0,0 +1,69 @@ +############################################################################### +# Set default behavior to automatically normalize line endings. +############################################################################### +* text=auto +############################################################################### +# Set default behavior for command prompt diff. +# +# This is need for earlier builds of msysgit that does not have it on by +# default for csharp files. +# Note: This is only used by command line +############################################################################### +#*.cs diff=csharp +*.py text eol=lf +############################################################################### +# Set the merge driver for project and solution files +# +# Merging from the command prompt will add diff markers to the files if there +# are conflicts (Merging from VS is not affected by the settings below, in VS +# the diff markers are never inserted). Diff markers may cause the following +# file extensions to fail to load in VS. An alternative would be to treat +# these files as binary and thus will always conflict and require user +# intervention with every merge. To do so, just uncomment the entries below +############################################################################### +#*.sln merge=binary +#*.csproj merge=binary +#*.vbproj merge=binary +#*.vcxproj merge=binary +#*.vcproj merge=binary +#*.dbproj merge=binary +#*.fsproj merge=binary +#*.lsproj merge=binary +#*.wixproj merge=binary +#*.modelproj merge=binary +#*.sqlproj merge=binary +#*.wwaproj merge=binary +############################################################################### +# behavior for image files +# +# image files are treated as binary by default. +############################################################################### +#*.jpg binary +#*.png binary +#*.gif binary +############################################################################### +# diff behavior for common document formats +# +# Convert binary document formats to text before diffing them. This feature +# is only available from the command line. Turn it on by uncommenting the +# entries below. +############################################################################### +#*.doc diff=astextplain +#*.DOC diff=astextplain +#*.docx diff=astextplain +#*.DOCX diff=astextplain +#*.dot diff=astextplain +#*.DOT diff=astextplain +#*.pdf diff=astextplain +#*.PDF diff=astextplain +#*.rtf diff=astextplain +#*.RTF diff=astextplain +*.PNG filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.jpg filter=lfs diff=lfs merge=lfs -text +*.gif filter=lfs diff=lfs merge=lfs -text +*.vsdx filter=lfs diff=lfs merge=lfs -text +*.bmp filter=lfs diff=lfs merge=lfs -text +*.svg filter=lfs diff=lfs merge=lfs -text + +.github/workflows/*.lock.yml linguist-generated=true merge=ours diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/CODEOWNERS b/src/resources/openvino.genai-2026.1.0.0/.github/CODEOWNERS new file mode 100644 index 0000000..7d267f2 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/CODEOWNERS @@ -0,0 +1,62 @@ +# This file defines code owners for the OpenVINO GenAI repository + +# Default owners +* @Wovchena @as-suvorov + +# CI +/.github/ @akashchi + +# JavaScript +/src/js/ @Retribution98 @yatarkan +/samples/js/ @Retribution98 @yatarkan + +# samples +/samples/cpp/image_generation/ @likholat +/samples/cpp/video_generation/ @likholat @sgonorov +/samples/cpp/speech_generation/ @rkazants +/samples/cpp/whisper_speech_recognition/ @as-suvorov +/samples/cpp/rag/ @as-suvorov +/samples/cpp/visual_language_chat/ @yatarkan + +/samples/python/image_generation/ @likholat +/samples/python/video_generation/ @likholat @sgonorov +/samples/python/speech_generation/ @rkazants +/samples/python/whisper_speech_recognition/ @as-suvorov +/samples/python/rag/ @as-suvorov +/samples/python/visual_language_chat/ @yatarkan + +# Documentation +/site/ @yatarkan + +# C++ +/src/cpp/include/openvino/genai/image_generation/ @likholat +/src/cpp/include/openvino/genai/video_generation/ @likholat @sgonorov +/src/cpp/include/openvino/genai/rag/ @as-suvorov +/src/cpp/include/openvino/genai/speculative_decoding/ @sbalandi +/src/cpp/include/openvino/genai/speech_generation/ @rkazants @as-suvorov +/src/cpp/include/openvino/genai/visual_language/ @yatarkan + +/src/cpp/src/continuous_batching/ @popovaan +/src/cpp/src/gguf_utils/ @TianmengChen +/src/cpp/src/image_generation/ @likholat +/src/cpp/src/video_generation/ @likholat @sgonorov +/src/cpp/src/llm/ @pavel-esir +/src/cpp/src/lora/ @likholat +/src/cpp/src/prompt_lookup/ @sgonorov +/src/cpp/src/rag/ @as-suvorov +/src/cpp/src/sampling/ @apaniukov @pavel-esir +/src/cpp/src/speculative_decoding/ @sbalandi +/src/cpp/src/speech_generation/ @rkazants @as-suvorov +/src/cpp/src/tokenizer/ @apaniukov @pavel-esir +/src/cpp/src/visual_language/ @yatarkan +/src/cpp/src/whisper/ @as-suvorov + +# Tests +/tests/ @sgonorov +/tests/python_tests/test_image_generation_multi_call.py @likholat + +# Python bindings +/src/python/py_image_generation_pipelines.cpp @likholat + +# GenAI Tools +/tools/ @sbalandi diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/CONTRIBUTING.md b/src/resources/openvino.genai-2026.1.0.0/.github/CONTRIBUTING.md new file mode 100644 index 0000000..f235f6a --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/CONTRIBUTING.md @@ -0,0 +1,17 @@ +# Contributing +1. See [pull_request_template.md](/.github/pull_request_template.md) for pull request (PR) requirements. +2. See [BUILD.md](/src/docs/BUILD.md) for instructions on how to build `OpenVINO™ GenAI`. +3. Code style is determined by the file the change is made in. If ambiguous, look into the neighboring files of the same type. In case of contradiction, pick any of the options but stay consistent in your choice. +4. Don't push branches directly to the upstream repository. Once a branch is pushed to upstream, non-admins lose push access to it, preventing you from updating your changes. Instead, push to your fork and open PRs from there. +5. Your PR will be tested after one of the developers approves the tests run. +6. Branching policy is aligned with [OpenVINO's policy](https://github.com/openvinotoolkit/openvino/blob/71ee9cc42ec63b3affb2801dbbc4a77e6d8003f6/CONTRIBUTING_PR.md#branching-policy). +7. Contributions with use of AI must comply with [OpenVINO's AI Usage Policy](https://github.com/openvinotoolkit/openvino/blob/c4f4325c57977c684184e758449d1f8825ebbfd7/AI_USAGE_POLICY.md). + +# New feature contribution +In order to get accepted PR with new features, the following list of items MUST be completed. Otherwise, PR will be rejected. +1. Proof of Concept (PoC) pipeline including model preparation step using `optimum-intel` and `GenAI` inference implementation. +2. Pass architectural review with + 1. API proposal for `optimum-intel` and `GenAI` + 2. Working PoC + 3. Command line arguments for model conversion with `optimum-cli export openvino` + 4. `GenAI` sample diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/actions/build_app/action.yml b/src/resources/openvino.genai-2026.1.0.0/.github/actions/build_app/action.yml new file mode 100644 index 0000000..8656eb2 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/actions/build_app/action.yml @@ -0,0 +1,23 @@ +name: 'Build App' +inputs: + ov_dir: + description: 'Directory where OpenVINO is installed' + default: './ov' + required: false + build_dir: + description: 'Directory where the app is built' + default: './build' + required: false + build_target: + description: 'Target to build' + default: '' + required: false +runs: + using: "composite" + steps: + - name: Build app + shell: bash + run: | + source ${{ inputs.ov_dir }}/setupvars.sh + cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ inputs.build_dir }} + cmake --build ${{ inputs.build_dir }} --config Release ${{ inputs.build_target && format('--target {0}', inputs.build_target) || '' }} -j diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_openvino/action.yml b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_openvino/action.yml new file mode 100644 index 0000000..79d64ea --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_openvino/action.yml @@ -0,0 +1,18 @@ +name: 'Install OpenVINO' +inputs: + ov_link: + description: 'URL to download OpenVINO' + required: true + ov_dir: + description: 'Directory to install OpenVINO' + default: './ov' + required: false +runs: + using: "composite" + steps: + - name: 'Install OpenVINO' + shell: bash + run: | + mkdir ${{ inputs.ov_dir }} + curl ${{ inputs.ov_link }} | tar --directory ${{ inputs.ov_dir }} --strip-components 1 -xz + sudo ${{ inputs.ov_dir }}/install_dependencies/install_openvino_dependencies.sh diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_python_deps/action.yml b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_python_deps/action.yml new file mode 100644 index 0000000..3b42f5f --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_python_deps/action.yml @@ -0,0 +1,15 @@ +name: 'Install Python Dependencies' +inputs: + ov_dir: + description: 'Directory where OpenVINO is installed' + default: './ov' + required: false +runs: + using: "composite" + steps: + - name: Install Python dependencies + shell: bash + run: | + source ${{ inputs.ov_dir }}/setupvars.sh + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install -r ./samples/requirements.txt diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/.node-version b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/.node-version new file mode 100644 index 0000000..1cc433a --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/.node-version @@ -0,0 +1 @@ +20.6.0 diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/.prettierignore b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/.prettierignore new file mode 100644 index 0000000..2d0c064 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/.prettierignore @@ -0,0 +1,3 @@ +dist/ +node_modules/ +coverage/ diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/.prettierrc.json b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/.prettierrc.json new file mode 100644 index 0000000..d068818 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/.prettierrc.json @@ -0,0 +1,16 @@ +{ + "printWidth": 80, + "tabWidth": 2, + "useTabs": false, + "semi": true, + "singleQuote": true, + "quoteProps": "as-needed", + "jsxSingleQuote": false, + "trailingComma": "none", + "bracketSpacing": true, + "bracketSameLine": true, + "arrowParens": "avoid", + "proseWrap": "always", + "htmlWhitespaceSensitivity": "css", + "endOfLine": "lf" +} diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/action.yml b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/action.yml new file mode 100644 index 0000000..c29bfce --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/action.yml @@ -0,0 +1,22 @@ +name: 'Install Python Packages with Local Wheels and Extras' +description: + 'Installs specified Python packages with support for local wheels and optional + extras.' +author: 'OpenVINO Developers' +inputs: + packages: + description: + "Semicolon-separated list of packages to install, e.g., + 'openvino;openvino_tokenizers[extra1,extra2]'" + required: true + requirements_files: + description: + "Semicolon-separated list of requirements.txt to install, e.g., + 'requirements.txt;requirements-dev.txt'" + required: false + local_wheel_dir: + description: 'Path to the directory containing local wheel files' + required: true +runs: + using: 'node20' + main: 'dist/index.js' diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/package-lock.json b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/package-lock.json new file mode 100644 index 0000000..9603454 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/package-lock.json @@ -0,0 +1,589 @@ +{ + "name": "install-wheel-action", + "version": "0.0.1", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "install-wheel-action", + "version": "0.0.1", + "license": "Apache-2.0", + "dependencies": { + "@actions/core": "^1.11.1", + "glob": "^11.1.0" + }, + "devDependencies": { + "@vercel/ncc": "^0.38.3", + "prettier": "^3.5.0" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/@actions/core": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@actions/core/-/core-1.11.1.tgz", + "integrity": "sha512-hXJCSrkwfA46Vd9Z3q4cpEpHB1rL5NG04+/rbqW9d3+CSvtB1tYe8UTpAlixa1vj0m/ULglfEK2UKxMGxCxv5A==", + "license": "MIT", + "dependencies": { + "@actions/exec": "^1.1.1", + "@actions/http-client": "^2.0.1" + } + }, + "node_modules/@actions/exec": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@actions/exec/-/exec-1.1.1.tgz", + "integrity": "sha512-+sCcHHbVdk93a0XT19ECtO/gIXoxvdsgQLzb2fE2/5sIZmWQuluYyjPQtrtTHdU1YzTZ7bAPN4sITq2xi1679w==", + "license": "MIT", + "dependencies": { + "@actions/io": "^1.0.1" + } + }, + "node_modules/@actions/http-client": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/@actions/http-client/-/http-client-2.2.3.tgz", + "integrity": "sha512-mx8hyJi/hjFvbPokCg4uRd4ZX78t+YyRPtnKWwIl+RzNaVuFpQHfmlGVfsKEJN8LwTCvL+DfVgAM04XaHkm6bA==", + "license": "MIT", + "dependencies": { + "tunnel": "^0.0.6", + "undici": "^5.25.4" + } + }, + "node_modules/@actions/io": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@actions/io/-/io-1.1.3.tgz", + "integrity": "sha512-wi9JjgKLYS7U/z8PPbco+PvTb/nRWjeoFlJ1Qer83k/3C5PHQi28hiVdeE2kHXmIL99mQFawx8qt/JPjZilJ8Q==", + "license": "MIT" + }, + "node_modules/@fastify/busboy": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/@fastify/busboy/-/busboy-2.1.1.tgz", + "integrity": "sha512-vBZP4NlzfOlerQTnba4aqZoMhE/a9HY7HRqoOPaETQcSQuWEIyZMHGfVu6w9wGtGK5fED5qRs2DteVCjOH60sA==", + "license": "MIT", + "engines": { + "node": ">=14" + } + }, + "node_modules/@isaacs/cliui": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", + "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", + "license": "ISC", + "dependencies": { + "string-width": "^5.1.2", + "string-width-cjs": "npm:string-width@^4.2.0", + "strip-ansi": "^7.0.1", + "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", + "wrap-ansi": "^8.1.0", + "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@vercel/ncc": { + "version": "0.38.3", + "resolved": "https://registry.npmjs.org/@vercel/ncc/-/ncc-0.38.3.tgz", + "integrity": "sha512-rnK6hJBS6mwc+Bkab+PGPs9OiS0i/3kdTO+CkI8V0/VrW3vmz7O2Pxjw/owOlmo6PKEIxRSeZKv/kuL9itnpYA==", + "dev": true, + "license": "MIT", + "bin": { + "ncc": "dist/ncc/cli.js" + } + }, + "node_modules/ansi-regex": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz", + "integrity": "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/ansi-styles": { + "version": "6.2.3", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.3.tgz", + "integrity": "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/balanced-match": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.3.tgz", + "integrity": "sha512-1pHv8LX9CpKut1Zp4EXey7Z8OfH11ONNH6Dhi2WDUt31VVZFXZzKwXcysBgqSumFCmR+0dqjMK5v5JiFHzi0+g==", + "license": "MIT", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/brace-expansion": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.2.tgz", + "integrity": "sha512-Pdk8c9poy+YhOgVWw1JNN22/HcivgKWwpxKq04M/jTmHyCZn12WPJebZxdjSa5TmBqISrUSgNYU3eRORljfCCw==", + "license": "MIT", + "dependencies": { + "balanced-match": "^4.0.2" + }, + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "license": "MIT" + }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/eastasianwidth": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", + "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", + "license": "MIT" + }, + "node_modules/emoji-regex": { + "version": "9.2.2", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", + "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", + "license": "MIT" + }, + "node_modules/foreground-child": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", + "integrity": "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==", + "license": "ISC", + "dependencies": { + "cross-spawn": "^7.0.6", + "signal-exit": "^4.0.1" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/glob": { + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/glob/-/glob-11.1.0.tgz", + "integrity": "sha512-vuNwKSaKiqm7g0THUBu2x7ckSs3XJLXE+2ssL7/MfTGPLLcrJQ/4Uq1CjPTtO5cCIiRxqvN6Twy1qOwhL0Xjcw==", + "license": "BlueOak-1.0.0", + "dependencies": { + "foreground-child": "^3.3.1", + "jackspeak": "^4.1.1", + "minimatch": "^10.1.1", + "minipass": "^7.1.2", + "package-json-from-dist": "^1.0.0", + "path-scurry": "^2.0.0" + }, + "bin": { + "glob": "dist/esm/bin.mjs" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "license": "ISC" + }, + "node_modules/jackspeak": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-4.1.1.tgz", + "integrity": "sha512-zptv57P3GpL+O0I7VdMJNBZCu+BPHVQUk55Ft8/QCJjTVxrnJHuVuX/0Bl2A6/+2oyR/ZMEuFKwmzqqZ/U5nPQ==", + "license": "BlueOak-1.0.0", + "dependencies": { + "@isaacs/cliui": "^8.0.2" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/lru-cache": { + "version": "11.0.2", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.0.2.tgz", + "integrity": "sha512-123qHRfJBmo2jXDbo/a5YOQrJoHF/GNQTLzQ5+IdK5pWpceK17yRc6ozlWd25FxvGKQbIUs91fDFkXmDHTKcyA==", + "license": "ISC", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/minimatch": { + "version": "10.2.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz", + "integrity": "sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==", + "license": "BlueOak-1.0.0", + "dependencies": { + "brace-expansion": "^5.0.2" + }, + "engines": { + "node": "18 || 20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/minipass": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", + "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", + "license": "ISC", + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, + "node_modules/package-json-from-dist": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", + "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", + "license": "BlueOak-1.0.0" + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/path-scurry": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-2.0.0.tgz", + "integrity": "sha512-ypGJsmGtdXUOeM5u93TyeIEfEhM6s+ljAhrk5vAvSx8uyY/02OvrZnA0YNGUrPXfpJMgI1ODd3nwz8Npx4O4cg==", + "license": "BlueOak-1.0.0", + "dependencies": { + "lru-cache": "^11.0.0", + "minipass": "^7.1.2" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/prettier": { + "version": "3.5.0", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.5.0.tgz", + "integrity": "sha512-quyMrVt6svPS7CjQ9gKb3GLEX/rl3BCL2oa/QkNcXv4YNVBC9olt3s+H7ukto06q7B1Qz46PbrKLO34PR6vXcA==", + "dev": true, + "license": "MIT", + "bin": { + "prettier": "bin/prettier.cjs" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/prettier/prettier?sponsor=1" + } + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/signal-exit": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", + "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", + "license": "ISC", + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/string-width": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", + "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", + "license": "MIT", + "dependencies": { + "eastasianwidth": "^0.2.0", + "emoji-regex": "^9.2.2", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/string-width-cjs": { + "name": "string-width", + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width-cjs/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width-cjs/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "license": "MIT" + }, + "node_modules/string-width-cjs/node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.2.tgz", + "integrity": "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==", + "license": "MIT", + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/strip-ansi-cjs": { + "name": "strip-ansi", + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi-cjs/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/tunnel": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/tunnel/-/tunnel-0.0.6.tgz", + "integrity": "sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg==", + "license": "MIT", + "engines": { + "node": ">=0.6.11 <=0.7.0 || >=0.7.3" + } + }, + "node_modules/undici": { + "version": "5.29.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-5.29.0.tgz", + "integrity": "sha512-raqeBD6NQK4SkWhQzeYKd1KmIG6dllBOTt55Rmkt4HtI9mwdWtJljnrXjAFUBLTSN67HWrOIZ3EPF4kjUw80Bg==", + "license": "MIT", + "dependencies": { + "@fastify/busboy": "^2.0.0" + }, + "engines": { + "node": ">=14.0" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/wrap-ansi": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", + "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", + "license": "MIT", + "dependencies": { + "ansi-styles": "^6.1.0", + "string-width": "^5.0.1", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrap-ansi-cjs": { + "name": "wrap-ansi", + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrap-ansi-cjs/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/wrap-ansi-cjs/node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/wrap-ansi-cjs/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "license": "MIT" + }, + "node_modules/wrap-ansi-cjs/node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/wrap-ansi-cjs/node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + } + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/package.json b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/package.json new file mode 100644 index 0000000..13eca98 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/package.json @@ -0,0 +1,33 @@ +{ + "name": "install-wheel-action", + "description": "Action to install local python wheels together with their dependencies", + "version": "0.0.1", + "author": "OpenVINO Developers", + "private": true, + "keywords": [ + "GitHub", + "Actions", + "JavaScript" + ], + "engines": { + "node": ">=20" + }, + "main": "dist/index.js", + "scripts": { + "bundle": "npm run format:write && npm run package", + "format:write": "npx prettier --write .", + "format:check": "npx prettier --check .", + "package": "npx ncc build src/install_packages.js -o dist", + "package:watch": "npm run package -- --watch", + "all": "npm run format:write && npm run package" + }, + "dependencies": { + "@actions/core": "^1.11.1", + "glob": "^11.1.0" + }, + "devDependencies": { + "@vercel/ncc": "^0.38.3", + "prettier": "^3.5.0" + }, + "license": "Apache-2.0" +} diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/src/install_packages.js b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/src/install_packages.js new file mode 100644 index 0000000..9fafaae --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/actions/install_wheel/src/install_packages.js @@ -0,0 +1,119 @@ +const core = require('@actions/core'); +const glob = require('glob'); +const path = require('path'); +const { exec } = require('child_process'); +const util = require('util'); + +const execAsync = util.promisify(exec); + +async function getPythonVersion() { + const { stdout } = await execAsync('python --version'); + const versionMatch = stdout.match(/Python (\d+)\.(\d+)\.(\d+)/); + if (versionMatch) { + return { + major: versionMatch[1], + minor: versionMatch[2], + patch: versionMatch[3] + }; + } else { + throw new Error('Unable to detect Python version'); + } +} + +async function installPackages(packages, localWheelDir, requirementsFiles) { + core.debug(`Packages to install: ${packages}`); + core.debug(`Local wheel directory: ${localWheelDir}`); + core.debug(`Requirements files: ${requirementsFiles}`); + + const pythonVersion = await getPythonVersion(); + core.debug(`Detected Python version: ${JSON.stringify(pythonVersion)}`); + + // Resolve local wheels + const localWheels = {}; + if (localWheelDir) { + const wheels = glob.sync(path.posix.join(localWheelDir, '*.whl')); + core.debug(`Found wheels: ${wheels}`); + for (const whl of wheels) { + const packageName = path.basename(whl).split('-')[0]; + const wheelPythonVersion = path.basename(whl).match(/cp(\d{2,3})/); + if ( + !wheelPythonVersion || + wheelPythonVersion[1] === `${pythonVersion.major}${pythonVersion.minor}` + ) { + localWheels[packageName] = whl; + } + } + } + core.debug(`Resolved local wheels: ${JSON.stringify(localWheels)}`); + + // Collect wheel paths + const wheelPaths = []; + for (const pkg of packages) { + const packageName = pkg.split('[')[0]; + if (localWheels[packageName]) { + const wheelPath = localWheels[packageName]; + wheelPaths.push(`"${wheelPath}${pkg.slice(packageName.length)}"`); + } else { + core.setFailed(`Package ${pkg} not found locally.`); + return; + } + } + core.debug(`Collected wheel paths: ${wheelPaths}`); + + // Collect requirements files + const requirementsArgs = requirementsFiles.map(reqFile => `-r ${reqFile}`); + core.debug(`Requirements arguments: ${requirementsArgs}`); + + // Install all wheels and requirements in one command + const installArgs = [...wheelPaths, ...requirementsArgs]; + if (installArgs.length > 0) { + core.debug(`Installing packages with arguments: ${installArgs.join(' ')}`); + for (let attempt = 0; attempt < 3; attempt++) { + try { + core.debug(`Attempt ${attempt} of 3`); + const { stdout, stderr } = await execAsync( + `python -m pip install ${installArgs.join(' ')}`, + { + stdio: 'inherit' + } + ); + if (stdout) { + core.debug('stdout:', stdout); + } + if (stderr) { + core.error('stderr:', stderr); + } + break; + } catch (error) { + core.error(`Attempt ${attempt + 1} failed:`, error.message); + if (attempt === 2) { + throw error; + } + const sleepTime = Math.pow(2, attempt) * 1000; + core.debug(`Waiting ${sleepTime / 1000} seconds before retry...`); + await new Promise(resolve => setTimeout(resolve, sleepTime)); + } + } + } +} + +async function run() { + try { + const packagesInput = core.getInput('packages'); + const localWheelDir = core.getInput('local_wheel_dir') || null; + const requirementsInput = core.getInput('requirements_files') || ''; + const packages = packagesInput.split(';'); + const requirementsFiles = requirementsInput + .split(';') + .filter(Boolean) + .map(reqFile => path.normalize(reqFile)); + const normalizedLocalWheelDir = localWheelDir + ? path.normalize(localWheelDir) + : null; + await installPackages(packages, normalizedLocalWheelDir, requirementsFiles); + } catch (error) { + core.setFailed(error.message); + } +} + +run(); diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/agents/agentic-workflows.agent.md b/src/resources/openvino.genai-2026.1.0.0/.github/agents/agentic-workflows.agent.md new file mode 100644 index 0000000..bbfe60b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/agents/agentic-workflows.agent.md @@ -0,0 +1,143 @@ +--- +description: GitHub Agentic Workflows (gh-aw) - Create, debug, and upgrade AI-powered workflows with intelligent prompt routing +disable-model-invocation: true +--- + +# GitHub Agentic Workflows Agent + +This agent helps you work with **GitHub Agentic Workflows (gh-aw)**, a CLI extension for creating AI-powered workflows in natural language using markdown files. + +## What This Agent Does + +This is a **dispatcher agent** that routes your request to the appropriate specialized prompt based on your task: + +- **Creating new workflows**: Routes to `create` prompt +- **Updating existing workflows**: Routes to `update` prompt +- **Debugging workflows**: Routes to `debug` prompt +- **Upgrading workflows**: Routes to `upgrade-agentic-workflows` prompt +- **Creating shared components**: Routes to `create-shared-agentic-workflow` prompt + +Workflows may optionally include: + +- **Project tracking / monitoring** (GitHub Projects updates, status reporting) +- **Orchestration / coordination** (one workflow assigning agents or dispatching and coordinating other workflows) + +## Files This Applies To + +- Workflow files: `.github/workflows/*.md` and `.github/workflows/**/*.md` +- Workflow lock files: `.github/workflows/*.lock.yml` +- Shared components: `.github/workflows/shared/*.md` +- Configuration: https://github.com/github/gh-aw/blob/v0.46.5/.github/aw/github-agentic-workflows.md + +## Problems This Solves + +- **Workflow Creation**: Design secure, validated agentic workflows with proper triggers, tools, and permissions +- **Workflow Debugging**: Analyze logs, identify missing tools, investigate failures, and fix configuration issues +- **Version Upgrades**: Migrate workflows to new gh-aw versions, apply codemods, fix breaking changes +- **Component Design**: Create reusable shared workflow components that wrap MCP servers + +## How to Use + +When you interact with this agent, it will: + +1. **Understand your intent** - Determine what kind of task you're trying to accomplish +2. **Route to the right prompt** - Load the specialized prompt file for your task +3. **Execute the task** - Follow the detailed instructions in the loaded prompt + +## Available Prompts + +### Create New Workflow +**Load when**: User wants to create a new workflow from scratch, add automation, or design a workflow that doesn't exist yet + +**Prompt file**: https://github.com/github/gh-aw/blob/v0.46.5/.github/aw/create-agentic-workflow.md + +**Use cases**: +- "Create a workflow that triages issues" +- "I need a workflow to label pull requests" +- "Design a weekly research automation" + +### Update Existing Workflow +**Load when**: User wants to modify, improve, or refactor an existing workflow + +**Prompt file**: https://github.com/github/gh-aw/blob/v0.46.5/.github/aw/update-agentic-workflow.md + +**Use cases**: +- "Add web-fetch tool to the issue-classifier workflow" +- "Update the PR reviewer to use discussions instead of issues" +- "Improve the prompt for the weekly-research workflow" + +### Debug Workflow +**Load when**: User needs to investigate, audit, debug, or understand a workflow, troubleshoot issues, analyze logs, or fix errors + +**Prompt file**: https://github.com/github/gh-aw/blob/v0.46.5/.github/aw/debug-agentic-workflow.md + +**Use cases**: +- "Why is this workflow failing?" +- "Analyze the logs for workflow X" +- "Investigate missing tool calls in run #12345" + +### Upgrade Agentic Workflows +**Load when**: User wants to upgrade workflows to a new gh-aw version or fix deprecations + +**Prompt file**: https://github.com/github/gh-aw/blob/v0.46.5/.github/aw/upgrade-agentic-workflows.md + +**Use cases**: +- "Upgrade all workflows to the latest version" +- "Fix deprecated fields in workflows" +- "Apply breaking changes from the new release" + +### Create Shared Agentic Workflow +**Load when**: User wants to create a reusable workflow component or wrap an MCP server + +**Prompt file**: https://github.com/github/gh-aw/blob/v0.46.5/.github/aw/create-shared-agentic-workflow.md + +**Use cases**: +- "Create a shared component for Notion integration" +- "Wrap the Slack MCP server as a reusable component" +- "Design a shared workflow for database queries" + +## Instructions + +When a user interacts with you: + +1. **Identify the task type** from the user's request +2. **Load the appropriate prompt** from the GitHub repository URLs listed above +3. **Follow the loaded prompt's instructions** exactly +4. **If uncertain**, ask clarifying questions to determine the right prompt + +## Quick Reference + +```bash +# Initialize repository for agentic workflows +gh aw init + +# Generate the lock file for a workflow +gh aw compile [workflow-name] + +# Debug workflow runs +gh aw logs [workflow-name] +gh aw audit + +# Upgrade workflows +gh aw fix --write +gh aw compile --validate +``` + +## Key Features of gh-aw + +- **Natural Language Workflows**: Write workflows in markdown with YAML frontmatter +- **AI Engine Support**: Copilot, Claude, Codex, or custom engines +- **MCP Server Integration**: Connect to Model Context Protocol servers for tools +- **Safe Outputs**: Structured communication between AI and GitHub API +- **Strict Mode**: Security-first validation and sandboxing +- **Shared Components**: Reusable workflow building blocks +- **Repo Memory**: Persistent git-backed storage for agents +- **Sandboxed Execution**: All workflows run in the Agent Workflow Firewall (AWF) sandbox, enabling full `bash` and `edit` tools by default + +## Important Notes + +- Always reference the instructions file at https://github.com/github/gh-aw/blob/v0.46.5/.github/aw/github-agentic-workflows.md for complete documentation +- Use the MCP tool `agentic-workflows` when running in GitHub Copilot Cloud +- Workflows must be compiled to `.lock.yml` files before running in GitHub Actions +- **Bash tools are enabled by default** - Don't restrict bash commands unnecessarily since workflows are sandboxed by the AWF +- Follow security best practices: minimal permissions, explicit network access, no template injection diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/aw/actions-lock.json b/src/resources/openvino.genai-2026.1.0.0/.github/aw/actions-lock.json new file mode 100644 index 0000000..801907d --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/aw/actions-lock.json @@ -0,0 +1,14 @@ +{ + "entries": { + "actions/github-script@v8": { + "repo": "actions/github-script", + "version": "v8", + "sha": "ed597411d8f924073f98dfc5c65a23a2325f34cd" + }, + "github/gh-aw/actions/setup@v0.46.5": { + "repo": "github/gh-aw/actions/setup", + "version": "v0.46.5", + "sha": "5a79466d65414632d47c7869b27170ade5b9404e" + } + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/components.yml b/src/resources/openvino.genai-2026.1.0.0/.github/components.yml new file mode 100644 index 0000000..db09776 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/components.yml @@ -0,0 +1,173 @@ +GHA: + revalidate: 'all' + +tokenizers: + revalidate: 'all' + +tests_dependencies: + revalidate: + - whisper + - continuous_batching + - visual_language + - LLM + - GGUF + - tokenizers + - sampling + - text_streamer + - RAG + - WWB + - video_generation + +samples_dependencies: + revalidate: + - LLM_samples + - Whisper_samples + - Image_generation_samples + - Video_generation_samples + - VLM_samples + - RAG_samples + - Speech_generation_samples + +text_streamer: + revalidate: + - LLM + - visual_language + - sampling + - whisper + - speculative_decoding + - prompt_lookup + - continuous_batching + - LLM_samples + - Whisper_samples + - VLM_samples + +sampling: + revalidate: + - LLM + - visual_language + - whisper + - continuous_batching + - speculative_decoding + - prompt_lookup + - LLM_samples + - Whisper_samples + - VLM_samples + - llm_bench + - WWB + +LoRA: + revalidate: + - LLM + - image_generation + - LLM_samples + - llm_bench + +LLM: + revalidate: + - sampling + - continuous_batching + - speculative_decoding + - prompt_lookup + - LLM_samples + - GGUF + - llm_bench + - WWB + +visual_language: + revalidate: + - VLM_samples + - llm_bench + - WWB + +whisper: + revalidate: + - Whisper_samples + - llm_bench + - WWB + +image_generation: + revalidate: + - Image_generation_samples + - llm_bench + - WWB + +video_generation: + revalidate: + - Video_generation_samples + - llm_bench + - WWB + +RAG: + revalidate: + - RAG_samples + +speech_generation: + revalidate: + - Speech_generation_samples + +continuous_batching: + revalidate: + - LLM + - visual_language + - speculative_decoding + - prompt_lookup + - sampling + - LLM_samples + - VLM_samples + - llm_bench + - WWB + +speculative_decoding: + revalidate: + - LLM + - LLM_samples + - llm_bench + - WWB + +prompt_lookup: + revalidate: + - LLM + - LLM_samples + - llm_bench + - WWB + +LLM_samples: + revalidate: [] + +Whisper_samples: + revalidate: [] + +Image_generation_samples: + revalidate: [] + +VLM_samples: + revalidate: [] + +RAG_samples: + revalidate: [] + +Speech_generation_samples: + revalidate: [] + +Video_generation_samples: + revalidate: [] + +GGUF: + revalidate: [] + +C_API: + revalidate: + - LLM_samples + +JS_API: + revalidate: + - LLM_samples + +llm_bench: + revalidate: [] + +WWB: + revalidate: [] + +GH_Pages_Docs: + revalidate: [] diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/copilot-instructions.md b/src/resources/openvino.genai-2026.1.0.0/.github/copilot-instructions.md new file mode 100644 index 0000000..133c01f --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/copilot-instructions.md @@ -0,0 +1,57 @@ +# OpenVINO GenAI Copilot Instructions + +## Context & Persona + +You are the OpenVINO GenAI expert. Your mission is to ensure that all code aligns with the OpenVINO GenAI existing code and guidelines. The focus should be on high-performance inference. You are deeply familiar with the ov::, ov::genai:: namespaces, generative models and pipelines architectures. + +## Expertise Areas + +1. Model Architecture Knowledge: + - Understanding of attention mechanisms, KV-cache optimization, and sampling strategies + - Understanding of transformer-based models and diffusion models +2. OpenVINO Expertise: + - Proficient with OpenVINO libraries, especially ov::genai components + - Familiar with OpenVINO performance optimization techniques +3. C++ Proficiency: + - Strong C++17 skills + - Familiar with best practices in memory management, concurrency, and template programming + +## General Coding Guidelines + +Follow these rules when writing, modifying, or reviewing code in this repository: + +1. Follow C++ Core Guidelines strictly. +2. Performance: avoid `dynamic_cast` in hot paths (inference loops). Use `static_cast` or redesign if the type is known. +3. Avoid copies: large data structures (like tensors) must be passed by reference or moved, not copied. +4. Pass non-fundamental values by `const` reference wherever possible. +5. Exceptions: use `OPENVINO_ASSERT(condition, ...)` for checks instead of `if` + `OPENVINO_THROW(...)` or `throw`. +6. Formatting & Safety: + - No `using namespace std;`. + - No `auto` for primitive types where it obscures readability. + - Use `const` and `constexpr` wherever possible. +7. Follow constructors and member initializer lists style instead of direct assignments in the constructor body. +8. When initial container values are known upfront, prefer initializer-list / brace-initialization over constructing an empty container and immediately inserting values. +9. Make sure the function names are descriptive. +10. Check for variables with different names but similar meaning or aliasing. +11. Avoid duplicate code. Ensure that common functionality is extracted into reusable functions or utilities. +12. Avoid pronouns in comments and names to make the statements concise. +13. Unused functions and constructors aren't allowed except for in `debug_utils.hpp`. +14. `debug_utils.hpp` must never be included. +15. Assumptions on the user's behalf aren't allowed. For example, the implementation shouldn't adjust config values silently or with a warning; it should throw an exception instead. +16. Samples: + - Avoid adding new samples unless there is a strong, clearly justified reason. + - Keep command‑line arguments in samples minimal. Prefer hardcoding values. + - Ensure new samples have corresponding tests. + +## Code Review Instructions for PRs + +When performing a code review on a Pull Request, additionally follow this protocol: + +1. PR description must be aligned with [./pull_request_template.md](./pull_request_template.md) and its checklist must be filled out. If not, request the author to update the description and checklist before proceeding with the review. +2. If the documentation is updated, PR description must include a link to the corresponding documentation deployed on the fork. +3. PR description must be up to date and include all information about the changes. +4. Include C++ Core Guidelines references in review comments. +5. Python Bindings: if C++ APIs are changed, check if the corresponding Python pybind11 wrappers in src/python need updates. +6. Documentation: ensure that any new public APIs have docstrings in C++ headers and Python bindings. Ensure that new public APIs have documentation updated in /site. +7. Test Coverage: ensure that new features or changes have corresponding tests. +8. Verify that the result of every newly introduced function is used in at least one call site except for `void` functions. diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/dependabot.yml b/src/resources/openvino.genai-2026.1.0.0/.github/dependabot.yml new file mode 100644 index 0000000..ef0c758 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/dependabot.yml @@ -0,0 +1,42 @@ +# See help here: https://docs.github.com/en/free-pro-team@latest/github/administering-a-repository/enabling-and-disabling-version-updates + +version: 2 +updates: + # + # Github actions - CI + # + - package-ecosystem: github-actions + directory: "/" + schedule: + interval: "daily" + time: "09:00" + timezone: "Europe/Dublin" + open-pull-requests-limit: 3 + + # + # Enable version updates for npm + # + - package-ecosystem: "npm" + directory: "/" + schedule: + interval: "daily" + time: "09:00" + timezone: "Europe/Dublin" + open-pull-requests-limit: 3 + versioning-strategy: increase-if-necessary + + # + # Python dependencies + # + - package-ecosystem: "pip" + directories: + - "/" + - "./samples/" + - "./tests/python_tests/" + - "./tools/llm_bench/" + - "./tools/who_what_benchmark/" + schedule: + interval: "daily" + time: "09:00" + timezone: "Europe/Dublin" + versioning-strategy: increase-if-necessary diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/dependency_review.yml b/src/resources/openvino.genai-2026.1.0.0/.github/dependency_review.yml new file mode 100644 index 0000000..0fc24e3 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/dependency_review.yml @@ -0,0 +1,25 @@ +fail-on-severity: "low" +allow-licenses: + - "BSD-2-Clause" + - "BSD-3-Clause" + - "MIT" + - "Apache-2.0" + - "ISC" + - "BlueOak-1.0.0" + - "0BSD" + - "Python-2.0" + - "MIT-CMU" # Pillow's license + - "CC-BY-NC-4.0" + - "GPL-1.0-or-later" + - "LGPL-2.0-or-later" + - "LicenseRef-scancode-proprietary-license" +fail-on-scopes: + - "runtime" + - "development" + - "unknown" +license-check: true +vulnerability-check: true +allow-dependencies-licenses: + - "pkg:npm/thingies" # Docs site (dependency of dependency) + - "pkg:pypi/PyGithub" # Dependencies of the workflow rerunner script + - "pkg:pypi/psycopg2-binary" diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/labeler.yml b/src/resources/openvino.genai-2026.1.0.0/.github/labeler.yml new file mode 100644 index 0000000..fc8d18a --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/labeler.yml @@ -0,0 +1,220 @@ +'category: GHA': +- '.github/**/*' +- '.pre-commit-config.yaml' +- 'pyproject.toml' + +'category: tests dependencies': +- 'tests/python_tests/requirements.txt' + +'category: samples dependencies': +- 'samples/requirements.txt' +- 'samples/deployment-requirements.txt' +- 'samples/export-requirements.txt' + +'category: tokenizers': +- 'src/cpp/include/openvino/genai/tokenizer.hpp' +- 'src/cpp/src/tokenizer/**/*' +- 'src/python/py_tokenizer.cpp' +- 'thirdparty/openvino_tokenizers' +- 'tests/python_tests/data/tokenizer_configs.py' +- 'tests/python_tests/test_tokenizer.py' + +'category: text streamer': +- 'src/cpp/include/openvino/genai/text_streamer.hpp' +- 'src/cpp/include/openvino/genai/streamer_base.hpp' +- 'src/cpp/src/text_streamer.cpp' +- 'src/cpp/src/continuous_batching/threaded_streamer.hpp' +- 'src/python/py_streamers.cpp' +- 'tests/python_tests/test_text_streamer.py' + +'category: LLM': +- 'src/cpp/include/openvino/genai/llm_pipeline.hpp' +- 'src/cpp/src/llm/**/*' +- 'src/cpp/src/lm_encoding.hpp' +- 'src/cpp/src/lm_encoding.cpp' +- 'src/python/py_llm_pipeline.cpp' +- 'tests/python_tests/test_llm_pipeline.py' +- 'tests/python_tests/test_llm_pipeline_static.py' + +'category: visual language': +- 'src/include/openvino/genai/visual_language/**/*' +- 'src/cpp/src/visual_language/**/*' +- 'src/cpp/src/lm_encoding.hpp' +- 'src/cpp/src/lm_encoding.cpp' +- 'src/python/py_vlm_pipeline.cpp' +- 'tests/python_tests/test_vlm_pipeline.py' + +'category: RAG': +- 'src/cpp/include/openvino/genai/rag/**/*' +- 'src/cpp/src/rag/**/*' +- 'src/python/py_rag.cpp' +- 'tests/python_tests/test_rag.py' + +'category: sampling': +- 'src/cpp/include/openvino/genai/generation_config.hpp' +- 'src/cpp/src/generation_config.cpp' +- 'src/cpp/src/sampling/**/*' +- 'src/python/py_generation_config.cpp' +- 'tests/cpp/logit_filtering.cpp' +- 'tests/cpp/generate_config.cpp' +- 'tests/cpp/sampler.cpp' +- 'tests/python_tests/test_sampling.py' +- 'tests/python_tests/test_generation_config.py' + +'category: LoRA': +- 'src/cpp/include/openvino/genai/lora_adapter.hpp' +- 'src/cpp/src/lora/**/*' +- 'src/python/py_lora_adapter.cpp' + +'category: LLM samples': +- 'samples/c/text_generation/**/*' +- 'samples/cpp/text_generation/**/*' +- 'samples/js/text_generation/**/*' +- 'samples/python/text_generation/**/*' +- 'tests/python_tests/samples/test_beam_search_causal_lm.py' +- 'tests/python_tests/samples/test_benchmark_genai.py' +- 'tests/python_tests/samples/test_chat_sample.py' +- 'tests/python_tests/samples/test_encrypted_model_causal_lm.py' +- 'tests/python_tests/samples/test_greedy_causal_lm.py' +- 'tests/python_tests/samples/test_lora.py' +- 'tests/python_tests/samples/test_multinomial_causal_lm.py' +- 'tests/python_tests/samples/test_prompt_lookup_decoding_lm.py' +- 'tests/python_tests/samples/test_react_sample.py' +- 'tests/python_tests/samples/test_speculative_decoding_lm.py' + +'category: Whisper samples': +- 'samples/cpp/whisper_speech_recognition/**/*' +- 'samples/python/whisper_speech_recognition/**/*' + +'category: Image generation samples': +- 'samples/cpp/image_generation/**/*' +- 'samples/python/image_generation/**/*' +- 'tests/python_tests/samples/test_benchmark_image_gen.py' +- 'tests/python_tests/samples/test_heterogeneous_stable_diffusion.py' +- 'tests/python_tests/samples/test_text2image.py' +- 'tests/python_tests/samples/test_image2image.py' +- 'tests/python_tests/samples/test_inpainting.py' +- 'tests/python_tests/samples/test_lora_text2image.py' + +'category: VLM samples': +- 'samples/cpp/visual_language_chat/**/*' +- 'samples/python/visual_language_chat/**/*' +- 'tests/python_tests/samples/test_benchmark_vlm.py' +- 'tests/python_tests/samples/test_visual_language_chat.py' + +'category: RAG samples': +- 'samples/cpp/rag/**/*' +- 'samples/python/rag/**/*' +- 'tests/python_tests/samples/test_rag.py' + +'category: structured output generation': +- 'src/cpp/src/sampling/structured_output/*' + +'category: whisper': +- 'src/cpp/include/openvino/genai/whisper_generation_config.hpp' +- 'src/cpp/include/openvino/genai/whisper_pipeline.hpp' +- 'src/cpp/src/whisper/**/*' +- 'src/python/py_whisper_pipeline.cpp' +- 'tests/python_tests/test_whisper_pipeline.py' +- 'tests/python_tests/test_whisper_pipeline_static.py' +- 'tests/python_tests/samples/test_whisper_speech_recognition.py' + +'category: image generation': +- 'src/include/openvino/genai/image_generation/**/*' +- 'src/cpp/src/image_generation/**/*' +- 'src/python/py_image_generation_models.cpp' +- 'src/python/py_image_generation_pipelines.cpp' +- 'tests/python_tests/test_image_generation.py' + +'category: video generation': +- 'src/cpp/include/openvino/genai/video_generation/**/*' +- 'src/cpp/src/video_generation/**/*' +- 'src/python/py_video_generation_models.cpp' +- 'src/python/py_video_generation_pipelines.cpp' +- 'tests/python_tests/test_video_generation.py' + +'category: Video generation samples': +- 'samples/cpp/video_generation/**/*' +- 'samples/python/video_generation/**/*' + +'category: speech generation': +- 'src/include/openvino/genai/speech_generation/**/*' +- 'src/cpp/src/speech_generation/**/*' +- 'src/python/py_speech_generation.cpp' + +'category: Speech generation samples': +- 'samples/cpp/speech_generation/**/*' +- 'samples/python/speech_generation/**/*' +- 'tests/python_tests/samples/test_text2speech.py' + +'category: speculative decoding': +- 'src/cpp/src/speculative_decoding/**/*' +- 'tests/cpp/speculative_decoding.cpp' + +'category: prompt lookup': +- 'src/cpp/src/prompt_lookup/**/*' + +'category: continuous batching': +- 'src/cpp/include/openvino/genai/cache_eviction.hpp' +- 'src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp' +- 'src/cpp/include/openvino/genai/generation_handle.hpp' +- 'src/cpp/include/openvino/genai/scheduler_config.hpp' +- 'src/cpp/src/continuous_batching/**/*' +- 'src/python/py_continuous_batching_pipeline.cpp' +- 'tools/continuous_batching/**/*' +- 'tools/cacheviz/**/*' +- 'tests/python_tests/test_continuous_batching.py' +- 'tests/python_tests/test_kv_cache_eviction/**/*' +- 'tests/cpp/data/**/*' +- 'tests/cpp/block_allocator.cpp' +- 'tests/cpp/block_hash_store.cpp' +- 'tests/cpp/block_manager.cpp' +- 'tests/cpp/cache_eviction.cpp' +- 'tests/cpp/cache_manager.cpp' +- 'tests/cpp/scheduler.cpp' +- 'tests/python_tests/data/long_prompts.txt' +- 'tests/python_tests/data/short_prompts.txt' +- 'tests/python_tests/data/test_dataset.py' +- 'tests/python_tests/models/**/*' +- 'tests/python_tests/samples/test_continuous_batching_tools.py' + +'category: GGUF': +- 'src/cpp/src/gguf_utils/**' +- 'tests/python_tests/**' + +'category: Structured Output samples': +- 'samples/cpp/text_generation/structured_output_generation' +- 'samples/python/text_generation/structured_output_generation.py' +- 'tests/python_tests/samples/test_structured_output_sample.py' + +'category: cmake / build': +- 'cmake/**/*' +- '**/CMakeLists.txt' +- '**/*.cmake' +- 'pyproject.toml' +- 'requirements-build.txt' + +'category: Python API': +- 'src/python/**/*' + +'category: CPP API': +- 'src/cpp/include/openvino/genai/**/*' + +'category: C API': +- 'src/c/**/*' +- 'src/samples/c/**/*' + +'category: JS API': +- 'src/js/**/*' +- 'src/samples/js/**/*' + +'category: llm_bench': +- 'tools/llm_bench/**/*' +- '.github/workflows/llm_bench-python.yml' +- 'tests/python_tests/samples/test_tools_llm_benchmark.py' + +'category: WWB': +- 'tools/who_what_benchmark/**/*' + +'category: GH Pages Docs': +- 'site/**/*' diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/pull_request_template.md b/src/resources/openvino.genai-2026.1.0.0/.github/pull_request_template.md new file mode 100644 index 0000000..93e8c0d --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/pull_request_template.md @@ -0,0 +1,16 @@ + +## Description + + + +CVS-### + + +Fixes #(issue) + +## Checklist: +- [ ] This PR follows [GenAI Contributing guidelines](https://github.com/openvinotoolkit/openvino.genai?tab=contributing-ov-file#contributing). +- [ ] Tests have been updated or added to cover the new code. +- [ ] This PR fully addresses the ticket. +- [ ] I have made corresponding changes to the documentation. diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/generate_reference_llava.py b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/generate_reference_llava.py new file mode 100644 index 0000000..f772fe0 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/generate_reference_llava.py @@ -0,0 +1,58 @@ +import argparse +from pathlib import Path +from optimum.intel.openvino import OVModelForVisualCausalLM +from transformers import AutoProcessor +from PIL import Image + +IMAGE_EXTENSIONS = [".jpg", ".jpeg", ".png", ".bmp"] + + +def main(model_path: str, images_path: str): + print(f"Selected model: {model_path}\n") + + if Path(images_path).is_file(): + image_files = [Path(images_path)] + else: + image_files = sorted( + [f for f in Path(images_path).glob("*") if f.is_file() and f.suffix.lower() in IMAGE_EXTENSIONS], + key=lambda x: x.name + ) + + if not image_files: + raise FileNotFoundError(f"No images found in '{images_path}' directory. Supported formats: {IMAGE_EXTENSIONS}") + + images = [] + for file in image_files: + images.append( + Image.open(file).convert("RGB") + ) + + print("Images:", image_files) + + model = OVModelForVisualCausalLM.from_pretrained(model_path, trust_remote_code=True) + processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True) + + conversation = [{ + "role": "user", + "content": [ + *[{"type": "image"} for _ in images], + {"type": "text", "text": "Describe the images."}, + ], + }] + + prompt = processor.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True) + print(prompt) + inputs = processor(text=[prompt], images=images, return_tensors="pt") + result = model.generate(**inputs, max_new_tokens=100, do_sample=False) + decoded = processor.tokenizer.batch_decode(result[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0] + print(decoded) + with open("ref.txt", "w") as f: + f.write(f"question:\n{decoded}\n----------\nquestion:\n") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-m", "--model_path", type=str, required=True, help="Path to the model.") + parser.add_argument("-i", "--images_path", type=str, required=True, help="Path to the directory with images.") + args = parser.parse_args() + main(args.model_path, args.images_path) diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/__init__.py b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/argument_parser.py b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/argument_parser.py new file mode 100644 index 0000000..c3d7a46 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/argument_parser.py @@ -0,0 +1,30 @@ +# Copyright (C) 2018-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +from pathlib import Path + + +def get_arguments() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument( + "-r", "--repository-name", type=str, required=True, help="Repository name in the OWNER/REPOSITORY format" + ) + parser.add_argument("--run-id", type=int, required=True, help="Workflow Run ID") + parser.add_argument( + "--rerunner-run-id", type=int, required=True, help="Workflow Run ID for the rerunner run itself" + ) + parser.add_argument( + "--errors-to-look-for-file", + type=Path, + required=False, + help=".json file with the errors to look for in logs", + default=Path(__file__).resolve().parent.joinpath("errors_to_look_for.json"), + ) + parser.add_argument( + "--dry-run", + required=False, + action="store_true", + help="Whether to run in dry mode and not actually retrigger the pipeline and only collect and analyze logs", + ) + return parser.parse_args() diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/constants.py b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/constants.py new file mode 100644 index 0000000..800012d --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/constants.py @@ -0,0 +1,20 @@ +# Copyright (C) 2018-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +import os + + +GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN") + + +def init_logger(): + LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper() + logging.basicConfig( + level=LOGLEVEL, format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s", datefmt="%m-%d-%Y %H:%M:%S" + ) + + +init_logger() + +LOGGER = logging.getLogger("rerunner") diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/errors_to_look_for.json b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/errors_to_look_for.json new file mode 100644 index 0000000..20ee7e0 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/errors_to_look_for.json @@ -0,0 +1,186 @@ +[ + { + "error_text": "This is a problem related to network connectivity", + "ticket": 135929 + }, + { + "error_text": "Unable to make request", + "ticket": 135715 + }, + { + "error_text": "GnuTLS recv error", + "ticket": 131918 + }, + { + "error_text": "Connection was reset", + "ticket": 131818 + }, + { + "error_text": "Failed to connect to github.com", + "ticket": 131657 + }, + { + "error_text": "retrieving gpg key timed out", + "ticket": 131538 + }, + { + "error_text": "Retry limit has been reached for chunk", + "ticket": 131537 + }, + { + "error_text": "fatal error: downloading", + "ticket": 131424 + }, + { + "error_text": "Network is unreachable", + "ticket": 130955 + }, + { + "error_text": "connection timed out", + "ticket": 130955 + }, + { + "error_text": "The requested URL returned error: 500", + "ticket": 139384 + }, + { + "error_text": "Unable to fetch some archives", + "ticket": 130965 + }, + { + "error_text": "status_string: \"Timeout was reached\"", + "ticket": 142653 + }, + { + "error_text": "ERROR 502: Bad Gateway", + "ticket": 146254 + }, + { + "error_text": "Unexpected HTTP response: 520", + "ticket": 147958 + }, + { + "error_text": "json.decoder.JSONDecodeError: Unterminated string starting at", + "ticket": 151796 + }, + { + "error_text": "Upload progress stalled", + "ticket": 152933 + }, + { + "error_text": "because the GET request got Content-Type", + "ticket": 158400 + }, + { + "error_text": "Failure when receiving data from the peer", + "ticket": 159323 + }, + { + "error_text": "HTTP response code said error", + "ticket": 159398 + }, + { + "error_text": "download failed after attempts", + "ticket": 159547 + }, + { + "error_text": "Failed to connect to github.com port 443", + "ticket": 156593 + }, + { + "error_text": "file DOWNLOAD cannot compute hash on failed download", + "ticket": 156593 + }, + { + "error_text": "lost communication with the server", + "ticket": 160816 + }, + { + "error_text": "the runner has received a shutdown signal", + "ticket": 160818 + }, + { + "error_text": "Timed out waiting for server startup", + "ticket": 161077 + }, + { + "error_text": "THESE PACKAGES DO NOT MATCH THE HASHES FROM THE REQUIREMENTS FILE", + "ticket": 163749 + }, + { + "error_text": "OperationStopped:", + "ticket": 171334 + }, + { + "error_text": "No connection could be made", + "ticket": 171953 + }, + { + "error_text": "connect: connection refused", + "ticket": 171446 + }, + { + "error_text": "Connection reset by peer - SSL_connect", + "ticket": 173970 + }, + { + "error_text": "Error in the HTTP2 framing layer", + "ticket": 177549 + }, + { + "error_text": "CERT_TRUST_REVOCATION_STATUS_UNKNOWN", + "ticket": 177273 + }, + { + "error_text": "urllib3.exceptions.IncompleteRead", + "ticket": 173184 + }, + { + "error_text": "Error from intermediary with HTTP status code 403", + "ticket": 181450 + }, + { + "error_text": "terminal prompts disabled", + "ticket": 181095 + }, + { + "error_text": "HTTP 500 curl 22 The requested URL returned error: 500", + "ticket": 181530 + }, + { + "error_text": "Unable to make request: ECONNREFUSED", + "ticket": 158401 + }, + { + "error_text": "Unable to make request: ECONNRESET", + "ticket": 158401 + }, + { + "error_text": "Failed to FinalizeArtifact: Failed to make request after 5 attempts: Unexpected token", + "ticket": 181539 + }, + { + "error_text": "getaddrinfo EAI_AGAIN", + "ticket": 182238 + }, + { + "error_text": "getaddrinfo failed", + "ticket": 182238 + }, + { + "error_text": "Temporary failure in name resolution", + "ticket": 182238 + }, + { + "error_text": "unable to resolve host", + "ticket": 182238 + }, + { + "error_text": "Could not resolve host:", + "ticket": 182238 + }, + { + "error_text": "SSL connect error", + "ticket": 182850 + } +] diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/log_analyzer.py b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/log_analyzer.py new file mode 100644 index 0000000..b786779 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/log_analyzer.py @@ -0,0 +1,123 @@ +# Copyright (C) 2018-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import re +from pathlib import Path +from typing import TypedDict + +from workflow_rerun.constants import LOGGER + + +class LogFile(TypedDict): + file_name: str + path: Path + + +class ErrorData(TypedDict): + error_text: str + ticket: int + + +class LogAnalyzer: + def __init__(self, path_to_logs: Path, path_to_errors_file: Path) -> None: + self._path_to_errors_file = path_to_errors_file + + self._errors_to_look_for: list[ErrorData] = [] + self._collect_errors_to_look_for() + + self._log_dir = path_to_logs + + self._log_files: list[LogFile] = [] + self._collect_log_files() + all_txt_log_files_pretty = "\n".join(map(lambda item: str(item["path"]), self._log_files)) + LOGGER.info(f"ALL .txt LOG FILES: \n{all_txt_log_files_pretty}") + + self.found_matching_error = False + self.found_error_ticket = None + self.matched_error_text = None + + def _collect_errors_to_look_for(self) -> None: + with open(file=self._path_to_errors_file, mode="r", encoding="utf-8") as errors_file: + errors_data = json.load(errors_file) + for error_data in errors_data: + self._errors_to_look_for.append( + ErrorData(error_text=error_data["error_text"], ticket=error_data["ticket"]) + ) + + def _collect_log_files(self) -> None: + """ + Collects the .txt log files from the log archive + + The GitHub Actions pipeline logs archive should have the following structure: + > Job_name_0 + > step_name_0.txt + > step_name_1.txt + ... + > Job_name_1 + > step_name_0.txt + > step_name_1.txt + ... + > Job_name_2 + ... + ... + + We need to only analyze the `*.txt` files + """ + + for _file in Path(self._log_dir).iterdir(): + if _file.is_dir(): + for log_file in _file.iterdir(): + self._log_files.append(LogFile(file_name=log_file.name, path=log_file.resolve())) + elif _file.suffix == ".txt": + self._log_files.append(LogFile(file_name=_file.name, path=_file.resolve())) + + def _is_error_in_log(self, error_to_look_for: str, log_file_path: Path) -> bool: + """ + Searches for the error in the provided log + """ + + error_to_look_for = self._clean_up_string(error_to_look_for) + + with open(file=log_file_path, mode="r", encoding="utf-8") as log_file: + for line in log_file: + if error_to_look_for in self._clean_up_string(line): + return True + return False + + @staticmethod + def _clean_up_string(string: str) -> str: + """ + Replaces special characters with spaces in the string, strips it from leading and following spaces, + and lowers it + + for "Could not resolve host: github.com" returns "could not resolve host github com" + + This cleanup is applied to both errors to look for and logs themselves for matching + """ + return re.sub(r"[^A-Za-z0-9]+", " ", string).lower().strip() + + def analyze(self) -> None: + """ + Iterates over the known errors and tries to find them in the collected log files + """ + for error in self._errors_to_look_for: + LOGGER.info(f'LOOKING FOR "{error["error_text"]}" ERROR...') + + for log_file in self._log_files: + if self._is_error_in_log(error_to_look_for=error["error_text"], log_file_path=log_file["path"]): + LOGGER.info(f'FOUND "{error["error_text"]}" ERROR IN {log_file["path"]}. TICKET: {error["ticket"]}') + self.found_matching_error = True + self.found_error_ticket = error["ticket"] + self.matched_error_text = error["error_text"] + return + + +if __name__ == "__main__": + # Usage example + log_analyzer = LogAnalyzer( + path_to_logs=Path("/tmp/logs_dir"), path_to_errors_file=Path("/tmp/errors_to_look_for.json") + ) + log_analyzer.analyze() + if log_analyzer.found_matching_error: + print("found matching error, see logs above") diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/log_collector.py b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/log_collector.py new file mode 100644 index 0000000..2cd6e29 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/log_collector.py @@ -0,0 +1,100 @@ +# Copyright (C) 2018-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path +from zipfile import ZipFile +import tempfile + +import requests +from github.WorkflowRun import WorkflowRun +from workflow_rerun.constants import GITHUB_TOKEN, LOGGER + + +def collect_logs_for_run(run: WorkflowRun, logs_dir: Path, session: requests.Session) -> Path: + """ + Downloads logs of a given Workflow Run, + saves them to a specified path, and returns that path. + + We don't need successful job logs, so we remove them. + We could've just downloaded logs for failed jobs only, + but when you download all logs from a workflow run, + GitHub includes "system.txt" files for each job, which can also + contain errors on which we might want to trigger rerun. + + Example log archive structure: + . + ├── 10_Pytorch Layer Tests _ PyTorch Layer Tests.txt + ├── 11_CPU functional tests _ CPU functional tests.txt + ├── 12_C++ unit tests _ C++ unit tests.txt + ├── 13_OpenVINO tokenizers extension _ OpenVINO tokenizers extension.txt + ├── C++ unit tests _ C++ unit tests + │ └── system.txt + ├── CPU functional tests _ CPU functional tests + │ └── system.txt + ├── OpenVINO tokenizers extension _ OpenVINO tokenizers extension + │ └── system.txt + ├── Pytorch Layer Tests _ PyTorch Layer Tests + └── system.txt + + Sometimes though, directories contain log files for each individual step, + IN ADDITION to the full log in root of the directory: + . + ├── 1_Build.txt + └── Build + ├── 13_Upload build logs.txt + ├── 1_Set up job.txt + ├── 24_Post Clone vcpkg.txt + ├── 25_Post Clone OpenVINO.txt + ├── 26_Stop containers.txt + ├── 27_Complete job.txt + ├── 2_Initialize containers.txt + ├── 3_Clone OpenVINO.txt + ├── 4_Get VCPKG version and put it into GitHub ENV.txt + ├── 5_Init submodules for non vcpkg dependencies.txt + ├── 6_Clone vcpkg.txt + ├── 7_System info.txt + ├── 8_Build vcpkg.txt + ├── 9_CMake - configure.txt + └── system.txt + + In that case, we need only 'system.txt' file from each directory + """ + # Get failed jobs + failed_jobs = [job for job in run.jobs() if job.conclusion == "failure"] + LOGGER.info(f"FAILED JOBS: {[job.name for job in failed_jobs]}") + + with tempfile.NamedTemporaryFile(suffix=".zip") as temp_file: + log_archive_path = Path(temp_file.name) + + # Download logs archive + with open(file=log_archive_path, mode="wb") as log_archive: + LOGGER.info(f"DOWNLOADING LOGS FOR RUN ID {run.id}") + # PyGitHub does not expose the "/repos/{owner}/{repo}/actions/runs/{run_id}/logs" endpoint so we have to use requests + LOGGER.debug(f"Downloading logs from {run.logs_url}") + response = session.get(url=run.logs_url, headers={"Authorization": f"Bearer {GITHUB_TOKEN}"}) + response.raise_for_status() + log_archive.write(response.content) + + # Unpack it + with tempfile.TemporaryDirectory() as temp_dir: + logs_temp_dir = Path(temp_dir) + + with ZipFile(file=log_archive_path, mode="r") as zip_file: + zip_file.extractall(logs_temp_dir) + + # Traverse the unpacked logs to find the ones of failed jobs + for job in failed_jobs: + job_filename = job.name.replace("/", "_") + LOGGER.debug(f"Looking for failed job logs with filename: {job_filename}") + + for p in logs_temp_dir.iterdir(): + # Move failed jobs' logs to the final destination + if p.is_dir() and p.name == job_filename: + LOGGER.debug(f"Keeping system.txt from directory {p} for failed job {job.name}") + (p / "system.txt").rename(logs_dir / f"{job_filename}__system.txt") + elif p.is_file() and p.name.endswith(f"{job_filename}.txt"): + LOGGER.debug(f"Keeping file {p} for failed job {job.name}") + p.rename(logs_dir / p.name) + + LOGGER.info(f"COLLECTED LOGS FOR {run.id} IN {logs_dir}") + return logs_dir diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/requirements.txt b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/requirements.txt new file mode 100644 index 0000000..7725967 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/requirements.txt @@ -0,0 +1,3 @@ +PyGithub==2.8.1 +requests==2.32.4 +psycopg2-binary==2.9.9 diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/rerunner.py b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/rerunner.py new file mode 100644 index 0000000..3433df9 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/rerunner.py @@ -0,0 +1,138 @@ +# Copyright (C) 2018-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import sys +import tempfile +import psycopg2 +from pathlib import Path +from psycopg2 import sql + +import requests +from github import Github, Auth +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry + +from workflow_rerun.argument_parser import get_arguments +from workflow_rerun.constants import GITHUB_TOKEN, LOGGER +from workflow_rerun.log_analyzer import LogAnalyzer +from workflow_rerun.log_collector import collect_logs_for_run + + +def record_rerun_to_db( + repository_full_name: str, run_id: int, ticket_number: int, rerunner_run_id: int, error_text: str +): + """Record the rerun event to the PostgreSQL database.""" + db_username = os.environ.get("PGUSER") + db_password = os.environ.get("PGPASSWORD") + db_host = os.environ.get("PGHOST") + db_database = os.environ.get("PGDATABASE") + db_port = os.environ.get("PGPORT") + conn = psycopg2.connect(host=db_host, port=db_port, user=db_username, password=db_password, database=db_database) + + cursor = None + try: + cursor = conn.cursor() + + insert_query = sql.SQL(""" + INSERT INTO rerunner_stats (repository_full_name, run_id, ticket_number, rerun_at, rerunner_run_id, error_text) + VALUES (%s, %s, %s, NOW() AT TIME ZONE 'UTC', %s, %s) + """) + + cursor.execute(insert_query, (repository_full_name, run_id, ticket_number, rerunner_run_id, error_text)) + conn.commit() + + LOGGER.info( + f"Successfully recorded rerun to database: repo={repository_full_name}, " + f"run_id={run_id}, ticket={ticket_number}, rerunner_run_id={rerunner_run_id}, error_text={error_text}" + ) + + except psycopg2.Error as e: + LOGGER.error(f"Failed to record rerun to database: {e}") + conn.rollback() + raise + finally: + if cursor: + cursor.close() + conn.close() + + +def rerun_failed_jobs(repository_name: str, run_id: int, session: requests.Session): + # PyGitHub does not expose the "/repos/{owner}/{repo}/actions/runs/RUN_ID/rerun-failed-jobs" endpoint + # so we have to use requests + response = session.post( + url=f"https://api.github.com/repos/{repository_name}/actions/runs/{run_id}/rerun-failed-jobs", + headers={"Authorization": f"Bearer {GITHUB_TOKEN}"}, + ) + + response.raise_for_status() + + LOGGER.info(f"RUN RETRIGGERED SUCCESSFULLY") + + +def analyze_and_rerun( + run, + repository_name: str, + run_id: int, + rerunner_run_id: int, + errors_file: Path, + is_dry_run: bool, + session: requests.Session, +): + with tempfile.TemporaryDirectory() as temp_dir: + logs_dir = Path(temp_dir) + collect_logs_for_run(run=run, logs_dir=logs_dir, session=session) + + log_analyzer = LogAnalyzer(path_to_logs=logs_dir, path_to_errors_file=errors_file) + log_analyzer.analyze() + + if log_analyzer.found_matching_error: + LOGGER.info(f"FOUND MATCHING ERROR, RETRIGGERING {run.html_url}") + if is_dry_run: + LOGGER.info(f"RUNNING IN DRY RUN MODE, NOT RETRIGGERING, EXITING") + return + + rerun_failed_jobs(repository_name, run_id, session) + + if log_analyzer.found_error_ticket and log_analyzer.matched_error_text: + record_rerun_to_db( + repository_name, + run_id, + log_analyzer.found_error_ticket, + rerunner_run_id, + log_analyzer.matched_error_text, + ) + else: + LOGGER.error(f"Cannot record to database: missing ticket_number or error_text") + raise ValueError("Missing ticket_number or error_text for database recording.") + else: + LOGGER.info(f"NO ERROR WAS FOUND, NOT RETRIGGERING") + + +if __name__ == "__main__": + args = get_arguments() + run_id = args.run_id + rerunner_run_id = args.rerunner_run_id + repository_name = args.repository_name + errors_file = args.errors_to_look_for_file + is_dry_run = args.dry_run + if is_dry_run: + LOGGER.info("RUNNING IN DRY RUN MODE. IF ERROR WILL BE FOUND, WILL NOT RETRIGGER") + + session = requests.Session() + retry_strategy = Retry(total=5, backoff_factor=3, backoff_jitter=1, status_forcelist=[429, 500, 502, 503, 504]) + session.mount("https://", HTTPAdapter(max_retries=retry_strategy)) + + github = Github(auth=Auth.Token(token=GITHUB_TOKEN)) + gh_repo = github.get_repo(full_name_or_id=repository_name) + run = gh_repo.get_workflow_run(id_=run_id) + + LOGGER.info(f"CHECKING IF RERUN IS NEEDED FOR {run.html_url} RUN IN {repository_name}.") + + # Check if the run has already been retriggered + # we do not want to fall into a loop with retriggers + if run.run_attempt > 1: + LOGGER.info(f"THERE ARE {run.run_attempt} ATTEMPTS ALREADY. NOT CHECKING LOGS AND NOT RETRIGGERING. EXITING") + sys.exit(0) + + analyze_and_rerun(run, repository_name, run_id, rerunner_run_id, errors_file, is_dry_run, session) diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/__init__.py b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/19_Samples _ Samples.txt b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/19_Samples _ Samples.txt new file mode 100644 index 0000000..2d508e7 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/19_Samples _ Samples.txt @@ -0,0 +1,101 @@ + +2026-01-12T09:16:22.1413136Z [setupvars.sh] OpenVINO environment initialized +2026-01-12T09:16:22.1423345Z Python detected LC_CTYPE=C: LC_CTYPE coerced to C.UTF-8 (set another locale or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior). +2026-01-12T09:16:22.6461484Z ============================= test session starts ============================== +2026-01-12T09:16:22.6462288Z platform linux -- Python 3.11.14, pytest-9.0.2, pluggy-1.6.0 +2026-01-12T09:16:22.6462837Z rootdir: /__w/openvino/openvino +2026-01-12T09:16:22.6463228Z plugins: xdist-3.8.0 +2026-01-12T09:16:22.6463600Z collected 175 items +2026-01-12T09:16:22.6463808Z +2026-01-12T09:16:56.4433071Z install/tests/smoke_tests/test_benchmark_app.py ........................ [ 13%] +2026-01-12T09:18:38.0985810Z ............................................F........................... [ 54%] +2026-01-12T09:19:12.5821675Z ...................................... [ 76%] +2026-01-12T09:19:17.9814654Z install/tests/smoke_tests/test_classification_sample_async.py .......... [ 82%] +2026-01-12T09:19:19.2133678Z .. [ 83%] +2026-01-12T09:19:24.4414972Z install/tests/smoke_tests/test_hello_classification.py ......... [ 88%] +2026-01-12T09:19:26.7063811Z install/tests/smoke_tests/test_hello_nv12_input_classification.py ...... [ 92%] +2026-01-12T09:19:26.7068957Z [ 92%] +2026-01-12T09:19:26.9441675Z install/tests/smoke_tests/test_hello_query_device.py .. [ 93%] +2026-01-12T09:19:28.7302791Z install/tests/smoke_tests/test_hello_reshape_ssd.py .. [ 94%] +2026-01-12T09:19:29.6995193Z install/tests/smoke_tests/test_model_creation_sample.py ...... [ 97%] +2026-01-12T09:19:50.6909298Z install/tests/smoke_tests/test_sync_benchmark.py .. [ 98%] +2026-01-12T09:20:11.8235515Z install/tests/smoke_tests/test_throughput_benchmark.py .. [100%] +2026-01-12T09:20:11.8236263Z +2026-01-12T09:20:11.8236476Z =================================== FAILURES =================================== +2026-01-12T09:20:11.8237183Z _________________________ test_dynamic_shape[CPU-C++] __________________________ +2026-01-12T09:20:11.8237631Z +2026-01-12T09:20:11.8238280Z self = +2026-01-12T09:20:11.8238875Z +2026-01-12T09:20:11.8239077Z def _new_conn(self) -> socket.socket: +2026-01-12T09:20:11.8239720Z """Establish a socket connection and set nodelay settings on it. +2026-01-12T09:20:11.8240343Z +2026-01-12T09:20:11.8240679Z :return: New socket connection. +2026-01-12T09:20:11.8241106Z """ +2026-01-12T09:20:11.8241424Z try: +2026-01-12T09:20:11.8241786Z > sock = connection.create_connection( +2026-01-12T09:20:11.8242327Z (self._dns_host, self.port), +2026-01-12T09:20:11.8242817Z self.timeout, +2026-01-12T09:20:11.8243277Z source_address=self.source_address, +2026-01-12T09:20:11.8243799Z socket_options=self.socket_options, +2026-01-12T09:20:11.8244286Z ) +2026-01-12T09:20:11.8244484Z +2026-01-12T09:20:11.8244793Z /venv/lib/python3.11/site-packages/urllib3/connection.py:204: +2026-01-12T09:20:11.8245502Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +2026-01-12T09:20:11.8246401Z /venv/lib/python3.11/site-packages/urllib3/util/connection.py:85: in create_connection +2026-01-12T09:20:11.8347779Z raise err +2026-01-12T09:20:11.8348255Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +2026-01-12T09:20:11.8348630Z +2026-01-12T09:20:11.8348897Z address = ('media.githubusercontent.com', 443), timeout = None +2026-01-12T09:20:11.8349947Z source_address = None, socket_options = [(6, 1, 1)] +2026-01-12T09:20:11.8350330Z +2026-01-12T09:20:11.8350466Z def create_connection( +2026-01-12T09:20:11.8350817Z address: tuple[str, int], +2026-01-12T09:20:11.8351231Z timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, +2026-01-12T09:20:11.8351765Z source_address: tuple[str, int] | None = None, +2026-01-12T09:20:11.8352331Z socket_options: _TYPE_SOCKET_OPTIONS | None = None, +2026-01-12T09:20:11.8352858Z ) -> socket.socket: +2026-01-12T09:20:11.8353311Z """Connect to *address* and return the socket object. +2026-01-12T09:20:11.8353778Z +2026-01-12T09:20:11.8354208Z Convenience function. Connect to *address* (a 2-tuple ``(host, +2026-01-12T09:20:11.8354907Z port)``) and return the socket object. Passing the optional +2026-01-12T09:20:11.8355534Z *timeout* parameter will set the timeout on the socket instance +2026-01-12T09:20:11.8356230Z before attempting to connect. If no *timeout* is supplied, the +2026-01-12T09:20:11.8357015Z global default timeout setting returned by :func:`socket.getdefaulttimeout` +2026-01-12T09:20:11.8357946Z is used. If *source_address* is set it must be a tuple of (host, port) +2026-01-12T09:20:11.8358650Z for the socket to bind as a source address before making the connection. +2026-01-12T09:20:11.8359273Z An host of '' or port 0 tells the OS to use the default. +2026-01-12T09:20:11.8359720Z """ +2026-01-12T09:20:11.8359973Z +2026-01-12T09:20:11.8360241Z host, port = address +2026-01-12T09:20:11.8360599Z if host.startswith("["): +2026-01-12T09:20:11.8360976Z host = host.strip("[]") +2026-01-12T09:20:11.8361339Z err = None +2026-01-12T09:20:11.8361618Z +2026-01-12T09:20:11.8362081Z # Using the value from allowed_gai_family() in the context of getaddrinfo lets +2026-01-12T09:20:11.8362795Z # us select whether to work with IPv4 DNS records, IPv6 records, or both. +2026-01-12T09:20:11.8363470Z # The original create_connection function always returns all records. +2026-01-12T09:20:11.8364027Z family = allowed_gai_family() +2026-01-12T09:20:11.8364401Z +2026-01-12T09:20:11.8364650Z try: +2026-01-12T09:20:11.8364934Z host.encode("idna") +2026-01-12T09:20:11.8365277Z except UnicodeError: +2026-01-12T09:20:11.8365789Z raise LocationParseError(f"'{host}', label empty or too long") from None +2026-01-12T09:20:11.8366345Z +2026-01-12T09:20:11.8366754Z for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM): +2026-01-12T09:20:11.8367337Z af, socktype, proto, canonname, sa = res +2026-01-12T09:20:11.8367740Z sock = None +2026-01-12T09:20:11.8368051Z try: +2026-01-12T09:20:11.8368394Z sock = socket.socket(af, socktype, proto) +2026-01-12T09:20:11.8368801Z +2026-01-12T09:20:11.8369193Z # If provided, set socket level options before connecting. +2026-01-12T09:20:11.8369736Z _set_socket_options(sock, socket_options) +2026-01-12T09:20:11.8370132Z +2026-01-12T09:20:11.8370418Z if timeout is not _DEFAULT_TIMEOUT: +2026-01-12T09:20:11.8370846Z sock.settimeout(timeout) +2026-01-12T09:20:11.8371243Z if source_address: +2026-01-12T09:20:11.8371642Z sock.bind(source_address) +2026-01-12T09:20:11.8372022Z > sock.connect(sa) +2026-01-12T09:20:11.8372425Z E OSError: [Errno 101] Network is unreachable +2026-01-12T09:20:11.8372743Z +2026-01-12T09:20:11.8373030Z /venv/lib/python3.11/site-packages/urllib3/util/connection.py:73: OSError +2026-01-12T09:20:11.8373471Z diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/29_Build _ Build.txt b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/29_Build _ Build.txt new file mode 100644 index 0000000..152de86 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/29_Build _ Build.txt @@ -0,0 +1 @@ + diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/31_Smart_CI.txt b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/31_Smart_CI.txt new file mode 100644 index 0000000..152de86 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/31_Smart_CI.txt @@ -0,0 +1 @@ + diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/Build _ Build/system.txt b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/Build _ Build/system.txt new file mode 100644 index 0000000..fd4a94a --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/Build _ Build/system.txt @@ -0,0 +1,14 @@ +2026-01-12T08:52:38.1920000Z Evaluating Build.if +2026-01-12T08:52:38.1920000Z Evaluating: (success() && (!needs.smart_ci.outputs.skip_workflow)) +2026-01-12T08:52:38.1920000Z Expanded: (true && !null) +2026-01-12T08:52:38.1920000Z Result: true +2026-01-12T08:52:38.1920000Z Evaluating Build.Build.if +2026-01-12T08:52:38.1920000Z Evaluating: success() +2026-01-12T08:52:38.1920000Z Result: true +2026-01-12T08:52:38.1920000Z Requested labels: aks-linux-16-cores-32gb +2026-01-12T08:52:38.1920000Z Job defined at: openvinotoolkit/openvino/.github/workflows/job_build_linux.yml@refs/pull/33540/merge +2026-01-12T08:52:38.1920000Z Reusable workflow chain: +2026-01-12T08:52:38.1920000Z openvinotoolkit/openvino/.github/workflows/ubuntu_22.yml@refs/pull/33540/merge (4a047279a51135cf69f1113f9682335d3f75cf75) +2026-01-12T08:52:38.1920000Z -> openvinotoolkit/openvino/.github/workflows/job_build_linux.yml@refs/pull/33540/merge (4a047279a51135cf69f1113f9682335d3f75cf75) +2026-01-12T08:52:38.1920000Z Waiting for a runner to pick up this job... +2026-01-12T08:52:46.0940000Z Job is about to start running on the runner: aks-linux-16-cores-32gb diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/Samples _ Samples/system.txt b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/Samples _ Samples/system.txt new file mode 100644 index 0000000..abfcdbc --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/Samples _ Samples/system.txt @@ -0,0 +1,14 @@ +2026-01-12T09:11:37.7200000Z Evaluating Samples.if +2026-01-12T09:11:37.7200000Z Evaluating: (success() && (fromJSON(needs.smart_ci.outputs.affected_components).samples)) +2026-01-12T09:11:37.7200000Z Expanded: (true && Object) +2026-01-12T09:11:37.7200000Z Result: Object +2026-01-12T09:11:37.7200000Z Evaluating Samples.Samples.if +2026-01-12T09:11:37.7200000Z Evaluating: success() +2026-01-12T09:11:37.7200000Z Result: true +2026-01-12T09:11:37.7270000Z Requested labels: aks-linux-4-cores-16gb +2026-01-12T09:11:37.7270000Z Job defined at: openvinotoolkit/openvino/.github/workflows/job_samples_tests.yml@refs/pull/33540/merge +2026-01-12T09:11:37.7270000Z Reusable workflow chain: +2026-01-12T09:11:37.7270000Z openvinotoolkit/openvino/.github/workflows/ubuntu_22.yml@refs/pull/33540/merge (4a047279a51135cf69f1113f9682335d3f75cf75) +2026-01-12T09:11:37.7270000Z -> openvinotoolkit/openvino/.github/workflows/job_samples_tests.yml@refs/pull/33540/merge (4a047279a51135cf69f1113f9682335d3f75cf75) +2026-01-12T09:11:37.7270000Z Waiting for a runner to pick up this job... +2026-01-12T09:11:44.4120000Z Job is about to start running on the runner: aks-linux-4-cores-16gb diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/Smart_CI/system.txt b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/Smart_CI/system.txt new file mode 100644 index 0000000..544dcf9 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_with_error/Smart_CI/system.txt @@ -0,0 +1,9 @@ +2026-01-12T08:47:49.8520000Z Evaluating Smart_CI.if +2026-01-12T08:47:49.8520000Z Evaluating: (success() && (((github.event.pull_request.draft == false) || (github.run_attempt > 1)))) +2026-01-12T08:47:49.8520000Z Expanded: (true && ((false == false) || (github.run_attempt > 1))) +2026-01-12T08:47:49.8520000Z Result: true +2026-01-12T08:47:49.8540000Z Requested labels: ubuntu-latest +2026-01-12T08:47:49.8540000Z Job defined at: openvinotoolkit/openvino/.github/workflows/ubuntu_22.yml@refs/pull/33540/merge +2026-01-12T08:47:49.8540000Z Waiting for a runner to pick up this job... +2026-01-12T08:47:54.3890000Z Job is waiting for a hosted runner to come online. +2026-01-12T08:47:54.3890000Z Job is about to start running on the hosted runner: GitHub Actions 1004737693 diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_wo_error/dir_should_be_empty.txt b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_wo_error/dir_should_be_empty.txt new file mode 100644 index 0000000..21e5fab --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/data/logs_wo_error/dir_should_be_empty.txt @@ -0,0 +1 @@ +script should NOT download logs for successful jobs diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/integration_test.py b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/integration_test.py new file mode 100644 index 0000000..345fdf2 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/integration_test.py @@ -0,0 +1,127 @@ +# Copyright (C) 2018-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" +Integration tests +""" + +import unittest +from pathlib import Path +from datetime import datetime, timedelta +import os +import tempfile +import shutil +from unittest.mock import patch, MagicMock + +import requests +from github import Github, Auth +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry + +from workflow_rerun.log_analyzer import LogAnalyzer +from workflow_rerun.log_collector import collect_logs_for_run +from workflow_rerun.rerunner import analyze_and_rerun + + +class IntegrationTest(unittest.TestCase): + """ + A class for testing integration between LogAnalyzer and log_collection + """ + + @classmethod + def setUpClass(cls) -> None: + cls._cwd = Path(__file__).parent + cls.errors_to_look_for_file = cls._cwd.parent.joinpath("errors_to_look_for.json") + cls.test_logs_with_error_dir = cls._cwd.joinpath("data", "logs_with_error") + + cls.session = requests.Session() + retry_strategy = Retry(total=5, backoff_factor=3, backoff_jitter=1, status_forcelist=[429, 500, 502, 503, 504]) + cls.session.mount("https://github.com", HTTPAdapter(max_retries=retry_strategy)) + + # Only create a GitHub client/run if token is available (otherwise tests should be offline) + cls.github = None + cls.wf_run = None + token = os.environ.get("GITHUB_TOKEN") + if token: + cls.github = Github(auth=Auth.Token(token=token)) + gh_repo = cls.github.get_repo(full_name_or_id="openvinotoolkit/openvino") + + oldest_allowed_date = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d") + cls.wf_run = gh_repo.get_workflow_runs(status="failure", created=f">={oldest_allowed_date}")[0] + print(f"Workflow run for testing: {cls.wf_run}", flush=True) + + def setUp(self): + print(f'\nIn test: "{self._testMethodName}"', flush=True) + + @unittest.skipUnless(os.environ.get("GITHUB_TOKEN"), "GITHUB_TOKEN not set; skipping live GitHub integration test") + def test_log_collection_and_analysis(self) -> None: + """ + Ensure logs collected by collect_logs_for_run are analyzed by LogAnalyzer + """ + + with tempfile.TemporaryDirectory() as temp_dir: + logs_dir = Path(temp_dir) + collect_logs_for_run(run=self.wf_run, logs_dir=logs_dir, session=self.session) + + analyzer = LogAnalyzer( + path_to_logs=logs_dir, + path_to_errors_file=self.errors_to_look_for_file, + ) + self.assertTrue(len(analyzer._log_files) > 0, "Failed run log files should be collected for failed jobs") + analyzer.analyze() + if analyzer.found_matching_error: + print(f"Found matching error, ticket: {analyzer.found_error_ticket}") + + def test_analyze_and_rerun_records_to_db_offline(self) -> None: + """Offline integration-style test: uses local logs and mocks all network/DB side effects.""" + + def fake_collect_logs_for_run(*, run, logs_dir: Path, session): + # Populate the temp logs_dir with our checked-in test logs. + for p in self.test_logs_with_error_dir.rglob("*"): + if p.is_file(): + rel = p.relative_to(self.test_logs_with_error_dir) + dst = logs_dir / rel + dst.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(p, dst) + + mock_run = MagicMock() + mock_run.html_url = "https://github.com/example/repo/actions/runs/123" + + mock_session = MagicMock() + + repository_name = "openvinotoolkit/openvino" + run_id = 123 + rerunner_run_id = 456 + + # ruff wants to use parenthesis here, but it's not supported by Python 3.8 + # which is used on the runner where these tests run + # fmt: off + with patch('workflow_rerun.rerunner.collect_logs_for_run', + side_effect=fake_collect_logs_for_run) as collect_mock, \ + patch('workflow_rerun.rerunner.rerun_failed_jobs') as rerun_mock, \ + patch('workflow_rerun.rerunner.record_rerun_to_db') as record_mock: + # fmt: on + analyze_and_rerun( + run=mock_run, + repository_name=repository_name, + run_id=run_id, + rerunner_run_id=rerunner_run_id, + errors_file=self.errors_to_look_for_file, + is_dry_run=False, + session=mock_session, + ) + + collect_mock.assert_called_once() + rerun_mock.assert_called_once_with(repository_name, run_id, mock_session) + record_mock.assert_called_once() + + # Basic sanity on record_rerun_to_db args + args = record_mock.call_args[0] + self.assertEqual(args[0], repository_name) + self.assertEqual(args[1], run_id) + self.assertEqual(args[3], rerunner_run_id) + + @classmethod + def tearDownClass(cls) -> None: + if cls.github is not None: + cls.github.close() diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/log_analyzer_test.py b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/log_analyzer_test.py new file mode 100644 index 0000000..4e80336 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/log_analyzer_test.py @@ -0,0 +1,94 @@ +# Copyright (C) 2018-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" +LogAnalyzer tests +""" + +import unittest +from pathlib import Path + + +from workflow_rerun.log_analyzer import LogAnalyzer + + +class LogAnalyzerTest(unittest.TestCase): + """ + A class for testing LogAnalyzer + """ + + def setUp(self) -> None: + print(f'\nIn test: "{self._testMethodName}"', flush=True) + self._cwd = Path(__file__).parent + self.logs_dir_with_error = self._cwd.joinpath("data").joinpath("logs_with_error") + self.logs_dir_wo_error = self._cwd.joinpath("data").joinpath("logs_wo_error") + self.errors_to_look_for_file = self._cwd.parent.joinpath("errors_to_look_for.json") + + def test_log_analyzer_instantiation(self) -> None: + """ + Ensure LogAnalyzer is instantiated correctly. + """ + analyzer = LogAnalyzer( + path_to_logs=self.logs_dir_wo_error, + path_to_errors_file=self.errors_to_look_for_file, + ) + self.assertTrue( + hasattr(analyzer, "_errors_to_look_for"), + "Analyzer should have _errors_to_look_for", + ) + self.assertTrue(hasattr(analyzer, "_log_files"), "Analyzer should have _log_files") + + for error_data in analyzer._errors_to_look_for: + self.assertTrue(error_data["error_text"], "Each error_data should have text") + self.assertTrue(error_data["ticket"], "Each error_data should have ticket") + + for log_file in analyzer._log_files: + self.assertTrue(log_file["file_name"], "Each log_file should have file_name") + self.assertTrue(log_file["path"], "Each log_file should have path") + + def test_string_cleanup(self) -> None: + """ + Ensure log cleanup function returns correct results + """ + analyzer = LogAnalyzer( + path_to_logs=self.logs_dir_wo_error, + path_to_errors_file=self.errors_to_look_for_file, + ) + + data = ( + "Connection was reset", + "Failed to connect to github.com", + "Could not resolve host: github.com", + ) + expected = ( + "connection was reset", + "failed to connect to github com", + "could not resolve host github com", + ) + + for input_str, expected_str in zip(data, expected): + self.assertEqual(analyzer._clean_up_string(string=input_str), expected_str) + + def test_analyzer_with_error(self) -> None: + """ + Ensure LogAnalyzer can find an error + """ + analyzer = LogAnalyzer( + path_to_logs=self.logs_dir_with_error, + path_to_errors_file=self.errors_to_look_for_file, + ) + analyzer.analyze() + self.assertTrue(analyzer.found_matching_error) + self.assertEqual(analyzer.found_error_ticket, 130955) + self.assertEqual(analyzer.matched_error_text, "Network is unreachable") + + def test_analyzer_wo_error(self) -> None: + """ + Ensure LogAnalyzer does not find an error in the log files w/o errors + """ + analyzer = LogAnalyzer( + path_to_logs=self.logs_dir_wo_error, + path_to_errors_file=self.errors_to_look_for_file, + ) + analyzer.analyze() + self.assertFalse(analyzer.found_matching_error) diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/log_collector_test.py b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/log_collector_test.py new file mode 100644 index 0000000..5d19551 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/scripts/workflow_rerun/tests/log_collector_test.py @@ -0,0 +1,69 @@ +# Copyright (C) 2018-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" +log collector tests +""" + +import os +import unittest +import tempfile +from pathlib import Path +from datetime import datetime, timedelta + +import requests +from github import Github, Auth +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry + +from workflow_rerun.log_collector import collect_logs_for_run + + +class LogCollectorTest(unittest.TestCase): + """ + A class for testing log collection + """ + + @classmethod + def setUpClass(cls) -> None: + cls.session = requests.Session() + retry_strategy = Retry(total=5, backoff_factor=3, backoff_jitter=1, status_forcelist=[429, 500, 502, 503, 504]) + cls.session.mount("https://github.com", HTTPAdapter(max_retries=retry_strategy)) + + cls.github = Github(auth=Auth.Token(token=os.environ.get("GITHUB_TOKEN"))) + gh_repo = cls.github.get_repo(full_name_or_id="openvinotoolkit/openvino") + + # Looking for reference workflow runs. + # Their "created_at" time should be within 60 days - the log retention window + oldest_allowed_date = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d") + + cls.successful_workflow_run = gh_repo.get_workflow_runs(status="success", created=f">={oldest_allowed_date}")[0] + print(f"Successful workflow run for testing: {cls.successful_workflow_run}", flush=True) + + cls.failed_workflow_run = gh_repo.get_workflow_runs(status="failure", created=f">={oldest_allowed_date}")[0] + print(f"Failed workflow run for testing: {cls.failed_workflow_run}", flush=True) + + def setUp(self): + print(f'\nIn test: "{self._testMethodName}"', flush=True) + + def test_failed_logs_are_collected(self) -> None: + """ + Ensure only logs for failed jobs are collected + """ + with tempfile.TemporaryDirectory() as temp_dir: + logs_dir = Path(temp_dir) + collect_logs_for_run(run=self.failed_workflow_run, logs_dir=logs_dir, session=self.session) + self.assertTrue(any(logs_dir.iterdir()), "Logs directory should not be empty for failed runs") + + def test_successful_logs_are_not_collected(self) -> None: + """ + Ensure logs for successful jobs are not collected + """ + with tempfile.TemporaryDirectory() as temp_dir: + logs_dir = Path(temp_dir) + collect_logs_for_run(run=self.successful_workflow_run, logs_dir=logs_dir, session=self.session) + self.assertFalse(any(logs_dir.iterdir()), "Logs directory should be empty for successful runs") + + @classmethod + def tearDownClass(cls) -> None: + cls.github.close() diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/assign_issue.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/assign_issue.yml new file mode 100644 index 0000000..4a4579e --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/assign_issue.yml @@ -0,0 +1,25 @@ +name: Take Issue + +on: + issue_comment: + types: + - created + - edited + +permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions + +jobs: + take-issue: + name: Take issue + runs-on: ubuntu-latest + permissions: + issues: write + timeout-minutes: 10 + steps: + - name: take an issue + uses: bdougie/take-action@v1.6.1 + with: + message: Thank you for looking into this issue! Please let us know if you have any questions or require any help. + issueCurrentlyAssignedMessage: Thanks for being interested in this issue. It looks like this ticket is already assigned to a contributor. Please communicate with the assigned contributor to confirm the status of the issue. + trigger: .take + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/ci-doctor.lock.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/ci-doctor.lock.yml new file mode 100644 index 0000000..78ada3d --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/ci-doctor.lock.yml @@ -0,0 +1,1233 @@ +# +# ___ _ _ +# / _ \ | | (_) +# | |_| | __ _ ___ _ __ | |_ _ ___ +# | _ |/ _` |/ _ \ '_ \| __| |/ __| +# | | | | (_| | __/ | | | |_| | (__ +# \_| |_/\__, |\___|_| |_|\__|_|\___| +# __/ | +# _ _ |___/ +# | | | | / _| | +# | | | | ___ _ __ _ __| |_| | _____ ____ +# | |/\| |/ _ \ '__| |/ /| _| |/ _ \ \ /\ / / ___| +# \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \ +# \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/ +# +# This file was automatically generated by gh-aw (v0.46.5). DO NOT EDIT. +# +# To update this file, edit githubnext/agentics/workflows/ci-doctor.md@0aa94a6e40aeaf131118476bc6a07e55c4ceb147 and run: +# gh aw compile +# Not all edits will cause changes to this file. +# +# For more information: https://github.github.com/gh-aw/introduction/overview/ +# +# This workflow is an automated CI failure investigator that triggers when monitored workflows fail. +# Performs deep analysis of GitHub Actions workflow failures to identify root causes, +# patterns, and provide actionable remediation steps. Analyzes logs, error messages, +# and workflow configuration to help diagnose and resolve CI issues efficiently. +# +# Source: githubnext/agentics/workflows/ci-doctor.md@0aa94a6e40aeaf131118476bc6a07e55c4ceb147 +# +# gh-aw-metadata: {"schema_version":"v1","frontmatter_hash":"ab6f1bebb48e4555bacc8c6f7173e8338fc34d0c282b41e256a19c678a675565","compiler_version":"v0.46.5"} + +name: "CI Failure Doctor" +"on": + workflow_dispatch: + inputs: + run_id: + description: Workflow run ID to investigate (for manual testing) + required: false + +permissions: {} + +concurrency: + group: "gh-aw-${{ github.workflow }}" + +run-name: "CI Failure Doctor" + +jobs: + activation: + needs: pre_activation + if: > + (needs.pre_activation.outputs.activated == 'true') && (github.event_name == 'workflow_dispatch' || (github.event.workflow_run.conclusion == 'failure' && + (github.event.workflow_run.head_branch == 'master' || github.event.workflow_run.event == 'pull_request'))) + runs-on: ubuntu-slim + permissions: + contents: read + outputs: + comment_id: "" + comment_repo: "" + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@5a79466d65414632d47c7869b27170ade5b9404e # v0.46.5 + with: + destination: /opt/gh-aw/actions + - name: Validate context variables + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/validate_context_variables.cjs'); + await main(); + - name: Checkout .github and .agents folders + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + sparse-checkout: | + .github + .agents + fetch-depth: 1 + persist-credentials: false + - name: Check workflow file timestamps + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_WORKFLOW_FILE: "ci-doctor.lock.yml" + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/check_workflow_timestamp_api.cjs'); + await main(); + - name: Create prompt with built-in context + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GH_AW_GITHUB_ACTOR: ${{ github.actor }} + GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} + GH_AW_GITHUB_EVENT_INPUTS_RUN_ID: ${{ github.event.inputs.run_id }} + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_CONCLUSION: ${{ github.event.workflow_run.conclusion }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_EVENT: ${{ github.event.workflow_run.event }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_HEAD_SHA: ${{ github.event.workflow_run.head_sha }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_HTML_URL: ${{ github.event.workflow_run.html_url }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_RUN_NUMBER: ${{ github.event.workflow_run.run_number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + run: | + bash /opt/gh-aw/actions/create_prompt_first.sh + cat << 'GH_AW_PROMPT_EOF' > "$GH_AW_PROMPT" + + GH_AW_PROMPT_EOF + cat "/opt/gh-aw/prompts/xpia.md" >> "$GH_AW_PROMPT" + cat "/opt/gh-aw/prompts/temp_folder_prompt.md" >> "$GH_AW_PROMPT" + cat "/opt/gh-aw/prompts/markdown.md" >> "$GH_AW_PROMPT" + cat "/opt/gh-aw/prompts/cache_memory_prompt.md" >> "$GH_AW_PROMPT" + cat << 'GH_AW_PROMPT_EOF' >> "$GH_AW_PROMPT" + + GitHub API Access Instructions + + The gh CLI is NOT authenticated. Do NOT use gh commands for GitHub operations. + + + To create or modify GitHub resources (issues, discussions, pull requests, etc.), you MUST call the appropriate safe output tool. Simply writing content will NOT work - the workflow requires actual tool calls. + + Temporary IDs: Some safe output tools support a temporary ID field (usually named temporary_id) so you can reference newly-created items elsewhere in the SAME agent output (for example, using #aw_abc1 in a later body). + + **IMPORTANT - temporary_id format rules:** + - If you DON'T need to reference the item later, OMIT the temporary_id field entirely (it will be auto-generated if needed) + - If you DO need cross-references/chaining, you MUST match this EXACT validation regex: /^aw_[A-Za-z0-9]{3,8}$/i + - Format: aw_ prefix followed by 3 to 8 alphanumeric characters (A-Z, a-z, 0-9, case-insensitive) + - Valid alphanumeric characters: ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 + - INVALID examples: aw_ab (too short), aw_123456789 (too long), aw_test-id (contains hyphen), aw_id_123 (contains underscore) + - VALID examples: aw_abc, aw_abc1, aw_Test123, aw_A1B2C3D4, aw_12345678 + - To generate valid IDs: use 3-8 random alphanumeric characters or omit the field to let the system auto-generate + + Do NOT invent other aw_* formats — downstream steps will reject them with validation errors matching against /^aw_[A-Za-z0-9]{3,8}$/i. + + Discover available tools from the safeoutputs MCP server. + + **Critical**: Tool calls write structured data that downstream jobs process. Without tool calls, follow-up actions will be skipped. + + **Note**: If you made no other safe output tool calls during this workflow execution, call the "noop" tool to provide a status message indicating completion or that no actions were needed. + + + + The following GitHub context information is available for this workflow: + {{#if __GH_AW_GITHUB_ACTOR__ }} + - **actor**: __GH_AW_GITHUB_ACTOR__ + {{/if}} + {{#if __GH_AW_GITHUB_REPOSITORY__ }} + - **repository**: __GH_AW_GITHUB_REPOSITORY__ + {{/if}} + {{#if __GH_AW_GITHUB_WORKSPACE__ }} + - **workspace**: __GH_AW_GITHUB_WORKSPACE__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ }} + - **issue-number**: #__GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ }} + - **discussion-number**: #__GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ }} + - **pull-request-number**: #__GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_COMMENT_ID__ }} + - **comment-id**: __GH_AW_GITHUB_EVENT_COMMENT_ID__ + {{/if}} + {{#if __GH_AW_GITHUB_RUN_ID__ }} + - **workflow-run-id**: __GH_AW_GITHUB_RUN_ID__ + {{/if}} + + + GH_AW_PROMPT_EOF + cat << 'GH_AW_PROMPT_EOF' >> "$GH_AW_PROMPT" + + GH_AW_PROMPT_EOF + cat << 'GH_AW_PROMPT_EOF' >> "$GH_AW_PROMPT" + {{#runtime-import .github/workflows/ci-doctor.md}} + GH_AW_PROMPT_EOF + - name: Interpolate variables and render templates + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_GITHUB_EVENT_INPUTS_RUN_ID: ${{ github.event.inputs.run_id }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_CONCLUSION: ${{ github.event.workflow_run.conclusion }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_EVENT: ${{ github.event.workflow_run.event }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_HEAD_SHA: ${{ github.event.workflow_run.head_sha }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_HTML_URL: ${{ github.event.workflow_run.html_url }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_RUN_NUMBER: ${{ github.event.workflow_run.run_number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/interpolate_prompt.cjs'); + await main(); + - name: Substitute placeholders + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_ALLOWED_EXTENSIONS: '' + GH_AW_CACHE_DESCRIPTION: '' + GH_AW_CACHE_DIR: '/tmp/gh-aw/cache-memory/' + GH_AW_GITHUB_ACTOR: ${{ github.actor }} + GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} + GH_AW_GITHUB_EVENT_INPUTS_RUN_ID: ${{ github.event.inputs.run_id }} + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_CONCLUSION: ${{ github.event.workflow_run.conclusion }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_EVENT: ${{ github.event.workflow_run.event }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_HEAD_SHA: ${{ github.event.workflow_run.head_sha }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_HTML_URL: ${{ github.event.workflow_run.html_url }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_RUN_NUMBER: ${{ github.event.workflow_run.run_number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + GH_AW_NEEDS_PRE_ACTIVATION_OUTPUTS_ACTIVATED: ${{ needs.pre_activation.outputs.activated }} + GH_AW_NEEDS_PRE_ACTIVATION_OUTPUTS_MATCHED_COMMAND: ${{ needs.pre_activation.outputs.matched_command }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + + const substitutePlaceholders = require('/opt/gh-aw/actions/substitute_placeholders.cjs'); + + // Call the substitution function + return await substitutePlaceholders({ + file: process.env.GH_AW_PROMPT, + substitutions: { + GH_AW_ALLOWED_EXTENSIONS: process.env.GH_AW_ALLOWED_EXTENSIONS, + GH_AW_CACHE_DESCRIPTION: process.env.GH_AW_CACHE_DESCRIPTION, + GH_AW_CACHE_DIR: process.env.GH_AW_CACHE_DIR, + GH_AW_GITHUB_ACTOR: process.env.GH_AW_GITHUB_ACTOR, + GH_AW_GITHUB_EVENT_COMMENT_ID: process.env.GH_AW_GITHUB_EVENT_COMMENT_ID, + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: process.env.GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER, + GH_AW_GITHUB_EVENT_INPUTS_RUN_ID: process.env.GH_AW_GITHUB_EVENT_INPUTS_RUN_ID, + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: process.env.GH_AW_GITHUB_EVENT_ISSUE_NUMBER, + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: process.env.GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER, + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_CONCLUSION: process.env.GH_AW_GITHUB_EVENT_WORKFLOW_RUN_CONCLUSION, + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_EVENT: process.env.GH_AW_GITHUB_EVENT_WORKFLOW_RUN_EVENT, + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_HEAD_SHA: process.env.GH_AW_GITHUB_EVENT_WORKFLOW_RUN_HEAD_SHA, + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_HTML_URL: process.env.GH_AW_GITHUB_EVENT_WORKFLOW_RUN_HTML_URL, + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_ID: process.env.GH_AW_GITHUB_EVENT_WORKFLOW_RUN_ID, + GH_AW_GITHUB_EVENT_WORKFLOW_RUN_RUN_NUMBER: process.env.GH_AW_GITHUB_EVENT_WORKFLOW_RUN_RUN_NUMBER, + GH_AW_GITHUB_REPOSITORY: process.env.GH_AW_GITHUB_REPOSITORY, + GH_AW_GITHUB_RUN_ID: process.env.GH_AW_GITHUB_RUN_ID, + GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE, + GH_AW_NEEDS_PRE_ACTIVATION_OUTPUTS_ACTIVATED: process.env.GH_AW_NEEDS_PRE_ACTIVATION_OUTPUTS_ACTIVATED, + GH_AW_NEEDS_PRE_ACTIVATION_OUTPUTS_MATCHED_COMMAND: process.env.GH_AW_NEEDS_PRE_ACTIVATION_OUTPUTS_MATCHED_COMMAND + } + }); + - name: Validate prompt placeholders + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + run: bash /opt/gh-aw/actions/validate_prompt_placeholders.sh + - name: Print prompt + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + run: bash /opt/gh-aw/actions/print_prompt_summary.sh + - name: Upload prompt artifact + if: success() + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: prompt + path: /tmp/gh-aw/aw-prompts/prompt.txt + retention-days: 1 + + agent: + needs: activation + runs-on: ubuntu-latest + permissions: read-all + concurrency: + group: "gh-aw-copilot-${{ github.workflow }}" + env: + DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + GH_AW_ASSETS_ALLOWED_EXTS: "" + GH_AW_ASSETS_BRANCH: "" + GH_AW_ASSETS_MAX_SIZE_KB: 0 + GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs + GH_AW_SAFE_OUTPUTS: /opt/gh-aw/safeoutputs/outputs.jsonl + GH_AW_SAFE_OUTPUTS_CONFIG_PATH: /opt/gh-aw/safeoutputs/config.json + GH_AW_SAFE_OUTPUTS_TOOLS_PATH: /opt/gh-aw/safeoutputs/tools.json + GH_AW_WORKFLOW_ID_SANITIZED: cidoctor + outputs: + checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }} + has_patch: ${{ steps.collect_output.outputs.has_patch }} + model: ${{ steps.generate_aw_info.outputs.model }} + output: ${{ steps.collect_output.outputs.output }} + output_types: ${{ steps.collect_output.outputs.output_types }} + secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@5a79466d65414632d47c7869b27170ade5b9404e # v0.46.5 + with: + destination: /opt/gh-aw/actions + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Create gh-aw temp directory + run: bash /opt/gh-aw/actions/create_gh_aw_tmp_dir.sh + # Cache memory file share configuration from frontmatter processed below + - name: Create cache-memory directory + run: bash /opt/gh-aw/actions/create_cache_memory_dir.sh + - name: Restore cache-memory file share data + uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: memory-${{ env.GH_AW_WORKFLOW_ID_SANITIZED }}-${{ github.run_id }} + path: /tmp/gh-aw/cache-memory + restore-keys: | + memory-${{ env.GH_AW_WORKFLOW_ID_SANITIZED }}- + - name: Configure Git credentials + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Checkout PR branch + id: checkout-pr + if: | + github.event.pull_request + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + with: + github-token: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/checkout_pr_branch.cjs'); + await main(); + - name: Generate agentic run info + id: generate_aw_info + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const fs = require('fs'); + + const awInfo = { + engine_id: "copilot", + engine_name: "GitHub Copilot CLI", + model: process.env.GH_AW_MODEL_AGENT_COPILOT || "", + version: "", + agent_version: "0.0.411", + cli_version: "v0.46.5", + workflow_name: "CI Failure Doctor", + experimental: false, + supports_tools_allowlist: true, + run_id: context.runId, + run_number: context.runNumber, + run_attempt: process.env.GITHUB_RUN_ATTEMPT, + repository: context.repo.owner + '/' + context.repo.repo, + ref: context.ref, + sha: context.sha, + actor: context.actor, + event_name: context.eventName, + staged: false, + allowed_domains: ["defaults"], + firewall_enabled: true, + awf_version: "v0.20.1", + awmg_version: "v0.1.4", + steps: { + firewall: "squid" + }, + created_at: new Date().toISOString() + }; + + // Write to /tmp/gh-aw directory to avoid inclusion in PR + const tmpPath = '/tmp/gh-aw/aw_info.json'; + fs.writeFileSync(tmpPath, JSON.stringify(awInfo, null, 2)); + console.log('Generated aw_info.json at:', tmpPath); + console.log(JSON.stringify(awInfo, null, 2)); + + // Set model as output for reuse in other steps/jobs + core.setOutput('model', awInfo.model); + - name: Validate COPILOT_GITHUB_TOKEN secret + id: validate-secret + run: /opt/gh-aw/actions/validate_multi_secret.sh COPILOT_GITHUB_TOKEN 'GitHub Copilot CLI' https://github.github.com/gh-aw/reference/engines/#github-copilot-default + env: + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + - name: Install GitHub Copilot CLI + run: /opt/gh-aw/actions/install_copilot_cli.sh 0.0.411 + - name: Install awf binary + run: bash /opt/gh-aw/actions/install_awf_binary.sh v0.20.1 + - name: Determine automatic lockdown mode for GitHub MCP Server + id: determine-automatic-lockdown + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }} + GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} + with: + script: | + const determineAutomaticLockdown = require('/opt/gh-aw/actions/determine_automatic_lockdown.cjs'); + await determineAutomaticLockdown(github, context, core); + - name: Download container images + run: bash /opt/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.20.1 ghcr.io/github/gh-aw-firewall/api-proxy:0.20.1 ghcr.io/github/gh-aw-firewall/squid:0.20.1 ghcr.io/github/gh-aw-mcpg:v0.1.4 ghcr.io/github/github-mcp-server:v0.30.3 node:lts-alpine + - name: Write Safe Outputs Config + run: | + mkdir -p /opt/gh-aw/safeoutputs + mkdir -p /tmp/gh-aw/safeoutputs + mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs + cat > /opt/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_EOF' + {"add_comment":{"max":1},"create_issue":{"max":1},"missing_data":{},"missing_tool":{},"noop":{"max":1}} + GH_AW_SAFE_OUTPUTS_CONFIG_EOF + cat > /opt/gh-aw/safeoutputs/tools.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_EOF' + [ + { + "description": "Create a new GitHub issue for tracking bugs, feature requests, or tasks. Use this for actionable work items that need assignment, labeling, and status tracking. For reports, announcements, or status updates that don't require task tracking, use create_discussion instead. CONSTRAINTS: Maximum 1 issue(s) can be created. Title will be prefixed with \"${{ github.workflow }}\". Labels [automation ci] will be automatically added.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "body": { + "description": "Detailed issue description in Markdown. Do NOT repeat the title as a heading since it already appears as the issue's h1. Include context, reproduction steps, or acceptance criteria as appropriate.", + "type": "string" + }, + "labels": { + "description": "Labels to categorize the issue (e.g., 'bug', 'enhancement'). Labels must exist in the repository.", + "items": { + "type": "string" + }, + "type": "array" + }, + "parent": { + "description": "Parent issue number for creating sub-issues. This is the numeric ID from the GitHub URL (e.g., 42 in github.com/owner/repo/issues/42). Can also be a temporary_id (e.g., 'aw_abc123', 'aw_Test123') from a previously created issue in the same workflow run.", + "type": [ + "number", + "string" + ] + }, + "temporary_id": { + "description": "Unique temporary identifier for referencing this issue before it's created. Format: 'aw_' followed by 3 to 8 alphanumeric characters (e.g., 'aw_abc1', 'aw_Test123'). Use '#aw_ID' in body text to reference other issues by their temporary_id; these are replaced with actual issue numbers after creation.", + "pattern": "^aw_[A-Za-z0-9]{3,8}$", + "type": "string" + }, + "title": { + "description": "Concise issue title summarizing the bug, feature, or task. The title appears as the main heading, so keep it brief and descriptive.", + "type": "string" + } + }, + "required": [ + "title", + "body" + ], + "type": "object" + }, + "name": "create_issue" + }, + { + "description": "Add a comment to an existing GitHub issue, pull request, or discussion. Use this to provide feedback, answer questions, or add information to an existing conversation. For creating new items, use create_issue, create_discussion, or create_pull_request instead. IMPORTANT: Comments are subject to validation constraints enforced by the MCP server - maximum 65536 characters for the complete comment (including footer which is added automatically), 10 mentions (@username), and 50 links. Exceeding these limits will result in an immediate error with specific guidance. NOTE: By default, this tool requires discussions:write permission. If your GitHub App lacks Discussions permission, set 'discussions: false' in the workflow's safe-outputs.add-comment configuration to exclude this permission. CONSTRAINTS: Maximum 1 comment(s) can be added.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "body": { + "description": "The comment text in Markdown format. This is the 'body' field - do not use 'comment_body' or other variations. Provide helpful, relevant information that adds value to the conversation. CONSTRAINTS: The complete comment (your body text + automatically added footer) must not exceed 65536 characters total. Maximum 10 mentions (@username), maximum 50 links (http/https URLs). A footer (~200-500 characters) is automatically appended with workflow attribution, so leave adequate space. If these limits are exceeded, the tool call will fail with a detailed error message indicating which constraint was violated.", + "type": "string" + }, + "item_number": { + "description": "The issue, pull request, or discussion number to comment on. This is the numeric ID from the GitHub URL (e.g., 123 in github.com/owner/repo/issues/123). If omitted, the tool will attempt to resolve the target from the current workflow context (triggering issue, PR, or discussion).", + "type": "number" + } + }, + "required": [ + "body" + ], + "type": "object" + }, + "name": "add_comment" + }, + { + "description": "Report that a tool or capability needed to complete the task is not available, or share any information you deem important about missing functionality or limitations. Use this when you cannot accomplish what was requested because the required functionality is missing or access is restricted.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "alternatives": { + "description": "Any workarounds, manual steps, or alternative approaches the user could take (max 256 characters).", + "type": "string" + }, + "reason": { + "description": "Explanation of why this tool is needed or what information you want to share about the limitation (max 256 characters).", + "type": "string" + }, + "tool": { + "description": "Optional: Name or description of the missing tool or capability (max 128 characters). Be specific about what functionality is needed.", + "type": "string" + } + }, + "required": [ + "reason" + ], + "type": "object" + }, + "name": "missing_tool" + }, + { + "description": "Log a transparency message when no significant actions are needed. Use this to confirm workflow completion and provide visibility when analysis is complete but no changes or outputs are required (e.g., 'No issues found', 'All checks passed'). This ensures the workflow produces human-visible output even when no other actions are taken.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "message": { + "description": "Status or completion message to log. Should explain what was analyzed and the outcome (e.g., 'Code review complete - no issues found', 'Analysis complete - all tests passing').", + "type": "string" + } + }, + "required": [ + "message" + ], + "type": "object" + }, + "name": "noop" + }, + { + "description": "Report that data or information needed to complete the task is not available. Use this when you cannot accomplish what was requested because required data, context, or information is missing.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "alternatives": { + "description": "Any workarounds, manual steps, or alternative approaches the user could take (max 256 characters).", + "type": "string" + }, + "context": { + "description": "Additional context about the missing data or where it should come from (max 256 characters).", + "type": "string" + }, + "data_type": { + "description": "Type or description of the missing data or information (max 128 characters). Be specific about what data is needed.", + "type": "string" + }, + "reason": { + "description": "Explanation of why this data is needed to complete the task (max 256 characters).", + "type": "string" + } + }, + "required": [], + "type": "object" + }, + "name": "missing_data" + } + ] + GH_AW_SAFE_OUTPUTS_TOOLS_EOF + cat > /opt/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_EOF' + { + "add_comment": { + "defaultMax": 1, + "fields": { + "body": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + }, + "item_number": { + "issueOrPRNumber": true + } + } + }, + "create_issue": { + "defaultMax": 1, + "fields": { + "body": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + }, + "labels": { + "type": "array", + "itemType": "string", + "itemSanitize": true, + "itemMaxLength": 128 + }, + "parent": { + "issueOrPRNumber": true + }, + "repo": { + "type": "string", + "maxLength": 256 + }, + "temporary_id": { + "type": "string" + }, + "title": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 128 + } + } + }, + "missing_tool": { + "defaultMax": 20, + "fields": { + "alternatives": { + "type": "string", + "sanitize": true, + "maxLength": 512 + }, + "reason": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 256 + }, + "tool": { + "type": "string", + "sanitize": true, + "maxLength": 128 + } + } + }, + "noop": { + "defaultMax": 1, + "fields": { + "message": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + } + } + } + } + GH_AW_SAFE_OUTPUTS_VALIDATION_EOF + - name: Generate Safe Outputs MCP Server Config + id: safe-outputs-config + run: | + # Generate a secure random API key (360 bits of entropy, 40+ chars) + # Mask immediately to prevent timing vulnerabilities + API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "::add-mask::${API_KEY}" + + PORT=3001 + + # Set outputs for next steps + { + echo "safe_outputs_api_key=${API_KEY}" + echo "safe_outputs_port=${PORT}" + } >> "$GITHUB_OUTPUT" + + echo "Safe Outputs MCP server will run on port ${PORT}" + + - name: Start Safe Outputs MCP HTTP Server + id: safe-outputs-start + env: + DEBUG: '*' + GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-config.outputs.safe_outputs_port }} + GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-config.outputs.safe_outputs_api_key }} + GH_AW_SAFE_OUTPUTS_TOOLS_PATH: /opt/gh-aw/safeoutputs/tools.json + GH_AW_SAFE_OUTPUTS_CONFIG_PATH: /opt/gh-aw/safeoutputs/config.json + GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs + run: | + # Environment variables are set above to prevent template injection + export DEBUG + export GH_AW_SAFE_OUTPUTS_PORT + export GH_AW_SAFE_OUTPUTS_API_KEY + export GH_AW_SAFE_OUTPUTS_TOOLS_PATH + export GH_AW_SAFE_OUTPUTS_CONFIG_PATH + export GH_AW_MCP_LOG_DIR + + bash /opt/gh-aw/actions/start_safe_outputs_server.sh + + - name: Start MCP Gateway + id: start-mcp-gateway + env: + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-start.outputs.api_key }} + GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-start.outputs.port }} + GITHUB_MCP_LOCKDOWN: ${{ steps.determine-automatic-lockdown.outputs.lockdown == 'true' && '1' || '0' }} + GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + run: | + set -eo pipefail + mkdir -p /tmp/gh-aw/mcp-config + + # Export gateway environment variables for MCP config and gateway script + export MCP_GATEWAY_PORT="80" + export MCP_GATEWAY_DOMAIN="host.docker.internal" + MCP_GATEWAY_API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "::add-mask::${MCP_GATEWAY_API_KEY}" + export MCP_GATEWAY_API_KEY + export MCP_GATEWAY_PAYLOAD_DIR="/tmp/gh-aw/mcp-payloads" + mkdir -p "${MCP_GATEWAY_PAYLOAD_DIR}" + export DEBUG="*" + + export GH_AW_ENGINE="copilot" + export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_LOCKDOWN -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.1.4' + + mkdir -p /home/runner/.copilot + cat << GH_AW_MCP_CONFIG_EOF | bash /opt/gh-aw/actions/start_mcp_gateway.sh + { + "mcpServers": { + "github": { + "type": "stdio", + "container": "ghcr.io/github/github-mcp-server:v0.30.3", + "env": { + "GITHUB_LOCKDOWN_MODE": "$GITHUB_MCP_LOCKDOWN", + "GITHUB_PERSONAL_ACCESS_TOKEN": "\${GITHUB_MCP_SERVER_TOKEN}", + "GITHUB_READ_ONLY": "1", + "GITHUB_TOOLSETS": "context,repos,issues,pull_requests" + } + }, + "safeoutputs": { + "type": "http", + "url": "http://host.docker.internal:$GH_AW_SAFE_OUTPUTS_PORT", + "headers": { + "Authorization": "\${GH_AW_SAFE_OUTPUTS_API_KEY}" + } + } + }, + "gateway": { + "port": $MCP_GATEWAY_PORT, + "domain": "${MCP_GATEWAY_DOMAIN}", + "apiKey": "${MCP_GATEWAY_API_KEY}", + "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" + } + } + GH_AW_MCP_CONFIG_EOF + - name: Generate workflow overview + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { generateWorkflowOverview } = require('/opt/gh-aw/actions/generate_workflow_overview.cjs'); + await generateWorkflowOverview(core); + - name: Download prompt artifact + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6 + with: + name: prompt + path: /tmp/gh-aw/aw-prompts + - name: Clean git credentials + run: bash /opt/gh-aw/actions/clean_git_credentials.sh + - name: Execute GitHub Copilot CLI + id: agentic_execution + # Copilot CLI tool arguments (sorted): + timeout-minutes: 10 + run: | + set -o pipefail + sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --allow-domains api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.20.1 --skip-pull --enable-api-proxy \ + -- /bin/bash -c '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-all-tools --add-dir /tmp/gh-aw/cache-memory/ --allow-all-paths --share /tmp/gh-aw/sandbox/agent/logs/conversation.md --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"${GH_AW_MODEL_AGENT_COPILOT:+ --model "$GH_AW_MODEL_AGENT_COPILOT"}' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + env: + COPILOT_AGENT_RUNNER_TYPE: STANDALONE + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + GH_AW_MCP_CONFIG: /home/runner/.copilot/mcp-config.json + GH_AW_MODEL_AGENT_COPILOT: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || '' }} + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GITHUB_HEAD_REF: ${{ github.head_ref }} + GITHUB_REF_NAME: ${{ github.ref_name }} + GITHUB_STEP_SUMMARY: ${{ env.GITHUB_STEP_SUMMARY }} + GITHUB_WORKSPACE: ${{ github.workspace }} + XDG_CONFIG_HOME: /home/runner + - name: Configure Git credentials + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Copy Copilot session state files to logs + if: always() + continue-on-error: true + run: | + # Copy Copilot session state files to logs folder for artifact collection + # This ensures they are in /tmp/gh-aw/ where secret redaction can scan them + SESSION_STATE_DIR="$HOME/.copilot/session-state" + LOGS_DIR="/tmp/gh-aw/sandbox/agent/logs" + + if [ -d "$SESSION_STATE_DIR" ]; then + echo "Copying Copilot session state files from $SESSION_STATE_DIR to $LOGS_DIR" + mkdir -p "$LOGS_DIR" + cp -v "$SESSION_STATE_DIR"/*.jsonl "$LOGS_DIR/" 2>/dev/null || true + echo "Session state files copied successfully" + else + echo "No session-state directory found at $SESSION_STATE_DIR" + fi + - name: Stop MCP Gateway + if: always() + continue-on-error: true + env: + MCP_GATEWAY_PORT: ${{ steps.start-mcp-gateway.outputs.gateway-port }} + MCP_GATEWAY_API_KEY: ${{ steps.start-mcp-gateway.outputs.gateway-api-key }} + GATEWAY_PID: ${{ steps.start-mcp-gateway.outputs.gateway-pid }} + run: | + bash /opt/gh-aw/actions/stop_mcp_gateway.sh "$GATEWAY_PID" + - name: Redact secrets in logs + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/redact_secrets.cjs'); + await main(); + env: + GH_AW_SECRET_NAMES: 'COPILOT_GITHUB_TOKEN,GH_AW_GITHUB_MCP_SERVER_TOKEN,GH_AW_GITHUB_TOKEN,GITHUB_TOKEN' + SECRET_COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + SECRET_GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} + SECRET_GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }} + SECRET_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Upload Safe Outputs + if: always() + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: safe-output + path: ${{ env.GH_AW_SAFE_OUTPUTS }} + if-no-files-found: warn + - name: Ingest agent output + id: collect_output + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com" + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_API_URL: ${{ github.api_url }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/collect_ndjson_output.cjs'); + await main(); + - name: Upload sanitized agent output + if: always() && env.GH_AW_AGENT_OUTPUT + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: agent-output + path: ${{ env.GH_AW_AGENT_OUTPUT }} + if-no-files-found: warn + - name: Upload engine output files + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: agent_outputs + path: | + /tmp/gh-aw/sandbox/agent/logs/ + /tmp/gh-aw/redacted-urls.log + if-no-files-found: ignore + - name: Parse agent logs for step summary + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/ + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/parse_copilot_log.cjs'); + await main(); + - name: Parse MCP Gateway logs for step summary + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/parse_mcp_gateway_log.cjs'); + await main(); + - name: Print firewall logs + if: always() + continue-on-error: true + env: + AWF_LOGS_DIR: /tmp/gh-aw/sandbox/firewall/logs + run: | + # Fix permissions on firewall logs so they can be uploaded as artifacts + # AWF runs with sudo, creating files owned by root + sudo chmod -R a+r /tmp/gh-aw/sandbox/firewall/logs 2>/dev/null || true + # Only run awf logs summary if awf command exists (it may not be installed if workflow failed before install step) + if command -v awf &> /dev/null; then + awf logs summary | tee -a "$GITHUB_STEP_SUMMARY" + else + echo 'AWF binary not installed, skipping firewall log summary' + fi + - name: Upload cache-memory data as artifact + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + if: always() + with: + name: cache-memory + path: /tmp/gh-aw/cache-memory + - name: Upload agent artifacts + if: always() + continue-on-error: true + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: agent-artifacts + path: | + /tmp/gh-aw/aw-prompts/prompt.txt + /tmp/gh-aw/aw_info.json + /tmp/gh-aw/mcp-logs/ + /tmp/gh-aw/sandbox/firewall/logs/ + /tmp/gh-aw/agent-stdio.log + /tmp/gh-aw/agent/ + if-no-files-found: ignore + + conclusion: + needs: + - activation + - agent + - detection + - safe_outputs + - update_cache_memory + if: (always()) && (needs.agent.result != 'skipped') + runs-on: ubuntu-slim + permissions: + contents: read + discussions: write + issues: write + outputs: + noop_message: ${{ steps.noop.outputs.noop_message }} + tools_reported: ${{ steps.missing_tool.outputs.tools_reported }} + total_count: ${{ steps.missing_tool.outputs.total_count }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@5a79466d65414632d47c7869b27170ade5b9404e # v0.46.5 + with: + destination: /opt/gh-aw/actions + - name: Download agent output artifact + continue-on-error: true + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6 + with: + name: agent-output + path: /tmp/gh-aw/safeoutputs/ + - name: Setup agent output environment variable + run: | + mkdir -p /tmp/gh-aw/safeoutputs/ + find "/tmp/gh-aw/safeoutputs/" -type f -print + echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/safeoutputs/agent_output.json" >> "$GITHUB_ENV" + - name: Process No-Op Messages + id: noop + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_NOOP_MAX: 1 + GH_AW_WORKFLOW_NAME: "CI Failure Doctor" + GH_AW_WORKFLOW_SOURCE: "githubnext/agentics/workflows/ci-doctor.md@0aa94a6e40aeaf131118476bc6a07e55c4ceb147" + GH_AW_WORKFLOW_SOURCE_URL: "${{ github.server_url }}/githubnext/agentics/tree/0aa94a6e40aeaf131118476bc6a07e55c4ceb147/workflows/ci-doctor.md" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/noop.cjs'); + await main(); + - name: Record Missing Tool + id: missing_tool + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "CI Failure Doctor" + GH_AW_WORKFLOW_SOURCE: "githubnext/agentics/workflows/ci-doctor.md@0aa94a6e40aeaf131118476bc6a07e55c4ceb147" + GH_AW_WORKFLOW_SOURCE_URL: "${{ github.server_url }}/githubnext/agentics/tree/0aa94a6e40aeaf131118476bc6a07e55c4ceb147/workflows/ci-doctor.md" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/missing_tool.cjs'); + await main(); + - name: Handle Agent Failure + id: handle_agent_failure + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "CI Failure Doctor" + GH_AW_WORKFLOW_SOURCE: "githubnext/agentics/workflows/ci-doctor.md@0aa94a6e40aeaf131118476bc6a07e55c4ceb147" + GH_AW_WORKFLOW_SOURCE_URL: "${{ github.server_url }}/githubnext/agentics/tree/0aa94a6e40aeaf131118476bc6a07e55c4ceb147/workflows/ci-doctor.md" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_WORKFLOW_ID: "ci-doctor" + GH_AW_SECRET_VERIFICATION_RESULT: ${{ needs.agent.outputs.secret_verification_result }} + GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }} + GH_AW_GROUP_REPORTS: "false" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/handle_agent_failure.cjs'); + await main(); + - name: Handle No-Op Message + id: handle_noop_message + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "CI Failure Doctor" + GH_AW_WORKFLOW_SOURCE: "githubnext/agentics/workflows/ci-doctor.md@0aa94a6e40aeaf131118476bc6a07e55c4ceb147" + GH_AW_WORKFLOW_SOURCE_URL: "${{ github.server_url }}/githubnext/agentics/tree/0aa94a6e40aeaf131118476bc6a07e55c4ceb147/workflows/ci-doctor.md" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_NOOP_MESSAGE: ${{ steps.noop.outputs.noop_message }} + GH_AW_NOOP_REPORT_AS_ISSUE: "true" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/handle_noop_message.cjs'); + await main(); + + detection: + needs: agent + if: needs.agent.outputs.output_types != '' || needs.agent.outputs.has_patch == 'true' + runs-on: ubuntu-latest + permissions: {} + concurrency: + group: "gh-aw-copilot-${{ github.workflow }}" + timeout-minutes: 10 + outputs: + success: ${{ steps.parse_results.outputs.success }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@5a79466d65414632d47c7869b27170ade5b9404e # v0.46.5 + with: + destination: /opt/gh-aw/actions + - name: Download agent artifacts + continue-on-error: true + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6 + with: + name: agent-artifacts + path: /tmp/gh-aw/threat-detection/ + - name: Download agent output artifact + continue-on-error: true + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6 + with: + name: agent-output + path: /tmp/gh-aw/threat-detection/ + - name: Print agent output types + env: + AGENT_OUTPUT_TYPES: ${{ needs.agent.outputs.output_types }} + run: | + echo "Agent output-types: $AGENT_OUTPUT_TYPES" + - name: Setup threat detection + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + WORKFLOW_NAME: "CI Failure Doctor" + WORKFLOW_DESCRIPTION: "This workflow is an automated CI failure investigator that triggers when monitored workflows fail.\nPerforms deep analysis of GitHub Actions workflow failures to identify root causes,\npatterns, and provide actionable remediation steps. Analyzes logs, error messages,\nand workflow configuration to help diagnose and resolve CI issues efficiently." + HAS_PATCH: ${{ needs.agent.outputs.has_patch }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/setup_threat_detection.cjs'); + await main(); + - name: Ensure threat-detection directory and log + run: | + mkdir -p /tmp/gh-aw/threat-detection + touch /tmp/gh-aw/threat-detection/detection.log + - name: Validate COPILOT_GITHUB_TOKEN secret + id: validate-secret + run: /opt/gh-aw/actions/validate_multi_secret.sh COPILOT_GITHUB_TOKEN 'GitHub Copilot CLI' https://github.github.com/gh-aw/reference/engines/#github-copilot-default + env: + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + - name: Install GitHub Copilot CLI + run: /opt/gh-aw/actions/install_copilot_cli.sh 0.0.411 + - name: Execute GitHub Copilot CLI + id: agentic_execution + # Copilot CLI tool arguments (sorted): + # --allow-tool shell(cat) + # --allow-tool shell(grep) + # --allow-tool shell(head) + # --allow-tool shell(jq) + # --allow-tool shell(ls) + # --allow-tool shell(tail) + # --allow-tool shell(wc) + timeout-minutes: 20 + run: | + set -o pipefail + COPILOT_CLI_INSTRUCTION="$(cat /tmp/gh-aw/aw-prompts/prompt.txt)" + mkdir -p /tmp/ + mkdir -p /tmp/gh-aw/ + mkdir -p /tmp/gh-aw/agent/ + mkdir -p /tmp/gh-aw/sandbox/agent/logs/ + copilot --add-dir /tmp/ --add-dir /tmp/gh-aw/ --add-dir /tmp/gh-aw/agent/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --allow-tool 'shell(cat)' --allow-tool 'shell(grep)' --allow-tool 'shell(head)' --allow-tool 'shell(jq)' --allow-tool 'shell(ls)' --allow-tool 'shell(tail)' --allow-tool 'shell(wc)' --share /tmp/gh-aw/sandbox/agent/logs/conversation.md --prompt "$COPILOT_CLI_INSTRUCTION"${GH_AW_MODEL_DETECTION_COPILOT:+ --model "$GH_AW_MODEL_DETECTION_COPILOT"} 2>&1 | tee /tmp/gh-aw/threat-detection/detection.log + env: + COPILOT_AGENT_RUNNER_TYPE: STANDALONE + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + GH_AW_MODEL_DETECTION_COPILOT: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }} + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GITHUB_HEAD_REF: ${{ github.head_ref }} + GITHUB_REF_NAME: ${{ github.ref_name }} + GITHUB_STEP_SUMMARY: ${{ env.GITHUB_STEP_SUMMARY }} + GITHUB_WORKSPACE: ${{ github.workspace }} + XDG_CONFIG_HOME: /home/runner + - name: Parse threat detection results + id: parse_results + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/parse_threat_detection_results.cjs'); + await main(); + - name: Upload threat detection log + if: always() + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: threat-detection.log + path: /tmp/gh-aw/threat-detection/detection.log + if-no-files-found: ignore + + pre_activation: + if: > + ${{ github.event_name == 'workflow_dispatch' || (github.event.workflow_run.conclusion == 'failure' && + (github.event.workflow_run.head_branch == 'master' || github.event.workflow_run.event == 'pull_request')) }} + runs-on: ubuntu-slim + permissions: + actions: read + outputs: + activated: ${{ steps.check_rate_limit.outputs.rate_limit_ok == 'true' }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@5a79466d65414632d47c7869b27170ade5b9404e # v0.46.5 + with: + destination: /opt/gh-aw/actions + - name: Check user rate limit + id: check_rate_limit + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_RATE_LIMIT_MAX: "5" + GH_AW_RATE_LIMIT_WINDOW: "60" + GH_AW_RATE_LIMIT_EVENTS: "workflow_dispatch" + GH_AW_RATE_LIMIT_IGNORED_ROLES: "admin,maintain,write" + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/check_rate_limit.cjs'); + await main(); + + safe_outputs: + needs: + - agent + - detection + if: ((!cancelled()) && (needs.agent.result != 'skipped')) && (needs.detection.outputs.success == 'true') + runs-on: ubuntu-slim + permissions: + contents: read + discussions: write + issues: write + timeout-minutes: 15 + env: + GH_AW_ENGINE_ID: "copilot" + GH_AW_WORKFLOW_ID: "ci-doctor" + GH_AW_WORKFLOW_NAME: "CI Failure Doctor" + GH_AW_WORKFLOW_SOURCE: "githubnext/agentics/workflows/ci-doctor.md@0aa94a6e40aeaf131118476bc6a07e55c4ceb147" + GH_AW_WORKFLOW_SOURCE_URL: "${{ github.server_url }}/githubnext/agentics/tree/0aa94a6e40aeaf131118476bc6a07e55c4ceb147/workflows/ci-doctor.md" + outputs: + create_discussion_error_count: ${{ steps.process_safe_outputs.outputs.create_discussion_error_count }} + create_discussion_errors: ${{ steps.process_safe_outputs.outputs.create_discussion_errors }} + process_safe_outputs_processed_count: ${{ steps.process_safe_outputs.outputs.processed_count }} + process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@5a79466d65414632d47c7869b27170ade5b9404e # v0.46.5 + with: + destination: /opt/gh-aw/actions + - name: Download agent output artifact + continue-on-error: true + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6 + with: + name: agent-output + path: /tmp/gh-aw/safeoutputs/ + - name: Setup agent output environment variable + run: | + mkdir -p /tmp/gh-aw/safeoutputs/ + find "/tmp/gh-aw/safeoutputs/" -type f -print + echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/safeoutputs/agent_output.json" >> "$GITHUB_ENV" + - name: Process Safe Outputs + id: process_safe_outputs + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"add_comment\":{\"max\":1},\"create_issue\":{\"labels\":[\"automation\",\"ci\"],\"max\":1,\"title_prefix\":\"${{ github.workflow }}\"},\"missing_data\":{},\"missing_tool\":{}}" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/safe_output_handler_manager.cjs'); + await main(); + - name: Upload safe output items manifest + if: always() + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: safe-output-items + path: /tmp/safe-output-items.jsonl + if-no-files-found: warn + + update_cache_memory: + needs: + - agent + - detection + if: always() && needs.detection.outputs.success == 'true' + runs-on: ubuntu-latest + permissions: {} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@5a79466d65414632d47c7869b27170ade5b9404e # v0.46.5 + with: + destination: /opt/gh-aw/actions + - name: Download cache-memory artifact (default) + id: download_cache_default + uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6 + continue-on-error: true + with: + name: cache-memory + path: /tmp/gh-aw/cache-memory + - name: Check if cache-memory folder has content (default) + id: check_cache_default + shell: bash + run: | + if [ -d "/tmp/gh-aw/cache-memory" ] && [ "$(ls -A /tmp/gh-aw/cache-memory 2>/dev/null)" ]; then + echo "has_content=true" >> $GITHUB_OUTPUT + else + echo "has_content=false" >> $GITHUB_OUTPUT + fi + - name: Save cache-memory to cache (default) + if: steps.check_cache_default.outputs.has_content == 'true' + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: memory-${{ env.GH_AW_WORKFLOW_ID_SANITIZED }}-${{ github.run_id }} + path: /tmp/gh-aw/cache-memory diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/ci-doctor.md b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/ci-doctor.md new file mode 100644 index 0000000..52085ba --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/ci-doctor.md @@ -0,0 +1,218 @@ +--- +description: | + This workflow is an automated CI failure investigator that triggers when monitored workflows fail. + Performs deep analysis of GitHub Actions workflow failures to identify root causes, + patterns, and provide actionable remediation steps. Analyzes logs, error messages, + and workflow configuration to help diagnose and resolve CI issues efficiently. + +on: + workflow_dispatch: + inputs: + run_id: + description: "Workflow run ID to investigate (for manual testing)" + required: false +# Disable automatic triggering on workflow_run events during manual testing. +# workflow_run: +# workflows: +# - "Linux (Ubuntu 22.04, Python 3.11)" +# types: +# - completed + +rate-limit: + max: 5 # Maximum runs per window + window: 60 # Time window in minutes + +# Only trigger for failures on master or PRs targeting master +# Allow workflow_dispatch for manual testing +if: ${{ github.event_name == 'workflow_dispatch' || (github.event.workflow_run.conclusion == 'failure' && (github.event.workflow_run.head_branch == 'master' || github.event.workflow_run.event == 'pull_request')) }} + +permissions: read-all + +network: defaults + +safe-outputs: + create-issue: + title-prefix: "${{ github.workflow }}" + labels: [automation, ci] + add-comment: + +tools: + cache-memory: true + web-fetch: + +timeout-minutes: 10 + +source: githubnext/agentics/workflows/ci-doctor.md@0aa94a6e40aeaf131118476bc6a07e55c4ceb147 +--- + +# CI Failure Doctor + +You are the CI Failure Doctor, an expert investigative agent that analyzes failed GitHub Actions workflows to identify root causes and patterns. Your goal is to conduct a deep investigation when the CI workflow fails. + +## Current Context + +- **Repository**: ${{ github.repository }} +- **Workflow Run**: ${{ github.event.workflow_run.id }} +- **Conclusion**: ${{ github.event.workflow_run.conclusion }} +- **Run URL**: ${{ github.event.workflow_run.html_url }} +- **Head SHA**: ${{ github.event.workflow_run.head_sha }} + +## Investigation Protocol + +**Trigger detection:** + +- If triggered by `workflow_run` event: ONLY proceed if `${{ github.event.workflow_run.conclusion }}` is `failure` or `cancelled`. Exit immediately if successful. +- If triggered by `workflow_run` event and the run was on a **pull request**: verify `github.event.workflow_run.pull_requests[0].base.ref` is `master`. Exit immediately if the PR targets a different base branch. +- If triggered by `workflow_dispatch` event: check if `${{ github.event.inputs.run_id }}` is provided, use that run ID to fetch the workflow run details. If no `run_id` is provided, exit immediately. + +### Phase 1: Initial Triage + +1. **Verify Failure**: Check that `${{ github.event.workflow_run.conclusion }}` is `failure` or `cancelled` +2. **Get Workflow Details**: Use `get_workflow_run` to get full details of the failed run. **Do NOT use curl or shell commands to fetch workflow data — always use the dedicated tools.** +3. **List Jobs**: Use `list_workflow_jobs` to identify which specific jobs failed +4. **Quick Assessment**: Determine if this is a new type of failure or a recurring pattern + +### Phase 2: Deep Log Analysis + +1. **Retrieve Logs**: Use `get_job_logs` with `failed_only=true` to get logs from all failed jobs. **This step is mandatory — do not skip it or substitute with source code analysis.** +2. **Pattern Recognition**: Analyze logs for: + - Error messages and stack traces + - Dependency installation failures + - Test failures with specific patterns + - Infrastructure or runner issues + - Timeout patterns + - Memory or resource constraints +3. **Extract Key Information**: + - Primary error messages + - File paths and line numbers where failures occurred + - Test names that failed + - Dependency versions involved + - Timing patterns + +### Phase 3: Historical Context Analysis + +1. **Search Investigation History**: Use file-based storage to search for similar failures: + - Read from cached investigation files in `/tmp/memory/investigations/` + - Parse previous failure patterns and solutions + - Look for recurring error signatures +2. **Issue History**: Search existing issues for related problems +3. **Commit Analysis**: Examine the commit that triggered the failure +4. **PR Context**: If triggered by a PR, analyze the changed files + +### Phase 4: Root Cause Investigation + +1. **Categorize Failure Type**: + - **Code Issues**: Syntax errors, logic bugs, test failures + - **Infrastructure**: Runner issues, network problems, resource constraints + - **Dependencies**: Version conflicts, missing packages, outdated libraries + - **Configuration**: Workflow configuration, environment variables + - **Flaky Tests**: Intermittent failures, timing issues + - **External Services**: Third-party API failures, downstream dependencies + +2. **Deep Dive Analysis**: + - For test failures: Identify specific test methods and assertions + - For build failures: Analyze compilation errors and missing dependencies + - For infrastructure issues: Check runner logs and resource usage + - For timeout issues: Identify slow operations and bottlenecks + +### Phase 5: Pattern Storage and Knowledge Building + +1. **Store Investigation**: Save structured investigation data to files: + - Write investigation report to `/tmp/memory/investigations/-.json` + - Store error patterns in `/tmp/memory/patterns/` + - Maintain an index file of all investigations for fast searching +2. **Update Pattern Database**: Enhance knowledge with new findings by updating pattern files +3. **Save Artifacts**: Store detailed logs and analysis in the cached directories + +### Phase 6: Looking for existing issues + +1. **Convert the report to a search query** + - Use any advanced search features in GitHub Issues to find related issues + - Look for keywords, error messages, and patterns in existing issues +2. **Judge each match issues for relevance** + - Analyze the content of the issues found by the search and judge if they are similar to this issue. +3. **Add issue comment to duplicate issue and finish** + - If you find a duplicate issue, add a comment with your findings and close the investigation. + - Do NOT open a new issue since you found a duplicate already (skip next phases). + +### Phase 7: Reporting and Recommendations + +1. **Create Investigation Report**: Generate a comprehensive analysis including: + - **Executive Summary**: Quick overview of the failure + - **Root Cause**: Detailed explanation of what went wrong + - **Reproduction Steps**: How to reproduce the issue locally + - **Recommended Actions**: Specific steps to fix the issue + - **Prevention Strategies**: How to avoid similar failures + - **AI Team Self-Improvement**: Give a short set of additional prompting instructions to copy-and-paste into instructions.md for AI coding agents to help prevent this type of failure in future + - **Historical Context**: Similar past failures and their resolutions +2. **Actionable Deliverables**: + - Create an issue with investigation results (if warranted) + - Comment on related PR with analysis (if PR-triggered) + - Provide specific file locations and line numbers for fixes + - Suggest code changes or configuration updates + +## Output Requirements + +### Investigation Issue Template + +When creating an investigation issue, use this structure: + +```markdown +# 🏥 CI Failure Investigation - Run #${{ github.event.workflow_run.run_number }} + +## Summary + +[Brief description of the failure] + +## Failure Details + +- **Run**: [${{ github.event.workflow_run.id }}](${{ github.event.workflow_run.html_url }}) +- **Commit**: ${{ github.event.workflow_run.head_sha }} +- **Trigger**: ${{ github.event.workflow_run.event }} + +## Root Cause Analysis + +[Detailed analysis of what went wrong] + +## Failed Jobs and Errors + +[List of failed jobs with key error messages] + +## Investigation Findings + +[Deep analysis results] + +## Recommended Actions + +- [ ] [Specific actionable steps] + +## Prevention Strategies + +[How to prevent similar failures] + +## AI Team Self-Improvement + +[Short set of additional prompting instructions to copy-and-paste into instructions.md for a AI coding agents to help prevent this type of failure in future] + +## Historical Context + +[Similar past failures and patterns] +``` + +## Important Guidelines + +- **Be Thorough**: Don't just report the error - investigate the underlying cause +- **Use Memory**: Always check for similar past failures and learn from them +- **Be Specific**: Provide exact file paths, line numbers, and error messages +- **Action-Oriented**: Focus on actionable recommendations, not just analysis +- **Pattern Building**: Contribute to the knowledge base for future investigations +- **Resource Efficient**: Use caching to avoid re-downloading large logs +- **Security Conscious**: Never execute untrusted code from logs or external sources + +## Cache Usage Strategy + +- Store investigation database and knowledge patterns in `/tmp/memory/investigations/` and `/tmp/memory/patterns/` +- Cache detailed log analysis and artifacts in `/tmp/investigation/logs/` and `/tmp/investigation/reports/` +- Persist findings across workflow runs using GitHub Actions cache +- Build cumulative knowledge about failure patterns and solutions using structured JSON files +- Use file-based indexing for fast pattern matching and similarity detection diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/cleanup_caches.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/cleanup_caches.yml new file mode 100644 index 0000000..f4744c6 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/cleanup_caches.yml @@ -0,0 +1,35 @@ +name: Cleanup caches +on: + workflow_dispatch: + schedule: + # at 00:00 on workdays + - cron: '0 0 * * 1,2,3,4,5' + +permissions: read-all + +jobs: + Cleanup_OV_CACHE: + name: Cleanup OV_CACHE + runs-on: aks-linux-4-cores-16gb + if: ${{ github.repository_owner == 'openvinotoolkit' }} + container: + image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 + volumes: + - /mount:/mount + env: + OV_CACHE: /mount/caches/huggingface/.ov_cache + + steps: + - name: Pre-Collecting Cache Info + run: | + echo "Cache info: " + du -h -d2 ${{ env.OV_CACHE }} + - name: Cleanup cache + run: | + echo "Delete cache files if they have not been used in over 3 days" + [ ! -z "${{ env.OV_CACHE }}" ] && find ${{ env.OV_CACHE }} ! -type d -atime +3 -delete + + - name: Post-Collecting Cache Info + run: | + echo "Cache info: " + du -h -d2 ${{ env.OV_CACHE }} diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/copilot-setup-steps.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/copilot-setup-steps.yml new file mode 100644 index 0000000..8735660 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/copilot-setup-steps.yml @@ -0,0 +1,26 @@ +name: "Copilot Setup Steps" + +# This workflow configures the environment for GitHub Copilot Agent with gh-aw MCP server +on: + workflow_dispatch: + push: + paths: + - .github/workflows/copilot-setup-steps.yml + +jobs: + # The job MUST be called 'copilot-setup-steps' to be recognized by GitHub Copilot Agent + copilot-setup-steps: + runs-on: ubuntu-latest + + # Set minimal permissions for setup steps + # Copilot Agent receives its own token with appropriate permissions + permissions: + contents: read + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Install gh-aw extension + uses: github/gh-aw/actions/setup-cli@v0.46.5 + with: + version: v0.46.5 diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/coverity.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/coverity.yml new file mode 100644 index 0000000..2fa1790 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/coverity.yml @@ -0,0 +1,187 @@ +name: Coverity (Ubuntu 22.04, Python 3.11) +on: + workflow_dispatch: + schedule: + # run daily at 00:00 + - cron: '0 0 * * *' + pull_request: + paths: + - '.github/workflows/coverity.yml' + +permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions + +concurrency: + group: ${{ github.ref }}-genai-cov-linux + cancel-in-progress: true + +env: + PYTHON_VERSION: '3.11' + OV_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }} + +jobs: + openvino_download: + name: Download OpenVINO + outputs: + status: ${{ steps.openvino_download.outcome }} + ov_artifact_name: ${{ steps.openvino_download.outputs.ov_artifact_name }} + ov_wheel_source: ${{ steps.openvino_download.outputs.ov_wheel_source }} + docker_tag: ${{ steps.get_docker_tag.outputs.docker_tag }} + timeout-minutes: 10 + defaults: + run: + shell: bash + runs-on: aks-linux-medium + container: + image: 'openvinogithubactions.azurecr.io/openvino_provider:0.1.0' + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + + steps: + - uses: openvinotoolkit/openvino/.github/actions/openvino_provider@master + id: openvino_download + with: + platform: ubuntu22 + commit_packages_to_provide: wheels + revision: latest_available_commit + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + # event_name: pull_request + + - name: Clone docker tag from OpenVINO repo + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + repository: 'openvinotoolkit/openvino' + path: 'openvino' + ref: ${{ env.OV_BRANCH }} + sparse-checkout: | + .github/dockerfiles/docker_tag + + - name: Save docker tag to output + id: get_docker_tag + run: | + docker_tag=$(cat openvino/.github/dockerfiles/docker_tag) + echo "docker_tag=$docker_tag" >> $GITHUB_OUTPUT + + coverity_build: + name: Build for coverity + needs: [ openvino_download ] + timeout-minutes: 20 + defaults: + run: + shell: bash + runs-on: aks-linux-16-cores-64gb + container: + image: openvinogithubactions.azurecr.io/ov_build/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + options: -v ${{ github.workspace }}:${{ github.workspace }} + env: + CMAKE_GENERATOR: Unix Makefiles + OV_INSTALL_DIR: ${{ github.workspace }}/ov + INSTALL_DIR: ${{ github.workspace }}/install + BUILD_DIR: ${{ github.workspace }}/build + BUILD_TYPE: Release + COV_TOOL_DIR: ${{ github.workspace }}/coverity_tool + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: openvino.genai + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Restore Coverity Tool + if: github.event_name == 'pull_request' + id: cache-coverity + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + path: ${{ env.COV_TOOL_DIR }} + key: coverity-${{ runner.os }}-${{ github.sha }} + restore-keys: coverity-${{ runner.os }} + + - name: Download coverity tool + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + run: | + wget -q https://scan.coverity.com/download/linux64 --post-data "token=${{ secrets.COVERITY_SECRET_TOKEN }}&project=openvino.genai" -O coverity_tool.tgz + mkdir -p ${{ env.COV_TOOL_DIR }} + pigz -dc coverity_tool.tgz | tar --strip-components=1 -xf - -C ${{ env.COV_TOOL_DIR }} + + - name: Create config file for coverity build + run: | + ${{ env.COV_TOOL_DIR }}/bin/cov-configure --delete-compiler-config template-python-config-0 + ${{ env.COV_TOOL_DIR }}/bin/cov-configure --python --no-capture-config-files --version 3 + + - name: Create build.sh + run: | + echo """ + mkdir -p ${{ github.workspace }}/build + cmake -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DBUILD_TOKENIZERS=NO -DOpenVINO_DIR=${OV_INSTALL_DIR}/runtime/cmake/ -DCMAKE_C_COMPILER_LAUNCHER= -DCMAKE_CXX_COMPILER_LAUNCHER= -B${BUILD_DIR} ${{ github.workspace }}/openvino.genai + cmake --build ${BUILD_DIR} --config ${BUILD_TYPE} --parallel $(nproc) + """ > build.sh + + - name: Build for coverity + run: | + ${{ env.COV_TOOL_DIR }}/bin/cov-build --config ${{ env.COV_TOOL_DIR }}/config/coverity_config.xml --tmpdir cov_temp --dir ${BUILD_DIR}/cov-int sh build.sh + + - name: Pack for analysis submission + run: tar -cvf - cov-int | pigz > openvino-genai.tgz + working-directory: ${{ env.BUILD_DIR }} + + - name: Submit to coverity + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + run: | + apt-get update && apt-get install -y curl jq + pushd ${BUILD_DIR} + curl -X POST -d token=${{ secrets.COVERITY_SECRET_TOKEN }} \ + -d email=${{ secrets.COVERITY_USER }} \ + -d file_name="openvino-genai.tgz" \ + -d version="${{ github.sha }}" \ + -d description="https://github.com/openvinotoolkit/openvino.genai/actions/runs/${{ github.run_id }}" \ + https://scan.coverity.com/projects/30357/builds/init | tee response + + upload_url=$(jq -r '.url' response) + build_id=$(jq -r '.build_id' response) + + curl -X PUT \ + --header 'Content-Type: application/json' \ + --upload-file openvino-genai.tgz \ + $upload_url + + curl -X PUT \ + -d token=${{ secrets.COVERITY_SECRET_TOKEN }} \ + https://scan.coverity.com/projects/30357/builds/$build_id/enqueue + popd + + - name: Show Coverity configure logs + continue-on-error: true + run: ${{ env.COV_TOOL_DIR }}/bin/cov-configure -c ${{ env.COV_TOOL_DIR }}/config/coverity_config.xml -lscc text + + - name: Save Coverity Tool + if: always() && github.event_name == 'schedule' + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: coverity-${{ runner.os }}-${{ github.sha }} + path: ${{ env.COV_TOOL_DIR }} + + - name: Upload Coverity build log + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + if: always() + with: + name: coverity_logs + path: ${{ env.BUILD_DIR }}/cov-int/build-log.txt + if-no-files-found: 'error' + + - name: Upload Coverity build archive + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + if: always() + with: + name: coverity_archive + path: ${{ env.BUILD_DIR }}/openvino-genai.tgz + if-no-files-found: 'error' diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/deploy_gh_pages.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/deploy_gh_pages.yml new file mode 100644 index 0000000..d5f4ad3 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/deploy_gh_pages.yml @@ -0,0 +1,61 @@ +name: Deploy Docs to GitHub Pages + +on: + workflow_dispatch: + push: + branches: + - master + paths: + - 'site/**' + +concurrency: + group: 'pages' + cancel-in-progress: true + +permissions: + contents: read + +jobs: + build_assets: + runs-on: ubuntu-22.04 + permissions: + contents: write + steps: + - name: Checkout code + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + lfs: true + + - name: Setup Node.js + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 + with: + node-version: 20 + + - name: Install Node.js dependencies + working-directory: ./site + shell: bash + run: npm ci + + - name: Build static for GitHub Pages + working-directory: ./site + shell: bash + run: npm run build + + - name: Upload pages artifact + uses: actions/upload-pages-artifact@7b1f4a764d45c48632c6b24a0339c27f5614fb0b # v4.0.0 + with: + path: ./site/build + + deploy_github_pages: + runs-on: ubuntu-22.04 + needs: build_assets + permissions: + pages: write + id-token: write + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4.0.5 diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/labeler.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/labeler.yml new file mode 100644 index 0000000..2f25c4e --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/labeler.yml @@ -0,0 +1,21 @@ +name: "Pull Request Labeler" +on: +- pull_request_target + +permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions + +jobs: + triage: + permissions: + contents: read + pull-requests: write + issues: write + runs-on: ubuntu-latest + steps: + - uses: akladiev/labeler@eeac5941e7fb6f980d47e038ac0665168851c874 # v4.3.1 + with: + repo-token: "${{ secrets.GITHUB_TOKEN }}" + configuration-path: '.github/labeler.yml' + sync-labels: 'true' + dot: 'true' + non-matching-label: 'no-match-files' diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/lint.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/lint.yml new file mode 100644 index 0000000..b63fa09 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/lint.yml @@ -0,0 +1,40 @@ +name: Lint + +on: + pull_request: + branches: + - master + push: + branches: + - master + +permissions: read-all + +env: + OV_BRANCH: master + +jobs: + lint: + name: Lint Changed Files + runs-on: ubuntu-latest + timeout-minutes: 30 + defaults: + run: + shell: bash + + steps: + - name: Checkout code + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 0 + submodules: recursive + + - name: Set up Python + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: "3.10" + + - name: Run pre-commit on changed files + uses: pre-commit/action@v3.0.1 + with: + extra_args: --from-ref origin/${{ github.base_ref || 'master' }} --to-ref HEAD diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/linux.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/linux.yml new file mode 100644 index 0000000..87e7fe2 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/linux.yml @@ -0,0 +1,1056 @@ +name: Linux (Ubuntu 22.04, Python 3.11) +on: + workflow_dispatch: + pull_request: + merge_group: + push: + branches: + - master + - 'releases/**' + +permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions + +concurrency: + # github.ref is not unique in post-commit + group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-linux + cancel-in-progress: true + +env: + PYTHON_VERSION: '3.11' + OV_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }} + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache + SCCACHE_IGNORE_SERVER_IO_ERROR: 1 + SCCACHE_SERVER_PORT: 35555 + SCCACHE_CACHE_SIZE: 30G + SCCACHE_AZURE_KEY_PREFIX: genai/ubuntu/22_04/x64 + HF_HOME: /mount/caches/huggingface/lin + # mount directory leads to issue with load_dataset lock file. It's either cannot be released or there is a deadlock. Ticket: 181288 + HF_DATASETS_CACHE: /tmp/.hf_cache/datasets/ + OV_CACHE: /mount/caches/huggingface/.ov_cache/lin/ + OPENVINO_LOG_LEVEL: 4 + GENAI_ARCHIVE_NAME: genai.tar.gz + GENAI_SAMPLES_NAME: genai_samples.tar.gz + ARTIFACTS_SHARE: '/mount/build-artifacts' + BASE_PRODUCT_TYPE: public_linux_ubuntu_22_04_x86_64 + GENAI_WHEELS_ARTIFACT_NAME: 'genai_wheels' + GENAI_ARCHIVE_ARTIFACT_BASE_NAME: 'genai_archive' + +jobs: + smart_ci: + name: Smart CI + runs-on: ubuntu-latest + outputs: + affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" + skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" + steps: + - name: checkout action + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + timeout-minutes: 15 + with: + sparse-checkout: .github + + - name: Get affected components + id: smart_ci + uses: openvinotoolkit/openvino/.github/actions/smart-ci@36a8f092d3250e7a2a365f0445e61297d91c358e + with: + repository: ${{ github.repository }} + pr: ${{ github.event.number }} + commit_sha: ${{ github.sha }} + ref_name: ${{ github.ref_name }} + component_pattern: "category: ((?!Python API|CPP API).*)|Structured Output" + repo_token: ${{ secrets.GITHUB_TOKEN }} + skip_when_only_listed_labels_set: 'GH Pages Docs' + skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg,*.gif' + + - name: Show affected components + run: echo "${{ toJSON(steps.smart_ci.outputs.affected_components) }}" + shell: bash + + openvino_download: + needs: smart_ci + if: ${{ github.event_name != 'merge_group' && needs.smart_ci.outputs.skip_workflow != 'True' }} + name: Download OpenVINO + outputs: + status: ${{ steps.openvino_download.outcome }} + ov_artifact_name: ${{ steps.openvino_download.outputs.ov_artifact_name }} + ov_wheel_source: ${{ steps.openvino_download.outputs.ov_wheel_source }} + ov_version: ${{ steps.openvino_download.outputs.ov_version }} + docker_tag: ${{ steps.get_docker_tag.outputs.docker_tag }} + timeout-minutes: 10 + defaults: + run: + shell: bash + runs-on: aks-linux-medium + container: + image: 'openvinogithubactions.azurecr.io/openvino_provider:0.1.0' + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + + steps: + - uses: openvinotoolkit/openvino/.github/actions/openvino_provider@master + id: openvino_download + with: + platform: ubuntu22 + commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz + revision: latest_available_commit + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + # event_name: pull_request + + - name: Clone docker tag from OpenVINO repo + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + repository: 'openvinotoolkit/openvino' + path: 'openvino' + ref: ${{ env.OV_BRANCH }} + sparse-checkout: | + .github/dockerfiles/docker_tag + + - name: Save docker tag to output + id: get_docker_tag + run: | + docker_tag=$(cat openvino/.github/dockerfiles/docker_tag) + echo "docker_tag=$docker_tag" >> $GITHUB_OUTPUT + + genai_build_cmake: + name: Build Archive - ${{ matrix.build-type }} + strategy: + fail-fast: false + matrix: + build-type: [Release] + needs: [ openvino_download ] + timeout-minutes: 45 + defaults: + run: + shell: bash + runs-on: aks-linux-4-cores-16gb + container: + image: openvinogithubactions.azurecr.io/ov_build/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING + env: + CMAKE_GENERATOR: Unix Makefiles + OV_INSTALL_DIR: ${{ github.workspace }}/ov + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TOOLS_DIR: ${{ github.workspace }}/tools + INSTALL_TESTS_DIR: ${{ github.workspace }}/tests + BUILD_DIR: ${{ github.workspace }}/build + SRC_DIR: ${{ github.workspace }}/src + MANIFEST_PATH: ${{ github.workspace }}/manifest.yml + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Generate product manifest + id: create_manifest + uses: openvinotoolkit/openvino/.github/actions/create_manifest@master + with: + repos: ${{ env.SRC_DIR }} + product_type: ${{ env.BASE_PRODUCT_TYPE }}_${{ matrix.build-type }} + target_arch: 'x86_64' + build_type: ${{ matrix.build-type }} + save_to: ${{ env.MANIFEST_PATH }} + + - name: CMake Build + run: | + apt update + apt install -y libgtk2.0-dev pkg-config ffmpeg libavformat-dev libavcodec-dev libswscale-dev libavutil-dev + source ${{ env.OV_INSTALL_DIR }}/setupvars.sh + + cmake -DOpenVINODeveloperPackage_DIR=${{ env.OV_INSTALL_DIR }}/developer_package/cmake \ + -DENABLE_PYTHON=OFF \ + -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ + -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ + -DCMAKE_C_COMPILER_LAUNCHER=sccache \ + -S ${{ env.SRC_DIR }} \ + -B ${{ env.BUILD_DIR }} + cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --parallel $(nproc) --verbose + + # Need to remove the default Python 3.11 in /venv from the Path so that CMake uses the correct one from newly created venv$py_version + export PATH=${PATH/:\/venv\/bin/} + + for py_version in "3.10" "3.11" "3.12" "3.13" + do + rm -rf ${{ env.BUILD_DIR }}/CMakeCache.txt + + echo "Creating venv for python$py_version" + python$py_version -m venv venv$py_version + source venv$py_version/bin/activate + echo "PATH: $PATH" + + echo "Configuring cmake for python$py_version" + cmake -DOpenVINODeveloperPackage_DIR=${{ env.OV_INSTALL_DIR }}/developer_package/cmake \ + -DENABLE_PYTHON=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ + -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ + -DCMAKE_C_COMPILER_LAUNCHER=sccache \ + -S ${{ env.SRC_DIR }} \ + -B ${{ env.BUILD_DIR }} + echo "Configuring cmake done for python$py_version" + + cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --parallel $(nproc) --verbose + cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_DIR }} + deactivate + done + + cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_TOOLS_DIR }} --component tools_bin + cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_TESTS_DIR }} --component tests + + - name: Pack Artifacts + run: tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/${{ env.GENAI_ARCHIVE_NAME }} + working-directory: ${{ env.INSTALL_DIR }} + + - name: Upload Archive Distribution Package + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_archive_${{ matrix.build-type }} + path: ${{ env.BUILD_DIR }}/${{ env.GENAI_ARCHIVE_NAME }} + if-no-files-found: 'error' + + - name: Upload Tools + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_tools_${{ matrix.build-type }} + path: ${{ env.INSTALL_TOOLS_DIR }} + if-no-files-found: 'error' + + - name: Upload Tests + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_tests_${{ matrix.build-type }} + path: ${{ env.INSTALL_TESTS_DIR }} + if-no-files-found: 'error' + + - name: Upload manifest + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: manifest_${{ matrix.build-type }} + path: ${{ env.MANIFEST_PATH }} + if-no-files-found: 'error' + + genai_build_wheels: + name: Build Tokenizers & WWB Wheels + needs: [ openvino_download ] + timeout-minutes: 30 + defaults: + run: + shell: bash + runs-on: aks-linux-4-cores-16gb + container: + image: openvinogithubactions.azurecr.io/ov_build/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING + env: + CMAKE_GENERATOR: Ninja + OV_INSTALL_DIR: ${{ github.workspace }}/ov + INSTALL_DIR: ${{ github.workspace }}/install + WHEELS_DIR: ${{ github.workspace }}/install/wheels + SRC_DIR: ${{ github.workspace }}/src + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Build Tokenizers Wheel + run: | + python -m pip wheel -v --no-deps --wheel-dir ${{ env.WHEELS_DIR }} \ + --config-settings='override=wheel.build_tag="${{ github.run_number }}"' \ + --config-settings=override=cross.arch="manylinux_2_31_x86_64" \ + --config-settings=cmake.args="-DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache" \ + ${{ needs.openvino_download.outputs.ov_wheel_source }} \ + ${{ env.SRC_DIR }}/thirdparty/openvino_tokenizers + working-directory: ${{ env.OV_INSTALL_DIR }} + + - name: Build WWB Wheel + run: python -m pip wheel -v --no-deps --wheel-dir ${{ env.WHEELS_DIR }} ${{ env.SRC_DIR }}/tools/who_what_benchmark + working-directory: ${{ env.OV_INSTALL_DIR }} + + - name: Upload Wheels + if: ${{ always() }} + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_wheels + path: ${{ env.INSTALL_DIR }} + if-no-files-found: 'error' + + genai_build_genai_wheel: + name: Build GenAI Wheel - Python ${{ matrix.python-version }} + needs: [ openvino_download ] + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + python-version: ['3.10', '3.11', '3.12', '3.13'] + defaults: + run: + shell: bash + runs-on: aks-linux-8-cores-16gb + container: + image: openvinogithubactions.azurecr.io/ov_build/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING + env: + CMAKE_GENERATOR: Ninja + OV_INSTALL_DIR: ${{ github.workspace }}/ov + INSTALL_DIR: ${{ github.workspace }}/install + WHEELS_DIR: ${{ github.workspace }}/install/wheels + SRC_DIR: ${{ github.workspace }}/src + OpenVINODeveloperPackage_DIR: ${{ github.workspace }}/ov/developer_package/cmake + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Set CI environment + id: create_manifest + uses: openvinotoolkit/openvino/.github/actions/create_manifest@master + with: + repos: ${{ env.SRC_DIR }} + product_type: ${{ env.BASE_PRODUCT_TYPE }}_Release + target_arch: 'x86_64' + build_type: Release + save_to: ${{ github.workspace }} + + - name: Clean sccache stats + run: ${SCCACHE_PATH} --zero-stats + + - name: Build GenAI Wheel + run: | + python_exec_path=$(python${{ matrix.python-version }} -c "import sys; print(sys.executable)") + $python_exec_path -m pip wheel -v --no-deps --wheel-dir ${{ env.WHEELS_DIR }} \ + --config-settings=override=cross.arch="manylinux_2_31_x86_64" \ + --config-settings=cmake.args="-DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache" \ + ${{ needs.openvino_download.outputs.ov_wheel_source }} \ + ${{ env.SRC_DIR }} + working-directory: ${{ env.OV_INSTALL_DIR }} + + - name: Show sccache stats + run: ${SCCACHE_PATH} --show-stats + + - name: Upload GenAI Wheel + if: ${{ always() }} + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_wheel_python_${{ matrix.python-version }} + path: ${{ env.INSTALL_DIR }} + if-no-files-found: 'error' + + store_artifacts: + name: Store build artifacts + strategy: + matrix: + build-type: [Release] + needs: [openvino_download, genai_build_wheels, genai_build_genai_wheel, genai_build_cmake] + timeout-minutes: 10 + defaults: + run: + shell: bash + runs-on: aks-linux-medium + container: + image: openvinogithubactions.azurecr.io/library/python:3.12-slim + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + env: + CPACK_PATH: ${{ github.workspace }}/ov_genai + CPACK_PACKAGE: ${{ github.workspace }}/ov_genai.tar.gz + WHEEL_PACKAGE: ${{ github.workspace }}/wheels + MANIFEST_PATH: ${{ github.workspace }}/manifest.yml + + steps: + - name: Download genai package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ env.GENAI_ARCHIVE_ARTIFACT_BASE_NAME }}_${{ matrix.build-type }} + path: ${{ env.CPACK_PATH }} + + - name: Pack Artifacts + run: tar -czvf ${{ env.CPACK_PACKAGE }} * + working-directory: ${{ env.CPACK_PATH }} + + - name: Download manifest and wheels + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: '{genai_wheels,genai_wheel_python_*,manifest_${{ matrix.build-type }}}' + path: ${{ github.workspace }} + merge-multiple: true + + - name: Store ${{ matrix.build_type }} artifacts to a shared drive + id: store_artifacts + if: ${{ always() }} + uses: openvinotoolkit/openvino/.github/actions/store_artifacts@master + with: + artifacts: | + ${{ env.CPACK_PACKAGE }} + ${{ env.WHEEL_PACKAGE }} + ${{ env.MANIFEST_PATH }} + storage_dir: ${{ env.BASE_PRODUCT_TYPE }}_${{ matrix.build-type }} + storage_root: ${{ env.ARTIFACTS_SHARE }} + product_name: ${{ github.event.repository.name }} + + genai_build_samples: + name: Build Samples - ${{ matrix.build-type }} + strategy: + fail-fast: false + matrix: + build-type: [Release] + needs: [ openvino_download, genai_build_cmake ] + timeout-minutes: 30 + defaults: + run: + shell: bash + runs-on: aks-linux-4-cores-16gb + container: + image: openvinogithubactions.azurecr.io/ov_build/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING + env: + CMAKE_GENERATOR: Unix Makefiles + OV_INSTALL_DIR: ${{ github.workspace }}/ov + INSTALL_DIR: ${{ github.workspace }}/install + BUILD_DIR: ${{ github.workspace }}/build + SRC_DIR: ${{ github.workspace }}/src + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download Build Artifacts + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_archive_${{ matrix.build-type }}}" + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Extract Artifacts + run: pigz -dc ${{ env.GENAI_ARCHIVE_NAME }} | tar -xf - -C ${{ env.OV_INSTALL_DIR }} + working-directory: ${{ env.OV_INSTALL_DIR }} + + - name: Build Samples (Release) + if: ${{ 'Release' == matrix.build-type }} + run: | + chmod +x ${{ env.OV_INSTALL_DIR }}/samples/cpp/build_samples.sh + ${{ env.OV_INSTALL_DIR }}/samples/cpp/build_samples.sh -i ${{ env.INSTALL_DIR }} + chmod +x ${{ env.OV_INSTALL_DIR }}/samples/c/build_samples.sh + ${{ env.OV_INSTALL_DIR }}/samples/c/build_samples.sh -i ${{ env.INSTALL_DIR }} + + - name: Build Samples (${{ matrix.build-type }}) + if: ${{ 'Release' != matrix.build-type }} + run: | + source ${{ env.OV_INSTALL_DIR }}/setupvars.sh + cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ + -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ + -DCMAKE_C_COMPILER_LAUNCHER=sccache \ + -S ${{ env.OV_INSTALL_DIR }}/samples/cpp/ -B ${{ env.BUILD_DIR }} + cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --parallel $(nproc) + cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --component samples_bin --prefix ${{ env.INSTALL_DIR }} + + - name: Pack Artifacts + run: tar -cvf - * | pigz > ${{ env.INSTALL_DIR }}/${{ env.GENAI_SAMPLES_NAME }} + working-directory: ${{ env.INSTALL_DIR }} + + - name: Upload Samples Build Package + if: ${{ always() }} + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_samples_${{ matrix.build-type }} + path: ${{ env.INSTALL_DIR }}/*.tar.gz + if-no-files-found: 'error' + + genai_build_nodejs: + name: Build Node.js bindings + strategy: + fail-fast: false + matrix: + build-type: [Release] + needs: [ openvino_download ] + timeout-minutes: 20 + defaults: + run: + shell: bash + runs-on: aks-linux-4-cores-16gb + container: + image: openvinogithubactions.azurecr.io/ov_build/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING + + env: + SRC_DIR: ${{ github.workspace }}/openvino.genai + BUILD_DIR: ${{ github.workspace }}/build + INSTALL_DIR: ${{ github.workspace }}/openvino.genai/src/js/bin + OV_INSTALL_DIR: ${{ github.workspace }}/ov + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + submodules: recursive + path: ${{ env.SRC_DIR }} + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Build GenAI Node.js bindings + run: | + source ${{ env.OV_INSTALL_DIR }}/setupvars.sh + cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ + -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ + -DCMAKE_C_COMPILER_LAUNCHER=sccache \ + -DENABLE_JS=ON -DCPACK_GENERATOR=NPM \ + -DENABLE_PYTHON=OFF -DENABLE_WHEEL=OFF \ + -S ${{ env.SRC_DIR }} -B ${{ env.BUILD_DIR }} + cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --parallel $(nproc) --verbose + cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_DIR }} + + - name: Upload Node.js bindings Build Package + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_nodejs_bindings + path: ${{ env.INSTALL_DIR }} + if-no-files-found: 'error' + + genai_tests_wheel: + name: Python (${{ matrix.test.name}}) Tests (wheel) + needs: [ smart_ci, openvino_download, genai_build_wheels, genai_build_genai_wheel ] + timeout-minutes: ${{ matrix.test.timeout }} + strategy: + fail-fast: false + matrix: + test: + - name: 'Whisper' + # TODO: skip some tests temporary untill https://github.com/huggingface/datasets/issues/7647 dataset is fixed + cmd: 'python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py ./tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke[sample_from_dataset0 and not test_whisper_constructors[sample_from_dataset0 and not test_max_new_tokens[sample_from_dataset0 and not test_language_mode[language and not test_task_mode[sample_from_dataset0 and not test_language_autodetect[sample_from_dataset0 and not test_whisper_config_constructor and not test_language_autodetect[sample_from_dataset1 and not test_language_autodetect[sample_from_dataset2 and not test_initial_prompt_hotwords[sample_from_dataset0 and not test_random_sampling[sample_from_dataset0"' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).whisper.test }} + timeout: 45 + - name: 'Cacheopt E2E (Part 1)' + cmd: 'python -m pytest -v ./tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + timeout: 180 + - name: 'Cacheopt E2E (Part 2)' + cmd: 'python -m pytest -v ./tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + timeout: 360 + - name: 'LLM & VLM' + cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py tests/python_tests/test_image_generation_multi_call.py --override-ini cache_dir=/mount/caches/pytest/' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }} + timeout: 180 + - name: 'Video Generation' + cmd: 'python -m pytest -v ./tests/python_tests/test_video_generation.py --override-ini cache_dir=/mount/caches/pytest/' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).video_generation.test }} + timeout: 60 + - name: 'GGUF Reader tests' + cmd: 'python -m pytest -v ./tests/python_tests/test_gguf_reader.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} + timeout: 360 + - name: 'Tokenizer tests' + cmd: 'python -m pytest -v ./tests/python_tests/test_tokenizer.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }} + timeout: 60 + - name: 'API tests' + cmd: 'python -m pytest -v ./tests/python_tests/test_continuous_batching.py -k "not eagle3" ./tests/python_tests/test_generation_config.py ./tests/python_tests/test_sampling.py ./tests/python_tests/test_text_streamer.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }} + timeout: 60 + - name: 'Rag tests' + cmd: 'python -m pytest -v ./tests/python_tests/test_rag.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG.test }} + timeout: 30 + - name: 'WWB tests' + cmd: 'python -m pytest -v ./tools/who_what_benchmark/tests -m "not nanollava"' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} + timeout: 120 + - name: 'EAGLE3 speculative decoding tests' + cmd: | + python -m pytest -v ./tests/python_tests/test_continuous_batching.py -k "eagle3" + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).speculative_decoding.test }} + timeout: 90 + - name: 'WWB tests (nanollava)' + cmd: | + python -m pip install transformers==4.48.0 diffusers==0.35.2 + python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} + timeout: 90 + - name: 'VLM (MiniCPM-o-2_6)' + cmd: | + python -m pip install transformers==4.51.3 + python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "MiniCPM-o-2_6" + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }} + timeout: 60 + - name: 'VLM (qwen3-vl)' + cmd: | + python -m pip install transformers==4.57.0 git+https://github.com/huggingface/optimum-intel.git@0566b76f094d4c3084e06d29a248b39a1bff3fa4 + python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "qwen3-vl" + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }} + timeout: 60 + defaults: + run: + shell: bash + runs-on: aks-linux-8-cores-32gb + container: + image: openvinogithubactions.azurecr.io/ov_test/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e HF_TOKEN + + env: + INSTALL_DIR: ${{ github.workspace }}/install + SRC_DIR: ${{ github.workspace }}/src + BUILD_DIR: ${{ github.workspace }}/build + + steps: + - name: Clone openvino.genai + if: ${{ matrix.test.run_condition }} + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download Build Artifacts + if: ${{ matrix.test.run_condition }} + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_wheels,genai_wheel_python_*}" + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Install GenAI Wheels + if: ${{ matrix.test.run_condition }} + uses: ./src/.github/actions/install_wheel + with: + packages: "openvino;openvino_tokenizers[transformers];openvino_genai;whowhatbench" + requirements_files: "${{ env.SRC_DIR }}/tests/python_tests/requirements.txt" + local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels + + - name: Tests + if: ${{ matrix.test.run_condition }} + run: ${{ matrix.test.cmd }} + working-directory: ${{ env.SRC_DIR }} + + genai_samples_tests: + name: Samples ${{ matrix.test.name }} (${{ matrix.build-type }}) + needs: [ smart_ci, openvino_download, genai_build_cmake, genai_build_wheels, genai_build_genai_wheel, genai_build_samples, genai_build_nodejs ] + strategy: + fail-fast: false + matrix: + build-type: [Release] + test: + - name: 'LLM' + marker: 'llm' + cmd: 'tests/python_tests/samples' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).LLM_samples.test }} + runner: 'aks-linux-4-cores-16gb' + - name: 'Whisper' + marker: 'whisper' + cmd: 'tests/python_tests/samples' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).Whisper_samples.test }} + runner: 'aks-linux-4-cores-16gb' + - name: 'dreamlike_anime_1_0' + marker: 'dreamlike_anime_1_0' + cmd: 'tests/python_tests/samples' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).Image_generation_samples.test }} + runner: 'aks-linux-8-cores-32gb' + - name: 'LCM_Dreamshaper_v7_int8_ov' + marker: 'LCM_Dreamshaper_v7_int8_ov' + cmd: 'tests/python_tests/samples' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).Image_generation_samples.test }} + runner: 'aks-linux-4-cores-16gb' + # Test hangs on Linux. Ticket: 181387 + # - name: 'VLM' + # marker: 'vlm' + # cmd: 'tests/python_tests/samples' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).VLM_samples.test }} + # runner: 'aks-linux-8-cores-32gb' + - name: 'Rag' + marker: 'rag' + cmd: 'tests/python_tests/samples' + runner: 'aks-linux-4-cores-16gb' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG_samples.test }} + - name: 'Speech generation' + marker: 'speech_generation' + cmd: 'tests/python_tests/samples' + runner: 'aks-linux-4-cores-16gb' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).Speech_generation_samples.test }} + - name: 'Eagle3 decoding' + marker: 'eagle3_decoding' + cmd: 'tests/python_tests/samples' + runner: 'aks-linux-4-cores-16gb' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).speculative_decoding.test }} + + timeout-minutes: 120 + defaults: + run: + shell: bash + runs-on: ${{ matrix.test.runner }} + container: + image: openvinogithubactions.azurecr.io/ov_test/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e HF_TOKEN + env: + INSTALL_DIR: ${{ github.workspace }}/ov + SRC_DIR: ${{ github.workspace }}/src + BUILD_DIR: ${{ github.workspace }}/build + # The debug logging includes messages about the time it takes to read the GGUF model. + # These messages differ from run to run, so we cannot compare the results of the CPP, Python, and JavaScript parts. + OPENVINO_LOG_LEVEL: 1 + + steps: + - name: Clone openvino.genai + if: ${{ matrix.test.run_condition }} + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download Build Artifacts + if: ${{ matrix.test.run_condition }} + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_archive_${{ matrix.build-type }},genai_samples_${{ matrix.build-type }},genai_wheels,genai_wheel_python_*}" + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Download GenAI JS Bildings Artifacts + if: ${{ matrix.test.run_condition }} + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: genai_nodejs_bindings + path: ${{ env.SRC_DIR }}/src/js/bin + merge-multiple: true + + - name: Extract Artifacts + if: ${{ matrix.test.run_condition }} + run: | + pigz -dc ${{ env.GENAI_ARCHIVE_NAME }} | tar -xf - -C ${{ env.INSTALL_DIR }} + pigz -dc ${{ env.GENAI_SAMPLES_NAME }} | tar -xf - -C ${{ env.INSTALL_DIR }} + working-directory: ${{ env.INSTALL_DIR }} + + - name: Install GenAI wheels + if: ${{ matrix.test.run_condition }} + uses: ./src/.github/actions/install_wheel + with: + packages: "openvino;openvino_tokenizers[transformers];openvino_genai[testing]" + requirements_files: "${{ env.SRC_DIR }}/samples/requirements.txt" + local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels + + - name: Setup Node + if: ${{ matrix.test.run_condition }} + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 + with: + node-version: 21 + + - name: Install GenAI NPM package + if: ${{ matrix.test.run_condition }} + working-directory: ${{ env.SRC_DIR }}/src/js + run: | + npm install ${{ env.INSTALL_DIR }}/openvino_node_npm_package/openvino-node-* --ignore-scripts + cp -R ${{ env.INSTALL_DIR }}/openvino_node_npm_package/bin node_modules/openvino-node/bin + npm install --verbose + + - name: Install NPM dependencies for samples + if: ${{ matrix.test.run_condition }} + working-directory: ${{ env.SRC_DIR }}/samples/js/text_generation + run: | + npm install ${{ env.SRC_DIR }}/src/js + npm install --verbose + + - name: Test Samples (Python and C++) + if: ${{ matrix.test.run_condition }} + run: python -m pytest -vvs ${{ env.SRC_DIR }}/${{ matrix.test.cmd }} -m "${{ matrix.test.marker }}" + env: + LD_LIBRARY_PATH: "${{ env.INSTALL_DIR }}/runtime/lib/intel64:${{ env.INSTALL_DIR }}/runtime/3rdparty/tbb/lib:$LD_LIBRARY_PATH" # Required for C++ samples + SAMPLES_PY_DIR: "${{ env.INSTALL_DIR }}/samples/python" + SAMPLES_JS_DIR: "${{ env.SRC_DIR }}/samples/js" + SAMPLES_CPP_DIR: "${{ env.INSTALL_DIR }}/samples_bin" + SAMPLES_C_DIR: "${{ env.INSTALL_DIR }}/samples_bin" + + genai_tools_tests: + name: Tools ${{ matrix.build-type }} + strategy: + fail-fast: false + matrix: + build-type: [Release] + needs: [ smart_ci, openvino_download, genai_build_cmake, genai_build_wheels, genai_build_genai_wheel ] + if: ${{ fromJSON(needs.smart_ci.outputs.affected_components).llm_bench || fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching }} + timeout-minutes: 90 + defaults: + run: + shell: bash + runs-on: aks-linux-4-cores-16gb + container: + image: openvinogithubactions.azurecr.io/ov_test/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e HF_TOKEN + + env: + INSTALL_DIR: ${{ github.workspace }}/ov + SRC_DIR: ${{ github.workspace }}/src + BUILD_DIR: ${{ github.workspace }}/build + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download Build Artifacts + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_archive_${{ matrix.build-type }},genai_tools_${{ matrix.build-type }},genai_tests_${{ matrix.build-type }},genai_wheels,genai_wheel_python_*}" + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Extract Artifacts + run: | + pigz -dc ${{ env.GENAI_ARCHIVE_NAME }} | tar -xf - -C ${{ env.INSTALL_DIR }} + working-directory: ${{ env.INSTALL_DIR }} + + - name: Fix C++ samples permissions + run: chmod +x ${{ env.INSTALL_DIR }}/samples_bin/* + + - name: Install GenAI wheels + uses: ./src/.github/actions/install_wheel + with: + packages: "openvino;openvino_tokenizers[transformers];openvino_genai[testing]" + requirements_files: "${{ env.SRC_DIR }}/samples/requirements.txt;${{ env.SRC_DIR }}/tools/llm_bench/requirements.txt" + local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels + + - name: gtests unit tests + if: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching }} + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + chmod +x ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching + ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*" + + - name: Test Continuous Batching Tools + if: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching }} + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + python -m pytest -vs ${{ env.SRC_DIR }}/tests/python_tests/samples/test_continuous_batching_tools.py -m "samples" + env: + SAMPLES_CPP_DIR: "${{ env.INSTALL_DIR }}/samples_bin" + + - name: Test LLM Benchmark Tools + if: ${{ fromJSON(needs.smart_ci.outputs.affected_components).llm_bench }} + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + python -m pytest -vs ${{ env.SRC_DIR }}/tests/python_tests/samples/test_tools_llm_benchmark.py -m "samples" + env: + SAMPLES_PY_DIR: "${{ env.SRC_DIR }}/tools" + + genai_nodejs_tests: + name: Node.js bindings tests + needs: [ smart_ci, openvino_download, genai_build_wheels, genai_build_genai_wheel, genai_build_nodejs ] + if: ${{ fromJSON(needs.smart_ci.outputs.affected_components).JS_API }} + timeout-minutes: 20 + defaults: + run: + shell: bash + runs-on: aks-linux-4-cores-16gb + container: + image: openvinogithubactions.azurecr.io/ov_test/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e HF_TOKEN + + env: + OV_INSTALL_DIR: ${{ github.workspace }}/ov + SRC_DIR: ${{ github.workspace }}/openvino.genai + INSTALL_DIR: ${{ github.workspace }}/openvino.genai/src/js/bin + PY_INSTALL_DIR: ${{ github.workspace }}/install + NODE_VERSION: 21 + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download OpenVINO Artifacts + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Download GenAI JS Bildings Artifacts + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: genai_nodejs_bindings + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Setup Node ${{ env.NODE_VERSION }} + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 + with: + node-version: ${{ env.NODE_VERSION }} + + - name: Download Python wheels for JS tests + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_wheels,genai_wheel_python_*}" + path: ${{ env.PY_INSTALL_DIR }} + merge-multiple: true + + - name: Install OpenVINO GenAI Python packages (from wheels) + uses: ./openvino.genai/.github/actions/install_wheel + with: + packages: "openvino;openvino_tokenizers[transformers];openvino_genai" + requirements_files: "${{ env.SRC_DIR }}/tests/python_tests/requirements.txt" + local_wheel_dir: ${{ env.PY_INSTALL_DIR }}/wheels + + # JS pacakges uses the OpenVINO and OpenVINO GenAI libraries from the bin directory. + # Here we emulate the installation of the openvino-node package from NPM. The latest + # release of the openvino-node package is installed, and we need to update the binaries + # in the node_modules/openvino-node/bin directory to work correctly with GenAI + - name: Install npm package tests dependencies + working-directory: ${{ env.SRC_DIR }}/src/js + run: | + npm install ${{ env.OV_INSTALL_DIR }}/openvino_node_npm_package/openvino-node-* --ignore-scripts + cp -R ${{ env.OV_INSTALL_DIR }}/openvino_node_npm_package/bin node_modules/openvino-node/bin + npm install --verbose + + - name: Check lint + working-directory: ${{ env.SRC_DIR }}/src/js + run: npm run lint + + - name: Run npm package tests + working-directory: ${{ env.SRC_DIR }}/src/js + run: npm test + + genai_xgrammar_off_tests: + name: Build & Test when ENABLE_XGRAMMAR=OFF + needs: [smart_ci, openvino_download, genai_build_wheels] + timeout-minutes: 45 + defaults: + run: + shell: bash + runs-on: aks-linux-4-cores-16gb + container: + image: openvinogithubactions.azurecr.io/ov_build/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING + env: + CMAKE_GENERATOR: Unix Makefiles + INSTALL_DIR: ${{ github.workspace }}/ov + SRC_DIR: ${{ github.workspace }}/src + BUILD_DIR: ${{ github.workspace }}/build_xgrammar + + steps: + - name: Clone openvino.genai + uses: actions/checkout@v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download Build Artifacts + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_wheels}" + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: CMake Build without XGrammar + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + cmake -DOpenVINODeveloperPackage_DIR=${{ env.INSTALL_DIR }}/developer_package/cmake \ + -DENABLE_PYTHON=ON \ + -DENABLE_XGRAMMAR=OFF \ + -BUILD_TOKENIZERS=OFF \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ + -DCMAKE_C_COMPILER_LAUNCHER=sccache \ + -S ${{ env.SRC_DIR}} \ + -B ${{ env.BUILD_DIR }} + cmake --build ${{ env.BUILD_DIR}} --config Release --parallel $(nproc) --target py_openvino_genai --verbose + + - name: Install dependencies + uses: ./src/.github/actions/install_wheel + with: + packages: openvino;openvino_tokenizers[transformers] + requirements_files: "${{ env.SRC_DIR }}/tests/python_tests/requirements.txt" + local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels + + - name: Run test_llm_pipeline.py when -DENABLE_XGRAMMAR=OFF + env: + PYTHONPATH: "${{ env.BUILD_DIR }}:" + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + python3 -m pytest -v ${{ env.SRC_DIR }}/tests/python_tests/test_llm_pipeline.py -k "not test_perf_metrics_with_structured_output" + + Overall_Status: + name: ci/gha_overall_status_linux + needs: [smart_ci, openvino_download, genai_build_cmake, genai_build_wheels, genai_build_genai_wheel, genai_build_samples, genai_build_nodejs, genai_tests_wheel, genai_samples_tests, genai_tools_tests, genai_nodejs_tests, genai_xgrammar_off_tests] + if: ${{ always() }} + runs-on: ubuntu-latest + steps: + - name: Check status of all jobs + if: >- + ${{ + contains(needs.*.result, 'failure') || + contains(needs.*.result, 'cancelled') + }} + run: exit 1 diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/mac.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/mac.yml new file mode 100644 index 0000000..4ee2724 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/mac.yml @@ -0,0 +1,784 @@ +name: macOS (14, Python 3.11) +on: + workflow_dispatch: + pull_request: + merge_group: + push: + branches: + - master + - 'releases/**' + +permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions + +concurrency: + # github.ref is not unique in post-commit + group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-mac + cancel-in-progress: true + +env: + MACOSX_DEPLOYMENT_TARGET: '11.0' + PYTHON_VERSION: '3.11' + TARGET_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }} + BASE_PRODUCT_TYPE: public_macos_arm64 + CCACHE_MAXSIZE: 500Mi + HF_HOME: ~/.cache/hf + OV_CACHE: ~/.cache/ov_cache/ + CLEANUP_CACHE: 1 + OPENVINO_LOG_LEVEL: 4 + +jobs: + smart_ci: + name: Smart CI + runs-on: ubuntu-latest + outputs: + affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" + skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" + steps: + - name: checkout action + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + timeout-minutes: 15 + with: + sparse-checkout: .github + + - name: Get affected components + id: smart_ci + uses: openvinotoolkit/openvino/.github/actions/smart-ci@36a8f092d3250e7a2a365f0445e61297d91c358e + with: + repository: ${{ github.repository }} + pr: ${{ github.event.number }} + commit_sha: ${{ github.sha }} + ref_name: ${{ github.ref_name }} + component_pattern: "category: ((?!Python API|CPP API).*)" + repo_token: ${{ secrets.GITHUB_TOKEN }} + skip_when_only_listed_labels_set: 'GH Pages Docs' + skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg,*.gif' + + - name: Show affected components + run: echo "${{ toJSON(steps.smart_ci.outputs.affected_components) }}" + shell: bash + + openvino_download: + needs: smart_ci + if: ${{ github.event_name != 'merge_group' && needs.smart_ci.outputs.skip_workflow != 'True' }} + name: Download OpenVINO + outputs: + status: ${{ steps.openvino_download.outcome }} + ov_artifact_name: ${{ steps.openvino_download.outputs.ov_artifact_name }} + ov_wheel_source: ${{ steps.openvino_download.outputs.ov_wheel_source }} + ov_version: ${{ steps.openvino_download.outputs.ov_version }} + timeout-minutes: 10 + defaults: + run: + shell: bash + runs-on: aks-linux-medium + container: + image: 'openvinogithubactions.azurecr.io/openvino_provider:0.1.0' + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + + steps: + - uses: openvinotoolkit/openvino/.github/actions/openvino_provider@master + id: openvino_download + with: + platform: macos_14_7 + arch: 'arm64' + commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz + revision: latest_available_commit + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + # event_name: pull_request + + genai_build_cmake: + name: Build cpack - ${{ matrix.build-type }} + strategy: + fail-fast: false + matrix: + build-type: [Release] + needs: [ openvino_download ] + timeout-minutes: 45 + defaults: + run: + shell: bash + runs-on: macos-14 + env: + CMAKE_GENERATOR: Ninja + CMAKE_CXX_COMPILER_LAUNCHER: ccache + CMAKE_C_COMPILER_LAUNCHER: ccache + CCACHE_DIR: ${{ github.workspace }}/ccache + OV_INSTALL_DIR: ${{ github.workspace }}/ov + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TOOLS_DIR: ${{ github.workspace }}/tools + INSTALL_TESTS_DIR: ${{ github.workspace }}/tests + BUILD_DIR: ${{ github.workspace }}/build + SRC_DIR: ${{ github.workspace }}/src + MANIFEST_PATH: ${{ github.workspace }}/manifest.yml + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + - name: Install build dependencies + run: brew install coreutils ccache + + - name: Setup ccache + id: ccache-restore + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ runner.os }}-${{ runner.arch }}-ccache-${{ env.TARGET_BRANCH }}-${{ matrix.build-type }}-cpack-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-${{ runner.arch }}-ccache-${{ env.TARGET_BRANCH }}-${{ matrix.build-type }}-cpack + path: ${{ env.CCACHE_DIR }} + + - name: Generate product manifest + id: create_manifest + uses: openvinotoolkit/openvino/.github/actions/create_manifest@master + with: + repos: ${{ env.SRC_DIR }} + product_type: ${{ env.BASE_PRODUCT_TYPE }}_${{ matrix.build-type }} + target_arch: 'arm64' + build_type: ${{ matrix.build-type }} + save_to: ${{ env.MANIFEST_PATH }} + + - name: Clean ccache stats + run: ccache --zero-stats --show-config + + - name: CMake Build + run: | + source ${{ env.OV_INSTALL_DIR }}/setupvars.sh + cmake -DOpenVINODeveloperPackage_DIR=${{ env.OV_INSTALL_DIR }}/developer_package/cmake \ + -DENABLE_PYTHON=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ + -S ${{ env.SRC_DIR }} \ + -B ${{ env.BUILD_DIR }} + cmake --build ${{ env.BUILD_DIR}} --config ${{ matrix.build-type }} --parallel $(nproc) --verbose + cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_DIR }} + cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_TOOLS_DIR }} --component tools_bin + cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_TESTS_DIR }} --component tests + + - name: Show ccache stats + run: ccache --show-stats + + - name: Save ccache + if: always() && steps.ccache-restore.outputs.cache-hit != 'true' && github.event_name == 'push' + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ steps.ccache-restore.outputs.cache-primary-key }} + path: ${{ env.CCACHE_DIR }} + - name: Upload Archive Distribution Package + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_cpack_${{ matrix.build-type }} + path: ${{ env.INSTALL_DIR }} + if-no-files-found: 'error' + + - name: Upload Tools + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_tools_${{ matrix.build-type }} + path: ${{ env.INSTALL_TOOLS_DIR }} + if-no-files-found: 'error' + + - name: Upload Tests + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_tests_${{ matrix.build-type }} + path: ${{ env.INSTALL_TESTS_DIR }} + if-no-files-found: 'error' + + - name: Upload manifest + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: manifest_${{ matrix.build-type }} + path: ${{ env.MANIFEST_PATH }} + if-no-files-found: 'error' + + genai_build_wheel: + name: Build Wheel + needs: [ openvino_download ] + timeout-minutes: 90 + defaults: + run: + shell: bash + runs-on: macos-14-xlarge + env: + CCACHE_DIR: ${{ github.workspace }}/ccache + CMAKE_CXX_COMPILER_LAUNCHER: ccache + CMAKE_C_COMPILER_LAUNCHER: ccache + OV_INSTALL_DIR: ${{ github.workspace }}/ov + INSTALL_DIR: ${{ github.workspace }}/install + WHEELS_DIR: ${{ github.workspace }}/install/wheels + SRC_DIR: ${{ github.workspace }}/src + OpenVINODeveloperPackage_DIR: ${{ github.workspace }}/ov/developer_package/cmake + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + - name: Install build dependencies + run: brew install coreutils ccache + + - name: Setup ccache + id: ccache-restore + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ runner.os }}-${{ runner.arch }}-ccache-${{ env.TARGET_BRANCH }}-wheel-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-${{ runner.arch }}-ccache-${{ env.TARGET_BRANCH }}-wheel + path: ${{ env.CCACHE_DIR }}\ + + - name: Set CI environment + id: create_manifest + uses: openvinotoolkit/openvino/.github/actions/create_manifest@master + with: + repos: ${{ env.SRC_DIR }} + product_type: ${{ env.BASE_PRODUCT_TYPE }}_Release + target_arch: 'arm64' + build_type: Release + save_to: ${{ github.workspace }} + + - name: Clean ccache stats + run: ccache --zero-stats --show-config + + - name: Build Tokenizers Wheel + run: | + python -m pip wheel -v --no-deps --wheel-dir ${{ env.WHEELS_DIR }} \ + --config-settings='override=wheel.build_tag="${{ github.run_number }}"' \ + ${{ needs.openvino_download.outputs.ov_wheel_source }} \ + ${{ env.SRC_DIR }}/thirdparty/openvino_tokenizers + working-directory: ${{ env.OV_INSTALL_DIR }} + + - name: Build GenAI Wheel + run: | + python -m pip wheel -v --no-deps --wheel-dir ${{ env.WHEELS_DIR }} \ + ${{ needs.openvino_download.outputs.ov_wheel_source }} \ + ${{ env.SRC_DIR }} + working-directory: ${{ env.OV_INSTALL_DIR }} + + - name: Build WWB Wheel + run: python -m pip wheel -v --no-deps --wheel-dir ${{ env.WHEELS_DIR }} ${{ env.SRC_DIR }}/tools/who_what_benchmark + working-directory: ${{ env.OV_INSTALL_DIR }} + + - name: Save ccache + if: always() && steps.ccache-restore.outputs.cache-hit != 'true' && github.event_name == 'push' + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ steps.ccache-restore.outputs.cache-primary-key }} + path: ${{ env.CCACHE_DIR }} + + - name: Upload Wheels + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_wheels + path: ${{ env.INSTALL_DIR }} + if-no-files-found: 'error' + + genai_build_samples: + name: Build Samples - ${{ matrix.build-type }} + strategy: + fail-fast: false + matrix: + build-type: [Release] + needs: [ openvino_download, genai_build_cmake ] + timeout-minutes: 30 + defaults: + run: + shell: bash + runs-on: macos-14 + env: + OV_INSTALL_DIR: ${{ github.workspace }}/ov + INSTALL_DIR: ${{ github.workspace }}/install + BUILD_DIR: ${{ github.workspace }}/build + SRC_DIR: ${{ github.workspace }}/src + CMAKE_POLICY_VERSION_MINIMUM: 3.5 + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download Build Artifacts + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_cpack_${{ matrix.build-type }}}" + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Build Samples (Release) + if: ${{ 'Release' == matrix.build-type }} + run: | + chmod +x ${{ env.OV_INSTALL_DIR }}/samples/cpp/build_samples.sh + ${{ env.OV_INSTALL_DIR }}/samples/cpp/build_samples.sh -i ${{ env.INSTALL_DIR }} + chmod +x ${{ env.OV_INSTALL_DIR }}/samples/c/build_samples.sh + ${{ env.OV_INSTALL_DIR }}/samples/c/build_samples.sh -i ${{ env.INSTALL_DIR }} + + - name: Build Samples (${{ matrix.build-type }}) + if: ${{ 'Release' != matrix.build-type }} + run: | + source ${{ env.OV_INSTALL_DIR }}/setupvars.sh + cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -S ${{ env.OV_INSTALL_DIR }}/samples/cpp/ -B ${{ env.BUILD_DIR }} + cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --parallel $(nproc) + cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --component samples_bin --prefix ${{ env.INSTALL_DIR }} + + - name: Upload Samples Build Package + if: ${{ always() }} + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_samples_${{ matrix.build-type }} + path: ${{ env.INSTALL_DIR }} + if-no-files-found: 'error' + + genai_build_nodejs: + name: Build Node.js bindings + strategy: + fail-fast: false + matrix: + build-type: [Release] + needs: [ openvino_download ] + timeout-minutes: 50 + defaults: + run: + shell: bash + runs-on: macos-14 + + env: + SRC_DIR: ${{ github.workspace }}/openvino.genai + BUILD_DIR: ${{ github.workspace }}/build + INSTALL_DIR: ${{ github.workspace }}/openvino.genai/src/js/bin + OV_INSTALL_DIR: ${{ github.workspace }}/ov + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + submodules: recursive + path: ${{ env.SRC_DIR }} + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Install build dependencies + run: brew install coreutils + + - name: Build GenAI Node.js bindings + run: | + source ${{ env.OV_INSTALL_DIR }}/setupvars.sh + cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ + -DENABLE_JS=ON -DCPACK_GENERATOR=NPM \ + -DENABLE_PYTHON=OFF -DENABLE_WHEEL=OFF \ + -S ${{ env.SRC_DIR }} -B ${{ env.BUILD_DIR }} + cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --parallel $(nproc) --verbose + cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_DIR }} + + - name: Upload Node.js bindings Build Package + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_nodejs_bindings + path: ${{ env.INSTALL_DIR }} + if-no-files-found: 'error' + + genai_tests_wheel: + name: Python (${{ matrix.test.name}}) Tests (wheel) + needs: [ smart_ci, openvino_download, genai_build_wheel ] + timeout-minutes: ${{ matrix.test.timeout }} + strategy: + fail-fast: false + matrix: + test: + - name: 'Whisper' + # TODO: skip some tests temporary until https://github.com/huggingface/datasets/issues/7647 dataset is fixed + cmd: 'python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke[sample_from_dataset0 and not test_whisper_constructors[sample_from_dataset0 and not test_max_new_tokens[sample_from_dataset0 and not test_language_mode[language and not test_task_mode[sample_from_dataset0 and not test_language_autodetect[sample_from_dataset0 and not test_whisper_config_constructor and not test_language_autodetect[sample_from_dataset1 and not test_language_autodetect[sample_from_dataset2 and not test_initial_prompt_hotwords[sample_from_dataset0 and not test_random_sampling[sample_from_dataset0"' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).whisper.test }} + timeout: 120 + # Only supported on X64 or ARM with SVE support + # - name: 'Cacheopt E2E (Part 1)' + # cmd: 'python -m pytest -v tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + # timeout: 180 + # Only supported on X64 or ARM with SVE support + # - name: 'Cacheopt E2E (Part 2)' + # cmd: 'python -m pytest -v tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + # timeout: 240 + # Only supported on X64 or ARM with SVE support + # - name: 'LLM & VLM' + # cmd: 'tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py tests/python_tests/test_image_generation_multi_call.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }} + # timeout: 180 + - name: 'GGUF Reader tests' + cmd: 'python -m pytest -v ./tests/python_tests/test_gguf_reader.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} + timeout: 360 + - name: 'Tokenizer tests' + cmd: 'python -m pytest -v ./tests/python_tests/test_tokenizer.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }} + timeout: 60 + # Only supported on X64 or ARM with SVE support + # - name: 'API tests' + # cmd: 'tests/python_tests/test_continuous_batching.py tests/python_tests/test_generation_config.py tests/python_tests/test_sampling.py tests/python_tests/test_text_streamer.py' + # run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }} + # timeout: 60 + - name: 'Rag tests' + cmd: 'python -m pytest -v ./tests/python_tests/test_rag.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG.test }} + timeout: 30 + - name: 'WWB tests' + cmd: 'python -m pytest -v ./tools/who_what_benchmark/tests -m "not nanollava"' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} + timeout: 120 + - name: 'WWB tests (nanollava)' + cmd: | + python -m pip install transformers==4.48.0 diffusers==0.35.2 + python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} + timeout: 90 + defaults: + run: + shell: bash + runs-on: macos-14 + env: + INSTALL_DIR: ${{ github.workspace }}/install + SRC_DIR: ${{ github.workspace }}/src + BUILD_DIR: ${{ github.workspace }}/build + HF_HOME: ${{ github.workspace }}/hf_cache + + steps: + - name: Clone openvino.genai + if: ${{ matrix.test.run_condition }} + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download Build Artifacts + if: ${{ matrix.test.run_condition }} + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_wheels}" + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Setup Python ${{ env.PYTHON_VERSION }} + if: ${{ matrix.test.run_condition }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + - name: Install GenAI Wheels + if: ${{ matrix.test.run_condition }} + uses: ./src/.github/actions/install_wheel + with: + packages: "openvino;openvino_tokenizers[transformers];openvino_genai;whowhatbench" + requirements_files: "${{ env.SRC_DIR }}/tests/python_tests/requirements.txt" + local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels + + - name: Tests + if: ${{ matrix.test.run_condition }} + run: ${{ matrix.test.cmd }} + working-directory: ${{ env.SRC_DIR }} + + genai_samples_tests: + name: Samples ${{ matrix.test.name }} (${{ matrix.build-type }}) + strategy: + fail-fast: false + matrix: + build-type: [Release] + test: + - name: 'LLM' + marker: 'llm' + cmd: 'tests/python_tests/samples' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).LLM_samples.test }} + - name: 'Whisper' + marker: 'whisper' + cmd: 'tests/python_tests/samples' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).Whisper_samples.test }} + - name: 'Rag' + marker: 'rag' + cmd: 'tests/python_tests/samples' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG_samples.test }} + - name: 'Speech generation' + marker: 'speech_generation' + cmd: 'tests/python_tests/samples' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).Speech_generation_samples.test }} + + needs: [ smart_ci, openvino_download, genai_build_cmake, genai_build_wheel, genai_build_samples, genai_build_nodejs ] + timeout-minutes: 120 + defaults: + run: + shell: bash + runs-on: macos-14 + env: + INSTALL_DIR: ${{ github.workspace }}/ov + SRC_DIR: ${{ github.workspace }}/src + BUILD_DIR: ${{ github.workspace }}/build + # The debug logging includes messages about the time it takes to read the GGUF model. + # These messages differ from run to run, so we cannot compare the results of the CPP, Python, and JavaScript parts. + OPENVINO_LOG_LEVEL: 1 + + steps: + - name: Clone openvino.genai + if: ${{ matrix.test.run_condition }} + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download Build Artifacts + if: ${{ matrix.test.run_condition }} + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_cpack_${{ matrix.build-type }},genai_samples_${{ matrix.build-type }},genai_wheels}" + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Download GenAI JS Bildings Artifacts + if: ${{ matrix.test.run_condition }} + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: genai_nodejs_bindings + path: ${{ env.SRC_DIR }}/src/js/bin + merge-multiple: true + + - name: Setup Python ${{ env.PYTHON_VERSION }} + if: ${{ matrix.test.run_condition }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + - name: Install GenAI wheels + if: ${{ matrix.test.run_condition }} + uses: ./src/.github/actions/install_wheel + with: + packages: "openvino;openvino_tokenizers[transformers];openvino_genai[testing]" + requirements_files: "${{ env.SRC_DIR }}/samples/requirements.txt" + local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels + + - name: Fix C++ samples permissions + if: ${{ matrix.test.run_condition }} + run: chmod +x ${{ env.INSTALL_DIR }}/samples_bin/* + + - name: Setup NodeJS + if: ${{ matrix.test.run_condition }} + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 + with: + node-version: 21 + + - name: Install GenAI NPM package + if: ${{ matrix.test.run_condition }} + working-directory: ${{ env.SRC_DIR }}/src/js + run: | + npm install ${{ env.INSTALL_DIR }}/openvino_node_npm_package/openvino-node-* --ignore-scripts + cp -R ${{ env.INSTALL_DIR }}/openvino_node_npm_package/bin node_modules/openvino-node/bin + npm install --verbose + + - name: Install NPM dependencies for samples + if: ${{ matrix.test.run_condition }} + working-directory: ${{ env.SRC_DIR }}/samples/js/text_generation + run: | + npm install ${{ env.SRC_DIR }}/src/js + npm install --verbose + + - name: Test Samples (Python and C++) + if: ${{ matrix.test.run_condition }} + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + python -m pytest -vs ${{ env.SRC_DIR }}/${{ matrix.test.cmd }} -m "${{ matrix.test.marker }}" + env: + SAMPLES_PY_DIR: "${{ env.INSTALL_DIR }}/samples/python" + SAMPLES_JS_DIR: "${{ env.SRC_DIR }}/samples/js" + SAMPLES_CPP_DIR: "${{ env.INSTALL_DIR }}/samples_bin" + SAMPLES_C_DIR: "${{ env.INSTALL_DIR }}/samples_bin" + + genai_tools_tests: + name: Tools ${{ matrix.build-type }} + if: ${{ 'false' }} # Only supported on X64 or ARM with SVE support + strategy: + fail-fast: false + matrix: + build-type: [Release] + needs: [ smart_ci, openvino_download, genai_build_cmake, genai_build_wheel ] + timeout-minutes: 90 + defaults: + run: + shell: bash + runs-on: macos-14 + env: + INSTALL_DIR: ${{ github.workspace }}/ov + SRC_DIR: ${{ github.workspace }}/src + BUILD_DIR: ${{ github.workspace }}/build + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download Build Artifacts + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_cpack_${{ matrix.build-type }},genai_tools_${{ matrix.build-type }},genai_tests_${{ matrix.build-type }},genai_wheels}" + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Fix C++ samples permissions + run: chmod +x ${{ env.INSTALL_DIR }}/samples_bin/* + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + - name: Install GenAI wheels + uses: ./src/.github/actions/install_wheel + with: + packages: "openvino;openvino_tokenizers[transformers];openvino_genai[testing]" + requirements_files: "${{ env.SRC_DIR }}/samples/requirements.txt" + local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels + + - name: gtests unit tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + chmod +x ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching + ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*" + + - name: Test C++ Tools + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + python -m pytest -vs ${{ env.SRC_DIR }}/tests/python_tests/samples/test_continuous_batching_tools.py -m "samples" + env: + SAMPLES_CPP_DIR: "${{ env.INSTALL_DIR }}/samples_bin" + + genai_nodejs_tests: + name: Node.js bindings tests + needs: [ smart_ci, openvino_download, genai_build_wheel, genai_build_nodejs ] + # if: ${{ fromJSON(needs.smart_ci.outputs.affected_components).JS_API }} + # nodejs tests fails on mac with: "mutex lock failed: Invalid argument" + # ticket: 179949 + if: false + timeout-minutes: 20 + defaults: + run: + shell: bash + runs-on: macos-14 + + env: + SRC_DIR: ${{ github.workspace }}/openvino.genai + INSTALL_DIR: ${{ github.workspace }}/install + NODE_VERSION: 21 + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download OpenVINO Artifacts + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_wheels,genai_wheel_python_*}" + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Download GenAI JS Bildings Artifacts + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: genai_nodejs_bindings + path: ${{ env.SRC_DIR }}/src/js/bin + merge-multiple: true + + - name: Setup Node ${{ env.NODE_VERSION }} + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 + with: + node-version: ${{ env.NODE_VERSION }} + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + # JS pacakges uses the OpenVINO and OpenVINO GenAI libraries from the bin directory. + # Here we emulate the installation of the openvino-node package from NPM. The latest + # release of the openvino-node package is installed, and we need to update the binaries + # in the node_modules/openvino-node/bin directory to work correctly with GenAI + - name: Install npm package tests dependencies + working-directory: ${{ env.SRC_DIR }}/src/js + run: | + npm install ${{ env.INSTALL_DIR }}/openvino_node_npm_package/openvino-node-* --ignore-scripts + cp -R ${{ env.INSTALL_DIR }}/openvino_node_npm_package/bin node_modules/openvino-node/bin + npm install --verbose + + - name: Install OpenVINO GenAI Python packages (from wheels) + uses: ./openvino.genai/.github/actions/install_wheel + with: + packages: "openvino;openvino_tokenizers[transformers];openvino_genai" + requirements_files: "${{ env.SRC_DIR }}/tests/python_tests/requirements.txt" + local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels + + - name: Run npm package tests + working-directory: ${{ env.SRC_DIR }}/src/js + run: npm test + + Overall_Status: + name: ci/gha_overall_status_macos + needs: [smart_ci, openvino_download, genai_build_cmake, genai_build_wheel, genai_build_samples, genai_tests_wheel, genai_samples_tests, genai_tools_tests, genai_build_nodejs, genai_nodejs_tests] + if: ${{ always() }} + runs-on: ubuntu-latest + steps: + - name: Check status of all jobs + if: >- + ${{ + contains(needs.*.result, 'failure') || + contains(needs.*.result, 'cancelled') + }} + run: exit 1 diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/manylinux_2_28.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/manylinux_2_28.yml new file mode 100644 index 0000000..f457b98 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/manylinux_2_28.yml @@ -0,0 +1,700 @@ +name: Manylinux 2_28 +on: + workflow_dispatch: + pull_request: + merge_group: + push: + branches: + - master + - 'releases/**' + +permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions + +concurrency: + # github.ref is not unique in post-commit + group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-manylinux-2-28 + cancel-in-progress: true + +env: + PYTHON_VERSION: '3.11' + OV_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }} + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache + SCCACHE_IGNORE_SERVER_IO_ERROR: 1 + SCCACHE_SERVER_PORT: 35555 + SCCACHE_CACHE_SIZE: 30G + SCCACHE_AZURE_KEY_PREFIX: genai/manylinux_2_28 + HF_HOME: /mount/caches/huggingface/lin + # mount directory leads to issue with load_dataset lock file. It's either cannot be released or there is a deadlock. Ticket: 181288 + HF_DATASETS_CACHE: /tmp/.hf_cache/datasets/ + OV_CACHE: /mount/caches/huggingface/.ov_cache/lin/ + OPENVINO_LOG_LEVEL: 4 + GENAI_ARCHIVE_NAME: genai.tar.gz + GENAI_SAMPLES_NAME: genai_samples.tar.gz + ARTIFACTS_SHARE: '/mount/build-artifacts' + BASE_PRODUCT_TYPE: public_manylinux_2_28_x86_64 + GENAI_WHEELS_ARTIFACT_NAME: 'genai_wheels' + GENAI_ARCHIVE_ARTIFACT_BASE_NAME: 'genai_archive' + +jobs: + smart_ci: + name: Smart CI + runs-on: ubuntu-latest + outputs: + affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" + skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" + steps: + - name: checkout action + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + timeout-minutes: 15 + with: + sparse-checkout: .github + + - name: Get affected components + id: smart_ci + uses: openvinotoolkit/openvino/.github/actions/smart-ci@36a8f092d3250e7a2a365f0445e61297d91c358e + with: + repository: ${{ github.repository }} + pr: ${{ github.event.number }} + commit_sha: ${{ github.sha }} + ref_name: ${{ github.ref_name }} + component_pattern: "category: ((?!Python API|CPP API).*)|Structured Output" + repo_token: ${{ secrets.GITHUB_TOKEN }} + skip_when_only_listed_labels_set: 'GH Pages Docs' + skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg,*.gif' + + - name: Show affected components + run: echo "${{ toJSON(steps.smart_ci.outputs.affected_components) }}" + shell: bash + + openvino_download: + needs: smart_ci + if: ${{ github.event_name != 'merge_group' && needs.smart_ci.outputs.skip_workflow != 'True' }} + name: Download OpenVINO + outputs: + status: ${{ steps.openvino_download.outcome }} + ov_artifact_name: ${{ steps.openvino_download.outputs.ov_artifact_name }} + ov_wheel_source: ${{ steps.openvino_download.outputs.ov_wheel_source }} + ov_version: ${{ steps.openvino_download.outputs.ov_version }} + docker_tag: ${{ steps.get_docker_tag.outputs.docker_tag }} + timeout-minutes: 10 + defaults: + run: + shell: bash + runs-on: aks-linux-medium + container: + image: 'openvinogithubactions.azurecr.io/openvino_provider:0.1.0' + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + + steps: + - uses: openvinotoolkit/openvino/.github/actions/openvino_provider@master + id: openvino_download + with: + platform: almalinux8 + commit_packages_to_provide: wheels,developer_package.tar.gz,openvino_node_npm_package.tar.gz + revision: latest_available_commit + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + # event_name: pull_request + + - name: Clone docker tag from OpenVINO repo + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + repository: 'openvinotoolkit/openvino' + path: 'openvino' + ref: ${{ env.OV_BRANCH }} + sparse-checkout: | + .github/dockerfiles/docker_tag + + - name: Save docker tag to output + id: get_docker_tag + run: | + docker_tag=$(cat openvino/.github/dockerfiles/docker_tag) + echo "docker_tag=$docker_tag" >> $GITHUB_OUTPUT + + genai_build_cmake: + name: Build Archive - ${{ matrix.build-type }} + strategy: + fail-fast: false + matrix: + build-type: [Release] + needs: [ openvino_download ] + timeout-minutes: 45 + defaults: + run: + shell: bash + runs-on: aks-linux-4-cores-16gb + container: + image: openvinogithubactions.azurecr.io/ov_build/manylinux_2_28:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING + env: + CMAKE_GENERATOR: Unix Makefiles + OV_INSTALL_DIR: ${{ github.workspace }}/ov + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TOOLS_DIR: ${{ github.workspace }}/tools + INSTALL_TESTS_DIR: ${{ github.workspace }}/tests + BUILD_DIR: ${{ github.workspace }}/build + SRC_DIR: ${{ github.workspace }}/src + MANIFEST_PATH: ${{ github.workspace }}/manifest.yml + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Generate product manifest + id: create_manifest + uses: openvinotoolkit/openvino/.github/actions/create_manifest@master + with: + repos: ${{ env.SRC_DIR }} + product_type: ${{ env.BASE_PRODUCT_TYPE }}_${{ matrix.build-type }} + target_arch: 'x86_64' + build_type: ${{ matrix.build-type }} + save_to: ${{ env.MANIFEST_PATH }} + + - name: CMake Build + run: | + source ${{ env.OV_INSTALL_DIR }}/setupvars.sh + /usr/bin/cmake -DOpenVINODeveloperPackage_DIR=${{ env.OV_INSTALL_DIR }}/developer_package/cmake \ + -DENABLE_PYTHON=OFF \ + -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ + -S ${{ env.SRC_DIR }} \ + -B ${{ env.BUILD_DIR }} + /usr/bin/cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --parallel $(nproc) --verbose + for py_version in "3.10" "3.11" "3.12" "3.13" + do + rm -rf ${{ env.BUILD_DIR }}/CMakeCache.txt + + python_exec_path=$(python$py_version -c "import sys; print(sys.executable)") + + echo "Configuring cmake for python$py_version" + /usr/bin/cmake -DOpenVINODeveloperPackage_DIR=${{ env.OV_INSTALL_DIR }}/developer_package/cmake \ + -DENABLE_PYTHON=ON \ + -DPython3_EXECUTABLE=${python_exec_path} \ + -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ + -S ${{ env.SRC_DIR }} \ + -B ${{ env.BUILD_DIR }} + echo "Configuring cmake done for python$py_version" + + /usr/bin/cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --parallel $(nproc) --verbose + /usr/bin/cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_DIR }} + done + /usr/bin/cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_TOOLS_DIR }} --component tools_bin + /usr/bin/cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_TESTS_DIR }} --component tests + env: + CXXFLAGS: "-Wno-dangling-reference -fno-lto" # bug in gcc-14: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107532, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113485 + CFLAGS: "-Wno-dangling-reference -fno-lto" # bug in gcc-14: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107532, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113485 + LDFLAGS: "-fno-lto" # bug in gcc-14: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113485 + + - name: Pack Artifacts + run: tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/${{ env.GENAI_ARCHIVE_NAME }} + working-directory: ${{ env.INSTALL_DIR }} + + - name: Upload Archive Distribution Package + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_archive_${{ matrix.build-type }} + path: ${{ env.BUILD_DIR }}/${{ env.GENAI_ARCHIVE_NAME }} + if-no-files-found: 'error' + + - name: Upload Tools + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_tools_${{ matrix.build-type }} + path: ${{ env.INSTALL_TOOLS_DIR }} + if-no-files-found: 'error' + + - name: Upload Tests + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_tests_${{ matrix.build-type }} + path: ${{ env.INSTALL_TESTS_DIR }} + if-no-files-found: 'error' + + - name: Upload manifest + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: manifest_${{ matrix.build-type }} + path: ${{ env.MANIFEST_PATH }} + if-no-files-found: 'error' + + genai_build_wheels: + name: Build Tokenizers & WWB Wheels + needs: [ openvino_download ] + timeout-minutes: 30 + defaults: + run: + shell: bash + runs-on: aks-linux-4-cores-16gb + container: + image: openvinogithubactions.azurecr.io/ov_build/manylinux_2_28:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING + env: + CMAKE_GENERATOR: Ninja + OV_INSTALL_DIR: ${{ github.workspace }}/ov + INSTALL_DIR: ${{ github.workspace }}/install + WHEELS_DIR: ${{ github.workspace }}/install/wheels + SRC_DIR: ${{ github.workspace }}/src + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Build Tokenizers Wheel + run: | + python -m pip wheel -v --no-deps --wheel-dir ${{ env.WHEELS_DIR }} \ + --config-settings='override=wheel.build_tag="${{ github.run_number }}"' \ + ${{ needs.openvino_download.outputs.ov_wheel_source }} \ + ${{ env.SRC_DIR }}/thirdparty/openvino_tokenizers + working-directory: ${{ env.OV_INSTALL_DIR }} + + - name: Build WWB Wheel + run: python -m pip wheel -v --no-deps --wheel-dir ${{ env.WHEELS_DIR }} ${{ env.SRC_DIR }}/tools/who_what_benchmark + working-directory: ${{ env.OV_INSTALL_DIR }} + + - name: Upload Wheels + if: ${{ always() }} + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_wheels + path: ${{ env.INSTALL_DIR }} + if-no-files-found: 'error' + + genai_build_genai_wheel: + name: Build GenAI Wheel - Python ${{ matrix.python-version }} + needs: [ openvino_download ] + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + python-version: ['3.10', '3.11', '3.12', '3.13'] + defaults: + run: + shell: bash + runs-on: aks-linux-8-cores-16gb + container: + image: openvinogithubactions.azurecr.io/ov_build/manylinux_2_28:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING + env: + CMAKE_GENERATOR: Ninja + OV_INSTALL_DIR: ${{ github.workspace }}/ov + INSTALL_DIR: ${{ github.workspace }}/install + WHEELS_DIR: ${{ github.workspace }}/install/wheels + SRC_DIR: ${{ github.workspace }}/src + OpenVINODeveloperPackage_DIR: ${{ github.workspace }}/ov/developer_package/cmake + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Set CI environment + id: create_manifest + uses: openvinotoolkit/openvino/.github/actions/create_manifest@master + with: + repos: ${{ env.SRC_DIR }} + product_type: ${{ env.BASE_PRODUCT_TYPE }}_Release + target_arch: 'x86_64' + build_type: Release + save_to: ${{ github.workspace }} + + - name: Build GenAI Wheel + run: | + python_exec_path=$(python${{ matrix.python-version }} -c "import sys; print(sys.executable)") + $python_exec_path -m pip wheel -v --no-deps --wheel-dir ${{ env.WHEELS_DIR }} \ + ${{ needs.openvino_download.outputs.ov_wheel_source }} \ + ${{ env.SRC_DIR }} + working-directory: ${{ env.OV_INSTALL_DIR }} + + - name: Upload GenAI Wheel + if: ${{ always() }} + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_wheel_python_${{ matrix.python-version }} + path: ${{ env.INSTALL_DIR }} + if-no-files-found: 'error' + + genai_build_nodejs: + name: Build Node.js bindings + strategy: + fail-fast: false + matrix: + build-type: [Release] + needs: [ openvino_download ] + timeout-minutes: 20 + defaults: + run: + shell: bash + runs-on: aks-linux-4-cores-16gb + container: + image: openvinogithubactions.azurecr.io/ov_build/manylinux_2_28:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING + env: + OV_INSTALL_DIR: ${{ github.workspace }}/ov + SRC_DIR: ${{ github.workspace }}/src + BUILD_DIR: ${{ github.workspace }}/build + INSTALL_DIR: ${{ github.workspace }}/openvino.genai/src/js/bin + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Cmake GenAI Node.js bindings + run: | + source ${{ env.OV_INSTALL_DIR }}/setupvars.sh + /usr/bin/cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ + -DENABLE_JS=ON -DCPACK_GENERATOR=NPM \ + -DENABLE_PYTHON=OFF -DENABLE_WHEEL=OFF -DENABLE_SAMPLES=OFF \ + -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ + -S ${{ env.SRC_DIR }} -B ${{ env.BUILD_DIR }} + env: + CXXFLAGS: "-Wno-dangling-reference -fno-lto" # bug in gcc-14: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107532, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113485 + CFLAGS: "-Wno-dangling-reference -fno-lto" # bug in gcc-14: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107532, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113485 + LDFLAGS: "-fno-lto" # bug in gcc-14: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113485 + + - name: Build GenAI Node.js bindings + run: /usr/bin/cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --parallel $(nproc) --verbose + + - name: Install GenAI Node.js bindings + run: /usr/bin/cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --prefix ${{ env.INSTALL_DIR }} + + - name: Upload Node.js bindings Build Package + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_nodejs_bindings + path: ${{ env.INSTALL_DIR }} + if-no-files-found: 'error' + + store_artifacts: + name: Store build artifacts + strategy: + matrix: + build-type: [Release] + needs: [openvino_download, genai_build_wheels, genai_build_genai_wheel, genai_build_cmake, genai_build_nodejs] + timeout-minutes: 10 + defaults: + run: + shell: bash + runs-on: aks-linux-medium + container: + image: openvinogithubactions.azurecr.io/library/python:3.12-slim + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + env: + CPACK_PATH: ${{ github.workspace }}/ov_genai + CPACK_PACKAGE: ${{ github.workspace }}/ov_genai.tar.gz + WHEEL_PACKAGE: ${{ github.workspace }}/wheels + MANIFEST_PATH: ${{ github.workspace }}/manifest.yml + NODEJS_PATH: ${{ github.workspace }}/genai_nodejs_bindings + NODEJS_PACKAGE: ${{ github.workspace }}/genai_nodejs_bindings.tar.gz + + steps: + - name: Download genai package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ env.GENAI_ARCHIVE_ARTIFACT_BASE_NAME }}_${{ matrix.build-type }} + path: ${{ env.CPACK_PATH }} + + - name: Pack Artifacts + run: tar -czvf ${{ env.CPACK_PACKAGE }} * + working-directory: ${{ env.CPACK_PATH }} + + - name: Download manifest and wheels + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: '{genai_wheels,genai_wheel_python_*,manifest_${{ matrix.build-type }}}' + path: ${{ github.workspace }} + merge-multiple: true + + - name: Download genai package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: genai_nodejs_bindings + path: ${{ env.NODEJS_PATH }} + + - name: Pack Artifacts + run: tar -czvf ${{ env.NODEJS_PACKAGE }} * + working-directory: ${{ env.NODEJS_PATH }} + + - name: Store ${{ matrix.build_type }} artifacts to a shared drive + id: store_artifacts + if: ${{ always() }} + uses: openvinotoolkit/openvino/.github/actions/store_artifacts@master + with: + artifacts: | + ${{ env.CPACK_PACKAGE }} + ${{ env.WHEEL_PACKAGE }} + ${{ env.MANIFEST_PATH }} + ${{ env.NODEJS_PACKAGE }} + storage_dir: ${{ env.BASE_PRODUCT_TYPE }}_${{ matrix.build-type }} + storage_root: ${{ env.ARTIFACTS_SHARE }} + product_name: ${{ github.event.repository.name }} + + genai_tests_wheel: + name: Python (${{ matrix.test.name}}) Tests (wheel) + needs: [ smart_ci, openvino_download, genai_build_wheels, genai_build_genai_wheel ] + timeout-minutes: ${{ matrix.test.timeout }} + strategy: + fail-fast: false + matrix: + test: + - name: 'Whisper' + # TODO: skip some tests temporary untill https://github.com/huggingface/datasets/issues/7647 dataset is fixed + cmd: 'python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py ./tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke[sample_from_dataset0 and not test_whisper_constructors[sample_from_dataset0 and not test_max_new_tokens[sample_from_dataset0 and not test_language_mode[language and not test_task_mode[sample_from_dataset0 and not test_language_autodetect[sample_from_dataset0 and not test_whisper_config_constructor and not test_language_autodetect[sample_from_dataset1 and not test_language_autodetect[sample_from_dataset2 and not test_initial_prompt_hotwords[sample_from_dataset0 and not test_random_sampling[sample_from_dataset0"' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).whisper.test }} + timeout: 120 + - name: 'Cacheopt E2E (Part 1)' + cmd: 'python -m pytest -v ./tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + timeout: 180 + - name: 'Cacheopt E2E (Part 2)' + cmd: 'python -m pytest -v ./tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + timeout: 360 + - name: 'LLM & VLM' + cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py ./tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py ./tests/python_tests/test_structured_output.py ./tests/python_tests/test_image_generation.py ./tests/python_tests/test_image_generation_multi_call.py --override-ini cache_dir=/mount/caches/pytest/' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }} + timeout: 180 + - name: 'Video Generation' + cmd: 'python -m pytest -v ./tests/python_tests/test_video_generation.py --override-ini cache_dir=/mount/caches/pytest/' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).video_generation.test }} + timeout: 60 + - name: 'GGUF Reader tests' + cmd: 'python -m pytest -v ./tests/python_tests/test_gguf_reader.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} + timeout: 360 + - name: 'Tokenizer tests' + cmd: 'python -m pytest -v ./tests/python_tests/test_tokenizer.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }} + timeout: 60 + - name: 'API tests' + cmd: 'python -m pytest -v ./tests/python_tests/test_continuous_batching.py -k "not eagle3" ./tests/python_tests/test_generation_config.py ./tests/python_tests/test_sampling.py ./tests/python_tests/test_text_streamer.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }} + timeout: 60 + - name: 'Rag tests' + cmd: 'python -m pytest -v ./tests/python_tests/test_rag.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG.test }} + timeout: 30 + - name: 'WWB tests' + cmd: 'python -m pytest -v ./tools/who_what_benchmark/tests -m "not nanollava"' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} + timeout: 120 + - name: 'EAGLE3 speculative decoding tests' + cmd: | + python -m pytest -v ./tests/python_tests/test_continuous_batching.py -k "eagle3" + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).speculative_decoding.test }} + timeout: 90 + - name: 'WWB tests (nanollava)' + cmd: | + python -m pip install transformers==4.48.0 diffusers==0.35.2 + python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} + timeout: 90 + - name: 'VLM (MiniCPM-o-2_6)' + cmd: | + python -m pip install transformers==4.51.3 + python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "MiniCPM-o-2_6" + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }} + timeout: 60 + - name: 'VLM (qwen3-vl)' + cmd: | + python -m pip install transformers==4.57.0 git+https://github.com/huggingface/optimum-intel.git@0566b76f094d4c3084e06d29a248b39a1bff3fa4 + python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "qwen3-vl" + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }} + timeout: 60 + defaults: + run: + shell: bash + runs-on: aks-linux-8-cores-32gb + container: + image: openvinogithubactions.azurecr.io/ov_test/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e HF_TOKEN + + env: + INSTALL_DIR: ${{ github.workspace }}/install + SRC_DIR: ${{ github.workspace }}/src + BUILD_DIR: ${{ github.workspace }}/build + + steps: + - name: Clone openvino.genai + if: ${{ matrix.test.run_condition }} + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download Build Artifacts + if: ${{ matrix.test.run_condition }} + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_wheels,genai_wheel_python_*}" + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Install GenAI Wheels + if: ${{ matrix.test.run_condition }} + uses: ./src/.github/actions/install_wheel + with: + packages: "openvino;openvino_tokenizers[transformers];openvino_genai;whowhatbench" + requirements_files: "${{ env.SRC_DIR }}/tests/python_tests/requirements.txt" + local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels + + - name: Tests + if: ${{ matrix.test.run_condition }} + run: ${{ matrix.test.cmd }} + working-directory: ${{ env.SRC_DIR }} + + genai_nodejs_tests: + name: Node.js bindings tests + needs: [ smart_ci, openvino_download, genai_build_wheels, genai_build_genai_wheel, genai_build_nodejs ] + if: ${{ fromJSON(needs.smart_ci.outputs.affected_components).JS_API }} + timeout-minutes: 20 + defaults: + run: + shell: bash + runs-on: aks-linux-4-cores-16gb + container: + image: openvinogithubactions.azurecr.io/ov_test/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }} + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + options: -e HF_TOKEN + + env: + OV_INSTALL_DIR: ${{ github.workspace }}/ov + SRC_DIR: ${{ github.workspace }}/src + NODE_VERSION: 22 + PY_INSTALL_DIR: ${{ github.workspace }}/install + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download OpenVINO Artifacts + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Download GenAI JS Bildings Artifacts + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: genai_nodejs_bindings + path: ${{ env.SRC_DIR }}/src/js/bin + merge-multiple: true + + - name: Setup Node ${{ env.NODE_VERSION }} + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 + with: + node-version: ${{ env.NODE_VERSION }} + + - name: Download Python wheels for JS tests + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_wheels,genai_wheel_python_*}" + path: ${{ env.PY_INSTALL_DIR }} + merge-multiple: true + + - name: Install OpenVINO GenAI Python packages (from wheels) + uses: ./src/.github/actions/install_wheel + with: + packages: "openvino;openvino_tokenizers[transformers];openvino_genai" + requirements_files: "${{ env.SRC_DIR }}/tests/python_tests/requirements.txt" + local_wheel_dir: ${{ env.PY_INSTALL_DIR }}/wheels + + # JS packages uses the OpenVINO and OpenVINO GenAI libraries from the bin directory. + # Here we emulate the installation of the openvino-node package from NPM. The latest + # release of the openvino-node package is installed, and we need to update the binaries + # in the node_modules/openvino-node/bin directory to work correctly with GenAI + - name: Install npm package tests dependencies + working-directory: ${{ env.SRC_DIR }}/src/js + run: | + npm install ${{ env.OV_INSTALL_DIR }}/openvino_node_npm_package/openvino-node-* --ignore-scripts + cp -R ${{ env.OV_INSTALL_DIR }}/openvino_node_npm_package/bin node_modules/openvino-node/bin + npm install --verbose + + - name: Check lint + working-directory: ${{ env.SRC_DIR }}/src/js + run: npm run lint + + - name: Run npm package tests + working-directory: ${{ env.SRC_DIR }}/src/js + run: npm test + + Overall_Status: + name: ci/gha_overall_status_manylinux_2_28 + needs: [smart_ci, openvino_download, genai_build_cmake, genai_build_wheels, genai_build_genai_wheel, genai_build_nodejs, genai_tests_wheel, genai_nodejs_tests] + if: ${{ always() }} + runs-on: ubuntu-latest + steps: + - name: Check status of all jobs + if: >- + ${{ + contains(needs.*.result, 'failure') || + contains(needs.*.result, 'cancelled') + }} + run: exit 1 diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/sdl.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/sdl.yml new file mode 100644 index 0000000..7505ced --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/sdl.yml @@ -0,0 +1,73 @@ +name: SDL tests +on: + workflow_dispatch: + pull_request: + push: + branches: + - master + - 'releases/**' + +permissions: read-all + +concurrency: + # github.ref is not unique in post-commit + group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-linux-sdl + cancel-in-progress: true + +env: + PYTHON_VERSION: '3.11' + +jobs: + sdl_tests: + name: SDL tests + timeout-minutes: 10 + defaults: + run: + shell: bash + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + steps: + - name: Clone sources and tests + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + - name: Install Python tests dependencies + run: | + python3 -m pip install flake8 pytest black bandit + + - name: Lint with flake8 (WWB) + run: | + # stop the build if there are Python syntax errors or undefined names + python -m flake8 . --config=./setup.cfg + working-directory: ${{ github.workspace }}/tools/who_what_benchmark + + - name: Lint with flake8 (LLM) + run: | + # stop the build if there are Python syntax errors or undefined names + python -m flake8 . --config=./setup.cfg + working-directory: ${{ github.workspace }}/tools/llm_bench + + - name: Bandit tests + run: python -m bandit --recursive --configfile bandit.yml . + + - name: Run Trivy vulnerability scanner in fs mode + uses: aquasecurity/trivy-action@e368e328979b113139d6f9068e03accaed98a518 # v0.34.1 + with: + scan-type: 'fs' + scan-ref: '.' + version: v0.69.2 + + - name: Dependency Review + if: ${{ github.event_name == 'pull_request' }} + uses: actions/dependency-review-action@3c4e3dcb1aa7874d2c16be7d79418e9b7efd6261 # v4.8.2 + with: + config-file: './.github/dependency_review.yml' diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/stale.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/stale.yml new file mode 100644 index 0000000..12f0905 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/stale.yml @@ -0,0 +1,28 @@ +name: Mark and close stale PRs +on: + schedule: + - cron: "0 0 * * 1" # Weekly on Monday at midnight UTC + workflow_dispatch: + +jobs: + stale: + runs-on: ubuntu-latest + permissions: + issues: read + pull-requests: write + if: ${{ github.repository_owner == 'openvinotoolkit' }} + steps: + - uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # v10.1.1 + with: + # PR configuration + days-before-pr-stale: 14 + days-before-pr-close: 7 + exempt-pr-labels: "keep-open" + stale-pr-message: "This PR will be closed in a week because of 2 weeks of no activity." + close-pr-message: "Closing due to inactivity. Feel free to reopen if you plan to continue working on this." + + # Disable issue handling + days-before-issue-stale: -1 + days-before-issue-close: -1 + + ascending: true diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/windows.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/windows.yml new file mode 100644 index 0000000..1da458d --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/windows.yml @@ -0,0 +1,1037 @@ +name: Windows (VS 2022, Python 3.11) +on: + workflow_dispatch: + pull_request: + merge_group: + push: + branches: + - master + - 'releases/**' + +permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions + +concurrency: + # github.ref is not unique in post-commit + group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-windows + cancel-in-progress: true + +env: + PYTHON_VERSION: '3.11' + TARGET_BRANCH: 'master' + PYTHONIOENCODING: utf8 + CMAKE_CXX_COMPILER_LAUNCHER: ccache + CMAKE_C_COMPILER_LAUNCHER: ccache + CCACHE_MAXSIZE: 500Mi + HF_HOME: C:/mount/caches/huggingface/win + OV_CACHE: C:/mount/caches/huggingface/.ov_cache/win/775cf1/ + OPENVINO_LOG_LEVEL: 2 # Windows fails with out of memory because of too verbose logging + ARTIFACTS_SHARE: '/mount/build-artifacts' + BASE_PRODUCT_TYPE: public_windows_vs2022 + GENAI_WHEELS_ARTIFACT_NAME: 'genai_wheels' + GENAI_ARCHIVE_ARTIFACT_BASE_NAME: 'genai_cpack' + +jobs: + smart_ci: + name: Smart CI + runs-on: ubuntu-latest + outputs: + affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" + skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" + steps: + - name: checkout action + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + timeout-minutes: 15 + with: + sparse-checkout: .github + + - name: Get affected components + id: smart_ci + uses: openvinotoolkit/openvino/.github/actions/smart-ci@36a8f092d3250e7a2a365f0445e61297d91c358e + with: + repository: ${{ github.repository }} + pr: ${{ github.event.number }} + commit_sha: ${{ github.sha }} + ref_name: ${{ github.ref_name }} + component_pattern: "category: ((?!Python API|CPP API).*)" + repo_token: ${{ secrets.GITHUB_TOKEN }} + skip_when_only_listed_labels_set: 'GH Pages Docs' + skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg,*.gif' + + - name: Show affected components + run: echo "${{ toJSON(steps.smart_ci.outputs.affected_components) }}" + shell: bash + + openvino_download: + needs: smart_ci + if: ${{ github.event_name != 'merge_group' && needs.smart_ci.outputs.skip_workflow != 'True' }} + name: Download prebuilt OpenVINO + outputs: + status: ${{ steps.openvino_download.outcome }} + ov_artifact_name: ${{ steps.openvino_download.outputs.ov_artifact_name }} + ov_wheel_source: ${{ steps.openvino_download.outputs.ov_wheel_source }} + ov_version: ${{ steps.openvino_download.outputs.ov_version }} + timeout-minutes: 10 + defaults: + run: + shell: bash + runs-on: aks-linux-medium + container: + image: 'openvinogithubactions.azurecr.io/openvino_provider:0.1.0' + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + continue-on-error: true + + steps: + - uses: openvinotoolkit/openvino/.github/actions/openvino_provider@master + id: openvino_download + with: + platform: windows + commit_packages_to_provide: wheels,openvino_node_npm_package.zip + revision: latest_available_commit + # Set specific revision and uncomment to use OV from its PR build: + # branch_name: master + # event_name: pull_request + + genai_build_cpack: + name: genai cpack (${{ matrix.build-type }}) + strategy: + matrix: + build-type: [Release, Debug] + needs: [ openvino_download ] + timeout-minutes: 80 + defaults: + run: + shell: pwsh + runs-on: aks-win-8-cores-16gb-build + env: + CMAKE_GENERATOR: 'Ninja' # Ninja is the only Windows native generator supported by ccache + OV_INSTALL_DIR: ${{ github.workspace }}\install\ov + GENAI_INSTALL_DIR: ${{ github.workspace }}\install\genai + INSTALL_TOOLS_DIR: ${{ github.workspace }}\tools + INSTALL_TESTS_DIR: ${{ github.workspace }}\tests + SRC_DIR: ${{ github.workspace }}\src\genai + BUILD_DIR: ${{ github.workspace }}\build\genai + CCACHE_DIR: ${{ github.workspace }}\ccache + MANIFEST_PATH: ${{ github.workspace }}\manifest.yml + + steps: + - name: Clone genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + submodules: recursive + path: ${{ env.SRC_DIR }} + + - name: Setup Python 3.10 + if: ${{ matrix.build-type != 'Debug' }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: '3.10' + + - name: Setup Python 3.11 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: '3.11' + + - name: Setup Python 3.12 + if: ${{ matrix.build-type != 'Debug' }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: '3.12' + + - name: Setup Python 3.13 + if: ${{ matrix.build-type != 'Debug' }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: '3.13' + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + # + # Build + # + + - name: Download and install ninja + run: | + Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-win.zip -OutFile ninja-win.zip -MaximumRetryCount 10 + Expand-Archive -Force ninja-win.zip + # Add it to the GitHub Path so it would be available in the subsequent steps + Add-Content -Path $env:GITHUB_PATH -Value "${{ github.workspace }}/ninja-win" + + - name: Download and install ccache + run: | + Invoke-WebRequest -Uri 'https://github.com/ccache/ccache/releases/download/v4.9.1/ccache-4.9.1-windows-x86_64.zip' -OutFile 'ccache.zip' + Expand-Archive -Path 'ccache.zip' -DestinationPath 'C:\temp\ccache' + Move-Item -Path 'C:\temp\ccache\*' -Destination 'C:\ccache' + Add-Content -Path $env:GITHUB_PATH -Value "C:\ccache" + + - name: Setup ccache + id: ccache-restore + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ runner.os }}-${{ runner.arch }}-ccache-${{ env.TARGET_BRANCH }}-${{ matrix.build-type }}-cpack-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-${{ runner.arch }}-ccache-${{ env.TARGET_BRANCH }}-${{ matrix.build-type }}-cpack + path: ${{ env.CCACHE_DIR }} + + - name: Generate product manifest + id: create_manifest + uses: openvinotoolkit/openvino/.github/actions/create_manifest@master + with: + repos: ${{ env.SRC_DIR }} + product_type: ${{ env.BASE_PRODUCT_TYPE }}_${{ matrix.build-type }} + target_arch: 'x86_64' + build_type: ${{ matrix.build-type }} + save_to: ${{ env.MANIFEST_PATH }} + + - name: Clean ccache stats + run: ccache --zero-stats --show-config + + - name: Configure Developer Command Prompt for Microsoft Visual C++ + uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0 + with: + toolset: 14.42 # v2022 + + - name: CMake Build + shell: pwsh + run: | + ${{ env.OV_INSTALL_DIR }}/setupvars.ps1 + if ( "${{ matrix.build-type }}" -ne "Debug" ) { + $pyVersions = '3.10', '3.11', '3.12', '3.13' + } else { + $pyVersions = '3.11' + } + + foreach ($pyVersion in $pyVersions) { + Remove-Item -Path "${{ env.BUILD_DIR }}/CMakeCache.txt" -Force -ErrorAction SilentlyContinue + $pythonCommand = "py -$pyVersion -c `"import sys; print(f'{sys.executable}')`"" + $pythonExecutablePath = & cmd /c $pythonCommand + + cmake -DPython3_EXECUTABLE="$pythonExecutablePath" -DOpenVINODeveloperPackage_DIR=${{ env.OV_INSTALL_DIR }}/developer_package/cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -S ${{ env.SRC_DIR }} -B ${{ env.BUILD_DIR }} && + cmake --build ${{ env.BUILD_DIR }} --parallel $ENV:NUMBER_OF_PROCESSORS --config ${{ matrix.build-type }} --verbose && + cmake --install ${{ env.BUILD_DIR }} --config=${{ matrix.build-type }} --prefix=${{ env.GENAI_INSTALL_DIR }} + if ($LASTEXITCODE -ne 0) { + Write-Host "Failed to build bindings for Python $pyVersion" + exit 1 + } + } + + cmake --install ${{ env.BUILD_DIR }} --config=${{ matrix.build-type }} --prefix=${{ env.INSTALL_TOOLS_DIR }} --component tools_bin + cmake --install ${{ env.BUILD_DIR }} --config=${{ matrix.build-type }} --prefix=${{ env.INSTALL_TESTS_DIR }} --component tests + env: + CMAKE_TLS_VERIFY: 0 + + - name: Show ccache stats + run: ccache --show-stats + + # + # Upload build artifacts + # + + - name: Pack Artifacts + run: | + $file=Get-ChildItem -Path "${{ env.GENAI_INSTALL_DIR }}" + $compress = @{ + Path = $file + CompressionLevel = "Optimal" + DestinationPath = "${{ env.BUILD_DIR }}/${{ env.GENAI_ARCHIVE_ARTIFACT_BASE_NAME }}.zip" + } + Compress-Archive @compress + + - name: Save ccache + if: always() && steps.ccache-restore.outputs.cache-hit != 'true' && github.event_name == 'push' + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ steps.ccache-restore.outputs.cache-primary-key }} + path: ${{ env.CCACHE_DIR }} + + - name: Upload cpack package + if: ${{ always() }} + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_cpack_${{ matrix.build-type }} + path: ${{ env.BUILD_DIR }}/*.zip + if-no-files-found: 'error' + + - name: Upload Tools + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_tools_${{ matrix.build-type }} + path: ${{ env.INSTALL_TOOLS_DIR }} + if-no-files-found: 'error' + + - name: Upload Tests + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_tests_${{ matrix.build-type }} + path: ${{ env.INSTALL_TESTS_DIR }} + if-no-files-found: 'error' + + - name: Upload manifest + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: manifest_${{ matrix.build-type }} + path: ${{ env.MANIFEST_PATH }} + if-no-files-found: 'error' + + genai_build_wheels: + name: Build Tokenizers & WWB Wheels + needs: [ openvino_download ] + timeout-minutes: 30 + defaults: + run: + shell: pwsh + runs-on: aks-win-8-cores-16gb-build + env: + CMAKE_GENERATOR: Ninja + OV_INSTALL_DIR: ${{ github.workspace }}\ov + SRC_DIR: ${{ github.workspace }}\src + BUILD_DIR: ${{ github.workspace }}\build + INSTALL_DIR: ${{ github.workspace }}\genai + WHEELS_DIR: ${{ github.workspace }}\genai\wheels + CCACHE_DIR: ${{ github.workspace }}\ccache + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + submodules: recursive + path: ${{ env.SRC_DIR }} + + - name: Setup Python 3.11 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: '3.11' + cache: 'pip' + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Download and install ninja + run: | + Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-win.zip -OutFile ninja-win.zip -MaximumRetryCount 10 + Expand-Archive -Force ninja-win.zip + # Add it to the GitHub Path so it would be available in the subsequent steps + Add-Content -Path $env:GITHUB_PATH -Value "${{ github.workspace }}/ninja-win" + + - name: Download and install ccache + run: | + Invoke-WebRequest -Uri 'https://github.com/ccache/ccache/releases/download/v4.9.1/ccache-4.9.1-windows-x86_64.zip' -OutFile 'ccache.zip' + Expand-Archive -Path 'ccache.zip' -DestinationPath 'C:\temp\ccache' + Move-Item -Path 'C:\temp\ccache\*' -Destination 'C:\ccache' + Add-Content -Path $env:GITHUB_PATH -Value "C:\ccache" + + - name: Setup ccache + id: ccache-restore + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ runner.os }}-${{ runner.arch }}-ccache-${{ env.TARGET_BRANCH }}-Release-wheels-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-${{ runner.arch }}-ccache-${{ env.TARGET_BRANCH }}-Release-wheels + path: ${{ env.CCACHE_DIR }} + + - name: Clean ccache stats + run: ccache --zero-stats --show-config + + - name: Configure Developer Command Prompt for Microsoft Visual C++ + uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0 + with: + toolset: 14.42 # v2022 + + - name: Build Tokenizers Wheel + run: | + python -m pip wheel -v --no-deps --wheel-dir ${{ env.WHEELS_DIR }} ` + --config-settings=override=cmake.generator='Ninja' ` + --config-settings=override=cmake.build_path='${{ env.BUILD_DIR }}/tokenizers' ` + ${{ needs.openvino_download.outputs.ov_wheel_source }} ` + ${{ env.SRC_DIR }}/thirdparty/openvino_tokenizers + working-directory: ${{ env.OV_INSTALL_DIR }} + + - name: Build WWB Wheel + run: python -m pip wheel -v --no-deps --wheel-dir ${{ env.WHEELS_DIR }} ${{ env.SRC_DIR }}/tools/who_what_benchmark + working-directory: ${{ env.OV_INSTALL_DIR }} + + - name: Show ccache stats + run: ccache --show-stats + + - name: Save ccache + if: always() && steps.ccache-restore.outputs.cache-hit != 'true' && github.event_name == 'push' + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ steps.ccache-restore.outputs.cache-primary-key }} + path: ${{ env.CCACHE_DIR }} + + - name: Upload wheels + if: ${{ always() }} + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_wheels + path: ${{ env.INSTALL_DIR }} + if-no-files-found: 'error' + + genai_build_genai_wheel: + name: Build GenAI Wheel - Python ${{ matrix.python-version }} + needs: [ openvino_download ] + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + python-version: ['3.10', '3.11', '3.12', '3.13'] + defaults: + run: + shell: pwsh + runs-on: aks-win-8-cores-16gb-build + env: + CMAKE_GENERATOR: Ninja + OV_INSTALL_DIR: ${{ github.workspace }}\ov + SRC_DIR: ${{ github.workspace }}\src + BUILD_DIR: ${{ github.workspace }}\build + INSTALL_DIR: ${{ github.workspace }}\genai + WHEELS_DIR: ${{ github.workspace }}\genai\wheels + CCACHE_DIR: ${{ github.workspace }}\ccache + OpenVINODeveloperPackage_DIR: ${{ github.workspace }}\install\ov\developer_package\cmake + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + submodules: recursive + path: ${{ env.SRC_DIR }} + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Download and install ninja + run: | + Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-win.zip -OutFile ninja-win.zip -MaximumRetryCount 10 + Expand-Archive -Force ninja-win.zip + # Add it to the GitHub Path so it would be available in the subsequent steps + Add-Content -Path $env:GITHUB_PATH -Value "${{ github.workspace }}/ninja-win" + + - name: Download and install ccache + run: | + Invoke-WebRequest -Uri 'https://github.com/ccache/ccache/releases/download/v4.9.1/ccache-4.9.1-windows-x86_64.zip' -OutFile 'ccache.zip' + Expand-Archive -Path 'ccache.zip' -DestinationPath 'C:\temp\ccache' + Move-Item -Path 'C:\temp\ccache\*' -Destination 'C:\ccache' + Add-Content -Path $env:GITHUB_PATH -Value "C:\ccache" + + - name: Setup ccache + id: ccache-restore + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ runner.os }}-${{ runner.arch }}-ccache-${{ env.TARGET_BRANCH }}-Release-genai-wheel-${{ matrix.python-version }}-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-${{ runner.arch }}-ccache-${{ env.TARGET_BRANCH }}-Release-genai-wheel-${{ matrix.python-version }} + path: ${{ env.CCACHE_DIR }} + + - name: Set CI environment + id: create_manifest + uses: openvinotoolkit/openvino/.github/actions/create_manifest@master + with: + repos: ${{ env.SRC_DIR }} + product_type: ${{ env.BASE_PRODUCT_TYPE }}_Release + target_arch: 'x86_64' + build_type: Release + save_to: ${{ github.workspace }} + + - name: Clean ccache stats + run: ccache --zero-stats --show-config + + - name: Configure Developer Command Prompt for Microsoft Visual C++ + uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0 + with: + toolset: 14.42 # v2022 + + - name: Build genai wheel + run: | + $pythonCommand = "py -${{ matrix.python-version }} -c `"import sys; print(f'{sys.executable}')`"" + $pythonExecutablePath = & cmd /c $pythonCommand + + & $pythonExecutablePath -m pip wheel -v --no-deps --wheel-dir ${{ env.WHEELS_DIR }} ` + --config-settings=override=cmake.generator='Ninja' ` + --config-settings=override=cmake.build_path='${{ env.BUILD_DIR }}/genai' ` + --config-settings='override=wheel.build_tag="${{ github.run_number }}"' ` + ${{ needs.openvino_download.outputs.ov_wheel_source }} ` + ${{ env.SRC_DIR }} + working-directory: ${{ env.OV_INSTALL_DIR }} + + - name: Show ccache stats + run: ccache --show-stats + + - name: Save ccache + if: always() && steps.ccache-restore.outputs.cache-hit != 'true' && github.event_name == 'push' + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + key: ${{ steps.ccache-restore.outputs.cache-primary-key }} + path: ${{ env.CCACHE_DIR }} + + - name: Upload GenAI wheel + if: ${{ always() }} + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_wheel_python_${{ matrix.python-version }} + path: ${{ env.INSTALL_DIR }} + if-no-files-found: 'error' + + store_artifacts: + name: Store build artifacts + strategy: + matrix: + build-type: [Release] + needs: [openvino_download, genai_build_wheels, genai_build_genai_wheel, genai_build_cpack] + timeout-minutes: 10 + defaults: + run: + shell: bash + runs-on: aks-linux-medium + container: + image: openvinogithubactions.azurecr.io/library/python:3.12-slim + volumes: + - /mount:/mount + - ${{ github.workspace }}:${{ github.workspace }} + env: + WHEEL_PACKAGE: ${{ github.workspace }}/wheels + MANIFEST_PATH: ${{ github.workspace }}/manifest.yml + + steps: + - name: Download genai package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ env.GENAI_ARCHIVE_ARTIFACT_BASE_NAME }}_${{ matrix.build-type }} + path: ${{ github.workspace }} + + - name: Download manifest and wheels + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: '{genai_wheels,genai_wheel_python_*,manifest_${{ matrix.build-type }}}' + path: ${{ github.workspace }} + merge-multiple: true + + - name: Store ${{ matrix.build_type }} artifacts to a shared drive + id: store_artifacts + if: ${{ always() }} + uses: openvinotoolkit/openvino/.github/actions/store_artifacts@master + with: + artifacts: | + ${{ github.workspace }}/${{ env.GENAI_ARCHIVE_ARTIFACT_BASE_NAME }}.zip + ${{ env.WHEEL_PACKAGE }} + ${{ env.MANIFEST_PATH }} + storage_dir: ${{ env.BASE_PRODUCT_TYPE }}_${{ matrix.build-type }} + storage_root: ${{ env.ARTIFACTS_SHARE }} + product_name: ${{ github.event.repository.name }} + + genai_build_samples: + name: Build Samples - ${{ matrix.build-type }} + strategy: + fail-fast: false + matrix: + build-type: [Release, Debug] + needs: [ openvino_download, genai_build_cpack ] + timeout-minutes: 70 + defaults: + run: + shell: pwsh + runs-on: aks-win-4-cores-8gb-build + env: + OV_INSTALL_DIR: ${{ github.workspace }}/install/ov + SRC_DIR: ${{ github.workspace }}/src + BUILD_DIR: ${{ github.workspace }}/build + INSTALL_DIR: ${{ github.workspace }}/install/genai + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + + - name: Download Build Artifacts + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_cpack_${{ matrix.build-type }}}" + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Extract Artifacts + run: Expand-Archive -Path ${{ env.OV_INSTALL_DIR }}/${{ env.GENAI_ARCHIVE_ARTIFACT_BASE_NAME }}.zip -DestinationPath ${{ env.OV_INSTALL_DIR }} + + - name: Build Samples (Release) + if: ${{ 'Release' == matrix.build-type }} + run: | + & ${{ env.OV_INSTALL_DIR }}/samples/cpp/build_samples.ps1 -i ${{ env.INSTALL_DIR }} + & ${{ env.OV_INSTALL_DIR }}/samples/c/build_samples.ps1 -i ${{ env.INSTALL_DIR }} + + - name: Build Samples (Debug) + if: ${{ 'Release' != matrix.build-type }} + run: | + . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1" + cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -S ${{ env.OV_INSTALL_DIR }}/samples/cpp -B ${{ env.BUILD_DIR }} + cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --parallel $ENV:NUMBER_OF_PROCESSORS + cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build-type }} --component samples_bin --prefix ${{ env.INSTALL_DIR }} + + - name: Upload Samples Build Package + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_samples_${{ matrix.build-type }} + path: ${{ env.INSTALL_DIR }} + if-no-files-found: 'error' + + genai_build_nodejs: + name: Build Node.js bindings + needs: [ openvino_download ] + timeout-minutes: 90 + defaults: + run: + shell: pwsh + runs-on: aks-win-4-cores-8gb-build + + env: + OV_INSTALL_DIR: ${{ github.workspace }}/ov + SRC_DIR: ${{ github.workspace }}/openvino.genai + INSTALL_DIR: ${{ github.workspace }}/openvino.genai/src/js/bin + BUILD_DIR: ${{ github.workspace }}/build + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + submodules: recursive + path: ${{ env.SRC_DIR }} + + - name: Download OpenVINO package + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: ${{ needs.openvino_download.outputs.ov_artifact_name }} + path: ${{ env.OV_INSTALL_DIR }} + merge-multiple: true + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + - name: Build GenAI Node.js bindings + run: | + . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1" + cmake -DCMAKE_BUILD_TYPE=Release ` + -DENABLE_JS=ON -DCPACK_GENERATOR=NPM ` + -DENABLE_PYTHON=OFF -DENABLE_WHEEL=OFF ` + -S ${{ env.SRC_DIR }} -B ${{ env.BUILD_DIR }} + cmake --build ${{ env.BUILD_DIR }} --config Release --parallel --verbose + cmake --install ${{ env.BUILD_DIR }} --config Release --prefix ${{ env.INSTALL_DIR }} + + - name: Upload Node.js bindings Build Package + if: always() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: genai_nodejs_bindings + path: ${{ env.INSTALL_DIR }} + if-no-files-found: 'error' + + genai_tests_wheel: + name: Python (${{ matrix.test.name}}) Tests (wheel) + needs: [ smart_ci, openvino_download, genai_build_wheels, genai_build_genai_wheel ] + timeout-minutes: ${{ matrix.test.timeout }} + strategy: + fail-fast: false + matrix: + test: + - name: 'Whisper' + # TODO: skip some tests temporary untill https://github.com/huggingface/datasets/issues/7647 dataset is fixed + cmd: 'python -m pytest -s -v tests/python_tests/test_whisper_pipeline.py tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke[sample_from_dataset0 and not test_whisper_constructors[sample_from_dataset0 and not test_max_new_tokens[sample_from_dataset0 and not test_language_mode[language and not test_task_mode[sample_from_dataset0 and not test_language_autodetect[sample_from_dataset0 and not test_whisper_config_constructor and not test_language_autodetect[sample_from_dataset1 and not test_language_autodetect[sample_from_dataset2 and not test_initial_prompt_hotwords[sample_from_dataset0 and not test_random_sampling[sample_from_dataset0"' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).whisper.test }} + timeout: 120 + - name: 'Cacheopt E2E (Part 1)' + cmd: 'python -m pytest -s -v tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_1.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + timeout: 180 + - name: 'Cacheopt E2E (Part 2)' + cmd: 'python -m pytest -s -v tests/python_tests/test_kv_cache_eviction/test_kv_cache_eviction_2.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }} + timeout: 360 + - name: 'LLM & VLM' + cmd: 'python -m pytest -s -v tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py tests/python_tests/test_image_generation_multi_call.py --override-ini cache_dir=/mount/caches/pytest/' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }} + timeout: 180 + - name: 'Video Generation' + cmd: 'python -m pytest -s -v tests/python_tests/test_video_generation.py --override-ini cache_dir=/mount/caches/pytest/' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).video_generation.test }} + timeout: 60 + - name: 'GGUF Reader tests' + cmd: 'python -m pytest -s -v tests/python_tests/test_gguf_reader.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }} + timeout: 360 + - name: 'Tokenizer tests' + cmd: 'python -m pytest -s -v tests/python_tests/test_tokenizer.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }} + timeout: 60 + - name: 'API tests' + cmd: 'python -m pytest -s -v tests/python_tests/test_continuous_batching.py -k "not eagle3" tests/python_tests/test_generation_config.py tests/python_tests/test_sampling.py tests/python_tests/test_text_streamer.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test || fromJSON(needs.smart_ci.outputs.affected_components).sampling.test || fromJSON(needs.smart_ci.outputs.affected_components).text_streamer.test }} + timeout: 60 + - name: 'Rag tests' + cmd: 'python -m pytest -s -v tests/python_tests/test_rag.py' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG.test }} + timeout: 30 + - name: 'WWB tests' + cmd: 'python -m pytest -s -v tools/who_what_benchmark/tests -m "not nanollava"' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} + timeout: 120 + - name: 'EAGLE3 speculative decoding tests' + cmd: | + python -m pytest -v ./tests/python_tests/test_continuous_batching.py -k "eagle3" + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).speculative_decoding.test }} + timeout: 90 + - name: 'WWB tests (nanollava)' + cmd: | + python -m pip install transformers==4.48.0 diffusers==0.35.2 + python -m pytest -v ./tools/who_what_benchmark/tests -m nanollava + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).WWB.test }} + timeout: 90 + - name: 'VLM (MiniCPM-o-2_6)' + cmd: | + python -m pip install transformers==4.51.3 + python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "MiniCPM-o-2_6" + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }} + timeout: 60 + - name: 'VLM (qwen3-vl)' + cmd: | + python -m pip install transformers==4.57.0 git+https://github.com/huggingface/optimum-intel.git@0566b76f094d4c3084e06d29a248b39a1bff3fa4 + python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "qwen3-vl" + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }} + timeout: 60 + defaults: + run: + shell: pwsh + runs-on: aks-win-16-cores-32gb-test + env: + INSTALL_DIR: ${{ github.workspace }}/install + SRC_DIR: ${{ github.workspace }}/src + BUILD_DIR: ${{ github.workspace }}/build + + steps: + - name: Clone openvino.genai + if: ${{ matrix.test.run_condition }} + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download Build Artifacts + if: ${{ matrix.test.run_condition }} + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_wheels,genai_wheel_python_*}" + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Setup Python ${{ env.PYTHON_VERSION }} + if: ${{ matrix.test.run_condition }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + - name: Install GenAI Wheels + if: ${{ matrix.test.run_condition }} + uses: ./src/.github/actions/install_wheel + with: + packages: "openvino;openvino_tokenizers[transformers];openvino_genai;whowhatbench" + requirements_files: "${{ env.SRC_DIR }}/tests/python_tests/requirements.txt" + local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels + + - name: Tests + if: ${{ matrix.test.run_condition }} + run: ${{ matrix.test.cmd }} + working-directory: ${{ env.SRC_DIR }} + + genai_samples_tests: + name: Samples ${{ matrix.test.name }} (${{ matrix.build-type }}) + strategy: + fail-fast: false + matrix: + build-type: [Release] + test: + - name: 'LLM' + marker: 'llm' + cmd: 'tests/python_tests/samples' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).LLM_samples.test }} + runner: 'aks-win-16-cores-32gb-test' + - name: 'Whisper' + marker: 'whisper' + cmd: 'tests/python_tests/samples' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).Whisper_samples.test }} + runner: 'aks-win-4-cores-8gb-test' + - name: 'dreamlike_anime_1_0' + marker: 'dreamlike_anime_1_0' + cmd: 'tests/python_tests/samples' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).Image_generation_samples.test }} + runner: 'aks-win-8-cores-32gb-test' + - name: 'LCM_Dreamshaper_v7_int8_ov' + marker: 'LCM_Dreamshaper_v7_int8_ov' + cmd: 'tests/python_tests/samples' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).Image_generation_samples.test }} + runner: 'aks-win-8-cores-16gb-test' + - name: 'Rag' + marker: 'rag' + cmd: 'tests/python_tests/samples' + runner: 'aks-win-4-cores-8gb-test' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).RAG_samples.test }} + - name: 'Speech generation' + marker: 'speech_generation' + cmd: 'tests/python_tests/samples' + runner: 'aks-win-4-cores-8gb-test' + run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).Speech_generation_samples.test }} + + needs: [ smart_ci, openvino_download, genai_build_cpack, genai_build_wheels, genai_build_genai_wheel, genai_build_samples, genai_build_nodejs ] + timeout-minutes: 120 + defaults: + run: + shell: pwsh + runs-on: ${{ matrix.test.runner }} + env: + INSTALL_DIR: ${{ github.workspace }}/install + SRC_DIR: ${{ github.workspace }}/src + BUILD_DIR: ${{ github.workspace }}/build + # The debug logging includes messages about the time it takes to read the GGUF model. + # These messages differ from run to run, so we cannot compare the results of the CPP, Python, and JavaScript parts. + OPENVINO_LOG_LEVEL: 1 + + steps: + - name: Clone openvino.genai + if: ${{ matrix.test.run_condition }} + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + + - name: Download Build Artifacts + if: ${{ matrix.test.run_condition }} + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_cpack_${{ matrix.build-type }},genai_samples_${{ matrix.build-type }},genai_wheels,genai_wheel_python_*}" + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Extract Artifacts + if: ${{ matrix.test.run_condition }} + run: Expand-Archive -Path ${{ env.INSTALL_DIR }}/${{ env.GENAI_ARCHIVE_ARTIFACT_BASE_NAME }}.zip -DestinationPath ${{ env.INSTALL_DIR }} + + - name: Download GenAI JS Bildings Artifacts + if: ${{ matrix.test.run_condition }} + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: genai_nodejs_bindings + path: ${{ env.SRC_DIR }}/src/js/bin + merge-multiple: true + + - name: Setup Python ${{ env.PYTHON_VERSION }} + if: ${{ matrix.test.run_condition }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + - name: Install GenAI wheels + if: ${{ matrix.test.run_condition }} + uses: ./src/.github/actions/install_wheel + with: + packages: "openvino;openvino_tokenizers[transformers];openvino_genai[testing]" + requirements_files: "${{ env.SRC_DIR }}/samples/requirements.txt" + local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels + + - name: Setup NodeJS + if: ${{ matrix.test.run_condition }} + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 + with: + node-version: 21 + + - name: Install GenAI NPM package + if: ${{ matrix.test.run_condition }} + working-directory: ${{ env.SRC_DIR }}/src/js + run: | + npm install $(Resolve-Path -Path "${{ env.INSTALL_DIR }}/openvino_node_npm_package/openvino-node-*") --ignore-scripts + Copy-Item -Recurse ${{ env.INSTALL_DIR }}/openvino_node_npm_package/bin node_modules/openvino-node/bin + npm install --verbose + + - name: Install NPM dependencies for samples + if: ${{ matrix.test.run_condition }} + working-directory: ${{ env.SRC_DIR }}/samples/js/text_generation + run: | + npm install ${{ env.SRC_DIR }}/src/js + npm install --verbose + + - name: Test Samples (Python and C++) + if: ${{ matrix.test.run_condition }} + run: python -m pytest -vs ${{ env.SRC_DIR }}/${{ matrix.test.cmd }} -m "${{ matrix.test.marker }}" + env: + PATH: "${{ env.INSTALL_DIR }}/runtime/bin/intel64/${{ matrix.build-type }};${{ env.INSTALL_DIR }}/runtime/3rdparty/tbb/bin;%PATH%" # Required for C++ samples + SAMPLES_PY_DIR: "${{ env.INSTALL_DIR }}/samples/python" + SAMPLES_JS_DIR: "${{ env.SRC_DIR }}/samples/js" + SAMPLES_CPP_DIR: "${{ env.INSTALL_DIR }}/samples_bin" + SAMPLES_C_DIR: "${{ env.INSTALL_DIR }}/samples_bin" + + genai_tools_tests: + name: Tools tests (${{ matrix.build-type }}) + strategy: + fail-fast: false + matrix: + build-type: [Release] + needs: [ smart_ci, openvino_download, genai_build_cpack, genai_build_wheels, genai_build_genai_wheel ] + if: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching }} + timeout-minutes: 90 + defaults: + run: + shell: pwsh + runs-on: aks-win-8-cores-16gb-test + env: + INSTALL_DIR: ${{ github.workspace }}/install + SRC_DIR: ${{ github.workspace }}/src + BUILD_DIR: ${{ github.workspace }}/build + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + + - name: Download Build Artifacts + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_cpack_${{ matrix.build-type }},genai_tools_${{ matrix.build-type }},genai_tests_${{ matrix.build-type }},genai_wheels,genai_wheel_python_*}" + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Extract Artifacts + run: Expand-Archive -Path ${{ env.INSTALL_DIR }}/${{ env.GENAI_ARCHIVE_ARTIFACT_BASE_NAME }}.zip -DestinationPath ${{ env.INSTALL_DIR }} + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + - name: Install GenAI wheels + uses: ./src/.github/actions/install_wheel + with: + packages: "openvino;openvino_tokenizers[transformers];openvino_genai[testing]" + requirements_files: "${{ env.SRC_DIR }}/samples/requirements.txt" + local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels + + - name: gtests unit tests + run: | + . "${{ env.INSTALL_DIR }}/setupvars.ps1" + & "${{ env.INSTALL_DIR }}/tests/tests_continuous_batching.exe" --gtest_filter="-AddSecondInputTest.*" + + - name: Test C++ Tools + run: | + . "${{ env.INSTALL_DIR }}/setupvars.ps1" + python -m pytest -vs ${{ env.SRC_DIR }}/tests/python_tests/samples/test_continuous_batching_tools.py -m "samples" + env: + SAMPLES_CPP_DIR: "${{ env.INSTALL_DIR }}/samples_bin" + + genai_nodejs_tests: + name: Node.js bindings tests + needs: [ smart_ci, openvino_download, genai_build_wheels, genai_build_genai_wheel, genai_build_nodejs ] + if: ${{ fromJSON(needs.smart_ci.outputs.affected_components).JS_API }} + timeout-minutes: 20 + defaults: + run: + shell: pwsh + runs-on: windows-2022 + + env: + SRC_DIR: ${{ github.workspace }}/openvino.genai + INSTALL_DIR: ${{ github.workspace }}/install + NODE_VERSION: 21 + + steps: + - name: Clone openvino.genai + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + path: ${{ env.SRC_DIR }} + submodules: recursive + + - name: Download build artifacts (OpenVINO + wheels) + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + pattern: "{${{ needs.openvino_download.outputs.ov_artifact_name }},genai_wheels,genai_wheel_python_*}" + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Download GenAI JS Bildings Artifacts + uses: akashchi/download-artifact@d59a9c15fec3fdb7c9adf09464124d00f9c11415 + with: + name: genai_nodejs_bindings + path: ${{ env.SRC_DIR }}/src/js/bin + merge-multiple: true + + - name: Setup Node ${{ env.NODE_VERSION }} + uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 + with: + node-version: ${{ env.NODE_VERSION }} + + - name: Setup Python + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: '3.11' + cache: 'pip' + + # JS pacakges uses the OpenVINO and OpenVINO GenAI libraries from the bin directory. + # Here we emulate the installation of the openvino-node package from NPM. The latest + # release of the openvino-node package is installed, and we need to update the binaries + # in the node_modules/openvino-node/bin directory to work correctly with GenAI + - name: Install npm package tests dependencies + working-directory: ${{ env.SRC_DIR }}/src/js + run: | + npm install $(Resolve-Path -Path "${{ env.INSTALL_DIR }}/openvino_node_npm_package/openvino-node-*") --ignore-scripts + Copy-Item -Recurse ${{ env.INSTALL_DIR }}/openvino_node_npm_package/bin node_modules/openvino-node/bin + npm install --verbose + + - name: Install OpenVINO GenAI Python packages (from wheels) + uses: ./openvino.genai/.github/actions/install_wheel + with: + packages: "openvino;openvino_tokenizers[transformers];openvino_genai" + requirements_files: "${{ env.SRC_DIR }}/tests/python_tests/requirements.txt" + local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels + + - name: Run npm package tests + working-directory: ${{ env.SRC_DIR }}/src/js + run: npm test + + Overall_Status: + name: ci/gha_overall_status_windows + needs: [smart_ci, openvino_download, genai_build_cpack, genai_build_wheels, genai_build_genai_wheel, genai_build_samples, genai_tests_wheel, genai_tools_tests, genai_samples_tests, genai_build_nodejs, genai_nodejs_tests] + if: ${{ always() }} + runs-on: ubuntu-latest + steps: + - name: Check status of all jobs + if: >- + ${{ + contains(needs.*.result, 'failure') || + contains(needs.*.result, 'cancelled') + }} + run: exit 1 diff --git a/src/resources/openvino.genai-2026.1.0.0/.github/workflows/workflow_rerunner.yml b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/workflow_rerunner.yml new file mode 100644 index 0000000..18ba60f --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.github/workflows/workflow_rerunner.yml @@ -0,0 +1,100 @@ +name: Rerun Workflow with Known Errors + +on: + workflow_run: + workflows: + - "Coverity (Ubuntu 22.04, Python 3.11)" + - "Linux (Ubuntu 22.04, Python 3.11)" + - "macOS (14, Python 3.11)" + - "Manylinux 2_28" + - "Windows (VS 2022, Python 3.11)" + types: + - completed + pull_request: + paths: + - '.github/workflows/workflow_rerunner.yml' + - '.github/scripts/workflow_rerun/**' + +permissions: read-all + +jobs: + rerun: + name: Rerun Workflow + # Run only for the failed workflows in openvinotoolkit org + if: ${{ github.event.workflow_run.conclusion == 'failure' && github.repository_owner == 'openvinotoolkit' }} + runs-on: aks-linux-small + permissions: + actions: write + contents: read + statuses: read + checks: read + steps: + - name: Checkout + uses: ababushk/checkout@dd591a6a2ac25618db4eda86e7e0d938f88cf01b # cherry_pick_retries + timeout-minutes: 15 + with: + sparse-checkout: '.github/scripts/workflow_rerun' + + - name: Install deps + working-directory: ${{ github.workspace }}/.github/scripts/workflow_rerun + run: pip3 install -r requirements.txt + + - name: Dump GitHub context + env: + GITHUB_CONTEXT: ${{ toJson(github) }} + run: echo "$GITHUB_CONTEXT" + + - name: Rerun + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PGHOST: ${{ secrets.METRICS_DATABASE_HOST }} + PGUSER: ${{ secrets.METRICS_DATABASE_USERNAME }} + PGPASSWORD: ${{ secrets.METRICS_DATABASE_PASSWORD }} + PGDATABASE: ${{ secrets.METRICS_DATABASE_NAME }} + PGPORT: 5432 + run: | + export PYTHONPATH=${{ github.workspace }}/.github/scripts/workflow_rerun:${{ github.workspace }}/.github/scripts:$PYTHONPATH + python3 ${{ github.workspace }}/.github/scripts/workflow_rerun/rerunner.py \ + --run-id ${{ github.event.workflow_run.id }} \ + --rerunner-run-id ${{ github.run_id }} \ + --repository-name ${GITHUB_REPOSITORY} + + rerunner_tests: + name: Rerunner Tests + if: ${{ github.event_name == 'pull_request' && github.repository_owner == 'openvinotoolkit' }} + runs-on: aks-linux-small + permissions: + contents: read + actions: read + steps: + - name: Checkout + uses: ababushk/checkout@dd591a6a2ac25618db4eda86e7e0d938f88cf01b # cherry_pick_retries + timeout-minutes: 15 + with: + sparse-checkout: '.github/scripts/workflow_rerun' + lfs: false + + - name: Install deps + working-directory: ${{ github.workspace }}/.github/scripts/workflow_rerun + run: pip3 install -r requirements.txt + + - name: Test Rerunner (Tests) + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + working-directory: ${{ github.workspace }}/.github/scripts/workflow_rerun + run: | + export PYTHONPATH=${{ github.workspace }}/.github/scripts/workflow_rerun:${{ github.workspace }}/.github/scripts:$PYTHONPATH + python3 -m unittest tests/*_test.py + + - name: Test Rerunner (CLI) + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + working-directory: ${{ github.workspace }}/.github/scripts/workflow_rerun + run: | + export PYTHONPATH=${{ github.workspace }}/.github/scripts/workflow_rerun:${{ github.workspace }}/.github/scripts:$PYTHONPATH + + # Need to get a run id with successful status for log analyzing + # cannot lock a run id as logs get deleted after some time + run_id=$(python3 -c "from github import Github, Auth; import os; from datetime import datetime, timedelta, timezone; github=Github(auth=Auth.Token(token=os.environ.get('GITHUB_TOKEN'))); repo = github.get_repo('${GITHUB_REPOSITORY}'); cutoff_date=(datetime.now(timezone.utc)-timedelta(days=14)).date().isoformat(); runs=repo.get_workflow_runs(status='success', created=f'>={cutoff_date}'); print(runs[0].id)") + + python3 rerunner.py --repository-name ${GITHUB_REPOSITORY} --run-id $run_id --rerunner-run-id ${{ github.run_id }} --dry-run diff --git a/src/resources/openvino.genai-2026.1.0.0/.gitignore b/src/resources/openvino.genai-2026.1.0.0/.gitignore new file mode 100644 index 0000000..1ce5d24 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.gitignore @@ -0,0 +1,46 @@ +# build/artifact dirs +[Bb]uild*/ + +# but ensure we don't skip __init__.py and __main__.py +!__init__.py +!__main__.py + +# don't skip GitHub Actions files and directories +!.github/** + +# developer tools +*.idea +.vscode +.vs/ +.vsconan/ +.DS_Store +**/tags +compile_commands.json +.local_vimrc +.gdb_history +.vimspector.json +doc/ +temp/ +.repo/ +CMakeLists.txt.user +CMakeUserPresets.json +*.env +ov_cache/ + +*.project +*.cproject +*.pydevproject +*.settings +*/gen/ +*.swp +/config.xml + +# Python-specific +*.?env* +*.pyc +__pycache__ +.py-build-cmake_cache +*.egg-info + +# CodeQL artifacts +_codeql_detected_source_root diff --git a/src/resources/openvino.genai-2026.1.0.0/.gitmodules b/src/resources/openvino.genai-2026.1.0.0/.gitmodules new file mode 100644 index 0000000..f72fd83 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.gitmodules @@ -0,0 +1,3 @@ +[submodule "thirdparty/openvino_tokenizers"] + path = thirdparty/openvino_tokenizers + url = https://github.com/openvinotoolkit/openvino_tokenizers.git diff --git a/src/resources/openvino.genai-2026.1.0.0/.pre-commit-config.yaml b/src/resources/openvino.genai-2026.1.0.0/.pre-commit-config.yaml new file mode 100644 index 0000000..2dc2415 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/.pre-commit-config.yaml @@ -0,0 +1,35 @@ +exclude: | + (?x)( + \.pyi$| + ^tools/llm_bench/prompts/| + ^tools/who_what_benchmark/whowhatbench/prompts/ + ) +repos: + - repo: meta + hooks: + - id: identity + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: trailing-whitespace + exclude: '\.(py|c|cpp|h|hpp)$' + - id: end-of-file-fixer + - id: check-merge-conflict + - id: check-case-conflict + - id: check-symlinks + - id: detect-private-key + - id: mixed-line-ending + args: ["--fix=lf"] + - id: check-ast + - id: check-yaml + - id: check-toml + - id: check-added-large-files + args: ["--maxkb=1000"] + - repo: https://github.com/akaihola/darker + rev: v3.0.0 + hooks: + - id: darker + args: ["--formatter=ruff"] + additional_dependencies: + - ruff==0.14.4 + - tomli==2.3.0 diff --git a/src/resources/openvino.genai-2026.1.0.0/CMakeLists.txt b/src/resources/openvino.genai-2026.1.0.0/CMakeLists.txt new file mode 100644 index 0000000..3fdda5d --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/CMakeLists.txt @@ -0,0 +1,132 @@ +# Copyright (C) 2018-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +cmake_minimum_required(VERSION 3.23.0) # The requirement comes from Jinja2Cpp + +# Multi config generators such as Visual Studio ignore CMAKE_BUILD_TYPE. Multi config generators are configured with +# CMAKE_CONFIGURATION_TYPES, but limiting options in it completely removes such build options +get_property(GENERATOR_IS_MULTI_CONFIG_VAR GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) +if(CMAKE_GENERATOR STREQUAL "Ninja Multi-Config") + # 'Ninja Multi-Config' specific, see: + # https://cmake.org/cmake/help/latest/variable/CMAKE_DEFAULT_BUILD_TYPE.html + set(CMAKE_DEFAULT_BUILD_TYPE "Release" CACHE STRING "CMake default build type") +elseif(NOT GENERATOR_IS_MULTI_CONFIG_VAR AND NOT DEFINED CMAKE_BUILD_TYPE) + message(STATUS "CMAKE_BUILD_TYPE is not defined, 'Release' will be used") + # Setting CMAKE_BUILD_TYPE as CACHE must go before project(). Otherwise project() sets its value and set() doesn't take an effect + set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel ...") +endif() + +if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) +endif() + +if(POLICY CMP0169) + cmake_policy(SET CMP0169 OLD) +endif() + +if(UNIX AND NOT (APPLE OR ANDROID OR CYGWIN)) + set(LINUX ON) +endif() + +project(OpenVINOGenAI + VERSION 2026.1.0.0 + DESCRIPTION "OpenVINO GenAI" + HOMEPAGE_URL "https://github.com/openvinotoolkit/openvino.genai" + LANGUAGES CXX C) + +if(NOT DEFINED Python3_FIND_VIRTUALENV) + set(Python3_FIND_VIRTUALENV FIRST) +endif() + +# Looking for OpenVINO in the python distribution. It doesn't work for cross-compiling build +if(NOT CMAKE_CROSSCOMPILING) + find_package(Python3 QUIET COMPONENTS Interpreter) + if(Python3_Interpreter_FOUND) + execute_process( + COMMAND ${Python3_EXECUTABLE} -c "from openvino.utils import get_cmake_path; print(get_cmake_path(), end='')" + OUTPUT_VARIABLE OpenVINO_DIR_PY + ERROR_QUIET + ) + endif() +endif() + +# Find OpenVINODeveloperPackage first to compile with SDL flags +set(OV_COMPATIBILITY_VERSION ${OpenVINOGenAI_VERSION_MAJOR}.${OpenVINOGenAI_VERSION_MINOR}.${OpenVINOGenAI_VERSION_PATCH}) +find_package(OpenVINODeveloperPackage ${OV_COMPATIBILITY_VERSION} QUIET + COMPONENTS Runtime Threading + PATHS "${OpenVINO_DIR}") +if(NOT OpenVINODeveloperPackage_FOUND) + find_package(OpenVINO ${OV_COMPATIBILITY_VERSION} REQUIRED + COMPONENTS Runtime Threading + PATHS "${OpenVINO_DIR_PY}") +endif() + +include(cmake/features.cmake) +include(cmake/version.cmake) +include(cmake/vs_version.cmake) + +if(ENABLE_PYTHON) + # the following two calls are required for cross-compilation + if(OpenVINODeveloperPackage_FOUND) + ov_find_python3(REQUIRED) + ov_detect_python_module_extension() + else() + if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) + find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Module) + else() + find_package(Python3 REQUIRED COMPONENTS Interpreter Development) + endif() + endif() +endif() + +if(WIN32 OR APPLE) + set(CMAKE_DEBUG_POSTFIX "d") +endif() + +# Workaround for an MSVC compiler issue in some versions of Visual Studio 2022. +# The issue involves a null dereference to a mutex. For details, refer to link https://github.com/microsoft/STL/wiki/Changelog#vs-2022-1710 +if(MSVC AND MSVC_VERSION GREATER_EQUAL 1930 AND MSVC_VERSION LESS 1941) + add_compile_definitions(_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR) +endif() + +add_subdirectory(thirdparty) +add_subdirectory(src) +if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/samples") + add_subdirectory(samples) +endif() +if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tools/continuous_batching" AND ENABLE_TOOLS) + add_subdirectory(tools/continuous_batching) +endif() +if(EXISTS "${OpenVINOGenAI_SOURCE_DIR}/tests/cpp" AND ENABLE_TESTS) + add_subdirectory(tests/cpp) +endif() + +install(FILES LICENSE DESTINATION docs/licensing COMPONENT licensing_genai RENAME LICENSE-GENAI) +install(FILES third-party-programs.txt DESTINATION docs/licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt) +if(NOT DEFINED CPACK_ARCHIVE_COMPONENT_INSTALL) + set(CPACK_ARCHIVE_COMPONENT_INSTALL ON) +endif() +set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF) +# Workaround https://gitlab.kitware.com/cmake/cmake/-/issues/2614 +set(CPACK_COMPONENTS_ALL core_genai core_genai_dev core_c_genai core_c_genai_dev cpp_samples_genai licensing_genai openvino_tokenizers openvino_tokenizers_docs) +if(ENABLE_PYTHON) + list(APPEND CPACK_COMPONENTS_ALL pygenai_${Python3_VERSION_MAJOR}_${Python3_VERSION_MINOR}) + if(NOT ENABLE_GIL_PYTHON_API) + if(Python3_VERSION VERSION_LESS "3.13") + message(FATAL_ERROR "Disabling GIL requires Python >= 3.13, but found Python ${Python3_VERSION}") + else() + set(Python3_FIND_ABI "ANY" "ANY" "ANY" "ON") + endif() + endif() +endif() +if(ENABLE_JS) + list(APPEND CPACK_COMPONENTS_ALL genai_node_addon) +endif() +if(WIN32 AND NOT DEFINED CPACK_GENERATOR) + set(CPACK_GENERATOR "ZIP") +endif() +if(CPACK_GENERATOR STREQUAL "NPM") + set(CPACK_GENERATOR "TGZ") +endif() +include(CPack) diff --git a/src/resources/openvino.genai-2026.1.0.0/Jenkinsfile b/src/resources/openvino.genai-2026.1.0.0/Jenkinsfile new file mode 100644 index 0000000..151e69a --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/Jenkinsfile @@ -0,0 +1,22 @@ +#!groovy + +properties([ + parameters([ + booleanParam(defaultValue: false, + description: 'Cancel the rest of parallel stages if one of them fails and return status immediately', + name: 'failFast'), + booleanParam(defaultValue: true, + description: 'Whether to propagate commit status to GitHub', + name: 'propagateStatus'), + booleanParam(defaultValue: false, + description: 'If true, forces running pre-commit scope', + name: 'forceRunPrecommitScope'), + string(defaultValue: '', + description: 'Pipeline shared library version (branch/tag/commit). Determined automatically if empty', + name: 'library_version') + ]) +]) + +loadOpenVinoLibrary { + entrypoint(this) +} diff --git a/src/resources/openvino.genai-2026.1.0.0/LICENSE b/src/resources/openvino.genai-2026.1.0.0/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/src/resources/openvino.genai-2026.1.0.0/README.md b/src/resources/openvino.genai-2026.1.0.0/README.md new file mode 100644 index 0000000..548cbf0 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/README.md @@ -0,0 +1,103 @@ +
+ +![OpenVINO GenAI](/site/static/img/openvino-genai-logo-gradient.svg) + +[Getting Started](#getting-started) • +[AI Scenarios](#ai-scenarios) • +[Optimization Methods](#optimization-methods) • +[Documentation](https://openvinotoolkit.github.io/openvino.genai/) + +[![GitHub Release](https://img.shields.io/github/v/release/openvinotoolkit/openvino.genai?color=green)](https://github.com/openvinotoolkit/openvino.genai/releases) +[![PyPI Downloads](https://static.pepy.tech/badge/openvino.genai)](https://pypi.org/project/openvino.genai/) +![Python](https://img.shields.io/badge/python-3.10+-green) +![OS](https://img.shields.io/badge/OS-Linux_|_Windows_|_MacOS-blue) + +![](/site/static/img/openvino-genai-workflow.svg) + +
+ +OpenVINO™ GenAI is a library of the most popular Generative AI model pipelines, optimized execution methods, and samples that run on top of highly performant [OpenVINO Runtime](https://github.com/openvinotoolkit/openvino). + +This library is friendly to PC and laptop execution, and optimized for resource consumption. It requires no external dependencies to run generative models as it already includes all the core functionality (e.g. tokenization via [`openvino-tokenizers`](https://github.com/openvinotoolkit/openvino_tokenizers)). + +![Text generation using LLaMa 3.2 model running on Intel ARC770 dGPU](./samples/generation.gif) + +
+ +## Getting Started + +* [Introduction to OpenVINO™ GenAI](https://openvinotoolkit.github.io/openvino.genai/docs/getting-started/introduction) +* [Install OpenVINO™ GenAI](https://openvinotoolkit.github.io/openvino.genai/docs/getting-started/installation) +* [Build OpenVINO™ GenAI](/src/docs/BUILD.md) +* [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/) +* [Model Preparation Guide](https://openvinotoolkit.github.io/openvino.genai/docs/category/model-preparation) + +Explore blogs to setup your first hands-on experience with OpenVINO GenAI: + +* [How to Build OpenVINO™ GenAI APP in C++](https://medium.com/openvino-toolkit/how-to-build-openvino-genai-app-in-c-32dcbe42fa67) +* [How to run Llama 3.2 locally with OpenVINO™](https://medium.com/openvino-toolkit/how-to-run-llama-3-2-locally-with-openvino-60a0f3674549) + + + +## Quick Start + +1. Install OpenVINO GenAI from PyPI: + ```sh + pip install openvino-genai + ``` +2. Obtain model, e.g. export model to OpenVINO IR format from Hugging Face (see [Model Preparation Guide](https://openvinotoolkit.github.io/openvino.genai/docs/category/model-preparation) for more details): + ```sh + optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --weight-format int4 --trust-remote-code TinyLlama_1_1b_v1_ov + ``` +3. Run inference: + ```python + import openvino_genai as ov_genai + + pipe = ov_genai.LLMPipeline("TinyLlama_1_1b_v1_ov", "CPU") # Use CPU or GPU as devices without any other code change + print(pipe.generate("What is OpenVINO?", max_new_tokens=100)) + ``` + +## Supported Generative AI Scenarios + +OpenVINO™ GenAI library provides very lightweight C++ and Python APIs to run the following Generative AI Scenarios: + - [Text generation using Large Language Models (LLMs)](https://openvinotoolkit.github.io/openvino.genai/docs/use-cases/text-generation/) - Chat with local Llama, Phi, Qwen and other models + - [Image processing using Visual Language Model (VLMs)](https://openvinotoolkit.github.io/openvino.genai/docs/use-cases/image-processing/) - Analyze images/videos with LLaVa, MiniCPM-V and other models + - [Image generation using Diffusers](https://openvinotoolkit.github.io/openvino.genai/docs/use-cases/image-generation/) - Generate images with Stable Diffusion & Flux models + - [Speech recognition using Whisper](https://openvinotoolkit.github.io/openvino.genai/docs/use-cases/speech-recognition/) - Convert speech to text using Whisper models + - [Speech generation using SpeechT5](https://openvinotoolkit.github.io/openvino.genai/docs/use-cases/speech-generation/) - Convert text to speech using SpeechT5 TTS models + - [Semantic search using Text Embedding](https://openvinotoolkit.github.io/openvino.genai/docs/use-cases/text-embedding) - Compute embeddings for documents and queries to enable efficient retrieval in RAG workflows + - [Text Rerank for Retrieval-Augmented Generation (RAG)](https://openvinotoolkit.github.io/openvino.genai/docs/use-cases/text-rerank) - Analyze the relevance and accuracy of documents and queries for your RAG workflows + +Library efficiently supports LoRA adapters for Text and Image generation scenarios: +- Load multiple adapters per model +- Select active adapters for every generation +- Mix multiple adapters with coefficients via alpha blending + +All scenarios are run on top of OpenVINO Runtime that supports inference on CPU, GPU and NPU. See [here](https://docs.openvino.ai/2026/about-openvino/release-notes-openvino/system-requirements.html) for platform support matrix. + + + +## Supported Generative AI Optimization Methods + +OpenVINO™ GenAI library provides a transparent way to use state-of-the-art generation optimizations: +- Speculative decoding that employs two models of different sizes and uses the large model to periodically correct the results of the small model. See [here](https://pytorch.org/blog/hitchhikers-guide-speculative-decoding/) for more detailed overview +- KVCache token eviction algorithm that reduces the size of the KVCache by pruning less impacting tokens. +- Sparse attention, which accelerates prefill by attending only to the most important regions of the attention matrix. OpenVINO GenAI currently supports two modes: Tri-shape and XAttention. See [here](https://openvinotoolkit.github.io/openvino.genai/docs/concepts/optimization-techniques/sparse-attention-prefill) for more details. + +Additionally, OpenVINO™ GenAI library implements a continuous batching approach to use OpenVINO within LLM serving. The continuous batching library could be used in LLM serving frameworks and supports the following features: +- Prefix caching that caches fragments of previous generation requests and corresponding KVCache entries internally and uses them in case of repeated query. + +Continuous batching functionality is used within OpenVINO Model Server (OVMS) to serve LLMs, see [here](https://docs.openvino.ai/2026/model-server/ovms_what_is_openvino_model_server.html) for more details. + + +## Additional Resources + +- [OpenVINO Generative AI workflow](https://docs.openvino.ai/2026/openvino-workflow-generative.html) +- [Optimum Intel and OpenVINO](https://huggingface.co/docs/optimum/intel/openvino/export) +- [OpenVINO Notebooks with GenAI](https://openvinotoolkit.github.io/openvino_notebooks/?libraries=OpenVINO+GenAI) + +## License + +The OpenVINO™ GenAI repository is licensed under [Apache License Version 2.0](LICENSE). +By contributing to the project, you agree to the license and copyright terms therein and release +your contribution under these terms. diff --git a/src/resources/openvino.genai-2026.1.0.0/SECURITY.md b/src/resources/openvino.genai-2026.1.0.0/SECURITY.md new file mode 100644 index 0000000..eb482d9 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/SECURITY.md @@ -0,0 +1,12 @@ +# Security Policy + +## Report a Vulnerability + +Please report security issues or vulnerabilities to the [Intel® Security Center]. + +For more information on how Intel® works to resolve security issues, see +[Vulnerability Handling Guidelines]. + +[Intel® Security Center]:https://www.intel.com/security + +[Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html diff --git a/src/resources/openvino.genai-2026.1.0.0/bandit.yml b/src/resources/openvino.genai-2026.1.0.0/bandit.yml new file mode 100644 index 0000000..bdd324b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/bandit.yml @@ -0,0 +1,398 @@ +### This config may optionally select a subset of tests to run or skip by +### filling out the 'tests' and 'skips' lists given below. If no tests are +### specified for inclusion then it is assumed all tests are desired. The skips +### set will remove specific tests from the include set. This can be controlled +### using the -t/-s CLI options. Note that the same test ID should not appear +### in both 'tests' and 'skips', this would be nonsensical and is detected by +### Bandit at runtime. + +# Available tests: +# B101 : assert_used +# B102 : exec_used +# B103 : set_bad_file_permissions +# B104 : hardcoded_bind_all_interfaces +# B105 : hardcoded_password_string +# B106 : hardcoded_password_funcarg +# B107 : hardcoded_password_default +# B108 : hardcoded_tmp_directory +# B110 : try_except_pass +# B112 : try_except_continue +# B201 : flask_debug_true +# B301 : pickle +# B302 : marshal +# B303 : md5 +# B304 : ciphers +# B305 : cipher_modes +# B306 : mktemp_q +# B307 : eval +# B308 : mark_safe +# B310 : urllib_urlopen +# B311 : random +# B312 : telnetlib +# B313 : xml_bad_cElementTree +# B314 : xml_bad_ElementTree +# B315 : xml_bad_expatreader +# B316 : xml_bad_expatbuilder +# B317 : xml_bad_sax +# B318 : xml_bad_minidom +# B319 : xml_bad_pulldom +# B320 : xml_bad_etree +# B321 : ftplib +# B323 : unverified_context +# B324 : hashlib_new_insecure_functions +# B401 : import_telnetlib +# B402 : import_ftplib +# B403 : import_pickle +# B404 : import_subprocess +# B405 : import_xml_etree +# B406 : import_xml_sax +# B407 : import_xml_expat +# B408 : import_xml_minidom +# B409 : import_xml_pulldom +# B410 : import_lxml +# B411 : import_xmlrpclib +# B412 : import_httpoxy +# B413 : import_pycrypto +# B501 : request_with_no_cert_validation +# B502 : ssl_with_bad_version +# B503 : ssl_with_bad_defaults +# B504 : ssl_with_no_version +# B505 : weak_cryptographic_key +# B506 : yaml_load +# B507 : ssh_no_host_key_verification +# B601 : paramiko_calls +# B602 : subprocess_popen_with_shell_equals_true +# B603 : subprocess_without_shell_equals_true +# B604 : any_other_function_with_shell_equals_true +# B605 : start_process_with_a_shell +# B606 : start_process_with_no_shell +# B607 : start_process_with_partial_path +# B608 : hardcoded_sql_expressions +# B609 : linux_commands_wildcard_injection +# B610 : django_extra_used +# B611 : django_rawsql_used +# B701 : jinja2_autoescape_false +# B702 : use_of_mako_templates +# B703 : django_mark_safe + +# (optional) list included test IDs here, eg '[B101, B406]': +# IPAS Required Checkers. Do not disable these +# Additional checkers may be added if desired +tests: + [ 'B301', 'B302', 'B303', 'B304', 'B305', 'B306', 'B308', 'B310', 'B311', 'B312', 'B313', 'B314', 'B315', 'B316', 'B317', 'B318', 'B319', 'B321', 'B323', 'B324', 'B401', 'B402', 'B403', 'B404', 'B405', 'B406', 'B407', 'B408', 'B409', 'B411', 'B412', 'B413'] + +# (optional) list skipped test IDs here, eg '[B101, B406]': +# The following checkers are not required but be added to tests list if desired +skips: + [ 'B101', 'B102', 'B103', 'B104', 'B105', 'B106', 'B107', 'B108', 'B110', 'B112', 'B201', 'B501', 'B502', 'B503', 'B504', 'B505', 'B506', 'B507', 'B601', 'B602', 'B603', 'B604', 'B605', 'B606', 'B607', 'B608', 'B609', 'B610', 'B611', 'B701', 'B702', 'B703'] + +### (optional) plugin settings - some test plugins require configuration data +### that may be given here, per-plugin. All bandit test plugins have a built in +### set of sensible defaults and these will be used if no configuration is +### provided. It is not necessary to provide settings for every (or any) plugin +### if the defaults are acceptable. + +any_other_function_with_shell_equals_true: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +assert_used: + skips: ["tools/who_what_benchmark/tests/test_*.py"] +hardcoded_tmp_directory: + tmp_dirs: + - /tmp + - /var/tmp + - /dev/shm +linux_commands_wildcard_injection: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +ssl_with_bad_defaults: + bad_protocol_versions: + - PROTOCOL_SSLv2 + - SSLv2_METHOD + - SSLv23_METHOD + - PROTOCOL_SSLv3 + - PROTOCOL_TLSv1 + - SSLv3_METHOD + - TLSv1_METHOD +ssl_with_bad_version: + bad_protocol_versions: + - PROTOCOL_SSLv2 + - SSLv2_METHOD + - SSLv23_METHOD + - PROTOCOL_SSLv3 + - PROTOCOL_TLSv1 + - SSLv3_METHOD + - TLSv1_METHOD +start_process_with_a_shell: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +start_process_with_no_shell: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +start_process_with_partial_path: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +subprocess_popen_with_shell_equals_true: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +subprocess_without_shell_equals_true: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +try_except_continue: + check_typed_exception: false +try_except_pass: + check_typed_exception: false +weak_cryptographic_key: + weak_key_size_dsa_high: 1024 + weak_key_size_dsa_medium: 2048 + weak_key_size_ec_high: 160 + weak_key_size_ec_medium: 224 + weak_key_size_rsa_high: 1024 + weak_key_size_rsa_medium: 2048 +exclude_dirs: + - thirdparty diff --git a/src/resources/openvino.genai-2026.1.0.0/cmake/features.cmake b/src/resources/openvino.genai-2026.1.0.0/cmake/features.cmake new file mode 100644 index 0000000..c456105 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/cmake/features.cmake @@ -0,0 +1,21 @@ +# Copyright (C) 2018-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +option(ENABLE_PYTHON "Enable Python API build" ON) +option(ENABLE_GIL_PYTHON_API "Build Python API with Global Interpreter Lock" ON) +option(ENABLE_JS "Enable JS API build" OFF) +option(ENABLE_SAMPLES "Enable samples build" ON) +option(ENABLE_TESTS "Enable tests build" ON) +option(ENABLE_TOOLS "Enable tools build" ON) +option(ENABLE_GGUF "Enable support for GGUF format" ON) +option(ENABLE_XGRAMMAR "Enable support for structured output generation with xgrammar backend" ON) + +# Disable building samples for NPM package +if(CPACK_GENERATOR STREQUAL "NPM") + set(ENABLE_SAMPLES OFF) + set(ENABLE_PYTHON OFF) + set(ENABLE_JS ON) +else() + set(ENABLE_JS OFF) +endif() diff --git a/src/resources/openvino.genai-2026.1.0.0/cmake/templates/OpenVINOGenAIConfig.cmake.in b/src/resources/openvino.genai-2026.1.0.0/cmake/templates/OpenVINOGenAIConfig.cmake.in new file mode 100644 index 0000000..c1f9c86 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/cmake/templates/OpenVINOGenAIConfig.cmake.in @@ -0,0 +1,10 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) +find_dependency(OpenVINO COMPONENTS Runtime) + +if(NOT TARGET openvino_genai) + include("${CMAKE_CURRENT_LIST_DIR}/OpenVINOGenAITargets.cmake") +endif() + +check_required_components(OpenVINOGenAI) diff --git a/src/resources/openvino.genai-2026.1.0.0/cmake/templates/version.cpp.in b/src/resources/openvino.genai-2026.1.0.0/cmake/templates/version.cpp.in new file mode 100644 index 0000000..812cbd6 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/cmake/templates/version.cpp.in @@ -0,0 +1,19 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/version.hpp" + +namespace ov { +namespace genai { + +const Version get_version() { + const static Version version = { + "@OpenVINOGenAI_FULL_VERSION@", + "OpenVINO GenAI version", + }; + + return version; +} + +} // namespace genai +} // namespace ov diff --git a/src/resources/openvino.genai-2026.1.0.0/cmake/templates/version.hpp.in b/src/resources/openvino.genai-2026.1.0.0/cmake/templates/version.hpp.in new file mode 100644 index 0000000..d04429b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/cmake/templates/version.hpp.in @@ -0,0 +1,34 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "openvino/core/version.hpp" +#include "openvino/genai/visibility.hpp" + +/** + * OpenVINO GenAI major version + */ +#define OPENVINO_GENAI_VERSION_MAJOR @OpenVINOGenAI_VERSION_MAJOR@ + +/** + * OpenVINO GenAI minor version + */ +#define OPENVINO_GENAI_VERSION_MINOR @OpenVINOGenAI_VERSION_MINOR@ + +/** + * OpenVINO GenAI patch version + */ +#define OPENVINO_GENAI_VERSION_PATCH @OpenVINOGenAI_VERSION_PATCH@ + +namespace ov { +namespace genai { + +/** + * Returns OpenVINO GenAI full version including git commit and hash information in form of: + * ...--[-] + */ +OPENVINO_EXTERN_C OPENVINO_GENAI_EXPORTS const ov::Version OPENVINO_CDECL get_version(); + +} // namespace genai +} // namespace ov diff --git a/src/resources/openvino.genai-2026.1.0.0/cmake/templates/vs_version.rc.in b/src/resources/openvino.genai-2026.1.0.0/cmake/templates/vs_version.rc.in new file mode 100644 index 0000000..63b732c --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/cmake/templates/vs_version.rc.in @@ -0,0 +1,33 @@ +#include + +VS_VERSION_INFO VERSIONINFO + FILEVERSION @PROJECT_VERSION_MAJOR@,@PROJECT_VERSION_MINOR@,@PROJECT_VERSION_PATCH@,@PROJECT_VERSION_TWEAK@ + PRODUCTVERSION @PROJECT_VERSION_MAJOR@,@PROJECT_VERSION_MINOR@,@PROJECT_VERSION_PATCH@,@PROJECT_VERSION_TWEAK@ + FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +#ifdef _DEBUG + FILEFLAGS 1 +#else + FILEFLAGS 0 +#endif + FILEOS VOS__WINDOWS32 + FILETYPE VFT_DLL + FILESUBTYPE 0 +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904E4" + BEGIN + VALUE "CompanyName", "@PROJECT_COMPANY_NAME@\0" + VALUE "FileDescription", "@PROJECT_DESCRIPTION@\0" + VALUE "FileVersion", "@PROJECT_VERSION@\0" + VALUE "LegalCopyright", "@PROJECT_COPYRIGHT@\0" + VALUE "ProductName", "@PROJECT_PRODUCT_NAME@\0" + VALUE "ProductVersion", "@OpenVINOGenAI_FULL_VERSION@\0" + VALUE "Comments", "@PROJECT_COMMENTS@\0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x0409, 1252 + END +END diff --git a/src/resources/openvino.genai-2026.1.0.0/cmake/version.cmake b/src/resources/openvino.genai-2026.1.0.0/cmake/version.cmake new file mode 100644 index 0000000..951b527 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/cmake/version.cmake @@ -0,0 +1,72 @@ +# Copyright (C) 2018-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +find_package(Git QUIET) + +function(ov_genai_branch_name VAR) + if(GIT_FOUND) + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${OpenVINOGenAI_SOURCE_DIR} + OUTPUT_VARIABLE GIT_BRANCH + RESULT_VARIABLE EXIT_CODE + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(EXIT_CODE EQUAL 0) + set(${VAR} ${GIT_BRANCH} PARENT_SCOPE) + endif() + endif() +endfunction() + +function(ov_genai_commit_hash VAR) + if(GIT_FOUND) + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-parse --short=11 HEAD + WORKING_DIRECTORY ${OpenVINOGenAI_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_HASH + RESULT_VARIABLE EXIT_CODE + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(EXIT_CODE EQUAL 0) + set(${VAR} ${GIT_COMMIT_HASH} PARENT_SCOPE) + endif() + endif() +endfunction() + +function(ov_genai_commit_number VAR) + set(GIT_COMMIT_NUMBER_FOUND OFF) + if(GIT_FOUND) + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD + WORKING_DIRECTORY ${OpenVINOGenAI_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_NUMBER + RESULT_VARIABLE EXIT_CODE + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(EXIT_CODE EQUAL 0) + set(GIT_COMMIT_NUMBER_FOUND ON) + set(${VAR} ${GIT_COMMIT_NUMBER} PARENT_SCOPE) + endif() + endif() + if(NOT GIT_COMMIT_NUMBER_FOUND) + # set zeros since git is not available + set(${VAR} "000" PARENT_SCOPE) + endif() +endfunction() + +function(ov_genai_full_version full_version) + if(GIT_FOUND) + ov_genai_branch_name(GIT_BRANCH) + ov_genai_commit_hash(GIT_COMMIT_HASH) + ov_genai_commit_number(GIT_COMMIT_NUMBER) + + if(NOT GIT_BRANCH MATCHES "^(master|HEAD)$") + set(GIT_BRANCH_POSTFIX "-${GIT_BRANCH}") + endif() + + set(${full_version} "${OpenVINOGenAI_VERSION}-${GIT_COMMIT_NUMBER}-${GIT_COMMIT_HASH}${GIT_BRANCH_POSTFIX}" PARENT_SCOPE) + else() + set(${full_version} "${OpenVINOGenAI_VERSION}" PARENT_SCOPE) + endif() +endfunction() + +ov_genai_full_version(OpenVINOGenAI_FULL_VERSION) +message(STATUS "OpenVINO GenAI full version: ${OpenVINOGenAI_FULL_VERSION}") diff --git a/src/resources/openvino.genai-2026.1.0.0/cmake/vs_version.cmake b/src/resources/openvino.genai-2026.1.0.0/cmake/vs_version.cmake new file mode 100644 index 0000000..e236426 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/cmake/vs_version.cmake @@ -0,0 +1,18 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +set(PROJECT_COMPANY_NAME "Intel Corporation") +set(PROJECT_PRODUCT_NAME "OpenVINO GenAI") +set(PROJECT_COPYRIGHT "Copyright (C) 2018-2025, Intel Corporation") +set(PROJECT_COMMENTS "https://docs.openvino.ai/") + +# This function generates a version resource (.rc) file from a template and adds it to the given target. +function(add_vs_version_resource TARGET_NAME) + set(VS_VERSION_TEMPLATE "${PROJECT_SOURCE_DIR}/cmake/templates/vs_version.rc.in") + set(VS_VERSION_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/vs_version.rc") + + configure_file("${VS_VERSION_TEMPLATE}" "${VS_VERSION_OUTPUT}" @ONLY) + + target_sources(${TARGET_NAME} PRIVATE "${VS_VERSION_OUTPUT}") +endfunction() diff --git a/src/resources/openvino.genai-2026.1.0.0/pyproject.toml b/src/resources/openvino.genai-2026.1.0.0/pyproject.toml new file mode 100644 index 0000000..fdea9fc --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/pyproject.toml @@ -0,0 +1,74 @@ +[project] +name = "openvino-genai" +version = "2026.1.0.0" +description = "Library of the most popular Generative AI model pipelines, optimized execution methods, and samples" +requires-python = ">=3.10" +readme = { file = "src/README.md", content-type="text/markdown" } +license = { "file" = "LICENSE" } +authors = [ + { name = "OpenVINO Developers", email = "openvino@intel.com" }, +] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "License :: OSI Approved :: Apache Software License", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Libraries :: Python Modules", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Operating System :: Unix", + "Operating System :: POSIX :: Linux", + "Operating System :: Microsoft :: Windows", + "Operating System :: MacOS", + "Programming Language :: C++", + "Programming Language :: C", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: Implementation :: CPython" +] +dependencies = [ + "openvino_tokenizers~=2026.1.0.0.dev" +] +[project.optional-dependencies] +testing = ["pytest>=6.0"] + +[tool.py-build-cmake.module] +directory = "src/python" + +[tool.py-build-cmake.sdist] +include = ["CMakeLists.txt", "LICENSE", "third-party-programs.txt", "SECURITY.md", "cmake", "src", "thirdparty"] + +[tool.py-build-cmake.cmake] +minimum_version = "3.23" +build_type = "Release" +config = ["Release"] +find_python3 = true +build_args = ["--parallel", "--target", "py_openvino_genai_stub"] +install_args = ["--strip"] +install_components = ["wheel_genai"] +options = {"ENABLE_PYTHON" = "ON", "BUILD_TOKENIZERS" = "OFF", "ENABLE_SAMPLES" = "OFF", "ENABLE_TESTS" = "OFF", "ENABLE_TOOLS" = "OFF", "CMAKE_SKIP_INSTALL_RPATH" = "OFF"} + +[build-system] +requires = [ + "py-build-cmake==0.5.0", + "openvino~=2026.1.0.0.dev", + "pybind11-stubgen==2.5.5", + "cmake~=3.23.0; platform_system != 'Darwin' or platform_machine == 'x86_64'", + "cmake~=4.2.1; platform_system == 'Darwin' and platform_machine == 'arm64'", +] +build-backend = "py_build_cmake.build" + +[tool.ruff] +line-length = 120 +indent-width = 4 +target-version = "py310" + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" +docstring-code-format = false +docstring-code-line-length = "dynamic" diff --git a/src/resources/openvino.genai-2026.1.0.0/requirements-build.txt b/src/resources/openvino.genai-2026.1.0.0/requirements-build.txt new file mode 100644 index 0000000..b06dcfa --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/requirements-build.txt @@ -0,0 +1,3 @@ +cmake~=3.23.0; platform_system != 'Darwin' or platform_machine == 'x86_64' +cmake~=4.2.1; platform_system == 'Darwin' and platform_machine == 'arm64' +pybind11-stubgen==2.5.5 diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/CMakeLists.txt b/src/resources/openvino.genai-2026.1.0.0/samples/CMakeLists.txt new file mode 100644 index 0000000..185a831 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/CMakeLists.txt @@ -0,0 +1,51 @@ +# Copyright (C) 2018-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +if(ENABLE_SAMPLES) + add_subdirectory(cpp/text_generation) + add_subdirectory(cpp/image_generation) + add_subdirectory(cpp/video_generation) + add_subdirectory(cpp/speech_generation) + add_subdirectory(cpp/visual_language_chat) + add_subdirectory(cpp/whisper_speech_recognition) + add_subdirectory(cpp/rag) + add_subdirectory(c/text_generation) + add_subdirectory(c/whisper_speech_recognition) + add_subdirectory(c/visual_language_chat) +endif() + +install(FILES + deployment-requirements.txt + export-requirements.txt + requirements.txt + DESTINATION samples + COMPONENT cpp_samples_genai) + +install(FILES cpp/fetch_opencv.cmake + DESTINATION samples/cpp COMPONENT cpp_samples_genai) + +install(DIRECTORY + cpp/text_generation + cpp/image_generation + cpp/video_generation + cpp/speech_generation + cpp/visual_language_chat + cpp/whisper_speech_recognition + cpp/rag + DESTINATION samples/cpp COMPONENT cpp_samples_genai) + +install(DIRECTORY + python/text_generation + python/image_generation + python/speech_generation + python/visual_language_chat + python/whisper_speech_recognition + python/rag + DESTINATION samples/python COMPONENT cpp_samples_genai + USE_SOURCE_PERMISSIONS) + +install(DIRECTORY + c/text_generation + c/whisper_speech_recognition + c/visual_language_chat + DESTINATION samples/c COMPONENT cpp_samples_genai) diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/CMakeLists.txt b/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/CMakeLists.txt new file mode 100644 index 0000000..f1364e6 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/CMakeLists.txt @@ -0,0 +1,32 @@ +# Copyright (C) 2023-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +find_package(OpenVINOGenAI REQUIRED + PATHS + "${CMAKE_BINARY_DIR}" # Reuse the package from the build. + ${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO. + NO_CMAKE_FIND_ROOT_PATH +) + +function(add_sample_executable target_name) + add_executable(${target_name} ${target_name}.c) + # Specifies that the source file should be compiled as a C source file + set_source_files_properties(${target_name}.c PROPERTIES LANGUAGE C) + target_link_libraries(${target_name} PRIVATE openvino::genai::c) + set_target_properties(${target_name} PROPERTIES + # Ensure out-of-box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + install(TARGETS ${target_name} + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) +endfunction() + +set (SAMPLE_LIST + greedy_causal_lm_c + chat_sample_c + benchmark_genai_c) + +foreach(sample IN LISTS SAMPLE_LIST) + add_sample_executable(${sample}) +endforeach() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/README.md new file mode 100644 index 0000000..f0a5bd1 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/README.md @@ -0,0 +1,87 @@ +# OpenVINO GenAI Text Generation C Samples + +## Table of Contents +1. [Download OpenVINO GenAI](#download-openvino-genai) +2. [Build Samples](#build-samples) +3. [Download and Convert the Model and Tokenizers](#download-and-convert-the-model-and-tokenizers) +4. [Sample Descriptions](#sample-descriptions) +5. [Support and Contribution](#support-and-contribution) + +## Download OpenVINO GenAI + +Download and extract [OpenVINO GenAI Archive](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html?PACKAGE=OPENVINO_GENAI&VERSION=NIGHTLY&OP_SYSTEM=WINDOWS&DISTRIBUTION=ARCHIVE) Visit the OpenVINO Download Page. + + +## Build Samples +Set up the environment and build the samples Linux and macOS +```sh +source /setupvars.sh +.//samples/c/build_samples.sh +``` +Windows Command Prompt: +```sh +\setupvars.bat +\samples\c\build_samples_msvc.bat +``` +Windows PowerShell +```sh +.\setupvars.ps1 +.\samples\c\build_samples.ps1 +``` + +## Download and convert the model and tokenizers +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. +Install [../../export-requirements.txt](../../export-requirements.txt) if model conversion is required. +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +optimum-cli export openvino --model +``` +If a converted model in OpenVINO IR format is available in the [OpenVINO optimized LLMs](https://huggingface.co/collections/OpenVINO/llm-6687aaa2abca3bbcec71a9bd) collection on Hugging Face, you can download it directly via huggingface-cli. +```sh +pip install huggingface-hub +huggingface-cli download --local-dir +``` + +### Using GGUF models + +To run any samples with a GGUF model, simply provide the path to the .gguf file via the `model_dir` parameter. + +This capability is currently available in preview mode and supports a limited set of topologies, including SmolLM and Qwen2.5. For other models +and architectures, we still recommend converting the model to the IR format using the `optimum-intel` tool. + +### Sample Descriptions + +#### Chat Sample (`chat_sample_c`) +Multi-turn conversations with an interactive chat interface powered by OpenVINO. +- **Run Command:** +```sh +./chat_sample_c model_dir +``` + +#### LLMs benchmarking sample(`benchmark_genai_c`) +The sample demonstrates how to benchmark LLMs in OpenVINO GenAI by using C language. +- **Run Command:** +```sh +./benchmark_gena_c [-m MODEL] [-p PROMPT] [-nw NUM_WARMUP] [-n NUM_ITER] [-mt MAX_NEW_TOKENS] [-d DEVICE] +``` +- **Options:** +- `-m, --model`: Path to the model and tokenizers base directory. +- `-p, --prompt` (default: `"The Sky is blue because"`): The prompt to generate text. +- `-nw, --num_warmup` (default: `1`): Number of warmup iterations. +- `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. +- `-n, --num_iter` (default: `3`): Number of iterations. +- `-d, --device` (default: `"CPU"`): Device to run the model on. + + +#### Greedy Causal LM(`greedy_causal_lm`) + +Basic text generation using a causal language model. +- **Run Command:** +```sh +./greedy_causal_lm_c model_dir prompt +``` + + +## Support and Contribution +- For troubleshooting, consult the [OpenVINO documentation](https://docs.openvino.ai). +- To report issues or contribute, visit the [GitHub repository](https://github.com/openvinotoolkit/openvino.genai). diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/benchmark_genai_c.c b/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/benchmark_genai_c.c new file mode 100644 index 0000000..4a9d557 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/benchmark_genai_c.c @@ -0,0 +1,191 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include + +#include "openvino/genai/c/llm_pipeline.h" + +#define MAX_PROMPT_LENGTH 256 +#define MAX_OUTPUT_LENGTH 1024 + +#define DEFAULT_PROMPT "The Sky is blue because" +#define DEFAULT_NUM_WARMUP 1 +#define DEFAULT_NUM_ITER 3 +#define DEFAULT_MAX_NEW_TOKENS 20 +#define DEFAULT_DEVICE "CPU" + +typedef struct { + const char* model; + const char* prompt; + size_t num_warmup; + size_t num_iter; + size_t max_new_tokens; + const char* device; +} Options; + +void print_usage() { + printf("Usage: benchmark_genai_c [OPTIONS]\n"); + printf("Options:\n"); + printf(" -m, --model Path to model and tokenizers base directory\n"); + printf(" -p, --prompt Prompt (default: \"%s\")\n", DEFAULT_PROMPT); + printf(" --nw, --num_warmup Number of warmup iterations (default: %d)\n", DEFAULT_NUM_WARMUP); + printf(" -n, --num_iter Number of iterations (default: %d)\n", DEFAULT_NUM_ITER); + printf(" --mt, --max_new_tokens Maximal number of new tokens (default: %d)\n", DEFAULT_MAX_NEW_TOKENS); + printf(" -d, --device Device (default: %s)\n", DEFAULT_DEVICE); + printf(" -h, --help Print usage\n"); +} +int parse_arguments(int argc, char* argv[], Options* options) { + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-m") == 0 || strcmp(argv[i], "--model") == 0) { + if (i + 1 < argc) { + options->model = argv[++i]; + } else { + printf("Error: --model requires an argument\n"); + return -1; + } + } else if (strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "--prompt") == 0) { + if (i + 1 < argc) { + options->prompt = argv[++i]; + } else { + printf("Error: --prompt requires an argument\n"); + return -1; + } + } else if (strcmp(argv[i], "--nw") == 0 || strcmp(argv[i], "--num_warmup") == 0) { + if (i + 1 < argc) { + options->num_warmup = atoi(argv[++i]); + } else { + printf("Error: --num_warmup requires an argument\n"); + return -1; + } + } else if (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--num_iter") == 0) { + if (i + 1 < argc) { + options->num_iter = atoi(argv[++i]); + } else { + printf("Error: --num_iter requires an argument\n"); + return -1; + } + } else if (strcmp(argv[i], "--mt") == 0 || strcmp(argv[i], "--max_new_tokens") == 0) { + if (i + 1 < argc) { + options->max_new_tokens = atoi(argv[++i]); + } else { + printf("Error: --max_new_tokens requires an argument\n"); + return -1; + } + } else if (strcmp(argv[i], "-d") == 0 || strcmp(argv[i], "--device") == 0) { + if (i + 1 < argc) { + options->device = argv[++i]; + } else { + printf("Error: --device requires an argument\n"); + return -1; + } + } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { + print_usage(); + return 0; + } else { + printf("Error: Unknown option %s\n", argv[i]); + return -1; + } + } + return 1; +} + +#define CHECK_STATUS(return_status) \ + if (return_status != OK) { \ + fprintf(stderr, "[ERROR] return status %d, line %d\n", return_status, __LINE__); \ + goto err; \ + } + +int main(int argc, char* argv[]) { + Options options = {.model = NULL, + .prompt = DEFAULT_PROMPT, + .num_warmup = DEFAULT_NUM_WARMUP, + .num_iter = DEFAULT_NUM_ITER, + .max_new_tokens = DEFAULT_MAX_NEW_TOKENS, + .device = DEFAULT_DEVICE}; + + int result = parse_arguments(argc, argv, &options); + if (result == 0) { + return EXIT_SUCCESS; + } else if (result == -1) { + return EXIT_FAILURE; + } + + printf("Model: %s\n", options.model ? options.model : "Not specified"); + printf("Prompt: %s\n", options.prompt); + printf("Num Warmup: %zu\n", options.num_warmup); + printf("Num Iter: %zu\n", options.num_iter); + printf("Max New Tokens: %zu\n", options.max_new_tokens); + printf("Device: %s\n", options.device); + + ov_genai_llm_pipeline* pipe = NULL; + ov_genai_generation_config* config = NULL; + ov_genai_decoded_results* results = NULL; + ov_genai_perf_metrics* metrics = NULL; + ov_genai_perf_metrics* cumulative_metrics = NULL; + + CHECK_STATUS(ov_genai_llm_pipeline_create(options.model, options.device, 0, &pipe)); + + CHECK_STATUS(ov_genai_generation_config_create(&config)); + CHECK_STATUS(ov_genai_generation_config_set_max_new_tokens(config, options.max_new_tokens)); + + for (size_t i = 0; i < options.num_warmup; i++) { + if (results) { + ov_genai_decoded_results_free(results); + results = NULL; // The end of main() would try to free it again if not NULL. + } + CHECK_STATUS(ov_genai_llm_pipeline_generate(pipe, options.prompt, config, NULL, &results)); + } + + CHECK_STATUS(ov_genai_llm_pipeline_generate(pipe, options.prompt, config, NULL, &results)); + CHECK_STATUS(ov_genai_decoded_results_get_perf_metrics(results, &cumulative_metrics)); + + if (results) { + ov_genai_decoded_results_free(results); + results = NULL; // The end of main() would try to free it again if not NULL. + } + for (size_t i = 0; i < options.num_iter - 1; i++) { + CHECK_STATUS(ov_genai_llm_pipeline_generate(pipe, options.prompt, config, NULL, &results)); + CHECK_STATUS(ov_genai_decoded_results_get_perf_metrics(results, &metrics)); + CHECK_STATUS(ov_genai_perf_metrics_add_in_place(cumulative_metrics, metrics)); // metrics += _metrics + if (metrics) { + ov_genai_decoded_results_perf_metrics_free(metrics); + metrics = NULL; // The end of main() would try to free it again if not NULL. + } + if (results) { + ov_genai_decoded_results_free(results); + results = NULL; + } + } + float mean = 0.0f; + float std = 0.0f; + float load_time = 0.0f; + CHECK_STATUS(ov_genai_perf_metrics_get_load_time(cumulative_metrics, &load_time)); + printf("%.2f ms\n", load_time); + CHECK_STATUS(ov_genai_perf_metrics_get_generate_duration(cumulative_metrics, &mean, &std)); + printf("Generate time: %.2f ± %.2f ms\n", mean, std); + CHECK_STATUS(ov_genai_perf_metrics_get_tokenization_duration(cumulative_metrics, &mean, &std)); + printf("Tokenization time: %.2f ± %.2f ms\n", mean, std); + CHECK_STATUS(ov_genai_perf_metrics_get_detokenization_duration(cumulative_metrics, &mean, &std)); + printf("Detokenization time: %.2f ± %.2f ms\n", mean, std); + CHECK_STATUS(ov_genai_perf_metrics_get_ttft(cumulative_metrics, &mean, &std)); + printf("TTFT: %.2f ± %.2f ms\n", mean, std); + CHECK_STATUS(ov_genai_perf_metrics_get_tpot(cumulative_metrics, &mean, &std)); + printf("TPOT: %.2f ± %.2f ms/token\n", mean, std); + CHECK_STATUS(ov_genai_perf_metrics_get_throughput(cumulative_metrics, &mean, &std)); + printf("Throughput: %.2f ± %.2f tokens/s\n", mean, std); + +err: + if (pipe) + ov_genai_llm_pipeline_free(pipe); + if (config) + ov_genai_generation_config_free(config); + if (metrics) + ov_genai_decoded_results_perf_metrics_free(metrics); + if (cumulative_metrics) + ov_genai_decoded_results_perf_metrics_free(cumulative_metrics); + if (results) + ov_genai_decoded_results_free(results); + return EXIT_SUCCESS; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/chat_sample_c.c b/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/chat_sample_c.c new file mode 100644 index 0000000..db0c434 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/chat_sample_c.c @@ -0,0 +1,305 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include + +#include "openvino/genai/c/llm_pipeline.h" +#include "openvino/genai/c/chat_history.h" +#include "openvino/genai/c/json_container.h" + +#define MAX_PROMPT_LENGTH 1024 +#define MAX_JSON_LENGTH 4096 + +// Worst-case escape: each char → \uXXXX (6 bytes) + null terminator +#define MAX_ESCAPED_PROMPT_LENGTH (MAX_PROMPT_LENGTH * 6 + 1) + +// JSON template: {"role": "user", "content": ""} ≈ 32 bytes +#define MAX_MESSAGE_JSON_LENGTH (MAX_ESCAPED_PROMPT_LENGTH + 32 + 1) + +// Worst-case escaped output: MAX_JSON_LENGTH * 6 + null terminator +#define MAX_ESCAPED_OUTPUT_LENGTH ((MAX_JSON_LENGTH - 1) * 6 + 1) + +// JSON template: {"role": "assistant", "content": ""} ≈ 35 bytes +#define MAX_ASSISTANT_MESSAGE_JSON_LENGTH (MAX_ESCAPED_OUTPUT_LENGTH + 35 + 1) + +#define CHECK_STATUS(return_status) \ + if (return_status != OK) { \ + fprintf(stderr, "[ERROR] return status %d, line %d\n", return_status, __LINE__); \ + goto err; \ + } + +#define CHECK_CHAT_HISTORY_STATUS(return_status) \ + if (return_status != OV_GENAI_CHAT_HISTORY_OK) { \ + fprintf(stderr, "[ERROR] chat history status %d, line %d\n", return_status, __LINE__); \ + goto err; \ + } + +#define CHECK_JSON_CONTAINER_STATUS(return_status) \ + if (return_status != OV_GENAI_JSON_CONTAINER_OK) { \ + fprintf(stderr, "[ERROR] json container status %d, line %d\n", return_status, __LINE__); \ + goto err; \ + } + +// Returns 0 on success, -1 if buffer is too small +static int json_escape_string(const char* input, char* output, size_t output_size) { + if (!input || !output || output_size == 0) { + return -1; + } + size_t i = 0; + size_t j = 0; + while (input[i] != '\0' && j < output_size - 1) { + unsigned char c = (unsigned char)input[i]; + switch (c) { + case '"': + if (j >= output_size - 3) { + return -1; + } + output[j++] = '\\'; + output[j++] = '"'; + break; + case '\\': + if (j >= output_size - 3) { + return -1; + } + output[j++] = '\\'; + output[j++] = '\\'; + break; + case '\b': + if (j >= output_size - 3) { + return -1; + } + output[j++] = '\\'; + output[j++] = 'b'; + break; + case '\f': + if (j >= output_size - 3) { + return -1; + } + output[j++] = '\\'; + output[j++] = 'f'; + break; + case '\n': + if (j >= output_size - 3) { + return -1; + } + output[j++] = '\\'; + output[j++] = 'n'; + break; + case '\r': + if (j >= output_size - 3) { + return -1; + } + output[j++] = '\\'; + output[j++] = 'r'; + break; + case '\t': + if (j >= output_size - 3) { + return -1; + } + output[j++] = '\\'; + output[j++] = 't'; + break; + default: + // Escape control characters (0x00-0x1F) as \uXXXX + if (c < 0x20) { + if (j >= output_size - 7) { + return -1; + } + output[j++] = '\\'; + output[j++] = 'u'; + output[j++] = '0'; + output[j++] = '0'; + // Convert to hex (upper case) + char hex1 = (c >> 4) & 0x0F; + char hex2 = c & 0x0F; + output[j++] = (hex1 < 10) ? ('0' + hex1) : ('A' + hex1 - 10); + output[j++] = (hex2 < 10) ? ('0' + hex2) : ('A' + hex2 - 10); + } else { + // Handle UTF-8 multi-byte characters + int utf8_len = 1; + if ((c & 0xE0) == 0xC0) { + utf8_len = 2; // 2-byte UTF-8 + } else if ((c & 0xF0) == 0xE0) { + utf8_len = 3; // 3-byte UTF-8 + } else if ((c & 0xF8) == 0xF0) { + utf8_len = 4; // 4-byte UTF-8 + } + + // Copy UTF-8 sequence if valid, otherwise copy single byte + if (utf8_len > 1) { + // Check if we have enough bytes and they are valid continuation bytes + int valid = 1; + for (int k = 1; k < utf8_len; k++) { + if (input[i + k] == '\0' || (input[i + k] & 0xC0) != 0x80) { + valid = 0; + break; + } + } + if (valid && j + utf8_len <= output_size - 1) { + // Copy entire UTF-8 sequence + for (int k = 0; k < utf8_len; k++) { + output[j++] = input[i + k]; + } + i += utf8_len - 1; + } else { + // Invalid UTF-8 or buffer too small, copy single byte + if (j >= output_size - 2) { + return -1; + } + output[j++] = input[i]; + } + } else { + // Single byte character (ASCII or invalid) + if (j >= output_size - 2) { + return -1; + } + output[j++] = input[i]; + } + } + break; + } + i++; + } + output[j] = '\0'; + return 0; +} + +ov_genai_streaming_status_e print_callback(const char* str, void* args) { + if (str) { + // If args is not null, it needs to be cast to its actual type. + fprintf(stdout, "%s", str); + fflush(stdout); + return OV_GENAI_STREAMING_STATUS_RUNNING; + } else { + printf("Callback executed with NULL message!\n"); + return OV_GENAI_STREAMING_STATUS_STOP; + } +} + +int main(int argc, char* argv[]) { + if (argc < 2 || argc > 3) { + fprintf(stderr, "Usage: %s [DEVICE]\n", argv[0]); + return EXIT_FAILURE; + } + const char* models_path = argv[1]; + const char* device = (argc == 3) ? argv[2] : "CPU"; // GPU, NPU can be used as well + + ov_genai_generation_config* config = NULL; + ov_genai_llm_pipeline* pipeline = NULL; + ov_genai_chat_history* chat_history = NULL; + ov_genai_decoded_results* results = NULL; + ov_genai_json_container* message_container = NULL; + ov_genai_json_container* assistant_message_container = NULL; + streamer_callback streamer = { + .callback_func = print_callback, + .args = NULL + }; + char prompt[MAX_PROMPT_LENGTH]; + char message_json[MAX_MESSAGE_JSON_LENGTH]; + char output_buffer[MAX_JSON_LENGTH]; + size_t output_size = 0; + char assistant_message_json[MAX_ASSISTANT_MESSAGE_JSON_LENGTH]; + char escaped_prompt[MAX_ESCAPED_PROMPT_LENGTH]; + char escaped_output[MAX_ESCAPED_OUTPUT_LENGTH]; + + CHECK_STATUS(ov_genai_llm_pipeline_create(models_path, device, 0, &pipeline)); + CHECK_STATUS(ov_genai_generation_config_create(&config)); + CHECK_STATUS(ov_genai_generation_config_set_max_new_tokens(config, 100)); + + CHECK_CHAT_HISTORY_STATUS(ov_genai_chat_history_create(&chat_history)); + + printf("question:\n"); + while (fgets(prompt, MAX_PROMPT_LENGTH, stdin)) { + // Remove newline character + prompt[strcspn(prompt, "\n")] = 0; + + // Skip empty lines + if (strlen(prompt) == 0) { + printf("question:\n"); + continue; + } + + if (json_escape_string(prompt, escaped_prompt, sizeof(escaped_prompt)) != 0) { + fprintf(stderr, "[ERROR] Failed to escape prompt: buffer too small\n"); + continue; + } + + int message_json_len = snprintf(message_json, sizeof(message_json), + "{\"role\": \"user\", \"content\": \"%s\"}", escaped_prompt); + if (message_json_len < 0 || (size_t)message_json_len >= sizeof(message_json)) { + fprintf(stderr, "[ERROR] Message JSON truncated: buffer too small (needed %d bytes)\n", message_json_len); + continue; + } + + if (message_container) { + ov_genai_json_container_free(message_container); + message_container = NULL; + } + CHECK_JSON_CONTAINER_STATUS(ov_genai_json_container_create_from_json_string( + &message_container, message_json)); + + // Push message using JsonContainer + CHECK_CHAT_HISTORY_STATUS(ov_genai_chat_history_push_back(chat_history, message_container)); + + results = NULL; + CHECK_STATUS(ov_genai_llm_pipeline_generate_with_history(pipeline, + chat_history, + config, + &streamer, + &results)); + + if (results) { + output_size = sizeof(output_buffer); + CHECK_STATUS(ov_genai_decoded_results_get_string(results, output_buffer, &output_size)); + + if (json_escape_string(output_buffer, escaped_output, sizeof(escaped_output)) != 0) { + fprintf(stderr, "[ERROR] Failed to escape output: buffer too small\n"); + ov_genai_decoded_results_free(results); + results = NULL; + continue; + } + + int assistant_message_json_len = snprintf(assistant_message_json, sizeof(assistant_message_json), + "{\"role\": \"assistant\", \"content\": \"%s\"}", escaped_output); + if (assistant_message_json_len < 0 || (size_t)assistant_message_json_len >= sizeof(assistant_message_json)) { + fprintf(stderr, "[ERROR] Assistant message JSON truncated: buffer too small (needed %d bytes)\n", assistant_message_json_len); + ov_genai_decoded_results_free(results); + results = NULL; + continue; + } + + if (assistant_message_container) { + ov_genai_json_container_free(assistant_message_container); + assistant_message_container = NULL; + } + CHECK_JSON_CONTAINER_STATUS(ov_genai_json_container_create_from_json_string( + &assistant_message_container, assistant_message_json)); + + // Push message using JsonContainer + CHECK_CHAT_HISTORY_STATUS(ov_genai_chat_history_push_back(chat_history, assistant_message_container)); + + ov_genai_decoded_results_free(results); + results = NULL; + } + + printf("\n----------\nquestion:\n"); + } + +err: + if (results) + ov_genai_decoded_results_free(results); + if (message_container) + ov_genai_json_container_free(message_container); + if (assistant_message_container) + ov_genai_json_container_free(assistant_message_container); + if (chat_history) + ov_genai_chat_history_free(chat_history); + if (pipeline) + ov_genai_llm_pipeline_free(pipeline); + if (config) + ov_genai_generation_config_free(config); + + return EXIT_SUCCESS; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/greedy_causal_lm_c.c b/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/greedy_causal_lm_c.c new file mode 100644 index 0000000..66e08e4 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/c/text_generation/greedy_causal_lm_c.c @@ -0,0 +1,56 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +#include +#include + +#include "openvino/genai/c/llm_pipeline.h" + +#define CHECK_STATUS(return_status) \ + if (return_status != OK) { \ + fprintf(stderr, "[ERROR] return status %d, line %d\n", return_status, __LINE__); \ + goto err; \ + } + +int main(int argc, char* argv[]) { + if (argc < 3) { + fprintf(stderr, "Usage: %s \"\"\n", argv[0]); + return EXIT_FAILURE; + } + const char* model_dir = argv[1]; + const char* prompt = argv[2]; + + ov_genai_llm_pipeline* pipeline = NULL; + ov_genai_generation_config* config = NULL; + ov_genai_decoded_results* results = NULL; + const char* device = "CPU"; // GPU, NPU can be used as well + char* output = NULL; // The output of the generation function. The caller is responsible for allocating and freeing + // the memory. + size_t output_size = 0; // Used to store the required size of the output buffer. + + CHECK_STATUS(ov_genai_llm_pipeline_create(model_dir, device, 0, &pipeline)); + CHECK_STATUS(ov_genai_generation_config_create(&config)); + CHECK_STATUS(ov_genai_generation_config_set_max_new_tokens(config, 100)); + CHECK_STATUS(ov_genai_llm_pipeline_generate(pipeline, prompt, config, NULL, &results)); + + // The function is called with NULL as the output to determine the required buffer size. + CHECK_STATUS(ov_genai_decoded_results_get_string(results, NULL, &output_size)); + output = (char*)malloc(output_size); // Allocate memory for the output string based on the determined size. + if (!output) { + fprintf(stderr, "Failed to allocate memory for output\n"); + goto err; + } + + // Retrieve the actual output string from the results into the allocated buffer. + CHECK_STATUS(ov_genai_decoded_results_get_string(results, output, &output_size)); + printf("%s\n", output); + +err: + if (pipeline) + ov_genai_llm_pipeline_free(pipeline); + if (config) + ov_genai_generation_config_free(config); + if (output) + free(output); + + return EXIT_SUCCESS; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/CMakeLists.txt b/src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/CMakeLists.txt new file mode 100644 index 0000000..6eb72a5 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/CMakeLists.txt @@ -0,0 +1,46 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +find_package(OpenVINOGenAI REQUIRED + PATHS + "${CMAKE_BINARY_DIR}" # Reuse the package from the build. + ${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO. + NO_CMAKE_FIND_ROOT_PATH +) + +include(FetchContent) +FetchContent_Declare( + stb + GIT_REPOSITORY https://github.com/nothings/stb.git + GIT_TAG master +) +FetchContent_MakeAvailable(stb) + +add_library(stb_image INTERFACE) +target_include_directories(stb_image INTERFACE ${stb_SOURCE_DIR}) + +# VLM Pipeline Sample +add_executable(vlm_pipeline_c vlm_pipeline.c load_image.c) + +# Specifies that the source file should be compiled as a C source file +set_source_files_properties(vlm_pipeline.c load_image.c PROPERTIES LANGUAGE C) + +target_include_directories(vlm_pipeline_c PRIVATE + ${CMAKE_SOURCE_DIR}/src/c/include + ${stb_SOURCE_DIR} +) + +target_link_libraries(vlm_pipeline_c PRIVATE openvino::genai::c stb_image) +if(UNIX AND NOT APPLE) + target_link_libraries(vlm_pipeline_c PRIVATE m) +endif() + +set_target_properties(vlm_pipeline_c PROPERTIES + # Ensure out-of-box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +# Install +install(TARGETS vlm_pipeline_c + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/load_image.c b/src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/load_image.c new file mode 100644 index 0000000..4b287dd --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/load_image.c @@ -0,0 +1,178 @@ +#include "load_image.h" +#include +#include +#include + +#ifdef _WIN32 + #define strcasecmp _stricmp +#else + #include +#endif + +#ifdef _WIN32 + #include + #define stat _stat +#else + #include +#endif + +#include "openvino/c/openvino.h" + +#define STB_IMAGE_IMPLEMENTATION +#include "stb_image.h" + +static const char* supported_extensions[] = { + ".jpg", ".jpeg", ".png", ".bmp", ".tga", ".psd", ".gif", ".hdr", ".pic", ".pnm" +}; +static const size_t num_extensions = sizeof(supported_extensions) / sizeof(supported_extensions[0]); + +static int is_supported_image(const char* filename) { + if (!filename) return 0; + + size_t len = strlen(filename); + for (size_t i = 0; i < num_extensions; i++) { + size_t ext_len = strlen(supported_extensions[i]); + if (len >= ext_len) { + const char* ext = filename + len - ext_len; + if (strcasecmp(ext, supported_extensions[i]) == 0) { + return 1; + } + } + } + return 0; +} + +typedef struct { + unsigned char* image_data; + int channels; + int height; + int width; +} image_allocator_t; + +static void* image_allocate(size_t bytes, size_t alignment, void* user_data) { + image_allocator_t* allocator = (image_allocator_t*)user_data; + if (allocator && allocator->image_data && + allocator->channels * allocator->height * allocator->width == (int)bytes) { + return allocator->image_data; + } + return NULL; +} + +static void image_deallocate(void* ptr, size_t bytes, size_t alignment, void* user_data) { + image_allocator_t* allocator = (image_allocator_t*)user_data; + if (allocator && allocator->image_data && + allocator->channels * allocator->height * allocator->width == (int)bytes) { + stbi_image_free(allocator->image_data); + allocator->image_data = NULL; + } +} + +#define CHECK_STATUS(return_status) \ + if (return_status != OK) { \ + fprintf(stderr, "[ERROR] return status %d, line %d\n", return_status, __LINE__); \ + goto err; \ + } + +ov_tensor_t* load_image(const char* image_path) { + if (!image_path) { + fprintf(stderr, "Error: image_path is NULL\n"); + return NULL; + } + + if (!file_exists(image_path)) { + fprintf(stderr, "Error: Image file '%s' does not exist\n", image_path); + return NULL; + } + + int width, height, channels; + const int desired_channels = 3; + + unsigned char* data = stbi_load(image_path, &width, &height, &channels, desired_channels); + if (!data) { + fprintf(stderr, "Error: Failed to load image '%s': %s\n", image_path, stbi_failure_reason()); + return NULL; + } + + image_allocator_t* allocator = (image_allocator_t*)malloc(sizeof(image_allocator_t)); + if (!allocator) { + fprintf(stderr, "Error: Failed to allocate memory for allocator\n"); + stbi_image_free(data); + return NULL; + } + + allocator->image_data = data; + allocator->channels = desired_channels; + allocator->height = height; + allocator->width = width; + + ov_tensor_t* tensor = NULL; + ov_element_type_e input_type = U8; + int64_t dims[4] = {1, height, width, desired_channels}; + + ov_shape_t input_shape = {.rank = 0, .dims = NULL}; + ov_shape_create(4, dims, &input_shape); + + ov_tensor_create_from_host_ptr( + input_type, + input_shape, // shape: [1, H, W, C] + data, + &tensor + ); + + free(allocator); + + return tensor; +} + +const ov_tensor_t** load_images(const char* image_path, size_t* tensor_count) { + if (!image_path || !tensor_count) { + fprintf(stderr, "Error: image_path or tensor_count is NULL\n"); + return NULL; + } + + if (!file_exists(image_path)) { + fprintf(stderr, "Error: Image file '%s' does not exist\n", image_path); + return NULL; + } + + ov_tensor_t* tensor = load_image(image_path); + if (!tensor) { + return NULL; + } + + const ov_tensor_t** tensors = (const ov_tensor_t**)malloc(sizeof(ov_tensor_t*)); + if (!tensors) { + fprintf(stderr, "Error: Failed to allocate memory for single tensor\n"); + free_tensor(tensor); + return NULL; + } + + tensors[0] = tensor; + *tensor_count = 1; + + return tensors; +} + +void free_tensor(ov_tensor_t* tensor) { + if (tensor) { + ov_tensor_free(tensor); + } +} + +void free_tensor_array(ov_tensor_t** tensors, size_t count) { + if (tensors) { + for (size_t i = 0; i < count; i++) { + if (tensors[i]) { + ov_tensor_free(tensors[i]); + } + } + free(tensors); + } +} + +int file_exists(const char* path) { + if (!path) return 0; + + struct stat buffer; + return (stat(path, &buffer) == 0); +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/load_image.h b/src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/load_image.h new file mode 100644 index 0000000..b8458fa --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/load_image.h @@ -0,0 +1,27 @@ +#ifndef LOAD_IMAGE_H +#define LOAD_IMAGE_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct ov_tensor ov_tensor_t; + +ov_tensor_t* load_image(const char* image_path); + +const ov_tensor_t** load_images(const char* image_path, size_t* tensor_count); + +void free_tensor(ov_tensor_t* tensor); + +void free_tensor_array(ov_tensor_t** tensors, size_t count); + +int file_exists(const char* path); + +#ifdef __cplusplus +} +#endif + +#endif // LOAD_IMAGE_H diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/vlm_pipeline.c b/src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/vlm_pipeline.c new file mode 100644 index 0000000..703c26f --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/c/visual_language_chat/vlm_pipeline.c @@ -0,0 +1,99 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +/** + * @file vlm_pipeline.c + * @brief Example demonstrating how to use OpenVINO GenAI VLM Pipeline C API + */ + +#include +#include +#include +#include "openvino/genai/c/vlm_pipeline.h" +#include "load_image.h" + +#define MAX_PROMPT_LENGTH 64 + +// Callback function for streaming results +ov_genai_streaming_status_e stream_callback(const char* str, void* args) { + printf("%s", str); + fflush(stdout); + return OV_GENAI_STREAMING_STATUS_RUNNING; +} + +int main(int argc, char* argv[]) { + if (argc < 4) { + printf("Usage: %s \n", argv[0]); + printf("Example: %s ./models CPU ./image.jpg \n", argv[0]); + return -1; + } + + const char* models_path = argv[1]; + const char* device = argv[2]; + const char* image_path = argv[3]; + + size_t tensor_count; + const ov_tensor_t** tensors = load_images(image_path, &tensor_count); + + // Create VLM pipeline + ov_genai_vlm_pipeline* pipeline = NULL; + ov_genai_vlm_pipeline_create(models_path, device, 0, &pipeline); + + // Set up streaming callback + streamer_callback callback = { + .callback_func = stream_callback, + .args = NULL + }; + + // Generate response + ov_genai_vlm_decoded_results* results = NULL; + ov_genai_generation_config* config = NULL; + ov_genai_generation_config_create(&config); + ov_genai_generation_config_set_max_new_tokens(config, 100); + char prompt[MAX_PROMPT_LENGTH]; + + ov_genai_vlm_pipeline_start_chat(pipeline); + printf("question:\n"); + + if (fgets(prompt, MAX_PROMPT_LENGTH, stdin)) { + prompt[strcspn(prompt, "\n")] = 0; + if (strlen(prompt) > 0) { + ov_genai_vlm_pipeline_generate(pipeline, prompt, tensors, tensor_count, config, &callback, &results); + printf("\n----------\nquestion:\n"); + } + } + + while (fgets(prompt, MAX_PROMPT_LENGTH, stdin)) { + prompt[strcspn(prompt, "\n")] = 0; + if (strlen(prompt) == 0) { + continue; + } + // New images and videos can be passed at each turn + ov_genai_vlm_pipeline_generate(pipeline, prompt, NULL, 0, config, &callback, &results); + printf("\n----------\nquestion:\n"); + } + ov_genai_vlm_pipeline_finish_chat(pipeline); + + + // Get performance metrics + ov_genai_perf_metrics* metrics = NULL; + ov_genai_vlm_decoded_results_get_perf_metrics(results, &metrics); + + // Get final result string + size_t output_size = 0; + ov_genai_vlm_decoded_results_get_string(results, NULL, &output_size); + if (output_size > 0) { + char* output = (char*)malloc(output_size); + if (output) { + ov_genai_vlm_decoded_results_get_string(results, output, &output_size); + free(output); + } + } + + // Cleanup + ov_genai_vlm_decoded_results_free(results); + ov_genai_generation_config_free(config); + ov_genai_vlm_pipeline_free(pipeline); + + return 0; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/CMakeLists.txt b/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/CMakeLists.txt new file mode 100644 index 0000000..97d1632 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +find_package(OpenVINOGenAI REQUIRED + PATHS + "${CMAKE_BINARY_DIR}" # Reuse the package from the build. + ${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO. + NO_CMAKE_FIND_ROOT_PATH +) + +# Whisper Speech Recognition Sample +add_executable(whisper_speech_recognition_c whisper_speech_recognition.c whisper_utils.c) + +# Specifies that the source file should be compiled as a C source file +set_source_files_properties(whisper_speech_recognition.c whisper_utils.c PROPERTIES LANGUAGE C) +target_link_libraries(whisper_speech_recognition_c PRIVATE openvino::genai::c) + +set_target_properties(whisper_speech_recognition_c PROPERTIES + # Ensure out-of-box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +# Install +install(TARGETS whisper_speech_recognition_c + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/README.md new file mode 100644 index 0000000..0848e57 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/README.md @@ -0,0 +1,133 @@ +# Whisper Automatic Speech Recognition C Sample + +## Table of Contents + +1. [Download OpenVINO GenAI](#download-openvino-genai) +2. [Build Samples](#build-samples) +3. [Download and Convert the Model](#download-and-convert-the-model) +4. [Prepare Audio File](#prepare-audio-file) +5. [Sample Description](#sample-description) +6. [Troubleshooting](#troubleshooting) +7. [Support and Contribution](#support-and-contribution) + +## Download OpenVINO GenAI + +Download and extract [OpenVINO GenAI Archive](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html?PACKAGE=OPENVINO_GENAI&VERSION=NIGHTLY&OP_SYSTEM=WINDOWS&DISTRIBUTION=ARCHIVE) Visit the OpenVINO Download Page. + +## Build Samples + +Set up the environment and build the samples Linux and macOS: + +```sh +source /setupvars.sh +.//samples/c/build_samples.sh +``` + +Windows Command Prompt: + +```sh +\setupvars.bat +\samples\c\build_samples_msvc.bat +``` + +Windows PowerShell: + +```sh +.\setupvars.ps1 +.\samples\c\build_samples.ps1 +``` + +## Download and Convert the Model + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. + +Install [../../export-requirements.txt](../../export-requirements.txt) if model conversion is required. + +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +optimum-cli export openvino --trust-remote-code --model openai/whisper-tiny whisper-tiny +``` + +If a converted model in OpenVINO IR format is available in the [OpenVINO optimized models](https://huggingface.co/OpenVINO) collection on Hugging Face, you can download it directly via huggingface-cli. + +For example: + +```sh +pip install huggingface-hub +huggingface-cli download OpenVINO/whisper-tiny-int8-ov --local-dir whisper-tiny-int8-ov +``` + +## Prepare audio file + +Prepare audio file in wav format with sampling rate 16k Hz. + +You can download example audio file: https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/librispeech_s5/how_are_you_doing_today.wav + +## Sample Description + +This example showcases inference of speech recognition Whisper Models using the OpenVINO GenAI C API. The sample features `ov_genai_whisper_pipeline` and uses audio files in WAV format as input. + +### Run Command + +```sh +./whisper_speech_recognition_c "" [DEVICE] +``` + +### Parameters + +- `MODEL_DIR`: Path to the converted Whisper model directory +- `WAV_FILE_PATH`: Path to the WAV audio file (use quotes if path contains spaces) +- `DEVICE`: Optional - device to run inference on (default: "CPU") + +### Example Usage + +```sh +./whisper_speech_recognition_c whisper-tiny how_are_you_doing_today.wav +``` + +### Expected Output + +```text + How are you doing today? +timestamps: [0.00, 2.00] text: How are you doing today? +``` + +The sample will: + +1. Load the WAV audio file and validate its format +2. Automatically resample to 16kHz if needed +3. Perform speech-to-text transcription +4. Output the full transcription +5. Display word-level timestamps for each text chunk + +## Troubleshooting + +### Empty or Incorrect Output + +If you get empty or incorrect transcription results: + +- Ensure your audio file is in WAV format +- Check that the audio contains clear speech + +### Model Loading Errors + +If the model fails to load: + +- Verify the model path exists and contains valid Whisper model files +- Ensure the model was properly converted to OpenVINO IR format +- Check that the specified device (CPU, GPU, etc.) is available on your system + +### Audio File Errors + +The sample provides detailed error messages for common audio file issues: + +- File not found +- Permission denied +- Invalid WAV format +- Unsupported audio encoding (only PCM is supported) +- Multi-channel audio (only mono is supported) + + +## Support and Contribution +- For troubleshooting, consult the [OpenVINO documentation](https://docs.openvino.ai). +- To report issues or contribute, visit the [GitHub repository](https://github.com/openvinotoolkit/openvino.genai). diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/whisper_speech_recognition.c b/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/whisper_speech_recognition.c new file mode 100644 index 0000000..17a3aed --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/whisper_speech_recognition.c @@ -0,0 +1,135 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include +#include + +#include "openvino/genai/c/whisper_pipeline.h" +#include "whisper_utils.h" + +#define SAMPLE_RATE_TOLERANCE 0.5f + +int main(int argc, char* argv[]) { + if (argc != 3 && argc != 4) { + fprintf(stderr, "Usage: %s \"\" [DEVICE]\n", argv[0]); + return EXIT_FAILURE; + } + + const char* model_path = argv[1]; + const char* wav_file_path = argv[2]; + const char* device = (argc == 4) ? argv[3] : "CPU"; // Default to CPU if no device is provided + + int exit_code = EXIT_SUCCESS; + + ov_genai_whisper_pipeline* pipeline = NULL; + ov_genai_whisper_generation_config* config = NULL; + ov_genai_whisper_decoded_results* results = NULL; + float* audio_data = NULL; + size_t audio_length = 0; + char* output = NULL; + size_t output_size = 0; + + if (strlen(wav_file_path) == 0 || strstr(wav_file_path, "..") != NULL) { + fprintf(stderr, "Invalid file path provided\n"); + exit_code = EXIT_FAILURE; + goto err; + } + + float file_sample_rate; + if (load_wav_file(wav_file_path, &audio_data, &audio_length, &file_sample_rate) != 0) { + exit_code = EXIT_FAILURE; + goto err; + } + + if (fabsf(file_sample_rate - 16000.0f) > SAMPLE_RATE_TOLERANCE) { + size_t resampled_length; + float* resampled_audio = resample_audio(audio_data, audio_length, file_sample_rate, 16000.0f, &resampled_length); + if (!resampled_audio) { + fprintf(stderr, "Error: Failed to resample audio\n"); + exit_code = EXIT_FAILURE; + goto err; + } + free(audio_data); + audio_data = resampled_audio; + audio_length = resampled_length; + } + + ov_status_e status = ov_genai_whisper_pipeline_create(model_path, device, 0, &pipeline); + if (status != OK) { + if (status == UNKNOW_EXCEPTION) { + fprintf(stderr, "Error: Failed to create Whisper pipeline. Please check:\n"); + fprintf(stderr, " - Model path exists and contains valid Whisper model files\n"); + fprintf(stderr, " - Device '%s' is available and supported\n", device); + fprintf(stderr, " - Model is compatible with OpenVINO GenAI\n"); + } + CHECK_STATUS(status); + } + + CHECK_STATUS(ov_genai_whisper_generation_config_create(&config)); + CHECK_STATUS(ov_genai_whisper_generation_config_set_task(config, "transcribe")); + CHECK_STATUS(ov_genai_whisper_generation_config_set_return_timestamps(config, true)); + CHECK_STATUS(ov_genai_whisper_pipeline_generate(pipeline, audio_data, audio_length, config, &results)); + + CHECK_STATUS(ov_genai_whisper_decoded_results_get_string(results, NULL, &output_size)); + output = (char*)malloc(output_size); + if (!output) { + fprintf(stderr, "Error: Failed to allocate memory for output\n"); + exit_code = EXIT_FAILURE; + goto err; + } + + CHECK_STATUS(ov_genai_whisper_decoded_results_get_string(results, output, &output_size)); + printf("%s\n", output); + + bool has_chunks = false; + CHECK_STATUS(ov_genai_whisper_decoded_results_has_chunks(results, &has_chunks)); + + if (has_chunks) { + size_t chunks_count = 0; + CHECK_STATUS(ov_genai_whisper_decoded_results_get_chunks_count(results, &chunks_count)); + + for (size_t i = 0; i < chunks_count; i++) { + ov_genai_whisper_decoded_result_chunk* chunk = NULL; + CHECK_STATUS(ov_genai_whisper_decoded_results_get_chunk_at(results, i, &chunk)); + + float start_ts = 0.0f, end_ts = 0.0f; + CHECK_STATUS(ov_genai_whisper_decoded_result_chunk_get_start_ts(chunk, &start_ts)); + CHECK_STATUS(ov_genai_whisper_decoded_result_chunk_get_end_ts(chunk, &end_ts)); + + size_t chunk_text_size = 0; + CHECK_STATUS(ov_genai_whisper_decoded_result_chunk_get_text(chunk, NULL, &chunk_text_size)); + + char* chunk_text = (char*)malloc(chunk_text_size); + if (!chunk_text) { + fprintf(stderr, "Warning: Failed to allocate memory for chunk text %zu\n", i); + ov_genai_whisper_decoded_result_chunk_free(chunk); + exit_code = EXIT_FAILURE; + goto err; + } + + CHECK_STATUS(ov_genai_whisper_decoded_result_chunk_get_text(chunk, chunk_text, &chunk_text_size)); + + printf("timestamps: [%.2f, %.2f] text: %s\n", start_ts, end_ts, chunk_text); + + free(chunk_text); + ov_genai_whisper_decoded_result_chunk_free(chunk); + } + } + +err: + if (pipeline) + ov_genai_whisper_pipeline_free(pipeline); + if (config) + ov_genai_whisper_generation_config_free(config); + if (results) + ov_genai_whisper_decoded_results_free(results); + if (output) + free(output); + if (audio_data) + free(audio_data); + + return exit_code; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/whisper_utils.c b/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/whisper_utils.c new file mode 100644 index 0000000..f779625 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/whisper_utils.c @@ -0,0 +1,141 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "whisper_utils.h" + +#include +#include + + +int load_wav_file(const char* filename, float** audio_data, size_t* audio_length, float* sample_rate) { + FILE* file = fopen(filename, "rb"); + if (!file) { + fprintf(stderr, "Error: Cannot open audio file '%s'. ", filename); + if (errno == ENOENT) { + fprintf(stderr, "File does not exist.\n"); + } else if (errno == EACCES) { + fprintf(stderr, "Permission denied.\n"); + } else { + fprintf(stderr, "Error code: %d\n", errno); + } + return -1; + } + + WAVHeader header; + if (fread(&header, sizeof(WAVHeader), 1, file) != 1) { + fprintf(stderr, "Error: Cannot read WAV header\n"); + fclose(file); + return -1; + } + + // Basic WAV validation + if (strncmp(header.chunk_id, "RIFF", 4) != 0 || strncmp(header.format, "WAVE", 4) != 0) { + fprintf(stderr, "Error: Invalid WAV file format\n"); + fclose(file); + return -1; + } + + if (header.audio_format != 1) { // PCM + fprintf(stderr, "Error: Only PCM WAV files are supported\n"); + fclose(file); + return -1; + } + + if (header.num_channels != 1) { + fprintf(stderr, "Error: Only mono audio is supported (found %d channels)\n", header.num_channels); + fclose(file); + return -1; + } + + *sample_rate = (float)header.sample_rate; + size_t num_samples = header.subchunk2_size / (header.bits_per_sample / 8); + *audio_length = num_samples; + + // Allocate memory for audio data + *audio_data = (float*)malloc(num_samples * sizeof(float)); + if (!*audio_data) { + fprintf(stderr, "Error: Cannot allocate memory for audio data\n"); + fclose(file); + return -1; + } + + // Read and convert audio data to float + if (header.bits_per_sample == 16) { + int16_t* temp_buffer = (int16_t*)malloc(num_samples * sizeof(int16_t)); + if (!temp_buffer) { + fprintf(stderr, "Error: Cannot allocate temporary buffer\n"); + free(*audio_data); + fclose(file); + return -1; + } + + if (fread(temp_buffer, sizeof(int16_t), num_samples, file) != num_samples) { + fprintf(stderr, "Error: Cannot read audio data\n"); + free(temp_buffer); + free(*audio_data); + fclose(file); + return -1; + } + + // Convert 16-bit PCM to float [-1, 1] + for (size_t i = 0; i < num_samples; i++) { + (*audio_data)[i] = temp_buffer[i] / 32768.0f; + } + + free(temp_buffer); + } else if (header.bits_per_sample == 32) { + if (fread(*audio_data, sizeof(float), num_samples, file) != num_samples) { + fprintf(stderr, "Error: Cannot read audio data\n"); + free(*audio_data); + fclose(file); + return -1; + } + } else { + fprintf(stderr, "Error: Unsupported bit depth: %d\n", header.bits_per_sample); + free(*audio_data); + fclose(file); + return -1; + } + + fclose(file); + return 0; +} + + +float* resample_audio(const float* input, + size_t input_length, + float input_rate, + float target_rate, + size_t* output_length) { + if (input_rate == target_rate) { + *output_length = input_length; + float* output = (float*)malloc(input_length * sizeof(float)); + if (output) { + memcpy(output, input, input_length * sizeof(float)); + } + return output; + } + + float ratio = input_rate / target_rate; + *output_length = (size_t)(input_length / ratio); + float* output = (float*)malloc(*output_length * sizeof(float)); + + if (!output) { + return NULL; + } + + for (size_t i = 0; i < *output_length; i++) { + float src_idx = i * ratio; + size_t idx0 = (size_t)src_idx; + size_t idx1 = idx0 + 1; + + if (idx1 >= input_length) { + output[i] = input[input_length - 1]; + } else { + float frac = src_idx - idx0; + output[i] = input[idx0] * (1.0f - frac) + input[idx1] * frac; + } + } + + return output; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/whisper_utils.h b/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/whisper_utils.h new file mode 100644 index 0000000..0e0ec58 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/c/whisper_speech_recognition/whisper_utils.h @@ -0,0 +1,75 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#ifndef WHISPER_UTILS_H +#define WHISPER_UTILS_H + +#include +#include +#include +#include + +#include "openvino/c/ov_common.h" +#include "openvino/genai/c/whisper_pipeline.h" + + +#define MAX_PATH_LENGTH 1024 + +// Error handling macro +#define CHECK_STATUS(return_status) \ + if (return_status != OK) { \ + const char* error_msg = "Unknown error"; \ + switch (return_status) { \ + case INVALID_C_PARAM: \ + error_msg = "Invalid parameter"; \ + break; \ + case NOT_FOUND: \ + error_msg = "Not found"; \ + break; \ + case OUT_OF_BOUNDS: \ + error_msg = "Out of bounds"; \ + break; \ + case UNEXPECTED: \ + error_msg = "Unexpected error"; \ + break; \ + case NOT_IMPLEMENTED: \ + error_msg = "Not implemented"; \ + break; \ + case UNKNOW_EXCEPTION: \ + error_msg = "Unknown exception"; \ + break; \ + } \ + fprintf(stderr, "[ERROR] %s (status code: %d) at line %d\n", error_msg, return_status, __LINE__); \ + exit_code = EXIT_FAILURE; \ + goto err; \ + } + +// Default values +#define DEFAULT_SAMPLE_RATE 16000.0f + +// WAV file header structure +typedef struct { + char chunk_id[4]; + uint32_t chunk_size; + char format[4]; + char subchunk1_id[4]; + uint32_t subchunk1_size; + uint16_t audio_format; + uint16_t num_channels; + uint32_t sample_rate; + uint32_t byte_rate; + uint16_t block_align; + uint16_t bits_per_sample; + char subchunk2_id[4]; + uint32_t subchunk2_size; +} WAVHeader; + +// Function declarations +int load_wav_file(const char* filename, float** audio_data, size_t* audio_length, float* sample_rate); +float* resample_audio(const float* input, + size_t input_length, + float input_rate, + float target_rate, + size_t* output_length); + +#endif // WHISPER_UTILS_H diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/README.md new file mode 100644 index 0000000..ab043a4 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/README.md @@ -0,0 +1,3 @@ +Please refer to the following blogs for the setup instructions. + +https://medium.com/openvino-toolkit/how-to-build-openvino-genai-app-in-c-32dcbe42fa67 diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/fetch_opencv.cmake b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/fetch_opencv.cmake new file mode 100644 index 0000000..cbe7735 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/fetch_opencv.cmake @@ -0,0 +1,86 @@ +# Copyright (C) 2025-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +function(ov_genai_link_opencv target_name) + set(required_components ${ARGN}) + if(NOT required_components) + set(required_components core imgproc videoio imgcodecs) + endif() + + find_package(OpenCV QUIET COMPONENTS ${required_components}) + + if(NOT OpenCV_FOUND) + include(FetchContent) + + if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) + endif() + + set(BUILD_SHARED_LIBS ON) + set(WITH_FFMPEG ON) + set(WITH_PROTOBUF OFF CACHE BOOL "" FORCE) + set(WITH_GSTREAMER OFF CACHE BOOL "" FORCE) + set(WITH_OPENCLAMDBLAS OFF CACHE BOOL "" FORCE) + set(WITH_OPENCLAMDFFT OFF CACHE BOOL "" FORCE) + set(WITH_MATLAB OFF CACHE BOOL "" FORCE) + set(HIGHGUI_ENABLE_PLUGINS OFF CACHE BOOL "" FORCE) + set(BUILD_JAVA OFF CACHE BOOL "" FORCE) + set(OPENCV_GAPI_GSTREAMER OFF CACHE BOOL "" FORCE) + set(INSTALL_TESTS OFF CACHE BOOL "" FORCE) + set(INSTALL_C_EXAMPLES OFF CACHE BOOL "" FORCE) + set(INSTALL_PYTHON_EXAMPLES OFF CACHE BOOL "" FORCE) + set(BUILD_TESTS OFF CACHE BOOL "" FORCE) + set(BUILD_PERF_TESTS OFF CACHE BOOL "" FORCE) + set(BUILD_ANDROID_EXAMPLES OFF CACHE BOOL "" FORCE) + set(BUILD_ITT OFF CACHE BOOL "" FORCE) + set(BUILD_opencv_java_bindings_generator OFF CACHE BOOL "" FORCE) + set(BUILD_opencv_apps OFF CACHE BOOL "" FORCE) + set(BUILD_opencv_calib3d OFF CACHE BOOL "" FORCE) + set(BUILD_opencv_dnn OFF CACHE BOOL "" FORCE) + set(BUILD_opencv_features2d OFF CACHE BOOL "" FORCE) + set(BUILD_opencv_flann OFF CACHE BOOL "" FORCE) + set(BUILD_opencv_gapi OFF CACHE BOOL "" FORCE) + set(BUILD_opencv_highgui OFF CACHE BOOL "" FORCE) + set(BUILD_opencv_ml OFF CACHE BOOL "" FORCE) + set(BUILD_opencv_objdetect OFF CACHE BOOL "" FORCE) + set(BUILD_opencv_photo OFF CACHE BOOL "" FORCE) + set(BUILD_opencv_python_tests OFF CACHE BOOL "" FORCE) + set(BUILD_opencv_stitching OFF CACHE BOOL "" FORCE) + set(BUILD_opencv_ts OFF CACHE BOOL "" FORCE) + + FetchContent_Declare(opencv + GIT_REPOSITORY https://github.com/opencv/opencv.git + GIT_TAG 4.11.0 + GIT_SHALLOW TRUE + ) + FetchContent_MakeAvailable(opencv) + + set(opencv_targets) + foreach(component IN LISTS required_components) + list(APPEND opencv_targets opencv_${component}) + endforeach() + + target_include_directories(${target_name} PRIVATE ${OPENCV_CONFIG_FILE_INCLUDE_DIR}) + foreach(component IN LISTS required_components) + target_include_directories(${target_name} PRIVATE + ${OPENCV_MODULE_opencv_${component}_LOCATION}/include) + endforeach() + + if(LINUX) + set_target_properties(${target_name} ${opencv_targets} PROPERTIES + INSTALL_RPATH "$ORIGIN/../lib" + INSTALL_RPATH_USE_LINK_PATH ON) + elseif(APPLE) + set_target_properties(${target_name} ${opencv_targets} PROPERTIES + INSTALL_RPATH "@loader_path/../lib" + INSTALL_RPATH_USE_LINK_PATH ON) + endif() + else() + set(opencv_targets ${OpenCV_LIBS}) + if(OpenCV_INCLUDE_DIRS) + target_include_directories(${target_name} PRIVATE ${OpenCV_INCLUDE_DIRS}) + endif() + endif() + + target_link_libraries(${target_name} PRIVATE ${opencv_targets}) +endfunction() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/512x512.bmp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/512x512.bmp new file mode 100644 index 0000000..b89aade --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/512x512.bmp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73fc1a2b80048752350d108852f3598395666b9208d5e0ab34c0613cea9cfd04 +size 786486 diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/CMakeLists.txt b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/CMakeLists.txt new file mode 100644 index 0000000..e9eb06e --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/CMakeLists.txt @@ -0,0 +1,194 @@ +# Copyright (C) 2023-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +find_package(OpenVINOGenAI REQUIRED + PATHS + "${CMAKE_BINARY_DIR}" # Reuse the package from the build. + ${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO. + NO_CMAKE_FIND_ROOT_PATH +) + +file(DOWNLOAD https://raw.githubusercontent.com/nothings/stb/f75e8d1cad7d90d72ef7a4661f1b994ef78b4e31/stb_image.h ${CMAKE_BINARY_DIR}/stb_image.h + EXPECTED_HASH MD5=27932e6fb3a2f26aee2fc33f2cb4e696) + +include(FetchContent) + +if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) +endif() + +FetchContent_Declare(indicators + URL https://github.com/p-ranav/indicators/archive/refs/tags/v2.3.tar.gz + URL_HASH SHA256=70da7a693ff7a6a283850ab6d62acf628eea17d386488af8918576d0760aef7b) +FetchContent_MakeAvailable(indicators) + +# create main sample executable + +add_executable(text2image text2image.cpp imwrite.cpp) + +target_include_directories(text2image PRIVATE ${CMAKE_BINARY_DIR} "${CMAKE_CURRENT_SOURCE_DIR}") +target_link_libraries(text2image PRIVATE openvino::genai indicators::indicators) + +set_target_properties(text2image PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS text2image + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + +# create text2image concurrent sample executable + +add_executable(text2image_concurrency text2image_concurrency.cpp imwrite.cpp) + +target_include_directories(text2image_concurrency PRIVATE ${CMAKE_BINARY_DIR} "${CMAKE_CURRENT_SOURCE_DIR}") +target_link_libraries(text2image_concurrency PRIVATE openvino::genai indicators::indicators) + +set_target_properties(text2image_concurrency PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS text2image_concurrency + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + +# create LoRA sample executable + +add_executable(lora_text2image lora_text2image.cpp imwrite.cpp) + +target_include_directories(lora_text2image PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") +target_link_libraries(lora_text2image PRIVATE openvino::genai indicators::indicators) + +set_target_properties(lora_text2image PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS lora_text2image + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + +# create TaylorSeer text2image sample executable + +add_executable(taylorseer_text2image taylorseer_text2image.cpp imwrite.cpp) + +target_include_directories(taylorseer_text2image PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") +target_link_libraries(taylorseer_text2image PRIVATE openvino::genai indicators::indicators) + +set_target_properties(taylorseer_text2image PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS taylorseer_text2image + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + +# create heterogeneous_stable_diffusion sample executable + +add_executable(heterogeneous_stable_diffusion + heterogeneous_stable_diffusion.cpp + imwrite.cpp) + +target_include_directories(heterogeneous_stable_diffusion PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") +target_link_libraries(heterogeneous_stable_diffusion PRIVATE openvino::genai indicators::indicators) + +set_target_properties(heterogeneous_stable_diffusion PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS heterogeneous_stable_diffusion + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + +# create image2image sample executable + +add_executable(image2image image2image.cpp load_image.cpp imwrite.cpp) + +target_include_directories(image2image PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_BINARY_DIR}") +target_link_libraries(image2image PRIVATE openvino::genai indicators::indicators) + +set_target_properties(image2image PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS image2image + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + +# create image2image concurrent sample executable + +add_executable(image2image_concurrency image2image_concurrency.cpp load_image.cpp imwrite.cpp) + +target_include_directories(image2image_concurrency PRIVATE ${CMAKE_BINARY_DIR} "${CMAKE_CURRENT_SOURCE_DIR}") +target_link_libraries(image2image_concurrency PRIVATE openvino::genai indicators::indicators) + +set_target_properties(image2image_concurrency PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS image2image_concurrency + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + +# create stable_diffusion_export_import sample executable + +add_executable(stable_diffusion_export_import stable_diffusion_export_import.cpp load_image.cpp imwrite.cpp) + +target_include_directories(stable_diffusion_export_import PRIVATE ${CMAKE_BINARY_DIR} "${CMAKE_CURRENT_SOURCE_DIR}") +target_link_libraries(stable_diffusion_export_import PRIVATE openvino::genai indicators::indicators) + +set_target_properties(stable_diffusion_export_import PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS stable_diffusion_export_import + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + +# create inpainting executable + +add_executable(inpainting inpainting.cpp load_image.cpp imwrite.cpp) + +target_include_directories(inpainting PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_BINARY_DIR}") +target_link_libraries(inpainting PRIVATE openvino::genai indicators::indicators) + +set_target_properties(inpainting PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS inpainting + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + +# benchmark_image_gen + +include(FetchContent) + +if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) +endif() + +FetchContent_Declare(cxxopts + URL https://github.com/jarro2783/cxxopts/archive/refs/tags/v3.1.1.tar.gz + URL_HASH SHA256=523175f792eb0ff04f9e653c90746c12655f10cb70f1d5e6d6d9491420298a08) +FetchContent_MakeAvailable(cxxopts) + +add_executable(benchmark_image_gen benchmark_image_gen.cpp load_image.cpp imwrite.cpp) +target_include_directories(benchmark_image_gen PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_BINARY_DIR}") +target_link_libraries(benchmark_image_gen PRIVATE openvino::genai cxxopts::cxxopts indicators::indicators) +set_target_properties(benchmark_image_gen PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS benchmark_image_gen + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/README.md new file mode 100644 index 0000000..c0d260c --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/README.md @@ -0,0 +1,330 @@ +# Text to Image C++ Generation Pipeline + +Examples in this folder showcase inference of text to image models like Stable Diffusion 1.5, 2.1, LCM. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `ov::genai::Text2ImagePipeline` and uses a text prompt as input source. + +There are several sample files: + - [`text2image.cpp`](./text2image.cpp) demonstrates basic usage of the text to image pipeline + - [`text2image_concurrency.cpp`](./text2image_concurrency.cpp) demonstrates concurrent usage of the text to image pipeline to create multiple images with different prompts + - [`lora_text2image.cpp`](./lora_text2image.cpp) shows how to apply LoRA adapters to the pipeline + - [`taylorseer_text2image.cpp`](./taylorseer_text2image.cpp) demonstrates text to image generation with TaylorSeer caching optimization for improved performance. Flux and StableDiffusion3 models are supported. + - [`heterogeneous_stable_diffusion.cpp`](./heterogeneous_stable_diffusion.cpp) shows how to assemble a heterogeneous txt2image pipeline from individual subcomponents (scheduler, text encoder, unet, vae decoder) + - [`image2image.cpp`](./image2image.cpp) demonstrates basic usage of the image to image pipeline + - [`image2image_concurrency.cpp.cpp`](./image2image_concurrency.cpp) demonstrates concurrent usage of the image to image pipeline to create multiple images with different prompts + - [`inpainting.cpp`](./inpainting.cpp) demonstrates basic usage of the inpainting pipeline + - [`benchmark_image_gen.cpp`](./benchmark_image_gen.cpp) demonstrates how to benchmark the text to image / image to image / inpainting pipeline + - [`stable_diffusion_export_import.cpp`](./stable_diffusion_export_import.cpp) demonstrates how to export and import compiled models from/to the text to image pipeline. Only the Stable Diffusion XL model is supported. + +Users can change the sample code and play with the following generation parameters: + +- Change width or height of generated image +- Generate multiple images per prompt +- Adjust a number of inference steps +- Play with [guidance scale](https://huggingface.co/spaces/stabilityai/stable-diffusion/discussions/9) (read [more details](https://arxiv.org/abs/2207.12598)) +- (SD 1.x, 2.x; SD3, SDXL) Add negative prompt when guidance scale > 1 +- (SDXL, SD3, FLUX) Specify other positive prompts like `prompt_2` +- Apply multiple different LoRA adapters and mix them with different blending coefficients +- (Image to image and inpainting) Play with `strength` parameter to control how initial image is noised and reduce number of inference steps + + +> [!NOTE] +> Image generated with HuggingFace / Optimum Intel is not the same generated by this C++ sample: C++ random generation with MT19937 results differ from `numpy.random.randn()` and `diffusers.utils.randn_tensor` (uses `torch.Generator` inside). So, it's expected that image generated by Diffusers and C++ versions provide different images, because latent images are initialize differently. + +## Download and convert the models and tokenizers + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. + +It's not required to install [../../export-requirements.txt](../../export-requirements.txt) for deployment if the model has already been exported. + +```sh +pip install --upgrade-strategy eager -r ../../requirements.txt +optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 dreamlike_anime_1_0_ov/FP16 +``` + +## Run text to image + +Follow [Get Started with Samples](https://docs.openvino.ai/2026/get-started/learn-openvino/openvino-samples/get-started-demos.html) to run the sample. + +`stable_diffusion ./dreamlike_anime_1_0_ov/FP16 'cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting'` + +### Examples + +Prompt: `cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting` + + ![](./512x512.bmp) + +### Run with threaded callback + +You can also implement a callback function in `main.cpp` that runs in a separate thread. This allows for parallel processing, enabling you to interrupt generation early if intermediate results are satisfactory or to add logs. + +Please find the template of the callback usage below. + +```cpp +ov::genai::Text2ImagePipeline pipe(models_path, device); + +auto callback = [&](size_t step, size_t num_steps, ov::Tensor& latent) -> bool { + std::cout << "Image generation step: " << step + 1 << " / " << num_steps << std::endl; + ov::Tensor img = pipe.decode(latent); // get intermediate image tensor + if (your_condition) // return true if you want to interrupt image generation + return true; + return false; +}; + +ov::Tensor image = pipe.generate(prompt, + /* other generation properties */ + ov::genai::callback(callback) +); +``` + +## Run with optional LoRA adapters + +LoRA adapters can be connected to the pipeline and modify generated images to have certain style, details or quality. Adapters are supported in Safetensors format and can be downloaded from public sources like [Civitai](https://civitai.com) or [HuggingFace](https://huggingface.co/models) or trained by the user. Adapters compatible with a base model should be used only. A weighted blend of multiple adapters can be applied by specifying multiple adapter files with corresponding alpha parameters in command line. Check `lora.cpp` source code to learn how to enable adapters and specify them in each `generate` call. + +> [!NOTE] +> ### LoRA `alpha` interpretation in OpenVINO GenAI +> The OpenVINO GenAI implementation merges the traditional LoRA parameters into a **single effective scaling factor** used during inference. +> +> In this context, the `alpha` value already includes: +> - normalization by LoRA rank (`alpha / rank`) +> - any user-defined scaling factor (`weight`) +> +> This means `alpha` in GenAI should be treated as the **final scaling weight** applied to the LoRA update — not the raw `alpha` parameter from training. + +### Example: Running with a LoRA Adapter + +Here is an example how to run the sample with a single adapter. First download adapter file from https://civitai.com/models/67927/soulcard page manually and save it as `soulcard.safetensors`. Or download it from command line: + +`wget -O soulcard.safetensors https://civitai.com/api/download/models/72591` + +Then run `lora_text2image` executable: + +`./lora_text2image dreamlike_anime_1_0_ov/FP16 'curly-haired unicorn in the forest, anime, line' soulcard.safetensors 0.7` + +The sample generates two images with and without adapters applied using the same prompt: + - `lora.bmp` with adapters applied + - `baseline.bmp` without adapters applied + +Check the difference: + +With adapter | Without adapter +:---:|:---: +![](./lora.bmp) | ![](./baseline.bmp) + +## Run text to image with TaylorSeer caching optimization + +The `taylorseer_text2image` sample demonstrates how to use TaylorSeer Lite caching to accelerate text to image generation. TaylorSeer is a caching optimization technique that uses Taylor series approximation to predict intermediate outputs during diffusion inference, reducing the number of computationally expensive transformer forward passes. + +Run the sample with custom parameters: + +```bash +./taylorseer_text2image ./flux.1-dev/FP16 "a beautiful sunset over mountains" +``` + +The sample generates two images with and without TaylorSeer config applied using the same prompt: + - `taylorseer.bmp` with TaylorSeer config applied + - `taylorseer_baseline.bmp` without TaylorSeer config applied + +Check the difference: + +With TaylorSeer | Without TaylorSeer +:---:|:---: +![](./taylorseer.bmp) | ![](./taylorseer_baseline.bmp) + +## Run text to image with multiple devices + +The `heterogeneous_stable_diffusion` sample demonstrates how a Text2ImagePipeline object can be created from individual subcomponents - scheduler, text encoder, unet, & vae decoder. This approach gives fine-grained control over the devices used to execute each stage of the stable diffusion pipeline. + +The usage of this sample is: + +`./heterogeneous_stable_diffusion '' [ ]` + +For example: + +`./heterogeneous_stable_diffusion ./dreamlike_anime_1_0_ov/FP16 'cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting' CPU NPU GPU` + +The sample will create a stable diffusion pipeline such that the text encoder is executed on the CPU, UNet on the NPU, and VAE decoder on the GPU. + +## Run image to image pipeline + +The `image2mage.cpp` sample demonstrates basic image to image generation pipeline. The difference with text to image pipeline is that final image is denoised from initial image converted to latent space and noised with image noise according to `strength` parameter. `strength` should be in range of `[0., 1.]` where `1.` means initial image is fully noised and it is an equivalent to text to image generation. +Also, `strength` parameter linearly affects a number of inferenece steps, because lower `strength` values means initial latent already has some structure and it requires less steps to denoise it. + +To run the sample, download initial image first: + +`wget https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png` + +And then run the sample: + +`./image2mage ./dreamlike_anime_1_0_ov/FP16 'cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k' cat.png` + +The resulting image is: + + ![](./imageimage.bmp) + +Note, that LoRA, heterogeneous execution and other features of `Text2ImagePipeline` are applicable for `Image2ImagePipeline`. + +## Run inpainting pipeline + +The `inpainting.cpp` sample demonstrates usage of inpainting pipeline, which can inpaint initial image by a given mask. Inpainting pipeline can work on typical text to image models as well as on specialized models which are often named `space/model-inpainting`, e.g. `stabilityai/stable-diffusion-2-inpainting`. + +Such models can be converted in the same way as regular ones via `optimum-cli`: + +`optimum-cli export openvino --model stabilityai/stable-diffusion-2-inpainting --weight-format fp16 stable-diffusion-2-inpainting` + +Let's also download input data: + +`wget -O image.png https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png` + +`wget -O mask_image.png https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png` + +And run the sample: + +`./inpainting ./stable-diffusion-2-inpainting 'Face of a yellow cat, high resolution, sitting on a park bench' image.png mask_image.png` + +The resulting image is: + + ![](./inpainting.bmp) + +Note, that LoRA, heterogeneous execution and other features of `Text2ImagePipeline` are applicable for `InpaintingPipeline`. + +## benchmarking sample for image generation pipelines + +This `benchmark_image_gen.cpp` sample script demonstrates how to benchmark the text to image pipeline, image to image pipeline and inpainting pipeline. The script includes functionality for warm-up iterations, generating image, and calculating various performance metrics. + +The usage of this sample is: +```bash +./benchmark_image_gen [OPTIONS] +``` +Options: +- `-t, --pipeline_type` (default: `"text2image"`): Pipeline type(text2image, image2image, inpainting). +- `-m, --model`: Path to the model and tokenizers base directory. +- `-p, --prompt` (default: `"The Sky is blue because"`): The prompt to generate text. +- `--nw, --num_warmup` (default: `1`): Number of warmup iterations. +- `-n, --num_iter` (default: `3`): Number of iterations. +- `-d, --device` (default: `"CPU"`): Device(s) to run the pipeline with. +- `-w, --width` (default: `512`): The width of the output image. +- `--ht, --height` (default: `512`): The height of the output image. +- `--is, --num_inference_steps` (default: `20`): The number of inference steps. +- `--ni, --num_images_per_prompt` (default: `1`): The number of images to generate per generate() call. +- `-o, --output_dir` (default: `""`): Path to save output image. +- `-i, --image`: Path to input image. +- `-s, --strength`: Indicates extent to transform the reference `image`. Must be between 0 and 1. +- `--mi, --mask_image`: Path to mask image. +- `-r, --reshape': Reshape pipeline before compilation. This can improve image generation performance. + +For example: + +`./benchmark_image_gen -t text2image -m dreamlike_anime_1_0_ov/FP16 -n 10 -d CPU` + +Performance output: + +``` +[warmup-0] generate time: 85008.00 ms, total infer time:84999.88 ms +[warmup-0] text encoder infer time: 98.00 ms +[warmup-0] unet iteration num:21, first iteration time:4317.94 ms, other iteration avg time:3800.91 ms +[warmup-0] unet inference num:21, first inference time:4317.71 ms, other inference avg time:3800.61 ms +[warmup-0] vae encoder infer time:0.00 ms, vae decoder infer time:4572.00 ms + +[iter-0] generate time: 84349.00 ms, total infer time:84340.97 ms +[iter-0] text encoder infer time: 76.00 ms +[iter-0] unet iteration num:21, first iteration time:3805.63 ms, other iteration avg time:3799.68 ms +[iter-0] unet inference num:21, first inference time:3805.42 ms, other inference avg time:3799.38 ms +[iter-0] vae encoder infer time:0.00 ms, vae decoder infer time:4472.00 ms + +[iter-1] generate time: 84391.00 ms, total infer time:84384.36 ms +[iter-1] text encoder infer time: 78.00 ms +[iter-1] unet iteration num:21, first iteration time:3801.15 ms, other iteration avg time:3802.17 ms +[iter-1] unet inference num:21, first inference time:3800.93 ms, other inference avg time:3801.87 ms +[iter-1] vae encoder infer time:0.00 ms, vae decoder infer time:4468.00 ms + +[iter-2] generate time: 84377.00 ms, total infer time:84366.51 ms +[iter-2] text encoder infer time: 76.00 ms +[iter-2] unet iteration num:21, first iteration time:3783.31 ms, other iteration avg time:3802.25 ms +[iter-2] unet inference num:21, first inference time:3783.09 ms, other inference avg time:3801.82 ms +[iter-2] vae encoder infer time:0.00 ms, vae decoder infer time:4471.00 ms + +Test finish, load time: 9356.00 ms +Warmup number:1, first generate warmup time:85008.00 ms, infer warmup time:84999.88 ms +Generate iteration number:3, for one iteration, generate avg time: 84372.34 ms, infer avg time:84363.95 ms, all text encoders infer avg time:76.67 ms, vae encoder infer avg time:0.00 ms, vae decoder infer avg time:4470.33 ms +``` + +### Run multiple generations with different prompt in parallel + +It is highly recommended to use `ov::genai::num_images_per_prompt(X)` parameter to generate multiple images in parallel. However, when the generation options differ (prompt, height, width), it is recommended to clone the pipeline. +It is possible to re-use models compiled into device for concurrent generation with different prompts in separate threads. + +Here in this example we load and compile the entire pipeline once, and then use `clone()` to create separate generation requests to be reused in separate threads: + + +```cpp +std::vector pipelines; + +// Prepare initial pipeline and compiled models into device +pipelines.emplace_back(models_path, device); +// Clone pipeline for concurrent usage +for (size_t i = 1; i < 4; i++) + pipelines.emplace_back(pipelines.begin()->clone()); + +std::vector threads; + +for (size_t i = 0; i < 4; i++) { + auto& pipe = pipelines.at(i); + threads.emplace_back([&pipe, i] { + std::string prompt = "A card with number " + std::to_string(i); + + ov::Tensor image = pipe.generate(prompt, + ov::AnyMap{ + ov::genai::width(512), + ov::genai::height(512), + ov::genai::num_inference_steps(25)}); + + // save image + }); +} + +for (auto& thread : threads) { + thread.join(); +} +``` + +### Image Generation Pipeline reuse + +To extend the pipeline's capabilities, we provide an interface that allows a specific image generation pipeline to reuse models from another pipeline that has already loaded them. The table below shows the support scope. + +| Image Generation pipeline | Model can be reused from | +|:---|:---| +| `Text2ImagePipeline` | `Image2ImagePipeline` or `InpaintingPipeline` | +| `Image2ImagePipeline` | `InpaintingPipeline` | +| `InpaintingPipeline` | `Image2ImagePipeline` | + +This example shows how `Text2ImagePipeline` reuses models from `Image2ImagePipeline` and executes a different pipeline depending on whether an initial image is provided. + +```cpp +ov::genai::Image2ImagePipeline img2img_pipe(models_path, device); +ov::genai::Text2ImagePipeline text2img_pipe(img2img_pipe); + +ov::Tensor generated_image; + +if (image_path.empty()) { + generated_image = text2img_pipe.generate(prompt, + ov::genai::strength(1.f), + ov::genai::callback(progress_bar)); +} else { + ov::Tensor image = utils::load_image(image_path); + generated_image = img2img_pipe.generate(prompt, image, + ov::genai::strength(0.8f), + ov::genai::callback(progress_bar)); +} +``` + +## Export and import compiled models + +`ov::genai::Image2ImagePipeline` supports exporting and importing compiled models to and from a specified directory. This API can significantly reduce model load time, especially for large models like UNet. Only the Stable Diffusion XL model is supported. + +```cpp +// export models +ov::genai::Text2ImagePipeline pipeline(models_path, device); +pipeline.export_model(models_path / "blobs"); + +// import models +ov::genai::Text2ImagePipeline imported_pipeline(models_path, device, ov::genai::blob_path(models_path / "blobs")); +``` diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/baseline.bmp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/baseline.bmp new file mode 100644 index 0000000..c8a6078 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/baseline.bmp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb8491607e8c2cce4394ac0b796350745dde04dba7d754c3fad24d86e1c4d2e1 +size 1376310 diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/benchmark_image_gen.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/benchmark_image_gen.cpp new file mode 100644 index 0000000..176e883 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/benchmark_image_gen.cpp @@ -0,0 +1,328 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/image_generation/text2image_pipeline.hpp" +#include "openvino/genai/image_generation/image2image_pipeline.hpp" +#include "openvino/genai/image_generation/inpainting_pipeline.hpp" +#include +#include +#include "imwrite.hpp" +#include "load_image.hpp" +#include "progress_bar.hpp" + +inline float get_total_text_encoder_infer_duration(ov::genai::ImageGenerationPerfMetrics& metrics) { + float text_encoder_duration = 0.0f; + for(auto text_encoder : metrics.get_text_encoder_infer_duration()) { + text_encoder_duration += text_encoder.second; + } + return text_encoder_duration; +} + +inline void print_one_generate(ov::genai::ImageGenerationPerfMetrics& metrics, std::string prefix, int idx) { + std::string prefix_idx = "[" + prefix + "-" + std::to_string(idx) + "]"; + std::cout << "\n"; + std::cout << prefix_idx << " generate time: " << metrics.get_generate_duration() + << " ms, total infer time:" << metrics.get_inference_duration() + << " ms" << std::endl; + std::cout << prefix_idx << " text encoder infer time: " << get_total_text_encoder_infer_duration(metrics) << " ms" + << std::endl; + float first_iter_time, other_iter_avg_time; + float first_infer_time, other_infer_avg_time; + metrics.get_first_and_other_iter_duration(first_iter_time, other_iter_avg_time); + if (!metrics.raw_metrics.transformer_inference_durations.empty()) { + metrics.get_first_and_other_trans_infer_duration(first_infer_time, other_infer_avg_time); + std::cout << prefix_idx << " transformer iteration num:" << metrics.raw_metrics.iteration_durations.size() + << ", first iteration time:" << first_iter_time + << " ms, other iteration avg time:" << other_iter_avg_time << " ms" << std::endl; + std::cout << prefix_idx + << " transformer inference num:" << metrics.raw_metrics.transformer_inference_durations.size() + << ", first inference time:" << first_infer_time + << " ms, other inference avg time:" << other_infer_avg_time << " ms" << std::endl; + } else { + metrics.get_first_and_other_unet_infer_duration(first_infer_time, other_infer_avg_time); + std::cout << prefix_idx << " unet iteration num:" << metrics.raw_metrics.iteration_durations.size() + << ", first iteration time:" << first_iter_time + << " ms, other iteration avg time:" << other_iter_avg_time << " ms" << std::endl; + std::cout << prefix_idx << " unet inference num:" << metrics.raw_metrics.unet_inference_durations.size() + << ", first inference time:" << first_infer_time + << " ms, other inference avg time:" << other_infer_avg_time << " ms" << std::endl; + } + std::cout << prefix_idx << " vae encoder infer time:" << metrics.get_vae_encoder_infer_duration() + << " ms, vae decoder infer time:" << metrics.get_vae_decoder_infer_duration() << " ms" << std::endl; +} + +inline float calculate_average(std::vector& durations) { + float duration_mean = std::accumulate(durations.begin(), + durations.end(), + 0.0f, + [](const float& acc, const float& duration) -> float { + return acc + duration; + }); + if (!durations.empty()) { + duration_mean /= durations.size(); + } + return duration_mean; +} + +inline void print_statistic(std::vector& warmup_metrics, std::vector& iter_metrics) { + std::vector generate_durations; + std::vector total_inference_durations; + std::vector text_encoder_durations; + std::vector vae_encoder_durations; + std::vector vae_decoder_durations; + float load_time = 0.0f; + int warmup_num = warmup_metrics.size(); + int iter_num = iter_metrics.size(); + + float generate_warmup = 0.0f; + float inference_warmup = 0.0f; + if (!warmup_metrics.empty()) { + generate_warmup = warmup_metrics[0].get_generate_duration(); + inference_warmup = warmup_metrics[0].get_inference_duration(); + } + + for (auto& metrics : iter_metrics) { + generate_durations.emplace_back(metrics.get_generate_duration()); + total_inference_durations.emplace_back(metrics.get_inference_duration()); + vae_decoder_durations.emplace_back(metrics.get_vae_decoder_infer_duration()); + vae_encoder_durations.emplace_back(metrics.get_vae_encoder_infer_duration()); + text_encoder_durations.emplace_back(get_total_text_encoder_infer_duration(metrics)); + load_time = metrics.get_load_time(); + } + + float generate_mean = calculate_average(generate_durations); + float inference_mean = calculate_average(total_inference_durations); + float vae_decoder_mean = calculate_average(vae_decoder_durations); + float vae_encoder_mean = calculate_average(vae_encoder_durations); + float text_encoder_mean = calculate_average(text_encoder_durations); + + std::cout << "\nTest finish, load time: " << load_time << " ms" << std::endl; + std::cout << "Warmup number:" << warmup_num << ", first generate warmup time:" << generate_warmup + << " ms, infer warmup time:" << inference_warmup << " ms" << std::endl; + std::cout << "Generate iteration number:" << iter_num << ", for one iteration, generate avg time: " << generate_mean + << " ms, infer avg time:" << inference_mean + << " ms, all text encoders infer avg time:" << text_encoder_mean + << " ms, vae encoder infer avg time:" << vae_encoder_mean + << " ms, vae decoder infer avg time:" << vae_decoder_mean << " ms" << std::endl; +} + +inline std::vector device_string_to_triplet(const std::string& device_input) { + std::vector devices; + std::istringstream stream(device_input); + std::string device; + + // Split the device input string by commas + while (std::getline(stream, device, ',')) { + devices.push_back(device); + } + + // Trim whitespace from each device name + for (auto& dev : devices) { + dev.erase(0, dev.find_first_not_of(" \t")); + dev.erase(dev.find_last_not_of(" \t") + 1); + } + + // Ensure exactly three devices + if (devices.size() == 1) { + return {devices[0], devices[0], devices[0]}; + } else if (devices.size() == 3) { + return devices; + } else { + throw std::invalid_argument("The device specified by -d/--device must be a single device (e.g. -d \"GPU\"), " + "or exactly 3 comma separated device names (e.g. -d \"CPU,NPU,GPU\")"); + } +} + +void text2image(cxxopts::ParseResult& result) { + std::string prompt = result["prompt"].as(); + const std::string models_path = result["model"].as(); + auto devices = device_string_to_triplet(result["device"].as()); + size_t num_warmup = result["num_warmup"].as(); + size_t num_iter = result["num_iter"].as(); + const std::string output_dir = result["output_dir"].as(); + + ov::genai::Text2ImagePipeline pipe(models_path); + if (result["reshape"].as()) { + pipe.reshape(result["num_images_per_prompt"].as(), + result["height"].as(), + result["width"].as(), + pipe.get_generation_config().guidance_scale); + } + pipe.compile(devices[0], devices[1], devices[2]); + + ov::genai::ImageGenerationConfig config = pipe.get_generation_config(); + config.width = result["width"].as(); + config.height = result["height"].as(); + config.num_inference_steps = result["num_inference_steps"].as(); + config.num_images_per_prompt = result["num_images_per_prompt"].as(); + pipe.set_generation_config(config); + + std::cout << std::fixed << std::setprecision(2); + std::vector warmup_metrics; + for (size_t i = 0; i < num_warmup; i++) { + pipe.generate(prompt); + ov::genai::ImageGenerationPerfMetrics metrics = pipe.get_performance_metrics(); + warmup_metrics.emplace_back(metrics); + print_one_generate(metrics, "warmup", i); + } + + std::vector iter_metrics; + for (size_t i = 0; i < num_iter; i++) { + ov::Tensor image = pipe.generate(prompt); + ov::genai::ImageGenerationPerfMetrics metrics = pipe.get_performance_metrics(); + iter_metrics.emplace_back(metrics); + std::string image_name = output_dir + "/image_" + std::to_string(i) + ".bmp"; + imwrite(image_name, image, true); + print_one_generate(metrics, "iter", i); + } + + print_statistic(warmup_metrics, iter_metrics); +} + +void image2image(cxxopts::ParseResult& result) { + std::string prompt = result["prompt"].as(); + const std::string models_path = result["model"].as(); + std::string image_path = result["image"].as(); + auto devices = device_string_to_triplet(result["device"].as()); + size_t num_warmup = result["num_warmup"].as(); + size_t num_iter = result["num_iter"].as(); + const std::string output_dir = result["output_dir"].as(); + float strength = result["strength"].as(); + + ov::Tensor image_input = utils::load_image(image_path); + + ov::genai::Image2ImagePipeline pipe(models_path); + if (result["reshape"].as()) { + auto height = image_input.get_shape()[1]; + auto width = image_input.get_shape()[2]; + pipe.reshape(1, height, width, pipe.get_generation_config().guidance_scale); + } + pipe.compile(devices[0], devices[1], devices[2]); + + std::vector warmup_metrics; + std::cout << std::fixed << std::setprecision(2); + for (size_t i = 0; i < num_warmup; i++) { + pipe.generate(prompt, image_input, ov::genai::strength(strength), ov::genai::callback(progress_bar)); + ov::genai::ImageGenerationPerfMetrics metrics = pipe.get_performance_metrics(); + warmup_metrics.emplace_back(metrics); + print_one_generate(metrics, "warmup", i); + } + + std::vector iter_metrics; + for (size_t i = 0; i < num_iter; i++) { + ov::Tensor image = pipe.generate(prompt, image_input, ov::genai::strength(strength), ov::genai::callback(progress_bar)); + ov::genai::ImageGenerationPerfMetrics metrics = pipe.get_performance_metrics(); + iter_metrics.emplace_back(metrics); + std::string image_name = output_dir + "/image_" + std::to_string(i) + ".bmp"; + imwrite(image_name, image, true); + print_one_generate(metrics, "iter", i); + } + + print_statistic(warmup_metrics, iter_metrics); +} + +void inpainting(cxxopts::ParseResult& result) { + std::string prompt = result["prompt"].as(); + const std::string models_path = result["model"].as(); + std::string image_path = result["image"].as(); + std::string mask_image_path = result["mask_image"].as(); + auto devices = device_string_to_triplet(result["device"].as()); + size_t num_warmup = result["num_warmup"].as(); + size_t num_iter = result["num_iter"].as(); + const std::string output_dir = result["output_dir"].as(); + + ov::Tensor image_input = utils::load_image(image_path); + ov::Tensor mask_image = utils::load_image(mask_image_path); + + ov::genai::InpaintingPipeline pipe(models_path); + if (result["reshape"].as()) { + auto height = image_input.get_shape()[1]; + auto width = image_input.get_shape()[2]; + pipe.reshape(1, height, width, pipe.get_generation_config().guidance_scale); + } + pipe.compile(devices[0], devices[1], devices[2]); + + std::cout << std::fixed << std::setprecision(2); + std::vector warmup_metrics; + for (size_t i = 0; i < num_warmup; i++) { + pipe.generate(prompt, image_input, mask_image, ov::genai::callback(progress_bar)); + ov::genai::ImageGenerationPerfMetrics metrics = pipe.get_performance_metrics(); + warmup_metrics.emplace_back(metrics); + print_one_generate(metrics, "warmup", i); + } + + std::vector iter_metrics; + for (size_t i = 0; i < num_iter; i++) { + ov::Tensor image = pipe.generate(prompt, image_input, mask_image, ov::genai::callback(progress_bar)); + ov::genai::ImageGenerationPerfMetrics metrics = pipe.get_performance_metrics(); + iter_metrics.emplace_back(metrics); + std::string image_name = output_dir + "/image_" + std::to_string(i) + ".bmp"; + imwrite(image_name, image, true); + print_one_generate(metrics, "iter", i); + } + + print_statistic(warmup_metrics, iter_metrics); +} + +int main(int argc, char* argv[]) try { + cxxopts::Options options("benchmark_image_generation", "Help command"); + + options.add_options() + //common parameters + ("t,pipeline_type", "pipeline type: text2image/image2image/inpainting", cxxopts::value()->default_value("text2image")) + ("m,model", "Path to model and tokenizers base directory", cxxopts::value()) + ("p,prompt", "Prompt", cxxopts::value()->default_value("The Sky is blue because")) + ("nw,num_warmup", "Number of warmup iterations", cxxopts::value()->default_value(std::to_string(1))) + ("n,num_iter", "Number of iterations", cxxopts::value()->default_value(std::to_string(3))) + ("d,device", "device", cxxopts::value()->default_value("CPU")) + ("o,output_dir", "Path to save output image", cxxopts::value()->default_value(".")) + ("is,num_inference_steps", "The number of inference steps used to denoise initial noised latent to final image", cxxopts::value()->default_value(std::to_string(20))) + ("ni,num_images_per_prompt", "The number of images to generate per generate() call", cxxopts::value()->default_value(std::to_string(1))) + ("i,image", "Image path", cxxopts::value()) + //special parameters of text2image pipeline + ("w,width", "The width of the resulting image", cxxopts::value()->default_value(std::to_string(512))) + ("ht,height", "The height of the resulting image", cxxopts::value()->default_value(std::to_string(512))) + //special parameters of image2image pipeline + ("s,strength", "Indicates extent to transform the reference `image`. Must be between 0 and 1", cxxopts::value()->default_value(std::to_string(0.8))) + //special parameters of inpainting pipeline + ("mi,mask_image", "Mask image path", cxxopts::value()) + ("r,reshape", "Reshape pipeline before compilation", cxxopts::value()->default_value("false")) + ("h,help", "Print usage"); + + cxxopts::ParseResult result; + try { + result = options.parse(argc, argv); + } catch (const cxxopts::exceptions::exception& e) { + std::cout << e.what() << "\n\n"; + std::cout << options.help() << std::endl; + return EXIT_FAILURE; + } + + if (result.count("help")) { + std::cout << options.help() << std::endl; + return EXIT_SUCCESS; + } + + std::string pipeline_type = result["pipeline_type"].as(); + if (pipeline_type == "text2image") { + text2image(result); + } else if (pipeline_type == "image2image") { + image2image(result); + } else if (pipeline_type == "inpainting") { + inpainting(result); + } else { + std::cout << "not support pipeline type: " << pipeline_type << std::endl; + } + + return 0; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp new file mode 100644 index 0000000..572b8c3 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp @@ -0,0 +1,87 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/image_generation/text2image_pipeline.hpp" + +#include "imwrite.hpp" +#include "progress_bar.hpp" + +int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc >= 3 && argc <= 6, + "Usage: ", + argv[0], + " '' [ ]"); + + const std::string models_path = argv[1], prompt = argv[2]; + + std::filesystem::path root_dir = models_path; + + const int width = 512; + const int height = 512; + const int number_of_images_to_generate = 1; + const int number_of_inference_steps_per_image = 20; + + // Set devices to command-line args if specified, otherwise default to CPU. + // Note that these can be set to CPU, GPU, or NPU. + const std::string text_encoder_device = (argc > 3) ? argv[3] : "CPU"; + const std::string unet_device = (argc > 4) ? argv[4] : "CPU"; + const std::string vae_decoder_device = (argc > 5) ? argv[5] : "CPU"; + + std::cout << "text_encoder_device: " << text_encoder_device << std::endl; + std::cout << "unet_device: " << unet_device << std::endl; + std::cout << "vae_decoder_device: " << vae_decoder_device << std::endl; + + // this is the path to where compiled models will get cached + // (so that the 'compile' method run much faster 2nd+ time) + std::string ov_cache_dir = "./cache"; + + // + // Step 1: Create the initial Text2ImagePipeline, given the model path + // + ov::genai::Text2ImagePipeline pipe(models_path); + + // + // Step 2: Reshape the pipeline given number of images, height, width and guidance scale. + // + pipe.reshape(1, height, width, pipe.get_generation_config().guidance_scale); + + // + // Step 3: Compile the pipeline with the specified devices, and properties (like cache dir) + // + ov::AnyMap properties = {ov::cache_dir(ov_cache_dir)}; + + // Note that if there are device-specific properties that are needed, they can + // be added using ov::device::properties groups, like this: + //ov::AnyMap properties = {ov::device::properties("CPU", ov::cache_dir("cpu_cache")), + // ov::device::properties("GPU", ov::cache_dir("gpu_cache")), + // ov::device::properties("NPU", ov::cache_dir("npu_cache"))}; + + pipe.compile(text_encoder_device, unet_device, vae_decoder_device, properties); + + // + // Step 4: Use the Text2ImagePipeline to generate 'number_of_images_to_generate' images. + // + for (int imagei = 0; imagei < number_of_images_to_generate; imagei++) { + std::cout << "Generating image " << imagei << std::endl; + + ov::Tensor image = pipe.generate(prompt, + ov::genai::num_inference_steps(number_of_inference_steps_per_image), + ov::genai::callback(progress_bar)); + + imwrite("image_" + std::to_string(imagei) + ".bmp", image, true); + } + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/image2image.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/image2image.cpp new file mode 100644 index 0000000..f837bc1 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/image2image.cpp @@ -0,0 +1,38 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/image_generation/image2image_pipeline.hpp" + +#include "imwrite.hpp" +#include "load_image.hpp" +#include "progress_bar.hpp" + +int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc == 4, "Usage: ", argv[0], " '' "); + + const std::string models_path = argv[1], prompt = argv[2], image_path = argv[3]; + const std::string device = "CPU"; // GPU can be used as well + + ov::Tensor image = utils::load_image(image_path); + + ov::genai::Image2ImagePipeline pipe(models_path, device); + ov::Tensor generated_image = pipe.generate(prompt, image, + // controls how initial image is noised after being converted to latent space. `1` means initial image is fully noised + ov::genai::strength(0.8f), + ov::genai::callback(progress_bar)); + + // writes `num_images_per_prompt` images by pattern name + imwrite("image_%d.bmp", generated_image, true); + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/image2image_concurrency.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/image2image_concurrency.cpp new file mode 100644 index 0000000..66fbb24 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/image2image_concurrency.cpp @@ -0,0 +1,85 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "openvino/genai/image_generation/image2image_pipeline.hpp" + +#include "imwrite.hpp" +#include "load_image.hpp" +#include "progress_bar.hpp" + +int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc >= 4, "Usage: ", argv[0], " '' '' ... "); + + const std::string models_path = argv[1]; + const std::string device = "CPU"; // GPU and NPU can be used as well + const std::string image_path = argv[argc - 1]; + ov::Tensor image = utils::load_image(image_path); + + std::vector threads; + std::vector prompts; + std::vector pipelines; + + for (int32_t i = 2; i < argc - 1; ++i) + prompts.push_back(argv[i]); + + ov::AnyMap properties; + if (device == "NPU") { + // Define static shape and guidance scale for NPU + const int num_images_per_prompt = 1; + const int height = 512; + const int width = 512; + const float guidance_scale = 7.5f; + + pipelines.emplace_back(models_path); + pipelines.back().reshape(num_images_per_prompt, height, width, guidance_scale); + pipelines.back().compile(device); // All models are compiled for NPU + //pipelines.back().compile("NPU", "NPU", "GPU"); // Compile for NPU and GPU, if needed + + // Don't specify N, H, W, and guidance_scale in the properties map because they were made static + properties = ov::AnyMap{ov::genai::strength(0.8f), + ov::genai::num_inference_steps(4)}; + } else { + pipelines.emplace_back(models_path, device); + + properties = ov::AnyMap{ov::genai::strength(0.8f), // controls how initial image is noised after being converted to latent space. `1` means initial image is fully noised + ov::genai::num_inference_steps(4)}; + } + + // Clone pipeline for concurrent usage + for (size_t i = 1; i < prompts.size(); ++i) + pipelines.emplace_back(pipelines.begin()->clone()); + + for (size_t i = 0; i < prompts.size(); ++i) { + std::string prompt = prompts[i]; + auto& pipe = pipelines.at(i); + + std::cout << "Starting to generate with prompt: '" << prompt << "'..." << std::endl; + + threads.emplace_back([i, &pipe, prompt, image, &properties] () { + + ov::Tensor generated_image = pipe.generate(prompt, image, properties); + + // writes `num_images_per_prompt` images by pattern name + imwrite("image_" + std::to_string(i) + "_%d.bmp", generated_image, true); + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/imageimage.bmp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/imageimage.bmp new file mode 100644 index 0000000..0ed5e17 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/imageimage.bmp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ecb4783a8f3a0962659ebf80eeaf0c0e48c44995c1e60001f215e0697ab9397 +size 2162742 diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/imwrite.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/imwrite.cpp new file mode 100644 index 0000000..78840a5 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/imwrite.cpp @@ -0,0 +1,153 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include + +#include "imwrite.hpp" + +#include "openvino/core/except.hpp" + +namespace { + +unsigned char file[14] = { + 'B', + 'M', // magic + 0, + 0, + 0, + 0, // size in bytes + 0, + 0, // app data + 0, + 0, // app data + 40 + 14, + 0, + 0, + 0 // start of data offset +}; + +unsigned char info[40] = { + 40, + 0, + 0, + 0, // info hd size + 0, + 0, + 0, + 0, // width + 0, + 0, + 0, + 0, // height + 1, + 0, // number color planes + 24, + 0, // bits per pixel + 0, + 0, + 0, + 0, // compression is none + 0, + 0, + 0, + 0, // image bits size + 0x13, + 0x0B, + 0, + 0, // horz resolution in pixel / m + 0x13, + 0x0B, + 0, + 0, // vert resolution (0x03C3 = 96 dpi, 0x0B13 = 72 + // dpi) + 0, + 0, + 0, + 0, // #colors in palette + 0, + 0, + 0, + 0, // #important colors +}; + +void imwrite_single_image(const std::string& name, ov::Tensor image, bool convert_bgr2rgb) { + const ov::Shape shape = image.get_shape(); + const size_t width = shape[2], height = shape[1], channels = shape[3]; + OPENVINO_ASSERT(image.get_element_type() == ov::element::u8 && + shape.size() == 4 && shape[0] == 1 && channels == 3, + "Image of u8 type and [1, H, W, 3] shape is expected.", + "Given image has shape ", shape, " and element type ", image.get_element_type()); + + std::ofstream output_file(name, std::ofstream::binary); + OPENVINO_ASSERT(output_file.is_open(), "Failed to open the output BMP image path"); + + int padSize = static_cast(4 - (width * channels) % 4) % 4; + int sizeData = static_cast(width * height * channels + height * padSize); + int sizeAll = sizeData + sizeof(file) + sizeof(info); + + file[2] = (unsigned char)(sizeAll); + file[3] = (unsigned char)(sizeAll >> 8); + file[4] = (unsigned char)(sizeAll >> 16); + file[5] = (unsigned char)(sizeAll >> 24); + + info[4] = (unsigned char)(width); + info[5] = (unsigned char)(width >> 8); + info[6] = (unsigned char)(width >> 16); + info[7] = (unsigned char)(width >> 24); + + std::int32_t negativeHeight = -(int32_t)height; + info[8] = (unsigned char)(negativeHeight); + info[9] = (unsigned char)(negativeHeight >> 8); + info[10] = (unsigned char)(negativeHeight >> 16); + info[11] = (unsigned char)(negativeHeight >> 24); + + info[20] = (unsigned char)(sizeData); + info[21] = (unsigned char)(sizeData >> 8); + info[22] = (unsigned char)(sizeData >> 16); + info[23] = (unsigned char)(sizeData >> 24); + + output_file.write(reinterpret_cast(file), sizeof(file)); + output_file.write(reinterpret_cast(info), sizeof(info)); + + const std::uint8_t pad[3] = {0, 0, 0}; + const std::uint8_t* data = image.data(); + + for (size_t y = 0; y < height; y++) { + const std::uint8_t* current_row = data + y * width * channels; + if (convert_bgr2rgb) { + for (size_t x = 0; x < width; ++x) { + output_file.write(reinterpret_cast(current_row + 2), 1); + output_file.write(reinterpret_cast(current_row + 1), 1); + output_file.write(reinterpret_cast(current_row), 1); + current_row += channels; + } + } else { + output_file.write(reinterpret_cast(current_row), width * channels); + } + output_file.write(reinterpret_cast(pad), padSize); + } +} + +} // namespace + + +void imwrite(const std::string& name, ov::Tensor images, bool convert_bgr2rgb) { + const ov::Shape shape = images.get_shape(); + OPENVINO_ASSERT(images.get_element_type() == ov::element::u8 && shape.size() == 4, + "Image of u8 type and [1, H, W, 3] shape is expected.", + "Given image has shape ", shape, " and element type ", images.get_element_type()); + + const ov::Shape img_shape = {1, shape[1], shape[2], shape[3]}; + uint8_t* img_data = images.data(); + + for (int img_num = 0, num_images = shape[0], img_size = ov::shape_size(img_shape); img_num < num_images; ++img_num, img_data += img_size) { + char img_name[25]; + sprintf(img_name, name.c_str(), img_num); + + ov::Tensor image(images.get_element_type(), img_shape, img_data); + imwrite_single_image(img_name, image, true); + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/imwrite.hpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/imwrite.hpp new file mode 100644 index 0000000..b1b89aa --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/imwrite.hpp @@ -0,0 +1,16 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include "openvino/runtime/tensor.hpp" + +/** + * @brief Writes multiple images (depending on `image` tensor batch size) to BPM file(s) + * @param name File name or pattern to use to write images + * @param image Image(s) tensor + * @param convert_bgr2rgb Convert BGR to RGB + */ +void imwrite(const std::string& name, ov::Tensor images, bool convert_bgr2rgb); diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/inpainting.bmp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/inpainting.bmp new file mode 100644 index 0000000..b93292e --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/inpainting.bmp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:527cee8f7d451c7e5004bc58c079d4c853443644eaeb2d84a343016cd25214c1 +size 786486 diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/inpainting.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/inpainting.cpp new file mode 100644 index 0000000..004d946 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/inpainting.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/image_generation/inpainting_pipeline.hpp" + +#include "imwrite.hpp" +#include "load_image.hpp" +#include "progress_bar.hpp" + +int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc == 5, "Usage: ", argv[0], " '' "); + + const std::string models_path = argv[1], prompt = argv[2], image_path = argv[3], mask_image_path = argv[4]; + const std::string device = "CPU"; // GPU can be used as well + + ov::Tensor image = utils::load_image(image_path); + ov::Tensor mask_image = utils::load_image(mask_image_path); + + ov::genai::InpaintingPipeline pipe(models_path, device); + ov::Tensor generated_image = pipe.generate(prompt, image, mask_image, ov::genai::callback(progress_bar)); + + // writes `num_images_per_prompt` images by pattern name + imwrite("image_%d.bmp", generated_image, true); + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/load_image.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/load_image.cpp new file mode 100644 index 0000000..8e68259 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/load_image.cpp @@ -0,0 +1,45 @@ + +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#define STB_IMAGE_IMPLEMENTATION + +#include "stb_image.h" +#include "load_image.hpp" + +namespace fs = std::filesystem; + +ov::Tensor utils::load_image(const std::filesystem::path& image_path) { + int x = 0, y = 0, channels_in_file = 0; + constexpr int desired_channels = 3; + unsigned char* image = stbi_load( + image_path.string().c_str(), + &x, &y, &channels_in_file, desired_channels); + if (!image) { + std::stringstream error_message; + error_message << "Failed to load the image '" << image_path << "'"; + throw std::runtime_error{error_message.str()}; + } + struct SharedImageAllocator { + unsigned char* image; + int channels, height, width; + void* allocate(size_t bytes, size_t) const { + if (image && channels * height * width == bytes) { + return image; + } + throw std::runtime_error{"Unexpected number of bytes was requested to allocate."}; + } + void deallocate(void*, size_t, size_t) noexcept { + stbi_image_free(image); + image = nullptr; + } + bool is_equal(const SharedImageAllocator& other) const noexcept {return this == &other;} + }; + return ov::Tensor( + ov::element::u8, + ov::Shape{1, size_t(y), size_t(x), size_t(desired_channels)}, + SharedImageAllocator{image, desired_channels, y, x} + ); +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/load_image.hpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/load_image.hpp new file mode 100644 index 0000000..004047f --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/load_image.hpp @@ -0,0 +1,12 @@ + +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include + +namespace utils { +ov::Tensor load_image(const std::filesystem::path& image_path); +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/lora.bmp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/lora.bmp new file mode 100644 index 0000000..41bde31 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/lora.bmp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72760b8ae70a02cf318cfb9a08d520bd4800abb22b5eafe57eafb3cfbed7303d +size 1376310 diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/lora_text2image.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/lora_text2image.cpp new file mode 100644 index 0000000..b02a874 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/lora_text2image.cpp @@ -0,0 +1,56 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/image_generation/text2image_pipeline.hpp" + +#include "imwrite.hpp" +#include "progress_bar.hpp" + +int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " '' [ ...]]"); + + const std::string models_path = argv[1], prompt = argv[2]; + const std::string device = "CPU"; // GPU can be used as well + + ov::genai::AdapterConfig adapter_config; + // Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters: + for(size_t i = 0; i < (argc - 3)/2; ++i) { + ov::genai::Adapter adapter(argv[3 + 2*i]); + float alpha = std::atof(argv[3 + 2*i + 1]); + adapter_config.add(adapter, alpha); + } + + // LoRA adapters passed to the constructor will be activated by default in next generates + ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config)); + + std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n"; + ov::Tensor image = pipe.generate(prompt, + ov::genai::width(512), + ov::genai::height(896), + ov::genai::num_inference_steps(20), + ov::genai::rng_seed(42), + ov::genai::callback(progress_bar)); + imwrite("lora.bmp", image, true); + + std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n"; + image = pipe.generate(prompt, + ov::genai::adapters(), // passing adapters in generate overrides adapters set in the constructor; adapters() means no adapters + ov::genai::width(512), + ov::genai::height(896), + ov::genai::num_inference_steps(20), + ov::genai::rng_seed(42), + ov::genai::callback(progress_bar)); + imwrite("baseline.bmp", image, true); + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/progress_bar.hpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/progress_bar.hpp new file mode 100644 index 0000000..9c4a701 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/progress_bar.hpp @@ -0,0 +1,35 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "indicators/progress_bar.hpp" +#include + +bool progress_bar(size_t step, size_t num_steps, ov::Tensor& /* latent */) { + using namespace indicators; + + static std::optional bar; + + if (!bar) { + bar.emplace( + option::BarWidth{50}, + option::ForegroundColor{Color::green}, + option::FontStyles{std::vector{FontStyle::bold}}, + option::ShowElapsedTime{true}, + option::ShowRemainingTime{true} + ); + } + + std::stringstream stream; + stream << "Generation step " << (step + 1) << " / " << num_steps; + + bar->set_option(option::PostfixText{stream.str()}); + bar->set_progress((100 * (step + 1)) / num_steps); + + if (step + 1 == num_steps) { + bar.reset(); // Required when multiple progress bars are used, without recreation of the object the second progress bar won't be displayed correctly + } + + return false; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/stable_diffusion_export_import.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/stable_diffusion_export_import.cpp new file mode 100644 index 0000000..c52397b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/stable_diffusion_export_import.cpp @@ -0,0 +1,119 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "imwrite.hpp" +#include "openvino/genai/image_generation/text2image_pipeline.hpp" +#include "progress_bar.hpp" + +void pipeline_export_import(const std::filesystem::path& root_dir) { + ov::genai::Text2ImagePipeline pipe(root_dir, "CPU"); + pipe.export_model(root_dir / "exported"); + // pipeline models are exported to dedicated subfolders + // for stable diffusion xl: + // exported/ + // ├── text_encoder/ + // │ └── openvino_model.blob + // ├── text_encoder_2/ + // │ └── openvino_model.blob + // ├── unet/ + // │ └── openvino_model.blob + // └── vae_decoder/ + // └── openvino_model.blob + + // during import, specify blob_path property to point to the exported model location + ov::genai::Text2ImagePipeline imported_pipe(root_dir, "CPU", ov::genai::blob_path(root_dir / "exported")); +}; + +void dedicated_models_export_import(const std::filesystem::path& root_dir) { + const auto blob_path = root_dir / "exported"; + const auto device = "CPU"; + + // instantiate models and export them individually + auto text_encoder = ov::genai::CLIPTextModel(root_dir / "text_encoder", device); + text_encoder.export_model(blob_path / "text_encoder"); + + auto text_encoder_2 = ov::genai::CLIPTextModelWithProjection(root_dir / "text_encoder_2", device); + text_encoder_2.export_model(blob_path / "text_encoder_2"); + + auto unet = ov::genai::UNet2DConditionModel(root_dir / "unet", device); + unet.export_model(blob_path / "unet"); + + auto vae = ov::genai::AutoencoderKL(root_dir / "vae_decoder", device, ov::AnyMap{}); + vae.export_model(blob_path); + // AutoencoderKL can be composed with decoder and encoder models + // exported/ + // └── vae_decoder/ + // └── openvino_model.blob + // └── vae_encoder/ + // └── openvino_model.blob + + // create pipeline from the exported models + auto imported_pipe = ov::genai::Text2ImagePipeline::stable_diffusion_xl( + ov::genai::Scheduler::from_config(root_dir / "scheduler" / "scheduler_config.json"), + ov::genai::CLIPTextModel(root_dir / "text_encoder", device, ov::genai::blob_path(blob_path / "text_encoder")), + ov::genai::CLIPTextModelWithProjection(root_dir / "text_encoder_2", + device, + ov::genai::blob_path(blob_path / "text_encoder_2")), + ov::genai::UNet2DConditionModel(root_dir / "unet", device, ov::genai::blob_path(blob_path / "unet")), + ov::genai::AutoencoderKL(root_dir / "vae_decoder", device, ov::genai::blob_path(blob_path))); +}; + +void export_import_with_reshape(const std::filesystem::path& root_dir, const std::string& prompt) { + const auto device = "CPU"; + + const int width = 512; + const int height = 512; + const int number_of_images_to_generate = 1; + const int number_of_inference_steps_per_image = 20; + + // reshape before export + ov::genai::Text2ImagePipeline pipe(root_dir); + pipe.reshape(1, height, width, pipe.get_generation_config().guidance_scale); + pipe.compile(device); + pipe.export_model(root_dir / "exported"); + + ov::genai::Text2ImagePipeline imported_pipe(root_dir, device, ov::genai::blob_path(root_dir / "exported")); + + // update generation config according to the new shape parameters + auto config = imported_pipe.get_generation_config(); + config.num_images_per_prompt = number_of_images_to_generate; + config.height = height; + config.width = width; + imported_pipe.set_generation_config(config); + + for (int imagei = 0; imagei < number_of_images_to_generate; imagei++) { + std::cout << "Generating image " << imagei << std::endl; + + ov::Tensor image = imported_pipe.generate(prompt, + ov::genai::num_inference_steps(number_of_inference_steps_per_image), + ov::genai::callback(progress_bar)); + + imwrite("image_" + std::to_string(imagei) + ".bmp", image, true); + } +} + +int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " ''"); + + const std::string models_path = argv[1], prompt = argv[2]; + + std::filesystem::path root_dir = models_path; + + pipeline_export_import(root_dir); + dedicated_models_export_import(root_dir); + export_import_with_reshape(root_dir, prompt); + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/taylorseer.bmp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/taylorseer.bmp new file mode 100644 index 0000000..4d59787 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/taylorseer.bmp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cd817f1926fb3ff82e55b7d9165482d1b17e1239c6c4b3f6af3627675fc23b6 +size 786486 diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/taylorseer_baseline.bmp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/taylorseer_baseline.bmp new file mode 100644 index 0000000..909f1d3 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/taylorseer_baseline.bmp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8de517b2bc5fd86e8110dd2f1d43a3b974b3f3f23bce55d3a02e4ced1b1cc23 +size 786486 diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/taylorseer_text2image.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/taylorseer_text2image.cpp new file mode 100644 index 0000000..9d21c79 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/taylorseer_text2image.cpp @@ -0,0 +1,101 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/image_generation/text2image_pipeline.hpp" +#include "openvino/genai/taylorseer_config.hpp" + +#include "imwrite.hpp" +#include "progress_bar.hpp" + +#include +#include + +int32_t main(int32_t argc, char* argv[]) try { + if (argc != 3) { + std::cout << "Usage: " << argv[0] << " ''\n"; + return EXIT_FAILURE; + } + + const std::string models_path = argv[1]; + const std::string prompt = argv[2]; + const std::string device = "CPU"; + + // TaylorSeer configuration + const size_t cache_interval = 3; + const size_t disable_before = 6; + const int disable_after = -2; + const size_t num_inference_steps = 28; + + ov::genai::Text2ImagePipeline pipe(models_path, device); + std::cout << "Generating baseline image without caching...\n"; + auto start_time = std::chrono::high_resolution_clock::now(); + + ov::Tensor baseline_image = pipe.generate(prompt, + ov::genai::width(512), + ov::genai::height(512), + ov::genai::num_inference_steps(num_inference_steps), + ov::genai::num_images_per_prompt(1), + ov::genai::rng_seed(42), + ov::genai::callback(progress_bar)); + + auto end_time = std::chrono::high_resolution_clock::now(); + auto baseline_duration = std::chrono::duration_cast(end_time - start_time); + + std::cout << "Baseline generation completed in " << baseline_duration.count() / 1000.0 << "s\n"; + + imwrite("taylorseer_baseline.bmp", baseline_image, true); + std::cout << "Baseline image saved to taylorseer_baseline.bmp\n"; + + // Configure TaylorSeer caching + std::cout << "\nGenerating image with TaylorSeer caching...\n"; + + ov::genai::TaylorSeerCacheConfig taylorseer_config{cache_interval, disable_before, disable_after}; + std::cout << taylorseer_config.to_string() << "\n"; + auto generation_config = pipe.get_generation_config(); + generation_config.taylorseer_config = taylorseer_config; + pipe.set_generation_config(generation_config); + + start_time = std::chrono::high_resolution_clock::now(); + + ov::Tensor image = pipe.generate(prompt, + ov::genai::width(512), + ov::genai::height(512), + ov::genai::num_inference_steps(num_inference_steps), + ov::genai::num_images_per_prompt(1), + ov::genai::rng_seed(42), + ov::genai::callback(progress_bar)); + + end_time = std::chrono::high_resolution_clock::now(); + auto taylorseer_duration = std::chrono::duration_cast(end_time - start_time); + + std::cout << "TaylorSeer generation completed in " << taylorseer_duration.count() / 1000.0 << "s\n"; + + imwrite("taylorseer.bmp", image, true); + std::cout << "Image saved to taylorseer.bmp\n"; + + // Performance comparison + double baseline_ms = static_cast(baseline_duration.count()); + double taylorseer_ms = static_cast(taylorseer_duration.count()); + + double speedup = taylorseer_ms > 0 ? baseline_ms / taylorseer_ms : 0.0; + double time_saved = baseline_ms > 0 ? (baseline_ms - taylorseer_ms) / 1000.0 : 0.0; + double percentage = baseline_ms > 0 ? (baseline_ms - taylorseer_ms) / baseline_ms * 100.0 : 0.0; + + std::cout << "\nPerformance Comparison:\n"; + std::cout << " Baseline time: " << baseline_ms / 1000.0 << "s\n"; + std::cout << " TaylorSeer time: " << taylorseer_ms / 1000.0 << "s\n"; + std::cout << " Speedup: " << speedup << "x\n"; + std::cout << " Time saved: " << time_saved << "s (" << percentage << "%)\n"; + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/text2image.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/text2image.cpp new file mode 100644 index 0000000..e11eda9 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/text2image.cpp @@ -0,0 +1,37 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/image_generation/text2image_pipeline.hpp" + +#include "imwrite.hpp" +#include "progress_bar.hpp" + +int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " ''"); + + const std::string models_path = argv[1], prompt = argv[2]; + const std::string device = "CPU"; // GPU can be used as well + + ov::genai::Text2ImagePipeline pipe(models_path, device); + ov::Tensor image = pipe.generate(prompt, + ov::genai::width(512), + ov::genai::height(512), + ov::genai::num_inference_steps(20), + ov::genai::num_images_per_prompt(1), + ov::genai::callback(progress_bar)); + + // writes `num_images_per_prompt` images by pattern name + imwrite("image_%d.bmp", image, true); + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/text2image_concurrency.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/text2image_concurrency.cpp new file mode 100644 index 0000000..d2c93d4 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/image_generation/text2image_concurrency.cpp @@ -0,0 +1,81 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +#include + +#include "openvino/genai/image_generation/text2image_pipeline.hpp" + + +#include "imwrite.hpp" +#include "progress_bar.hpp" + +int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc >= 3, "Usage: ", argv[0], " '' '' ..."); + + const std::string models_path = argv[1]; + const std::string device = "CPU"; // GPU and NPU can be used as well + + std::vector threads; + std::vector prompts; + std::vector pipelines; + + for (int i = 2; i < argc; ++i) + prompts.push_back(argv[i]); + + // Prepare initial pipeline and compiled models into device + ov::AnyMap properties; + if (device == "NPU") { + // Define static shape and guidance scale for NPU + const int num_images_per_prompt = 1; + const int height = 512; + const int width = 512; + const float guidance_scale = 7.5f; + + pipelines.emplace_back(models_path); + pipelines.back().reshape(num_images_per_prompt, height, width, guidance_scale); + pipelines.back().compile(device); // All models are compiled for NPU + // pipelines.back().compile("NPU", "NPU", "GPU"); // Compile for NPU and GPU, if needed + + // Don't specify N, H, W, and guidance_scale in the properties map because they were made static + properties = ov::AnyMap{ov::genai::num_inference_steps(2)}; + } else { + pipelines.emplace_back(models_path, device); + + properties = ov::AnyMap{ov::genai::width(512), + ov::genai::height(512), + ov::genai::num_inference_steps(2), + ov::genai::num_images_per_prompt(1)}; + } + + // Clone pipeline for concurrent usage + for (size_t i = 1; i < prompts.size(); ++i) + pipelines.emplace_back(pipelines.begin()->clone()); + + for (size_t i = 0; i < prompts.size(); ++i) { + std::string prompt = prompts[i]; + auto& pipe = pipelines.at(i); + + std::cout << "Starting to generate with prompt: '" << prompt << "'..." << std::endl; + + threads.emplace_back([i, &pipe, prompt, &properties] () { + ov::Tensor image = pipe.generate(prompt, properties); + + imwrite("image_" + std::to_string(i) + "_%d.bmp", image, true); + }); + } + + for (auto& thread : threads) { + thread.join(); + } + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/CMakeLists.txt b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/CMakeLists.txt new file mode 100644 index 0000000..2cc3a29 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/CMakeLists.txt @@ -0,0 +1,36 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +find_package(OpenVINOGenAI REQUIRED + PATHS + "${CMAKE_BINARY_DIR}" # Reuse the package from the build. + ${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO. + NO_CMAKE_FIND_ROOT_PATH +) + +function(add_sample_executable target_name) + add_executable(${target_name} ${target_name}.cpp) + target_link_libraries(${target_name} PRIVATE openvino::genai) + set_target_properties(${target_name} PROPERTIES + COMPILE_PDB_NAME ${target_name} + # Ensure out-of-box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + install(TARGETS ${target_name} + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) +endfunction() + +set(SAMPLE_LIST text_embeddings text_rerank) + +foreach(sample ${SAMPLE_LIST}) + add_sample_executable(${sample}) +endforeach() + + +# benchmark_genai +include(FetchContent) + +if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) +endif() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/README.md new file mode 100644 index 0000000..ec2a795 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/README.md @@ -0,0 +1,66 @@ +# Retrieval Augmented Generation Sample + +This example showcases inference of Text Embedding and Text Rerank Models. The application has limited configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `ov::genai::TextEmbeddingPipeline` and `ov::genai::TextRerankPipeline` and uses text as an input source. + +## Download and Convert the Model and Tokenizers + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. + +Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model. + +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +``` + +To export text embedding model run Optimum CLI command: + +```sh +optimum-cli export openvino --task feature-extraction --model BAAI/bge-small-en-v1.5 BAAI/bge-small-en-v1.5 +``` + +To export text reranking model run Optimum CLI command: + +```sh +optimum-cli export openvino --task text-classification --model cross-encoder/ms-marco-MiniLM-L6-v2 cross-encoder/ms-marco-MiniLM-L6-v2 +``` + + +## Run + +Follow [Get Started with Samples](https://docs.openvino.ai/2026/get-started/learn-openvino/openvino-samples/get-started-demos.html) to run the sample. + +### 1. Text Embedding Sample (`text_embeddings.cpp`) +- **Description:** + Demonstrates inference of text embedding models using OpenVINO GenAI. Converts input text into vector embeddings for downstream tasks such as retrieval or semantic search. +- **Run Command:** + ```sh + text_embeddings "Document 1" "Document 2" + ``` +Refer to the [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#text-embeddings-models) for more details. + +### 2. Text Rerank Sample (`text_rerank.cpp`) +- **Description:** + Demonstrates inference of text rerank models using OpenVINO GenAI. Reranks a list of candidate documents based on their relevance to a query using a cross-encoder or reranker model. +- **Run Command:** + ```sh + text_rerank '' '' ['' ...] + ``` + + +# Text Embedding Pipeline Usage + +```c++ +#include "openvino/genai/rag/text_embedding_pipeline.hpp" + +ov::genai::TextEmbeddingPipeline pipeline(models_path, device, config); +std::vector embeddings = pipeline.embed_documents(documents); +``` + +# Text Rerank Pipeline Usage + +```c++ +#include "openvino/genai/rag/text_rerank_pipeline.hpp" + +ov::genai::TextRerankPipeline pipeline(models_path, device, config); +std::vector> rerank_result = pipeline.rerank(query, documents); +``` diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/text_embeddings.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/text_embeddings.cpp new file mode 100644 index 0000000..e82853e --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/text_embeddings.cpp @@ -0,0 +1,34 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/rag/text_embedding_pipeline.hpp" + +int main(int argc, char* argv[]) try { + if (argc < 3) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " '' ['' ...]"); + } + auto documents = std::vector(argv + 2, argv + argc); + std::string models_path = argv[1]; + + std::string device = "CPU"; // GPU can be used as well + + ov::genai::TextEmbeddingPipeline::Config config; + config.pooling_type = ov::genai::TextEmbeddingPipeline::PoolingType::MEAN; + + ov::genai::TextEmbeddingPipeline pipeline(models_path, device, config); + + ov::genai::EmbeddingResults documents_embeddings = pipeline.embed_documents(documents); + ov::genai::EmbeddingResult query_embedding = pipeline.embed_query("What is the capital of France?"); +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/text_rerank.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/text_rerank.cpp new file mode 100644 index 0000000..2f6256f --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/rag/text_rerank.cpp @@ -0,0 +1,45 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/rag/text_rerank_pipeline.hpp" + +int main(int argc, char* argv[]) try { + if (argc < 4) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + + " '' '' ['' ...]"); + } + + auto documents = std::vector(argv + 3, argv + argc); + std::string models_path = argv[1]; + std::string query = argv[2]; + + std::string device = "CPU"; // GPU can be used as well + + ov::genai::TextRerankPipeline::Config config; + config.top_n = 3; + + ov::genai::TextRerankPipeline pipeline(models_path, device, config); + + std::vector> rerank_result = pipeline.rerank(query, documents); + + // print reranked documents + std::cout << std::fixed << std::setprecision(4); + std::cout << "Reranked documents:\n"; + for (const auto& [index, score] : rerank_result) { + std::cout << "Document " << index << " (score: " << score << "): " << documents[index] << '\n'; + } + std::cout << std::defaultfloat; + +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/CMakeLists.txt b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/CMakeLists.txt new file mode 100644 index 0000000..75a47a6 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/CMakeLists.txt @@ -0,0 +1,39 @@ +# Copyright (C) 2023-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +find_package(OpenVINOGenAI REQUIRED + PATHS + "${CMAKE_BINARY_DIR}" # Reuse the package from the build. + ${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO. + NO_CMAKE_FIND_ROOT_PATH +) + +include(FetchContent) + +if(NOT TARGET dr_libs) + FetchContent_Declare(dr_libs + URL https://github.com/mackron/dr_libs/archive/da35f9d6c7374a95353fd1df1d394d44ab66cf01.tar.gz + URL_HASH SHA256=2704d347f480ca1bc92233fb01747e4550cc8031735b6ea62ca9990ebb8851ae) + FetchContent_MakeAvailable(dr_libs) +endif() + +if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) +endif() + +# create main sample executable + +add_executable(text2speech text2speech.cpp audio_utils.cpp) + +target_include_directories(text2speech PRIVATE "$") +target_link_libraries(text2speech PRIVATE openvino::genai) + +set_target_properties(text2speech PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) +target_compile_features(text2speech PRIVATE cxx_std_11) + +install(TARGETS text2speech + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/README.md new file mode 100644 index 0000000..f6d84c4 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/README.md @@ -0,0 +1,60 @@ +# Text-to-speech pipeline sample + +This example demonstrates how to use the openvino_genai.Text2SpeechPipeline in Python to convert input text into speech. +You can specify a target voice using a speaker embedding vector that captures the desired voice characteristics. +Additionally, you can choose the inference device (e.g., CPU, GPU) to control where the model runs. + +## Download and convert the model and tokenizers + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. + +Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model. + +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +optimum-cli export openvino --model microsoft/speecht5_tts --model-kwargs "{\"vocoder\": \"microsoft/speecht5_hifigan\"}" speecht5_tts +``` + +**Note:** Currently, text-to-speech in OpenVINO GenAI supports the `SpeechT5 TTS` model. +When exporting the model, you must specify a vocoder using the `--model-kwargs` option in JSON format. + +## Prepare speaker embedding file + +To generate speech using the SpeechT5 TTS model, you can specify a target voice by providing a speaker embedding file. +This file must contain 512 32-bit floating-point values that represent the voice characteristics of the target speaker. +The model will use these characteristics to synthesize the input text in the specified voice. + +If no speaker embedding is provided, the model will default to a built-in speaker for speech generation. + +You can generate a speaker embedding using +the [`create_speaker_embedding.py`](../../python/speech_generation/create_speaker_embedding.py) script. +This script records 5 seconds of audio from your microphone and extracts a speaker embedding vector from the recording. + +To run the script: + +``` +python create_speaker_embedding.py +``` + +## Run Text-to-speech sample + +Follow [Get Started with Samples](https://docs.openvino.ai/2026/get-started/learn-openvino/openvino-samples/get-started-demos.html) +to run the sample. + +`text-to-speech speecht5_tts "Hello OpenVINO GenAI" speaker_embedding.bin` + +It generates `output_audio.wav` file containing the phrase `Hello OpenVINO GenAI` spoken in the target voice. + +Refer to the [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#speech-generation-models) for more details. + +# Text-to-speech pipeline usage + +```c++ +#include "openvino/genai/speech_generation/text2speech_pipeline.hpp" + +ov::genai::Text2SpeechPipeline pipe(models_path, device); +gen_speech = pipe.generate(prompt, speaker_embedding); + +auto speech = gen_speech.speeches[0]; +// speech tensor contains the waveform of the spoken phrase +``` diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/audio_utils.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/audio_utils.cpp new file mode 100644 index 0000000..82aadab --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/audio_utils.cpp @@ -0,0 +1,63 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "audio_utils.hpp" + +#include +#include +#include + +#include "openvino/core/except.hpp" + +#define DR_WAV_IMPLEMENTATION +#include + +namespace utils { +namespace audio { + +void save_to_wav(const float* waveform_ptr, + size_t waveform_size, + const std::filesystem::path& file_path, + uint32_t bits_per_sample) { + drwav_data_format format; + format.container = drwav_container_riff; + format.format = DR_WAVE_FORMAT_IEEE_FLOAT; + format.channels = 1; + format.sampleRate = 16000; // assume it is always 16 KHz + format.bitsPerSample = bits_per_sample; + + drwav wav; + OPENVINO_ASSERT(drwav_init_file_write(&wav, file_path.string().c_str(), &format, nullptr), + "Failed to initialize WAV writer"); + + size_t total_samples = waveform_size * format.channels; + + drwav_uint64 frames_written = drwav_write_pcm_frames(&wav, total_samples, waveform_ptr); + OPENVINO_ASSERT(frames_written == total_samples, "Failed to write not all frames"); + + drwav_uninit(&wav); +} + +ov::Tensor read_speaker_embedding(const std::filesystem::path& file_path) { + std::ifstream input(file_path, std::ios::binary); + OPENVINO_ASSERT(input, "Failed to open file: " + file_path.string()); + + // Get file size + input.seekg(0, std::ios::end); + size_t buffer_size = static_cast(input.tellg()); + input.seekg(0, std::ios::beg); + + // Check size is multiple of float + OPENVINO_ASSERT(buffer_size % sizeof(float) == 0, "File size is not a multiple of float size."); + size_t num_floats = buffer_size / sizeof(float); + OPENVINO_ASSERT(num_floats == 512, "File must contain speaker embedding including 512 32-bit floats."); + + OPENVINO_ASSERT(input, "Failed to read all data from file."); + ov::Tensor floats_tensor(ov::element::f32, ov::Shape{1, num_floats}); + input.read(reinterpret_cast(floats_tensor.data()), buffer_size); + + return floats_tensor; +} + +} // namespace audio +} // namespace utils diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/audio_utils.hpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/audio_utils.hpp new file mode 100644 index 0000000..a3364ea --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/audio_utils.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include +#include + +#include "openvino/runtime/tensor.hpp" + +namespace utils { +namespace audio { +/** + * This function saves an audio waveform, provided as an array of floating-point samples, to a WAV file. + * + * @param waveform_ptr Pointer to the array of float samples representing the audio waveform + * @param waveform_size The number of samples in the waveform array + * @param file_path The name (and path) of the WAV file to be created + * @param bits_per_sample The bit depth used to store each sample in the WAV file + */ +void save_to_wav(const float* waveform_ptr, + size_t waveform_size, + const std::filesystem::path& file_path, + uint32_t bits_per_sample); + +/** + * This function reads a binary file containing speaker embedding or 32-bit floating-point values and returns + * ov::Tensor + * + * @param file_path The path to the binary file to be read + * @returns a std::vector containing all float values read from the binary file + */ +ov::Tensor read_speaker_embedding(const std::filesystem::path& file_path); +} // namespace audio +} // namespace utils diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/text2speech.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/text2speech.cpp new file mode 100644 index 0000000..993b94f --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/speech_generation/text2speech.cpp @@ -0,0 +1,57 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "audio_utils.hpp" +#include "openvino/genai/speech_generation/text2speech_pipeline.hpp" + +int main(int argc, char* argv[]) try { + OPENVINO_ASSERT(argc == 3 || argc == 4, + "Usage: ", + argv[0], + " \"\" []"); + + const std::string models_path = argv[1], prompt = argv[2]; + const std::string device = "CPU"; + + ov::genai::Text2SpeechPipeline pipe(models_path, device); + + ov::genai::Text2SpeechDecodedResults gen_speech; + if (argc == 4) { + const std::string speaker_embedding_path = argv[3]; + auto speaker_embedding = utils::audio::read_speaker_embedding(speaker_embedding_path); + gen_speech = pipe.generate(prompt, speaker_embedding); + } else { + gen_speech = pipe.generate(prompt); + } + + OPENVINO_ASSERT(gen_speech.speeches.size() == 1, "Expected exactly one decoded waveform"); + + std::string output_file_name = "output_audio.wav"; + auto waveform_size = gen_speech.speeches[0].get_size(); + auto waveform_ptr = gen_speech.speeches[0].data(); + auto bits_per_sample = gen_speech.speeches[0].get_element_type().bitwidth(); + utils::audio::save_to_wav(waveform_ptr, waveform_size, output_file_name, bits_per_sample); + std::cout << "[Info] Text successfully converted to audio file \"" << output_file_name << "\"." << std::endl; + + auto& perf_metrics = gen_speech.perf_metrics; + if (perf_metrics.m_evaluated) { + std::cout << "\n\n=== Performance Summary ===" << std::endl; + std::cout << "Throughput : " << perf_metrics.throughput.mean << " samples/sec." << std::endl; + std::cout << "Total Generation Time : " << perf_metrics.generate_duration.mean / 1000.0f << " sec." + << std::endl; + } + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/CMakeLists.txt b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/CMakeLists.txt new file mode 100644 index 0000000..10e7b58 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/CMakeLists.txt @@ -0,0 +1,59 @@ +# Copyright (C) 2023-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +find_package(OpenVINOGenAI REQUIRED + PATHS + "${CMAKE_BINARY_DIR}" # Reuse the package from the build. + ${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO. + NO_CMAKE_FIND_ROOT_PATH +) + +function(add_sample_executable target_name) + add_executable(${target_name} ${target_name}.cpp) + target_link_libraries(${target_name} PRIVATE openvino::genai) + set_target_properties(${target_name} PROPERTIES + # Ensure out-of-box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + install(TARGETS ${target_name} + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) +endfunction() + +set (SAMPLE_LIST + greedy_causal_lm + encrypted_model_causal_lm + beam_search_causal_lm + chat_sample + structured_output_generation + lora_greedy_causal_lm + multinomial_causal_lm + prompt_lookup_decoding_lm + speculative_decoding_lm) + +foreach(sample IN LISTS SAMPLE_LIST) + add_sample_executable(${sample}) +endforeach() + +# benchmark_genai +include(FetchContent) + +if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) +endif() + +FetchContent_Declare(cxxopts + URL https://github.com/jarro2783/cxxopts/archive/refs/tags/v3.1.1.tar.gz + URL_HASH SHA256=523175f792eb0ff04f9e653c90746c12655f10cb70f1d5e6d6d9491420298a08) +FetchContent_MakeAvailable(cxxopts) + +add_executable(benchmark_genai benchmark_genai.cpp read_prompt_from_file.cpp) +target_link_libraries(benchmark_genai PRIVATE openvino::genai cxxopts::cxxopts) +set_target_properties(benchmark_genai PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS benchmark_genai + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/README.md new file mode 100644 index 0000000..0a79fb1 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/README.md @@ -0,0 +1,255 @@ +# OpenVINO GenAI Text Generation Samples + +These samples showcase the use of OpenVINO's inference capabilities for text generation tasks, including different decoding strategies such as beam search, multinomial sampling, and speculative decoding. Each sample has a specific focus and demonstrates a unique aspect of text generation. +The applications don't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. +There are also Jupyter notebooks for some samples. You can find links to them in the appropriate sample descriptions. + +## Table of Contents +1. [Download and Convert the Model and Tokenizers](#download-and-convert-the-model-and-tokenizers) +2. [Sample Descriptions](#sample-descriptions) +3. [Troubleshooting](#troubleshooting) +4. [Support and Contribution](#support-and-contribution) + +## Download and convert the model and tokenizers +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. +Install [../../export-requirements.txt](../../export-requirements.txt) if model conversion is required. +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +optimim-cli export openvino --model +``` +If a converted model in OpenVINO IR format is already available in the collection of [OpenVINO optimized LLMs](https://huggingface.co/collections/OpenVINO/llm-6687aaa2abca3bbcec71a9bd) on Hugging Face, it can be downloaded directly via huggingface-cli. +```sh +pip install huggingface-hub +huggingface-cli download --local-dir +``` + +### Using GGUF models + +To run any samples with a GGUF model, simply provide the path to the .gguf file via the `` parameter. + +This capability is currently available in preview mode and supports a limited set of topologies, including SmolLM and Qwen2.5. For other models +and architectures, we still recommend converting the model to the IR format using the `optimum-intel` tool. + +## Sample Descriptions +### Common information +Follow [Get Started with Samples](https://docs.openvino.ai/2026/get-started/learn-openvino/openvino-samples/get-started-demos.html) to get common information about OpenVINO samples. +Follow [build instruction](../../../src/docs/BUILD.md) to build GenAI samples + +GPUs usually provide better performance compared to CPUs. Modify the source code to change the device for inference to the GPU. + +Refer to the [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/) for more details. + +Install [../../deployment-requirements.txt](../../deployment-requirements.txt) to run samples +```sh +pip install --upgrade-strategy eager -r ../../deployment-requirements.txt +``` + +### 1. Chat Sample (`chat_sample`) +- **Description:** +Interactive chat interface powered by OpenVINO. +Here is a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-chatbot) that provides an example of LLM-powered text generation in Python. +Recommended models: meta-llama/Llama-2-7b-chat-hf, TinyLlama/TinyLlama-1.1B-Chat-v1.0, etc +- **Main Feature:** Real-time chat-like text generation. +- **Run Command:** + ```bash + ./chat_sample + ``` +#### Missing chat template +If you encounter an exception indicating a missing "chat template" when launching the `ov::genai::LLMPipeline` in chat mode, it likely means the model was not tuned for chat functionality. To work this around, manually add the chat template to tokenizer_config.json of your model or update it using call `pipe.get_tokenizer().set_chat_template(new_chat_template)`. +The following template can be used as a default, but it may not work properly with every model: +``` +"chat_template": "{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n<|im_start|>assistant\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|im_end|>\n'}}{% endif %}{% endfor %}", +``` + +#### NPU support + +NPU device is supported with some limitations. See [NPU inference of +LLMs](https://docs.openvino.ai/2026/openvino-workflow-generative/inference-with-genai/inference-with-genai-on-npu.html) documentation. In particular: + +- Models must be exported with symmetric INT4 quantization (`optimum-cli export openvino --weight-format int4 --sym --model `). + For models with more than 4B parameters, channel wise quantization should be used (`--group-size -1`). +- Beam search and parallel sampling are not supported. +- Use OpenVINO 2025.0 or later (installed by deployment-requirements.txt, see "Common information" section), and the latest NPU driver. + + +### 2. Greedy Causal LM (`greedy_causal_lm`) +- **Description:** +Basic text generation using a causal language model. +Here is a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-question-answering) that provides an example of LLM-powered text generation in Python. +Recommended models: meta-llama/Llama-2-7b-hf, etc +- **Main Feature:** Demonstrates simple text continuation. +- **Run Command:** + ```bash + ./greedy_causal_lm "" + ``` + +### 3. Beam Search Causal LM (`beam_search_causal_lm`) +- **Description:** +Uses beam search for more coherent text generation. +Here is a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-question-answering) that provides an example of LLM-powered text generation in Python. +Recommended models: meta-llama/Llama-2-7b-hf, etc +- **Main Feature:** Improves text quality with beam search. +- **Run Command:** + ```bash + ./beam_search_causal_lm "" ["" ...] + ``` + +### 4. Multinomial Causal LM (`multinomial_causal_lm`) +- **Description:** Text generation with multinomial sampling for diversity. +Recommended models: meta-llama/Llama-2-7b-hf, etc +- **Main Feature:** Introduces randomness for creative outputs. +- **Run Command:** + ```bash + ./multinomial_causal_lm "" + ``` + +### 5. Prompt Lookup Decoding LM (`prompt_lookup_decoding_lm`) +- **Description:** +[Prompt Lookup decoding](https://github.com/apoorvumang/prompt-lookup-decoding) is [assested-generation](https://huggingface.co/blog/assisted-generation#understanding-text-generation-latency) technique where the draft model is replaced with simple string matching the prompt to generate candidate token sequences. This method highly effective for input grounded generation (summarization, document QA, multi-turn chat, code editing), where there is high n-gram overlap between LLM input (prompt) and LLM output. This could be entity names, phrases, or code chunks that the LLM directly copies from the input while generating the output. Prompt lookup exploits this pattern to speed up autoregressive decoding in LLMs. This results in significant speedups with no effect on output quality. +Recommended models: meta-llama/Llama-2-7b-hf, etc +- **Main Feature:** Specialized prompt-based inference. +- **Run Command:** + ```bash + ./prompt_lookup_decoding_lm "" + ``` + +### 6. Speculative Decoding LM (`speculative_decoding_lm`) +- **Description:** +Speculative decoding (or [assisted-generation](https://huggingface.co/blog/assisted-generation#understanding-text-generation-latency) in HF terminology) is a recent technique, that allows to speed up token generation when an additional smaller draft model is used alongside with the main model. + +Speculative decoding works the following way. The draft model predicts the next K tokens one by one in an autoregressive manner, while the main model validates these predictions and corrects them if necessary. We go through each predicted token, and if a difference is detected between the draft and main model, we stop and keep the last token predicted by the main model. Then the draft model gets the latest main prediction and again tries to predict the next K tokens, repeating the cycle. + +This approach reduces the need for multiple infer requests to the main model, enhancing performance. For instance, in more predictable parts of text generation, the draft model can, in best-case scenarios, generate the next K tokens that exactly match the target. In that case they are validated in a single inference request to the main model (which is bigger, more accurate but slower) instead of running K subsequent requests. More details can be found in the original paper https://arxiv.org/pdf/2211.17192.pdf, https://arxiv.org/pdf/2302.01318.pdf + +Here is a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/speculative-sampling) that provides an example of LLM-powered text generation in Python. + +Recommended models: meta-llama/Llama-2-13b-hf as main model and TinyLlama/TinyLlama-1.1B-Chat-v1.0 as draft model. Note that GGUF models are not supported as draft models. +- **Main Feature:** Reduces latency while generating high-quality text. +- **Run Command:** + ```bash + ./speculative_decoding_lm "" + ``` + +### 7. LoRA Greedy Causal LM (`lora_greedy_causal_lm`) +- **Description:** +This sample demonstrates greedy decoding using Low-Rank Adaptation (LoRA) fine-tuned causal language models. LoRA enables efficient fine-tuning, reducing resource requirements for adapting large models to specific tasks. +- **Main Feature:** Lightweight fine-tuning with LoRA for efficient text generation +- **Run Command:** + ```bash + ./lora_greedy_causal_lm "" + ``` + +> [!NOTE] +> ### LoRA `alpha` interpretation in OpenVINO GenAI +> The OpenVINO GenAI implementation merges the traditional LoRA parameters into a **single effective scaling factor** used during inference. +> +> In this context, the `alpha` value already includes: +> - normalization by LoRA rank (`alpha / rank`) +> - any user-defined scaling factor (`weight`) +> +> This means `alpha` in GenAI should be treated as the **final scaling weight** applied to the LoRA update — not the raw `alpha` parameter from training. + +### 8. Encrypted Model Causal LM (`encrypted_model_causal_lm`) +- **Description:** +LLMPipeline and Tokenizer objects can be initialized directly from the memory buffer, e.g. when user stores only encrypted files and decrypts them on-the-fly. +The following code snippet demonstrates how to load the model from the memory buffer: +```cpp +auto [model_str, weights_tensor] = decrypt_model(models_path + "/openvino_model.xml", models_path + "/openvino_model.bin"); +ov::genai::Tokenizer tokenizer(models_path); +ov::genai::LLMPipeline pipe(model_str, weights_tensor, tokenizer, device); +``` +For the sake of brevity the code above does not include Tokenizer decryption. For more details look to encrypted_model_causal_lm sample. +The sample also demonstrates how to enable user defined encryption for plugin cache. +- **Main Feature:** Read model directly from memory buffer +- **Run Command:** + ```bash + ./encrypted_model_causal_lm "" + ``` + +### 9. LLMs benchmarking sample (`benchmark_genai`) +- **Description:** +This sample script demonstrates how to benchmark an LLMs in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text, and calculating various performance metrics. + +For more information how performance metrics are calculated please follow [performance-metrics tutorial](../../../src/README.md#performance-metrics). +- **Main Feature:** Benchmark model via GenAI +- **Run Command:** + ```bash + ./benchmark_genai [OPTIONS] + ``` + #### Options +- `-m, --model`: Path to the model and tokenizers base directory. +- `-p, --prompt` (default: ''): The prompt to generate text. If without `-p` and `--pf`, the default prompt is `"The Sky is blue because"` +- `--pf, --prompt_file` Read prompt from file. +- `--nw, --num_warmup` (default: `1`): Number of warmup iterations. +- `--mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. +- `-n, --num_iter` (default: `3`): Number of iterations. +- `-d, --device` (default: `"CPU"`): Device to run the model on. + +### 10. Structured Output Sample (`structured_output_sample`) +- **Description:** +This sample demonstrates how to use OpenVINO GenAI to generate structured outputs, such as JSON, from text prompts. It showcases step-by-step reasoning, allowing a language model to break down tasks (e.g., solving equations) and present each step in a structured format. + +The sample uses the following JSON schema for structured output: +```json +{ + "steps": [ + ... + {"explanation": "Moving the -30 term to the right", "output": "2*x = -30"}, + {"explanation": "Finding the value of x.", "output": "x = -30/2"} + ... + ], + "final_answer": "x = -15" +} +``` +**Schema Details:** +- Each reasoning step is an object with `explanation` and `output` fields. +- The `steps` array lists all steps in order. +- The `final_answer` field provides the final solution. +- The schema is defined in the sample source code and can be customized as needed. + - JSON schema for such format is defined in the source code of the sample, and can be modified to fit your needs. + +Recommended models: `meta-llama/Llama-3.2-1B-Instruct`, `meta-llama/Llama-3.2-8B-Instruct` + +- **Run Command:** + ```bash + structured_output_generation + ``` + After running the command, an interactive dialog starts. You can prompt the model to solve equations step by step. For example: + +1. **Step-by-step reasoning:** + If you prompt: + `Solve the equation 8x + 7 = -23 step by step` + The model might output: + ```json + { + "steps": [ + {"explanation": "Rearranging the equation to isolate x.", "output": "8x + 7 = -23"}, + {"explanation": "Subtracting 7 from both sides.", "output": "8x + 7 - 7 = -23 - 7"}, + {"explanation": "Simplifying the left side.", "output": "8x = -30"}, + {"explanation": "Dividing both sides by 8.", "output": "8x / 8 = -30 / 8"}, + {"explanation": "Simplifying the right side.", "output": "x = -30 / 8"}, + {"explanation": "Finding the value of x.", "output": "x = -15/4"} + ], + "final_answer": "x = -15/4" + } + ``` + +**Note:** +Structured output enforcement ensures valid JSON formatting, but does not guarantee factual accuracy or meaningfulness. The model may generate plausible-looking JSON with incorrect or nonsensical data (e.g., `{"explanation": "John", "output": 200000}` or `{"final_answer": "AbrakaKadabra9999######4242"}`). For best results, use the latest or fine-tuned models to improve output quality and relevance. + +## Troubleshooting + +### Unicode characters encoding error on Windows + +Example error: +``` +UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to +``` + +If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this: +1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot. +2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`. + +## Support and Contribution +- For troubleshooting, consult the [OpenVINO documentation](https://docs.openvino.ai). +- To report issues or contribute, visit the [GitHub repository](https://github.com/openvinotoolkit/openvino.genai). diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/beam_search_causal_lm.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/beam_search_causal_lm.cpp new file mode 100644 index 0000000..4eb1655 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/beam_search_causal_lm.cpp @@ -0,0 +1,35 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +int main(int argc, char* argv[]) try { + if (argc < 3) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " '' ['' ...]"); + } + auto prompts = std::vector(argv + 2, argv + argc); + std::string models_path = argv[1]; + + std::string device = "CPU"; // GPU can be used as well + ov::genai::LLMPipeline pipe(models_path, device); + + ov::genai::GenerationConfig config; + config.max_new_tokens = 20; + config.num_beam_groups = 3; + config.num_beams = 15; + config.diversity_penalty = 1.0f; + config.num_return_sequences = config.num_beams; + + auto beams = pipe.generate(prompts, config); + std::cout << beams << '\n'; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/benchmark_genai.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/benchmark_genai.cpp new file mode 100644 index 0000000..81d2011 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/benchmark_genai.cpp @@ -0,0 +1,107 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/llm_pipeline.hpp" +#include +#include "read_prompt_from_file.h" + +int main(int argc, char* argv[]) try { + cxxopts::Options options("benchmark_vanilla_genai", "Help command"); + + options.add_options() + ("m,model", "Path to model and tokenizers base directory", cxxopts::value()) + ("p,prompt", "Prompt", cxxopts::value()->default_value("")) + ("pf,prompt_file", "Read prompt from file", cxxopts::value()) + ("nw,num_warmup", "Number of warmup iterations", cxxopts::value()->default_value(std::to_string(1))) + ("n,num_iter", "Number of iterations", cxxopts::value()->default_value(std::to_string(3))) + ("mt,max_new_tokens", "Maximal number of new tokens", cxxopts::value()->default_value(std::to_string(20))) + ("d,device", "device", cxxopts::value()->default_value("CPU")) + ("h,help", "Print usage"); + + cxxopts::ParseResult result; + try { + result = options.parse(argc, argv); + } catch (const cxxopts::exceptions::exception& e) { + std::cout << e.what() << "\n\n"; + std::cout << options.help() << std::endl; + return EXIT_FAILURE; + } + + if (result.count("help")) { + std::cout << options.help() << std::endl; + return EXIT_SUCCESS; + } + + std::string prompt; + if (result.count("prompt") && result.count("prompt_file")) { + std::cout << "Prompt and prompt file should not exist together!" << std::endl; + return EXIT_FAILURE; + } else { + if (result.count("prompt_file")) { + prompt = utils::read_prompt(result["prompt_file"].as()); + } else { + prompt = result["prompt"].as().empty() ? "The Sky is blue because" : result["prompt"].as(); + } + } + if (prompt.empty()) { + std::cout << "Prompt is empty!" << std::endl; + return EXIT_FAILURE; + } + + const std::string models_path = result["model"].as(); + std::string device = result["device"].as(); + size_t num_warmup = result["num_warmup"].as(); + size_t num_iter = result["num_iter"].as(); + + ov::genai::GenerationConfig config; + config.max_new_tokens = result["max_new_tokens"].as(); + config.apply_chat_template = false; + + ov::genai::SchedulerConfig scheduler_config; + scheduler_config.enable_prefix_caching = false; + scheduler_config.max_num_batched_tokens = std::numeric_limits::max(); + + std::cout << ov::get_openvino_version() << std::endl; + + std::unique_ptr pipe; + if (device == "NPU") + pipe = std::make_unique(models_path, device); + else + pipe = std::make_unique(models_path, device, ov::genai::scheduler_config(scheduler_config)); + + auto input_data = pipe->get_tokenizer().encode(prompt); + size_t prompt_token_size = input_data.input_ids.get_shape()[1]; + std::cout << "Prompt token size:" << prompt_token_size << std::endl; + + for (size_t i = 0; i < num_warmup; i++) + pipe->generate(prompt, config); + + ov::genai::DecodedResults res = pipe->generate(prompt, config); + ov::genai::PerfMetrics metrics = res.perf_metrics; + for (size_t i = 0; i < num_iter - 1; i++) { + res = pipe->generate(prompt, config); + metrics = metrics + res.perf_metrics; + } + + std::cout << std::fixed << std::setprecision(2); + std::cout << "Output token size:" << res.perf_metrics.get_num_generated_tokens() << std::endl; + std::cout << "Load time: " << metrics.get_load_time() << " ms" << std::endl; + std::cout << "Generate time: " << metrics.get_generate_duration().mean << " ± " << metrics.get_generate_duration().std << " ms" << std::endl; + std::cout << "Tokenization time: " << metrics.get_tokenization_duration().mean << " ± " << metrics.get_tokenization_duration().std << " ms" << std::endl; + std::cout << "Detokenization time: " << metrics.get_detokenization_duration().mean << " ± " << metrics.get_detokenization_duration().std << " ms" << std::endl; + std::cout << "TTFT: " << metrics.get_ttft().mean << " ± " << metrics.get_ttft().std << " ms" << std::endl; + std::cout << "TPOT: " << metrics.get_tpot().mean << " ± " << metrics.get_tpot().std << " ms/token " << std::endl; + std::cout << "Throughput: " << metrics.get_throughput().mean << " ± " << metrics.get_throughput().std << " tokens/s" << std::endl; + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/chat_sample.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/chat_sample.cpp new file mode 100644 index 0000000..3b54ca9 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/chat_sample.cpp @@ -0,0 +1,46 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/llm_pipeline.hpp" + +int main(int argc, char* argv[]) try { + if (argc < 2 || argc > 3) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " "); + } + std::string prompt; + std::string models_path = argv[1]; + + // Default device is CPU; can be overridden by the second argument + std::string device = (argc == 3) ? argv[2] : "CPU"; // GPU, NPU can be used as well + ov::genai::LLMPipeline pipe(models_path, device); + + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + + auto streamer = [](std::string word) { + std::cout << word << std::flush; + // Return flag corresponds whether generation should be stopped. + return ov::genai::StreamingStatus::RUNNING; + }; + + ov::genai::ChatHistory chat_history; + + std::cout << "question:\n"; + while (std::getline(std::cin, prompt)) { + chat_history.push_back({{"role", "user"}, {"content", std::move(prompt)}}); + ov::genai::DecodedResults decoded_results = pipe.generate(chat_history, config, streamer); + chat_history.push_back({{"role", "assistant"}, {"content", std::move(decoded_results.texts[0])}}); + std::cout << "\n----------\n" + "question:\n"; + } +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/encrypted_model_causal_lm.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/encrypted_model_causal_lm.cpp new file mode 100644 index 0000000..27363e3 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/encrypted_model_causal_lm.cpp @@ -0,0 +1,97 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "openvino/genai/llm_pipeline.hpp" + +std::pair decrypt_model(const std::filesystem::path& model_dir, const std::string& model_file_name, const std::string& weights_file_name) { + std::ifstream model_file(model_dir / model_file_name); + std::ifstream weights_file; + if (!model_file.is_open()) { + throw std::runtime_error("Cannot open model file"); + } + std::string model_str((std::istreambuf_iterator(model_file)), std::istreambuf_iterator()); + + // read weights file using mmap to reduce memory consumption + auto weights_tensor = ov::read_tensor_data(model_dir / weights_file_name); + + // User can add file decryption of model_file and weights_file in memory here. + + return {model_str, weights_tensor}; +} + +ov::genai::Tokenizer decrypt_tokenizer(const std::filesystem::path& models_path) { + auto [tok_model_str, tok_weights_tensor] = decrypt_model(models_path, "openvino_tokenizer.xml", "openvino_tokenizer.bin"); + auto [detok_model_str, detok_weights_tensor] = decrypt_model(models_path, "openvino_detokenizer.xml", "openvino_detokenizer.bin"); + + return ov::genai::Tokenizer(tok_model_str, tok_weights_tensor, detok_model_str, detok_weights_tensor); +} + +static const char codec_key[] = {0x30, 0x60, 0x70, 0x02, 0x04, 0x08, 0x3F, 0x6F, 0x72, 0x74, 0x78, 0x7F}; + +std::string codec_xor(const std::string& source_str) { + auto key_size = sizeof(codec_key); + int key_idx = 0; + std::string dst_str = source_str; + for (char& c : dst_str) { + c ^= codec_key[key_idx % key_size]; + key_idx++; + } + return dst_str; +} + +std::string encryption_callback(const std::string& source_str) { + return codec_xor(source_str); +} + +std::string decryption_callback(const std::string& source_str) { + return codec_xor(source_str); +} + +auto get_config_for_cache_encryption() { + ov::AnyMap config; + config.insert({ov::cache_dir("llm_cache")}); + ov::EncryptionCallbacks encryption_callbacks; + //use XOR-based encryption as an example + encryption_callbacks.encrypt = encryption_callback; + encryption_callbacks.decrypt = decryption_callback; + config.insert(ov::cache_encryption_callbacks(encryption_callbacks)); + config.insert(ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE)); + return config; +} + +int main(int argc, char* argv[]) try { + if (3 > argc) + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " \"\""); + + std::string models_path = argv[1]; + std::string prompt = argv[2]; + + std::string device = "CPU"; // GPU, NPU can be used as well + + ov::AnyMap config; + if (device == "GPU") { + // Cache compiled models on disk for GPU to save time on the + // next run. It's not beneficial for CPU. + config = get_config_for_cache_encryption(); + } + + auto [model_str, model_weights] = decrypt_model(models_path, "openvino_model.xml", "openvino_model.bin"); + ov::genai::Tokenizer tokenizer = decrypt_tokenizer(models_path); + + ov::genai::LLMPipeline pipe(model_str, model_weights, tokenizer, device, config); + + std::string result = pipe.generate(prompt, ov::genai::max_new_tokens(100)); + std::cout << result << std::endl; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/greedy_causal_lm.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/greedy_causal_lm.cpp new file mode 100644 index 0000000..4a98c6d --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/greedy_causal_lm.cpp @@ -0,0 +1,29 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/llm_pipeline.hpp" + +int main(int argc, char* argv[]) try { + if (3 > argc) + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " \"\""); + + std::string models_path = argv[1]; + std::string prompt = argv[2]; + std::string device = "CPU"; // GPU can be used as well + + ov::genai::LLMPipeline pipe(models_path, device); + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + std::string result = pipe.generate(prompt, config); + std::cout << result << std::endl; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/lora_greedy_causal_lm.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/lora_greedy_causal_lm.cpp new file mode 100644 index 0000000..5ba73be --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/lora_greedy_causal_lm.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/llm_pipeline.hpp" + +int main(int argc, char* argv[]) try { + if (4 > argc) + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " \"\""); + + std::string models_path = argv[1]; + std::string adapter_path = argv[2]; + std::string prompt = argv[3]; + std::string device = "CPU"; // GPU can be used as well + + using namespace ov::genai; + + Adapter adapter(adapter_path); + LLMPipeline pipe(models_path, device, adapters(adapter)); // register all required adapters here + + // Resetting config to set greedy behaviour ignoring generation config from model directory. + // It helps to compare two generations with and without LoRA adapter. + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + pipe.set_generation_config(config); + + std::cout << "Generate with LoRA adapter and alpha set to 0.75:" << std::endl; + std::cout << pipe.generate(prompt, max_new_tokens(100), adapters(adapter, 0.75)) << std::endl; + + std::cout << "\n-----------------------------"; + std::cout << "\nGenerate without LoRA adapter:" << std::endl; + std::cout << pipe.generate(prompt, max_new_tokens(100), adapters()) << std::endl; + +} catch (const std::exception& error) { + std::cerr << error.what() << '\n'; + return EXIT_FAILURE; +} catch (...) { + std::cerr << "Non-exception object thrown\n"; + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/multinomial_causal_lm.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/multinomial_causal_lm.cpp new file mode 100644 index 0000000..8cd7bb1 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/multinomial_causal_lm.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/llm_pipeline.hpp" + +int main(int argc, char* argv[]) try { + if (3 != argc) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " ''"); + } + + std::string models_path = argv[1]; + std::string prompt = argv[2]; + + std::string device = "CPU"; // GPU can be used as well + ov::genai::LLMPipeline pipe(models_path, device); + + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + config.do_sample = true; + config.top_p = 0.9; + config.top_k = 30; + auto streamer = [](std::string subword) { + std::cout << subword << std::flush; + return ov::genai::StreamingStatus::RUNNING; + }; + + // Since the streamer is set, the results will + // be printed each time a new token is generated. + pipe.generate(prompt, config, streamer); +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/prompt_lookup_decoding_lm.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/prompt_lookup_decoding_lm.cpp new file mode 100644 index 0000000..4100862 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/prompt_lookup_decoding_lm.cpp @@ -0,0 +1,49 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "openvino/genai/llm_pipeline.hpp" + +int main(int argc, char* argv[]) try { + if (3 != argc) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " ''"); + } + + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + // Define candidates number for candidate generation + config.num_assistant_tokens = 5; + // Define max_ngram_size + config.max_ngram_size = 3; + + std::string model_path = argv[1]; + std::string prompt = argv[2]; + + std::string device = "CPU"; + + ov::genai::LLMPipeline pipe( + model_path, + device, + ov::genai::prompt_lookup(true)); + + auto streamer = [](std::string subword) { + std::cout << subword << std::flush; + return ov::genai::StreamingStatus::RUNNING; + }; + + // Since the streamer is set, the results will + // be printed each time a new token is generated. + pipe.generate(prompt, config, streamer); + std::cout << std::endl; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/read_prompt_from_file.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/read_prompt_from_file.cpp new file mode 100644 index 0000000..2818d3a --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/read_prompt_from_file.cpp @@ -0,0 +1,19 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include "read_prompt_from_file.h" + +std::string utils::read_prompt(const std::string& file_path) { + std::ifstream file(file_path); + if (file.is_open()) { + std::stringstream buffer; + buffer << file.rdbuf(); + return buffer.str(); + } else { + std::stringstream error_message; + error_message << "Error opening prompt file: '" << file_path << "'"; + throw std::runtime_error{error_message.str()}; + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/read_prompt_from_file.h b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/read_prompt_from_file.h new file mode 100644 index 0000000..3515736 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/read_prompt_from_file.h @@ -0,0 +1,11 @@ + +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +namespace utils { +std::string read_prompt(const std::string& file_path); +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/speculative_decoding_lm.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/speculative_decoding_lm.cpp new file mode 100644 index 0000000..72cd3fb --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/speculative_decoding_lm.cpp @@ -0,0 +1,85 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "openvino/genai/llm_pipeline.hpp" +#include "openvino/genai/speculative_decoding/perf_metrics.hpp" + +int main(int argc, char* argv[]) try { + if (4 != argc) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " ''"); + } + + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + // Speculative decoding generation parameters like `num_assistant_tokens` and `assistant_confidence_threshold` are mutually excluded. + // Add parameter to enable speculative decoding to generate `num_assistant_tokens` candidates by draft_model per iteration. + // NOTE: ContinuousBatching backend uses `num_assistant_tokens` as is. Stateful backend uses `num_assistant_tokens`'s copy as initial + // value and adjusts it based on recent number of accepted tokens. If `num_assistant_tokens` is not set, it defaults to `5` for both + // backends. + config.num_assistant_tokens = 4; + // Add parameter to enable speculative decoding to generate candidates by draft_model while candidate probability is higher than + // `assistant_confidence_threshold`. + // NOTE: `assistant_confidence_threshold` is supported only by ContinuousBatching backend. + // config.assistant_confidence_threshold = 0.4; + + std::string main_model_path = argv[1]; + std::string draft_model_path = argv[2]; + std::string prompt = argv[3]; + + // User can run main and draft model on different devices. + // Please, set device for main model in `LLMPipeline` constructor and in `ov::genai::draft_model` for draft. + // CPU, GPU and NPU can be used. For NPU, the preferred configuration is when both the main and draft models + // use NPU. + std::string main_device = "CPU", draft_device = "CPU"; + + ov::genai::LLMPipeline pipe( + main_model_path, + main_device, + ov::genai::draft_model(draft_model_path, draft_device)); + + auto streamer = [](std::string subword) { + std::cout << subword << std::flush; + return ov::genai::StreamingStatus::RUNNING; + }; + + // Since the streamer is set, the results will + // be printed each time a new token is generated. + auto result = pipe.generate(prompt, config, streamer); + auto sd_perf_metrics = std::dynamic_pointer_cast(result.extended_perf_metrics); + + if (sd_perf_metrics) { + auto main_model_metrics = sd_perf_metrics->main_model_metrics; + std::cout << "\nMAIN MODEL " << std::endl; + std::cout << " Generate time: " << main_model_metrics.get_generate_duration().mean << " ms" << std::endl; + std::cout << " TTFT: " << main_model_metrics.get_ttft().mean << " ± " << main_model_metrics.get_ttft().std << " ms" << std::endl; + std::cout << " TTST: " << main_model_metrics.get_ttst().mean << " ± " << main_model_metrics.get_ttst().std << " ms/token " << std::endl; + std::cout << " TPOT: " << main_model_metrics.get_tpot().mean << " ± " << main_model_metrics.get_tpot().std << " ms/iteration " << std::endl; + std::cout << " AVG Latency: " << main_model_metrics.get_latency().mean << " ± " << main_model_metrics.get_latency().std << " ms/token " << std::endl; + std::cout << " Num generated token: " << main_model_metrics.get_num_generated_tokens() << " tokens" << std::endl; + std::cout << " Total iteration number: " << main_model_metrics.raw_metrics.m_durations.size() << std::endl; + std::cout << " Num accepted token: " << sd_perf_metrics->get_num_accepted_tokens() << " tokens" << std::endl; + + auto draft_model_metrics = sd_perf_metrics->draft_model_metrics; + std::cout << "\nDRAFT MODEL " << std::endl; + std::cout << " Generate time: " << draft_model_metrics.get_generate_duration().mean << " ms" << std::endl; + std::cout << " TTFT: " << draft_model_metrics.get_ttft().mean << " ms" << std::endl; + std::cout << " TTST: " << draft_model_metrics.get_ttst().mean << " ms/token " << std::endl; + std::cout << " TPOT: " << draft_model_metrics.get_tpot().mean << " ± " << draft_model_metrics.get_tpot().std << " ms/token " << std::endl; + std::cout << " AVG Latency: " << draft_model_metrics.get_latency().mean << " ± " << draft_model_metrics.get_latency().std << " ms/iteration " << std::endl; + std::cout << " Num generated token: " << draft_model_metrics.get_num_generated_tokens() << " tokens" << std::endl; + std::cout << " Total iteration number: " << draft_model_metrics.raw_metrics.m_durations.size() << std::endl; + } + std::cout << std::endl; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/structured_output_generation.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/structured_output_generation.cpp new file mode 100644 index 0000000..6a2a5a9 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/text_generation/structured_output_generation.cpp @@ -0,0 +1,107 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/llm_pipeline.hpp" + +int main(int argc, char* argv[]) try { + if (argc < 2 || argc > 3) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " "); + } + std::string prompt; + std::string models_path = argv[1]; + + // Default device is CPU; can be overridden by the second argument + std::string device = (argc == 3) ? argv[2] : "CPU"; // GPU, NPU can be used as well + ov::genai::LLMPipeline pipe(models_path, device); + + ov::genai::GenerationConfig config; + config.max_new_tokens = 1000; + config.do_sample = false; + + std::string json_schema = R"({ + "$defs": { + "Step": { + "properties": { + "explanation": { + "title": "Explanation", + "type": "string" + }, + "output": { + "title": "Output", + "type": "string" + } + }, + "required": ["explanation", "output"], + "title": "Step", + "type": "object" + } + }, + "properties": { + "steps": { + "items": { + "$ref": "#/$defs/Step" + }, + "title": "Steps", + "type": "array" + }, + "final_answer": { + "title": "Final Answer", + "type": "string" + } + }, + "required": ["steps", "final_answer"], + "title": "MathReasoning", + "type": "object" + })"; + config.structured_output_config = ov::genai::StructuredOutputConfig( + ov::AnyMap{{ov::genai::json_schema(json_schema)}} + ); + + std::string sys_message = R"( + Decompose the task and do it step by step and include it in a structured JSON. + For every mathematical equation use the adequate mathematical method. Do not try to solve linear equations + as a quadratic/cubic ones and vice versa. + For example for 2*x - x**2 + 15 = 0 the output format should be as the following: + {"steps": [ + {"explanation": "Rearranging the equation to isolate x.", "output": "2*x - x**2 + 15 = 0"}, + {"explanation": "Rearranging the equation to standard form.", "output": "-x**2 + 2*x + 15 = 0"}, + {"explanation": "Factoring the quadratic equation.", "output": "-(x - 5)(x + 3) = 0"}, + {"explanation": "Setting each factor to zero to find the roots.", "output": "x - 5 = 0 or x + 3 = 0"}, + {"explanation": "Finding the solutions for x.", "output": "x = 5 or x = -3"} + ], "final_answer": "x = 5 or x = -3"}. + "output" field should contain only mathematical notations without text. + )"; + + auto streamer = [](std::string word) { + std::cout << word << std::flush; + // Return flag corresponds whether generation should be stopped. + return ov::genai::StreamingStatus::RUNNING; + }; + + ov::genai::ChatHistory chat_history; + + chat_history.push_back({{"role", "system"}, {"content", std::move(sys_message)}}); + std::cout << "This is a sample of structured output generation.\n" + << "You can enter a mathematical equation, and the model will solve it step by step.\n" + << "For example, try: 2*x -2 + 15 = 0\n" + << "To exit, press Ctrl+C or close the terminal.\n" + << "> "; + + while (std::getline(std::cin, prompt)) { + chat_history.push_back({{"role", "user"}, {"content", std::move(prompt)}}); + ov::genai::DecodedResults decoded_results = pipe.generate(chat_history, config, streamer); + chat_history.push_back({{"role", "assistant"}, {"content", std::move(decoded_results.texts[0])}}); + std::cout << "\n----------\n" + "> "; + } +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/CMakeLists.txt b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/CMakeLists.txt new file mode 100644 index 0000000..9ffa8bd --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/CMakeLists.txt @@ -0,0 +1,76 @@ +# Copyright (C) 2025-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +find_package(OpenVINOGenAI REQUIRED + PATHS + "${CMAKE_BINARY_DIR}" # Reuse the package from the build. + ${OpenVINO_DIR} # GenAI may be installed alongside OpenVINO. + NO_CMAKE_FIND_ROOT_PATH +) + +file(DOWNLOAD https://raw.githubusercontent.com/nothings/stb/f75e8d1cad7d90d72ef7a4661f1b994ef78b4e31/stb_image_write.h ${CMAKE_BINARY_DIR}/stb_image_write.h + EXPECTED_HASH MD5=845b8b43d7d941890a57a477455558ad) + +include(FetchContent) +include(${CMAKE_CURRENT_SOURCE_DIR}/../fetch_opencv.cmake) + +if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) +endif() + +FetchContent_Declare(indicators + URL https://github.com/p-ranav/indicators/archive/refs/tags/v2.3.tar.gz + URL_HASH SHA256=70da7a693ff7a6a283850ab6d62acf628eea17d386488af8918576d0760aef7b) +FetchContent_MakeAvailable(indicators) + +# create main sample executable +add_executable(text2video text2video.cpp imwrite_video.cpp) + +target_include_directories(text2video PRIVATE ${CMAKE_BINARY_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/../image_generation/" "${CMAKE_CURRENT_SOURCE_DIR}/../../../src/cpp/src/") +ov_genai_link_opencv(text2video core imgproc videoio imgcodecs) +target_link_libraries(text2video PRIVATE openvino::genai indicators::indicators) + +if(UNIX AND NOT APPLE) + set_target_properties(text2video PROPERTIES + INSTALL_RPATH "$ORIGIN/../lib" + ) +elseif(APPLE) + set_target_properties(text2video PROPERTIES + INSTALL_RPATH "@loader_path/../lib" + ) +endif() + +set_target_properties(text2video PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS text2video + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + +# create taylorseer text2video sample executable +add_executable(taylorseer_text2video taylorseer_text2video.cpp imwrite_video.cpp) + +target_include_directories(taylorseer_text2video PRIVATE ${CMAKE_BINARY_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/../image_generation/" "${CMAKE_CURRENT_SOURCE_DIR}/../../../src/cpp/src/") +ov_genai_link_opencv(taylorseer_text2video core imgproc videoio imgcodecs) +target_link_libraries(taylorseer_text2video PRIVATE openvino::genai indicators::indicators) + +if(UNIX AND NOT APPLE) + set_target_properties(taylorseer_text2video PROPERTIES + INSTALL_RPATH "$ORIGIN/../lib" + ) +elseif(APPLE) + set_target_properties(taylorseer_text2video PROPERTIES + INSTALL_RPATH "@loader_path/../lib" + ) +endif() + +set_target_properties(taylorseer_text2video PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS taylorseer_text2video + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/README.md new file mode 100644 index 0000000..1e36c44 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/README.md @@ -0,0 +1,140 @@ +# OpenVINO GenAI Video Generation C++ Samples + +These samples showcase the use of OpenVINO's inference capabilities for video generation tasks. The sample features `openvino_genai.Text2VideoPipeline` for generating videos from text prompts using models like LTX-Video. +The applications don't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. + + - [`text2video.cpp`](./text2video.cpp) demonstrates basic text to video generation. + - [`taylorseer_text2video.cpp`](./taylorseer_text2video.cpp) demonstrates text to video generation with TaylorSeer caching optimization for improved performance. LTX-Video model is supported only. + +## Table of Contents +1. [Download and Convert the Model](#download-and-convert-the-model) +2. [Sample Descriptions](#sample-descriptions) +3. [Troubleshooting](#troubleshooting) +4. [Support and Contribution](#support-and-contribution) + +## Download and Convert the Model + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. +Install [../../export-requirements.txt](../../export-requirements.txt) if model conversion is required. + +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +``` + +Then, run the export with Optimum CLI: + +```sh +optimum-cli export openvino --model Lightricks/LTX-Video --task text-to-video --weight-format int8 ltx_video_ov/INT8 +``` + +Alternatively, do it in Python code. If NNCF is installed, the model will be compressed to INT8 automatically. + +```python +from optimum.intel.openvino import OVLTXPipeline + +output_dir = "ltx_video_ov/INT8" + +pipeline = OVLTXPipeline.from_pretrained("Lightricks/LTX-Video", export=True, compile=False, load_in_8bit=True) +pipeline.save_pretrained(output_dir) +``` + +## Sample Descriptions + +### Common Information + +Follow [Get Started with Samples](https://docs.openvino.ai/2026/get-started/learn-openvino/openvino-samples/get-started-demos.html) to get common information about OpenVINO samples. +Follow [build instruction](../../../src/docs/BUILD.md) to build GenAI samples. + +GPUs usually provide better performance compared to CPUs. Modify the source code to change the device for inference to the GPU. + +### Text to Video Sample (`text2video.cpp`) + +- **Description:** + Basic video generation using a text-to-video model. This sample demonstrates how to generate videos from text prompts using the OpenVINO GenAI Text2VideoPipeline. The LTX-Video model is recommended for this sample. + + Recommended models: Lightricks/LTX-Video + +- **Main Feature:** Generate videos from text descriptions with customizable parameters. + +- **Run Command:** + ```bash + ./text2video model_dir prompt + ``` + + Example: + ```bash + ./text2video ltx_video_ov/INT8 "A woman with long brown hair and light skin smiles at another woman with long blonde hair" + ``` + +The sample will generate a video file `genai_video.avi` in the current directory. + +Users can modify the source code to experiment with different generation parameters: +- Change width or height of generated video +- Change number of frames +- Generate multiple videos per prompt +- Adjust number of inference steps +- Play with guidance scale (improves quality when > 1) +- Add negative prompt when guidance scale > 1 +- Adjust frame rate + +#### Run with threaded callback + +You can also implement a callback function that runs in a separate thread. This allows for parallel processing, enabling you to interrupt generation early if intermediate results are satisfactory or to add logs. + +Please find the template of the callback usage below: + +```cpp +ov::genai::Text2VideoPipeline pipe(models_path, device); + +auto callback = [&](size_t step, size_t num_steps, ov::Tensor& latent) -> bool { + std::cout << "Generation step: " << step + 1 << " / " << num_steps << std::endl; + ov::Tensor video = pipe.decode(latent).video; // get intermediate video tensor + if (your_condition) // return true if you want to interrupt video generation + return true; + return false; +}; + +ov::Tensor video = pipe.generate(prompt, + /* other generation properties */ + ov::genai::callback(callback) +).video; +``` + +### TaylorSeer Text to Video Sample (`taylorseer_text2video.cpp`) + +- **Description:** + Generate videos with TaylorSeer caching optimization. This sample runs two generations: one baseline without caching and one with TaylorSeer caching enabled, then compares their performance. + +- **Run Command:** + ```bash + ./taylorseer_text2video model_dir prompt + ``` + + Example: + ```bash + ./taylorseer_text2video ltx_video_ov/INT8 "a robot dancing in the rain" + ``` + +The sample will generate two video files: `taylorseer_baseline.avi` (without caching) and `taylorseer.avi` (with caching), and display a performance comparison showing the speedup achieved. + +The TaylorSeer configuration parameters can be adjusted in the source code: +- `cache_interval`: Number of steps between cache updates (default: 3) +- `disable_cache_before_step`: Disable caching before this step for warmup (default: 6) +- `disable_cache_after_step`: Disable caching after this step (default: -2, meaning 2 steps before the end) + +For more details about TaylorSeer, see the [diffusion caching documentation](../../../site/docs/concepts/optimization-techniques/diffusion-caching.md). + +## Troubleshooting + +### LTX-Video Model Constraints + +> [!NOTE] +> The LTX-Video model works best on: +> - Resolutions divisible by 32 (e.g., 480x704, 512x512, 720x1280) +> - Number of frames divisible by 8 + 1 (e.g., 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 121, 161, 257) +> - At least 2 inference steps (1 step may produce artifacts) +> - Best quality achieved with resolutions under 720x1280 and number of frames below 257 + +## Support and Contribution +- For troubleshooting, consult the [OpenVINO documentation](https://docs.openvino.ai). +- To report issues or contribute, visit the [GitHub repository](https://github.com/openvinotoolkit/openvino.genai). diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/imwrite_video.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/imwrite_video.cpp new file mode 100644 index 0000000..7e2ae7f --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/imwrite_video.cpp @@ -0,0 +1,54 @@ +// Copyright (C) 2025-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "imwrite_video.hpp" + +void save_video(const std::string& filename, + const ov::Tensor& video_tensor, // [B, F, H, W, C], u8 + float fps) { + const ov::Shape shape = video_tensor.get_shape(); + + if (shape.empty() || video_tensor.get_size() == 0) { + throw std::runtime_error("save_video(): input tensor is empty, skip saving: " + filename); + } + + const size_t B = shape[0], F = shape[1], H = shape[2], W = shape[3], C = shape[4]; + const uint8_t* video_data = video_tensor.data(); + + for (size_t b = 0; b < B; ++b) { + std::string out = filename; + if (B != 1) { + std::filesystem::path p(filename); + std::string ext = p.has_extension() ? p.extension().string() : ".avi"; + out = (p.parent_path() / (p.stem().string() + "_b" + std::to_string(b) + ext)).string(); + } + + const int fourcc = cv::VideoWriter::fourcc('M', 'J', 'P', 'G'); + cv::VideoWriter writer(out, fourcc, static_cast(fps), cv::Size(W, H), true); + if (!writer.isOpened()) + throw std::runtime_error("VideoWriter failed to open: " + out); + + const size_t frame_bytes = H * W * C; + const size_t batch_stride = F * frame_bytes; + const uint8_t* batch_ptr = video_data + b * batch_stride; + + for (size_t f = 0; f < F; ++f) { + const uint8_t* frame_ptr = batch_ptr + f * frame_bytes; + + cv::Mat src(H, W, CV_8UC3, const_cast(frame_ptr)); + cv::Mat bgr; + cv::cvtColor(src, bgr, cv::COLOR_RGB2BGR); + + writer.write(bgr); + } + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/imwrite_video.hpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/imwrite_video.hpp new file mode 100644 index 0000000..4655163 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/imwrite_video.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2025-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include "openvino/runtime/tensor.hpp" + +/** + * @brief Writes video(s) to AVI file(s). Input frames are assumed to be in RGB/RGBA format. + * @param filename Output filename. If batch size > 1, files are named with "_b{N}" suffix. + * @param video_tensor Video tensor of shape [B, F, H, W, C] with uint8 data (C = 1, 3, or 4). + * @param fps Frames per second. + */ +void save_video(const std::string& filename, const ov::Tensor& video_tensor, float fps = 25.0f); diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/taylorseer_text2video.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/taylorseer_text2video.cpp new file mode 100644 index 0000000..69a481d --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/taylorseer_text2video.cpp @@ -0,0 +1,95 @@ +// Copyright (C) 2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "progress_bar.hpp" +#include "imwrite_video.hpp" + +#include +#include + +int main(int argc, char* argv[]) try { + OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " ''"); + + const std::string models_path = argv[1]; + const std::string prompt = argv[2]; + const std::string device = "CPU"; // GPU can be used as well + const std::string negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"; + const size_t num_inference_steps = 25; + + ov::genai::Text2VideoPipeline pipe(models_path, device); + const size_t frame_rate = pipe.get_generation_config().frame_rate.value(); + std::cout << "Generating baseline video without caching...\n"; + auto start_time = std::chrono::high_resolution_clock::now(); + + auto baseline_output = pipe.generate( + prompt, + ov::genai::negative_prompt(negative_prompt), + ov::genai::num_inference_steps(num_inference_steps), + ov::genai::callback(progress_bar) + ); + + auto end_time = std::chrono::high_resolution_clock::now(); + auto baseline_duration = std::chrono::duration_cast(end_time - start_time); + + std::cout << "Baseline generation completed in " << baseline_duration.count() / 1000.0 << "s\n"; + + save_video("taylorseer_baseline.avi", baseline_output.video, frame_rate); + std::cout << "Baseline video saved to taylorseer_baseline.avi\n"; + + // Configure TaylorSeer caching + std::cout << "\nGenerating video with TaylorSeer caching...\n"; + const size_t cache_interval = 3; + const size_t disable_before = 6; + const int disable_after = -2; + ov::genai::TaylorSeerCacheConfig taylorseer_config{cache_interval, disable_before, disable_after}; + std::cout << taylorseer_config.to_string() << "\n"; + auto generation_config = pipe.get_generation_config(); + generation_config.taylorseer_config = taylorseer_config; + pipe.set_generation_config(generation_config); + + start_time = std::chrono::high_resolution_clock::now(); + + auto output = pipe.generate( + prompt, + ov::genai::negative_prompt(negative_prompt), + ov::genai::num_inference_steps(num_inference_steps), + ov::genai::callback(progress_bar) + ); + + end_time = std::chrono::high_resolution_clock::now(); + auto taylorseer_duration = std::chrono::duration_cast(end_time - start_time); + + std::cout << "TaylorSeer generation completed in " << taylorseer_duration.count() / 1000.0 << "s\n"; + + save_video("taylorseer.avi", output.video, frame_rate); + std::cout << "Video saved to taylorseer.avi\n"; + + // Performance comparison + double baseline_ms = static_cast(baseline_duration.count()); + double taylorseer_ms = static_cast(taylorseer_duration.count()); + + double speedup = taylorseer_ms > 0 ? baseline_ms / taylorseer_ms : 0.0; + double time_saved = baseline_ms > 0 ? (baseline_ms - taylorseer_ms) / 1000.0 : 0.0; + double percentage = baseline_ms > 0 ? (baseline_ms - taylorseer_ms) / baseline_ms * 100.0 : 0.0; + + std::cout << "\nPerformance Comparison:\n"; + std::cout << " Baseline time: " << baseline_ms / 1000.0 << "s\n"; + std::cout << " TaylorSeer time: " << taylorseer_ms / 1000.0 << "s\n"; + std::cout << " Speedup: " << speedup << "x\n"; + std::cout << " Time saved: " << time_saved << "s (" << percentage << "%)\n"; + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/text2video.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/text2video.cpp new file mode 100644 index 0000000..7df57f3 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/video_generation/text2video.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2025-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include + +#include "progress_bar.hpp" +#include "imwrite_video.hpp" + +#include + +int main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " ''"); + + std::filesystem::path models_dir = argv[1]; + std::string prompt = argv[2]; + + const std::string device = "CPU"; // GPU can be used as well + float frame_rate = 25.0f; + + ov::genai::Text2VideoPipeline pipe(models_dir, device); + auto output = pipe.generate( + prompt, + ov::genai::negative_prompt("worst quality, inconsistent motion, blurry, jittery, distorted"), + ov::genai::height(480), + ov::genai::width(704), + ov::genai::num_frames(161), + ov::genai::num_inference_steps(25), + ov::genai::num_videos_per_prompt(1), + ov::genai::callback(progress_bar), + ov::genai::frame_rate(frame_rate), + ov::genai::guidance_scale(3) + ); + + save_video("genai_video.avi", output.video, frame_rate); + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/CMakeLists.txt b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/CMakeLists.txt new file mode 100644 index 0000000..83d4ed4 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/CMakeLists.txt @@ -0,0 +1,88 @@ +# Copyright (C) 2023-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +if (MSVC) + set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") +endif() + +find_package(OpenVINOGenAI REQUIRED + PATHS + "${CMAKE_BINARY_DIR}" # Reuse the package from the build. + ${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO. + NO_CMAKE_FIND_ROOT_PATH +) + +file(DOWNLOAD + https://raw.githubusercontent.com/nothings/stb/f75e8d1cad7d90d72ef7a4661f1b994ef78b4e31/stb_image.h + ${CMAKE_BINARY_DIR}/stb_image.h + EXPECTED_HASH MD5=27932e6fb3a2f26aee2fc33f2cb4e696) + +include(${CMAKE_CURRENT_SOURCE_DIR}/../fetch_opencv.cmake) + +# create main sample executable + +add_executable(visual_language_chat visual_language_chat.cpp load_image.cpp) +target_include_directories(visual_language_chat PRIVATE "${CMAKE_BINARY_DIR}") +target_link_libraries(visual_language_chat PRIVATE openvino::genai) + +set_target_properties(visual_language_chat PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS visual_language_chat + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + +# create lora sample executable + +add_executable(visual_language_lora visual_language_lora.cpp load_image.cpp) +target_include_directories(visual_language_lora PRIVATE "${CMAKE_BINARY_DIR}") +target_link_libraries(visual_language_lora PRIVATE openvino::genai) + +set_target_properties(visual_language_lora PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS visual_language_lora + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + +# create encrypted model sample executable + +add_executable(encrypted_model_vlm encrypted_model_vlm.cpp load_image.cpp) +target_include_directories(encrypted_model_vlm PRIVATE "${CMAKE_BINARY_DIR}") +target_link_libraries(encrypted_model_vlm PRIVATE openvino::genai) + +set_target_properties(encrypted_model_vlm PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS encrypted_model_vlm + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + +# create benchmark executable +add_executable(benchmark_vlm benchmark_vlm.cpp load_image.cpp ../text_generation/read_prompt_from_file.cpp) +target_include_directories(benchmark_vlm PRIVATE "${CMAKE_BINARY_DIR}") +target_link_libraries(benchmark_vlm PRIVATE openvino::genai cxxopts::cxxopts) +set_target_properties(benchmark_vlm PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS benchmark_vlm + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + +add_executable(video_to_text_chat video_to_text_chat.cpp) +target_include_directories(video_to_text_chat PRIVATE "${CMAKE_BINARY_DIR}") +ov_genai_link_opencv(video_to_text_chat core imgproc videoio imgcodecs) +target_link_libraries(video_to_text_chat PRIVATE openvino::genai cxxopts::cxxopts) + +install(TARGETS video_to_text_chat + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/README.md new file mode 100644 index 0000000..ee8736b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/README.md @@ -0,0 +1,127 @@ +# C++ visual language chat + +This example showcases inference of Visual language models (VLMs). The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `ov::genai::VLMPipeline` and runs the simplest deterministic greedy sampling algorithm. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/minicpm-v-multimodal-chatbot) which provides an example of Visual-language assistant. + + +The following are sample files: + - [`visual_language_chat.cpp`](./visual_language_chat.cpp) demonstrates basic usage of the VLM pipeline which supports accelerated inference using prompt lookup decoding. + - [`video_to_text_chat.cpp`](./video_to_text_chat.cpp) demonstrates video to text usage of the VLM pipeline. + - [`benchmark_vlm.cpp`](./benchmark_vlm.cpp) shows how to benchmark a VLM in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text and calculating various performance metrics. + - [`visual_language_lora.cpp`](./visual_language_lora.cpp) demonstrates how to apply one or more LoRA adapters to a VLM at runtime. + + +## Download and convert the model and tokenizers + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. + +It's not required to install [../../export-requirements.txt](../../export-requirements.txt) for deployment if the model has already been exported. + +```sh +pip install --upgrade-strategy eager -r ../../requirements.txt +optimum-cli export openvino --model openbmb/MiniCPM-V-2_6 --trust-remote-code MiniCPM-V-2_6 +``` + +Follow [Get Started with Samples](https://docs.openvino.ai/2026/get-started/learn-openvino/openvino-samples/get-started-demos.html) to run samples. + +## Run image-to-text chat sample: + +[This image](https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11) can be used as a sample image. + +`visual_language_chat miniCPM-V-2_6 319483352-d5fbbd1a-d484-415c-88cb-9986625b7b11.jpg` + +Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model `llava-hf/llava-v1.6-mistral-7b-hf` can benefit from being run on a dGPU. Modify the source code to change the device for inference to the `GPU`. + +Refer to the [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#visual-language-models-vlms) for more details. + +## Run image-to-text sample with LoRA adapters: + +This sample runs generation twice for the same prompt and image: first with LoRA adapter applied, then without any adapters (base model). + +Export `Qwen/Qwen2.5-VL-7B-Instruct` to OpenVINO as [described above for MiniCPM-V](#download-and-convert-the-model-and-tokenizers), then download LoRA `Mouad2004/qwen2.5-vl-lora-diagrams`: + +```sh +wget -O adapter_model.safetensors \ + https://huggingface.co/Mouad2004/qwen2.5-vl-lora-diagrams/resolve/main/adapter_model.safetensors +``` + +This OpenVINO overview diagram can be used as a convenient image input: + +```sh +wget -O openvino-overview-diagram.jpg \ + https://docs.openvino.ai/2026/_images/openvino-overview-diagram.jpg +``` + +`visual_language_lora ./Qwen2.5-VL-7B-Instruct ./openvino-overview-diagram.jpg "What is shown in this diagram?" ./adapter_model.safetensors 4.0` + +> You can run with multiple LoRA adapters by providing multiple ` ` pairs. + +> [!NOTE] +> ### LoRA `alpha` interpretation in OpenVINO GenAI +> The OpenVINO GenAI implementation merges the traditional LoRA parameters into a **single effective scaling factor** used during inference. +> +> In this context, the `alpha` value already includes: +> - normalization by LoRA rank (`alpha / rank`) +> - any user-defined scaling factor (`weight`) +> +> This means `alpha` in GenAI should be treated as the **final scaling weight** applied to the LoRA update — not the raw `alpha` parameter from training. + +## Run video-to-text chat sample: + +A model that supports video input is required to run this sample, for example `llava-hf/LLaVA-NeXT-Video-7B-hf`. + +[This video](https://huggingface.co/datasets/raushan-testing-hf/videos-test/resolve/main/sample_demo_1.mp4) can be used as a sample video. + +`video_to_text_chat ./LLaVA-NeXT-Video-7B-hf/ sample_demo_1.mp4` + +Supported models with video input are listed in [this section](https://openvinotoolkit.github.io/openvino.genai/docs/use-cases/image-processing/#use-image-or-video-tags-in-prompt). + +## Run benchmark: + +```sh +benchmark_vlm [OPTIONS] +``` + +### Options + +- `-m, --model`(default: `.`): Path to the model and tokenizers base directory. +- `-p, --prompt` (default: ''): The prompt to generate text. If without `-p` and `--pf`, the default prompt is `"What is on the image?"` +- `--pf, --prompt_file` Read prompt from file. +- `-i, --image` (default: `image.jpg`): Path to the image. +- `-nw, --num_warmup` (default: `1`): Number of warmup iterations. +- `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. +- `-n, --num_iter` (default: `3`): Number of iterations. +- `-d, --device` (default: `"CPU"`): Device to run the model on. +- `-pr, --pruning_ratio`: (optional): Percentage of visual tokens to prune (valid range: 0-100); if this option is not provided, pruning is disabled. +- `-rw, --relevance_weight` (optional): Float value from 0 to 1, controls the trade-off between diversity and relevance for visual tokens pruning; a value of 0 disables relevance weighting, while higher values (up to 1.0) emphasize relevance, making pruning more conservative on borderline tokens. + +### Output: + +``` +benchmark_vlm -m miniCPM-V-2_6 -i 319483352-d5fbbd1a-d484-415c-88cb-9986625b7b11.jpg -n 3 +``` + +``` +Load time: 1982.00 ms +Generate time: 13820.99 ± 64.62 ms +Tokenization time: 1.26 ± 0.09 ms +Detokenization time: 0.33 ± 0.05 ms +Embeddings preparation time: 5733.85 ± 26.34 ms +TTFT: 11246.98 ± 80.55 ms +TPOT: 135.45 ± 4.73 ms/token +Throughput: 7.38 ± 0.26 tokens/s +``` + +For more information how performance metrics are calculated please follow [performance-metrics tutorial](../../../src/README.md#performance-metrics). + +### Troubleshooting + +#### Unicode characters encoding error on Windows + +Example error: +``` +UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to +``` + +If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this: +1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot. +2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`. diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/benchmark_vlm.cpp new file mode 100644 index 0000000..f620bb8 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/benchmark_vlm.cpp @@ -0,0 +1,123 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "load_image.hpp" +#include +#include "../text_generation/read_prompt_from_file.h" + +int main(int argc, char* argv[]) try { + cxxopts::Options options("benchmark_vlm", "Help command"); + + options.add_options() + ("m,model", "Path to model and tokenizers base directory", cxxopts::value()->default_value(".")) + ("p,prompt", "Prompt", cxxopts::value()->default_value("")) + ("pf,prompt_file", "Read prompt from file", cxxopts::value()) + ("i,image", "Image", cxxopts::value()->default_value("image.jpg")) + ("nw,num_warmup", "Number of warmup iterations", cxxopts::value()->default_value(std::to_string(1))) + ("n,num_iter", "Number of iterations", cxxopts::value()->default_value(std::to_string(3))) + ("mt,max_new_tokens", "Maximal number of new tokens", cxxopts::value()->default_value(std::to_string(20))) + ("d,device", "device", cxxopts::value()->default_value("CPU")) + ("pr,pruning_ratio", "(optional): Percentage of visual tokens to prune (valid range: 0-100); if this option is not provided, pruning is disabled.", cxxopts::value()) + ("rw,relevance_weight", "(optional): Float value from 0 to 1, controls the trade-off between diversity and relevance for visual tokens pruning; a value of 0 disables relevance weighting, while higher values (up to 1.0) emphasize relevance, making pruning more conservative on borderline tokens.", cxxopts::value()) + ("h,help", "Print usage"); + + cxxopts::ParseResult result; + try { + result = options.parse(argc, argv); + } catch (const cxxopts::exceptions::exception& e) { + std::cout << e.what() << "\n\n"; + std::cout << options.help() << std::endl; + return EXIT_FAILURE; + } + + if (result.count("help")) { + std::cout << options.help() << std::endl; + return EXIT_SUCCESS; + } + + std::string prompt; + if (result.count("prompt") && result.count("prompt_file")) { + std::cout << "Prompt and prompt file should not exist together!" << std::endl; + return EXIT_FAILURE; + } else { + if (result.count("prompt_file")) { + prompt = utils::read_prompt(result["prompt_file"].as()); + } else { + prompt = result["prompt"].as().empty() ? "What is on the image?" : result["prompt"].as(); + } + } + if (prompt.empty()) { + std::cout << "Prompt is empty!" << std::endl; + return EXIT_FAILURE; + } + + const std::string models_path = result["model"].as(); + const std::string image_path = result["image"].as(); + std::string device = result["device"].as(); + size_t num_warmup = result["num_warmup"].as(); + size_t num_iter = result["num_iter"].as(); + std::vector images = utils::load_images(image_path); + + ov::genai::GenerationConfig config; + if (result.count("pruning_ratio")) { + config.pruning_ratio = result["pruning_ratio"].as(); + } + if (result.count("relevance_weight")) { + config.relevance_weight = result["relevance_weight"].as(); + } + config.max_new_tokens = result["max_new_tokens"].as(); + config.ignore_eos = true; + + std::cout << ov::get_openvino_version() << std::endl; + + std::unique_ptr pipe; + if (device == "NPU") + pipe = std::make_unique(models_path, device); + else { + // Setting of Scheduler config will trigger usage of ContinuousBatching pipeline, which is not default for Qwen2VL, Qwen2.5VL, Gemma3 due to accuracy issues. + ov::genai::SchedulerConfig scheduler_config; + scheduler_config.enable_prefix_caching = false; + scheduler_config.max_num_batched_tokens = std::numeric_limits::max(); + pipe = std::make_unique(models_path, device, ov::genai::scheduler_config(scheduler_config)); + } + + auto input_data = pipe->get_tokenizer().encode(prompt); + size_t prompt_token_size = input_data.input_ids.get_shape()[1]; + std::cout << "Number of images:" << images.size() << ", prompt token size:" << prompt_token_size << std::endl; + + for (size_t i = 0; i < num_warmup; i++) + pipe->generate(prompt, ov::genai::images(images), ov::genai::generation_config(config)); + + auto res = pipe->generate(prompt, ov::genai::images(images), ov::genai::generation_config(config)); + auto metrics = res.perf_metrics; + for (size_t i = 0; i < num_iter - 1; i++) { + res = pipe->generate(prompt, ov::genai::images(images), ov::genai::generation_config(config)); + metrics = metrics + res.perf_metrics; + } + + std::cout << std::fixed << std::setprecision(2); + std::cout << "Output token size:" << res.perf_metrics.get_num_generated_tokens() << std::endl; + std::cout << "Load time: " << metrics.get_load_time() << " ms" << std::endl; + std::cout << "Generate time: " << metrics.get_generate_duration().mean << " ± " << metrics.get_generate_duration().std << " ms" << std::endl; + std::cout << "Tokenization time: " << metrics.get_tokenization_duration().mean << " ± " << metrics.get_tokenization_duration().std << " ms" << std::endl; + std::cout << "Detokenization time: " << metrics.get_detokenization_duration().mean << " ± " << metrics.get_detokenization_duration().std << " ms" << std::endl; + std::cout << "Embeddings preparation time: " << metrics.get_prepare_embeddings_duration().mean << " ± " << metrics.get_prepare_embeddings_duration().std << " ms" << std::endl; + std::cout << "TTFT: " << metrics.get_ttft().mean << " ± " << metrics.get_ttft().std << " ms" << std::endl; + std::cout << "TPOT: " << metrics.get_tpot().mean << " ± " << metrics.get_tpot().std << " ms/token " << std::endl; + std::cout << "Throughput: " << metrics.get_throughput().mean << " ± " << metrics.get_throughput().std << " tokens/s" << std::endl; + + return 0; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/encrypted_model_vlm.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/encrypted_model_vlm.cpp new file mode 100644 index 0000000..d3c38b7 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/encrypted_model_vlm.cpp @@ -0,0 +1,123 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "load_image.hpp" +#include "openvino/genai/visual_language/pipeline.hpp" + +std::pair decrypt_model(const std::filesystem::path& model_dir, const std::string& model_file_name, const std::string& weights_file_name) { + std::ifstream model_file(model_dir / model_file_name); + std::ifstream weights_file; + if (!model_file.is_open()) { + throw std::runtime_error("Cannot open model file"); + } + std::string model_str((std::istreambuf_iterator(model_file)), std::istreambuf_iterator()); + + // read weights file using mmap to reduce memory consumption + auto weights_tensor = ov::read_tensor_data(model_dir / weights_file_name); + + // User can add file decryption of model_file and weights_file in memory here. + + return {model_str, weights_tensor}; +} + +ov::genai::Tokenizer decrypt_tokenizer(const std::filesystem::path& models_path) { + auto [tok_model_str, tok_weights_tensor] = decrypt_model(models_path, "openvino_tokenizer.xml", "openvino_tokenizer.bin"); + auto [detok_model_str, detok_weights_tensor] = decrypt_model(models_path, "openvino_detokenizer.xml", "openvino_detokenizer.bin"); + + return ov::genai::Tokenizer(tok_model_str, tok_weights_tensor, detok_model_str, detok_weights_tensor); +} + +static const char codec_key[] = {0x30, 0x60, 0x70, 0x02, 0x04, 0x08, 0x3F, 0x6F, 0x72, 0x74, 0x78, 0x7F}; + +std::string codec_xor(const std::string& source_str) { + auto key_size = sizeof(codec_key); + int key_idx = 0; + std::string dst_str = source_str; + for (char& c : dst_str) { + c ^= codec_key[key_idx % key_size]; + key_idx++; + } + return dst_str; +} + +std::string encryption_callback(const std::string& source_str) { + return codec_xor(source_str); +} + +std::string decryption_callback(const std::string& source_str) { + return codec_xor(source_str); +} + +auto get_config_for_cache_encryption() { + ov::AnyMap config; + config.insert({ov::cache_dir("llm_cache")}); + ov::EncryptionCallbacks encryption_callbacks; + //use XOR-based encryption as an example + encryption_callbacks.encrypt = encryption_callback; + encryption_callbacks.decrypt = decryption_callback; + config.insert(ov::cache_encryption_callbacks(encryption_callbacks)); + config.insert(ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE)); + return config; +} + +ov::genai::StreamingStatus print_subword(std::string&& subword) { + std::cout << subword << std::flush; + return ov::genai::StreamingStatus::RUNNING; +} + +int main(int argc, char* argv[]) try { + if (4 != argc) { + throw std::runtime_error(std::string{"Usage "} + argv[0] + " "); + } + + //read and encrypt models + std::filesystem::path models_path = argv[1]; + ov::genai::ModelsMap models_map; + + std::map model_name_to_file_map = { + {"language", "openvino_language_model"}, + {"resampler", "openvino_resampler_model"}, + {"text_embeddings", "openvino_text_embeddings_model"}, + {"vision_embeddings", "openvino_vision_embeddings_model"}}; + + for (const auto& [model_name, file_name] : model_name_to_file_map) { + models_map.emplace(model_name, decrypt_model(models_path, file_name + ".xml", file_name + ".bin")); + } + + ov::genai::Tokenizer tokenizer = decrypt_tokenizer(models_path); + + // GPU can be used as well. + std::string device = "CPU"; + ov::AnyMap enable_compile_cache; + if (device == "GPU") { + // Cache compiled models on disk for GPU to save time on the + // next run. It's not beneficial for CPU. + enable_compile_cache = get_config_for_cache_encryption(); + } + ov::genai::VLMPipeline pipe(models_map, tokenizer, models_path, device, enable_compile_cache); + + ov::genai::GenerationConfig generation_config; + generation_config.max_new_tokens = 100; + + std::vector rgbs = utils::load_images(argv[2]); + + std::string prompt = argv[3]; + pipe.generate(prompt, + ov::genai::images(rgbs), + ov::genai::generation_config(generation_config), + ov::genai::streamer(print_subword)); + +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/load_image.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/load_image.cpp new file mode 100644 index 0000000..b063ae5 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/load_image.cpp @@ -0,0 +1,59 @@ + +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#define STB_IMAGE_IMPLEMENTATION +#include "stb_image.h" +#include "load_image.hpp" + +namespace fs = std::filesystem; + +std::vector utils::load_images(const std::filesystem::path& input_path) { + if (input_path.empty() || !fs::exists(input_path)) { + throw std::runtime_error{"Path to images is empty or does not exist."}; + } + if (fs::is_directory(input_path)) { + std::set sorted_images{fs::directory_iterator(input_path), fs::directory_iterator()}; + std::vector images; + for (const fs::path& dir_entry : sorted_images) { + images.push_back(utils::load_image(dir_entry)); + } + return images; + } + return {utils::load_image(input_path)}; +} + +ov::Tensor utils::load_image(const std::filesystem::path& image_path) { + int x = 0, y = 0, channels_in_file = 0; + constexpr int desired_channels = 3; + unsigned char* image = stbi_load( + image_path.string().c_str(), + &x, &y, &channels_in_file, desired_channels); + if (!image) { + std::stringstream error_message; + error_message << "Failed to load the image '" << image_path << "'"; + throw std::runtime_error{error_message.str()}; + } + struct SharedImageAllocator { + unsigned char* image; + int channels, height, width; + void* allocate(size_t bytes, size_t) const { + if (image && channels * height * width == bytes) { + return image; + } + throw std::runtime_error{"Unexpected number of bytes was requested to allocate."}; + } + void deallocate(void*, size_t, size_t) noexcept { + stbi_image_free(image); + image = nullptr; + } + bool is_equal(const SharedImageAllocator& other) const noexcept {return this == &other;} + }; + return ov::Tensor( + ov::element::u8, + ov::Shape{1, size_t(y), size_t(x), size_t(desired_channels)}, + SharedImageAllocator{image, desired_channels, y, x} + ); +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/load_image.hpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/load_image.hpp new file mode 100644 index 0000000..4fbdbaa --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/load_image.hpp @@ -0,0 +1,13 @@ + +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include + +namespace utils { +ov::Tensor load_image(const std::filesystem::path& image_path); +std::vector load_images(const std::filesystem::path& image_path); +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/video_to_text_chat.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/video_to_text_chat.cpp new file mode 100644 index 0000000..a46faa9 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/video_to_text_chat.cpp @@ -0,0 +1,135 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +std::vector make_indices(size_t total_frames, size_t num_frames) { + std::vector indices; + indices.reserve(num_frames); + + auto step = float(total_frames) / num_frames; + + for (size_t i = 0; i < num_frames; ++i) { + size_t idx = std::min(size_t(i * step), total_frames - 1); + indices.push_back(idx); + } + + return indices; +} + +ov::Tensor load_video(const std::filesystem::path& video_path, size_t num_frames = 8) { + cv::VideoCapture cap(video_path.string()); + + if (!cap.isOpened()) { + OPENVINO_THROW("Could not open the video file."); + } + size_t total_num_frames = cap.get(cv::CAP_PROP_FRAME_COUNT); + auto indices = make_indices(total_num_frames, num_frames); + + std::vector frames; + cv::Mat frame; + size_t width = cap.get(cv::CAP_PROP_FRAME_WIDTH); + size_t height = cap.get(cv::CAP_PROP_FRAME_HEIGHT); + ov::Tensor video_tensor(ov::element::u8, ov::Shape{num_frames, height, width, 3}); + auto video_tensor_data = video_tensor.data(); + + size_t frame_idx = 0; + while (cap.read(frame)) { + OPENVINO_ASSERT(frame.cols == width && frame.rows == height && frame.channels() == 3); + if (std::find(indices.begin(), indices.end(), frame_idx) != indices.end()) { + memcpy(video_tensor_data, frame.data, frame.total() * 3 * sizeof(uint8_t)); + video_tensor_data += frame.total() * 3; + } + frame_idx++; + } + OPENVINO_ASSERT(frame_idx == total_num_frames, "Frame count mismatch: expected " + std::to_string(total_num_frames) + ", got " + std::to_string(frame_idx)); + + return video_tensor; +} + +std::vector load_videos(const std::filesystem::path& input_path) { + if (input_path.empty() || !fs::exists(input_path)) { + OPENVINO_THROW("Path to videos is empty or does not exist."); + } + if (fs::is_directory(input_path)) { + std::set sorted_videos{fs::directory_iterator(input_path), fs::directory_iterator()}; + std::vector videos; + for (const fs::path& dir_entry : sorted_videos) { + videos.push_back(load_video(dir_entry)); + } + return videos; + } + return {load_video(input_path)}; +} + +ov::genai::StreamingStatus print_subword(std::string&& subword) { + std::cout << subword << std::flush; + return ov::genai::StreamingStatus::RUNNING; +} + +int main(int argc, char* argv[]) try { + if (argc < 3 || argc > 4) { + OPENVINO_THROW(std::string{"Usage "} + argv[0] + " "); + } + std::vector videos = load_videos(argv[2]); + + // GPU and NPU can be used as well. + // Note: If NPU is selected, only language model will be run on NPU + std::string device = (argc == 4) ? argv[3] : "CPU"; + ov::AnyMap enable_compile_cache; + if (device == "GPU") { + // Cache compiled models on disk for GPU to save time on the + // next run. It's not beneficial for CPU. + enable_compile_cache.insert({ov::cache_dir("vlm_cache")}); + } + ov::genai::VLMPipeline pipe(argv[1], device, enable_compile_cache); + + ov::genai::GenerationConfig generation_config; + generation_config.max_new_tokens = 100; + + std::string prompt; + + ov::genai::ChatHistory history; + + std::cout << "question:\n"; + std::getline(std::cin, prompt); + + history.push_back({{"role", "user"}, {"content", std::move(prompt)}}); + ov::genai::VLMDecodedResults decoded_results = pipe.generate( + history, + ov::genai::videos(videos), + ov::genai::generation_config(generation_config), + ov::genai::streamer(print_subword) + ); + history.push_back({{"role", "assistant"}, {"content", std::move(decoded_results.texts[0])}}); + std::cout << "\n----------\n" + "question:\n"; + while (std::getline(std::cin, prompt)) { + history.push_back({{"role", "user"}, {"content", std::move(prompt)}}); + // New images and videos can be passed at each turn + ov::genai::VLMDecodedResults decoded_results = pipe.generate( + history, + ov::genai::generation_config(generation_config), + ov::genai::streamer(print_subword) + ); + history.push_back({{"role", "assistant"}, {"content", std::move(decoded_results.texts[0])}}); + std::cout << "\n----------\n" + "question:\n"; + } +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/visual_language_chat.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/visual_language_chat.cpp new file mode 100644 index 0000000..fd175d7 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/visual_language_chat.cpp @@ -0,0 +1,83 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "load_image.hpp" +#include +#include + +ov::genai::StreamingStatus print_subword(std::string&& subword) { + std::cout << subword << std::flush; + return ov::genai::StreamingStatus::RUNNING; +} + +int main(int argc, char* argv[]) try { + if (argc < 3 || argc > 5) { + throw std::runtime_error(std::string{"Usage "} + argv[0] + " [DEVICE] [PROMPT_LOOKUP]"); + } + + std::vector rgbs = utils::load_images(argv[2]); + + // GPU and NPU can be used as well. + // Note: If NPU is selected, only language model will be run on NPU + std::string device = (argc >= 4) ? argv[3] : "CPU"; + std::string lookup = (argc == 5) ? argv[4] : "false"; + bool prompt_lookup = (lookup == "true"); + // Prompt lookup decoding in VLM pipeline enforces ContinuousBatching backend + ov::AnyMap properties = {ov::genai::prompt_lookup(prompt_lookup)}; + if (device == "GPU") { + // Cache compiled models on disk for GPU to save time on the + // next run. It's not beneficial for CPU. + properties.insert({ov::cache_dir("vlm_cache")}); + } + + ov::genai::VLMPipeline pipe(argv[1], device, properties); + + ov::genai::GenerationConfig generation_config; + generation_config.max_new_tokens = 100; + if (prompt_lookup) { + // Define candidates number for candidate generation + generation_config.num_assistant_tokens = 5; + // Define max_ngram_size + generation_config.max_ngram_size = 3; + } + + std::string prompt; + + ov::genai::ChatHistory history; + + std::cout << "question:\n"; + std::getline(std::cin, prompt); + + history.push_back({{"role", "user"}, {"content", std::move(prompt)}}); + ov::genai::VLMDecodedResults decoded_results = pipe.generate( + history, + ov::genai::images(rgbs), + ov::genai::generation_config(generation_config), + ov::genai::streamer(print_subword) + ); + history.push_back({{"role", "assistant"}, {"content", std::move(decoded_results.texts[0])}}); + std::cout << "\n----------\n" + "question:\n"; + while (std::getline(std::cin, prompt)) { + history.push_back({{"role", "user"}, {"content", std::move(prompt)}}); + // New images and videos can be passed at each turn + ov::genai::VLMDecodedResults decoded_results = pipe.generate( + history, + ov::genai::generation_config(generation_config), + ov::genai::streamer(print_subword) + ); + history.push_back({{"role", "assistant"}, {"content", std::move(decoded_results.texts[0])}}); + std::cout << "\n----------\n" + "question:\n"; + } +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/visual_language_lora.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/visual_language_lora.cpp new file mode 100644 index 0000000..aa15cb4 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/visual_language_chat/visual_language_lora.cpp @@ -0,0 +1,69 @@ +// Copyright (C) 2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "load_image.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +ov::genai::StreamingStatus print_subword(std::string&& subword) { + std::cout << subword << std::flush; + return ov::genai::StreamingStatus::RUNNING; +} +int main(int argc, char* argv[]) try { + // At least one LoRA adapter must be provided. + OPENVINO_ASSERT(argc >= 6 && ((argc - 4) % 2) == 0, + "Usage: ", argv[0], + " [ ...]"); + + std::vector rgbs = utils::load_images(argv[2]); + + const std::string device = "CPU"; // GPU can be used as well + ov::AnyMap pipeline_properties; + + const std::string prompt = argv[3]; + + // LoRA args parsed as pairs: + ov::genai::AdapterConfig adapter_config; + for (int idx = 4; idx + 1 < argc; idx += 2) { + ov::genai::Adapter adapter(argv[idx]); + float alpha = std::stof(argv[idx + 1]); + adapter_config.add(adapter, alpha); + } + pipeline_properties.insert({ov::genai::adapters(adapter_config)}); + + ov::genai::VLMPipeline pipe(argv[1], device, pipeline_properties); + + ov::genai::GenerationConfig generation_config; + generation_config.max_new_tokens = 100; + + std::cout << "Generating answer with LoRA adapters applied:\n"; + pipe.generate(prompt, + ov::genai::images(rgbs), + ov::genai::generation_config(generation_config), + ov::genai::streamer(print_subword)); + + std::cout << "\n----------\nGenerating answer without LoRA adapters applied:\n"; + pipe.generate(prompt, + ov::genai::images(rgbs), + ov::genai::generation_config(generation_config), + ov::genai::adapters(), + ov::genai::streamer(print_subword)); + std::cout << "\n----------\n"; + +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/CMakeLists.txt b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/CMakeLists.txt new file mode 100644 index 0000000..12e0bfb --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/CMakeLists.txt @@ -0,0 +1,39 @@ +# Copyright (C) 2023-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +find_package(OpenVINOGenAI REQUIRED + PATHS + "${CMAKE_BINARY_DIR}" # Reuse the package from the build. + ${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO. + NO_CMAKE_FIND_ROOT_PATH +) + +if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) +endif() + +if(POLICY CMP0169) + cmake_policy(SET CMP0169 OLD) +endif() + +include(FetchContent) + +if(NOT TARGET dr_libs) + FetchContent_Declare(dr_libs + URL https://github.com/mackron/dr_libs/archive/da35f9d6c7374a95353fd1df1d394d44ab66cf01.tar.gz + URL_HASH SHA256=2704d347f480ca1bc92233fb01747e4550cc8031735b6ea62ca9990ebb8851ae) + FetchContent_MakeAvailable(dr_libs) +endif() + +add_executable(whisper_speech_recognition whisper_speech_recognition.cpp audio_utils.cpp) +target_link_libraries(whisper_speech_recognition PRIVATE openvino::genai) +target_include_directories(whisper_speech_recognition PRIVATE "$") +set_target_properties(whisper_speech_recognition PROPERTIES + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) +target_compile_features(whisper_speech_recognition PRIVATE cxx_std_11) + +install(TARGETS whisper_speech_recognition + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/README.md new file mode 100644 index 0000000..9e23d0c --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/README.md @@ -0,0 +1,130 @@ +# Whisper automatic speech recognition sample + +This example showcases inference of speech recognition Whisper Models. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `ov::genai::WhisperPipeline` and uses audio file in wav format as an input source. + +## Download and convert the model and tokenizers + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. + +It's not required to install [../../export-requirements.txt](../../export-requirements.txt) for deployment if the model has already been exported. + +```sh +pip install --upgrade-strategy eager -r ../../requirements.txt +optimum-cli export openvino --trust-remote-code --model openai/whisper-base whisper-base +``` + +## Prepare audio file + +Prepare audio file in wav format with sampling rate 16k Hz. + +You can download example audio file: https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/librispeech_s5/how_are_you_doing_today.wav + +## Run + +Follow [Get Started with Samples](https://docs.openvino.ai/2026/get-started/learn-openvino/openvino-samples/get-started-demos.html) to run the sample. + +`whisper_speech_recognition whisper-base how_are_you_doing_today.wav` + +Output: +``` + How are you doing today? +timestamps: [0, 2] text: How are you doing today? +``` + +Refer to the [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#speech-recognition-models-whisper-based) for more details. + +# Whisper pipeline usage + +```c++ +#include "openvino/genai/whisper_pipeline.hpp" + +ov::genai::WhisperPipeline pipeline(model_dir, "CPU"); +// Pipeline expects normalized audio with Sample Rate of 16kHz +ov::genai::RawSpeechInput raw_speech = read_wav("how_are_you_doing_today.wav"); +auto result = pipeline.generate(raw_speech); +// How are you doing today? +``` + +### Transcription + +Whisper pipeline predicts the language of the source audio automatically. + +```c++ +ov::genai::RawSpeechInput raw_speech = read_wav("how_are_you_doing_today.wav"); +auto result = pipeline.generate(raw_speech); +// How are you doing today? + +raw_speech = read_wav("fr_sample.wav"); +result = pipeline.generate(raw_speech); +// Il s'agit d'une entité très complexe qui consiste... +``` + +If the source audio language is known in advance, it can be specified as an argument to `generate` method: + +```c++ +ov::genai::RawSpeechInput raw_speech = read_wav("how_are_you_doing_today.wav"); +auto result = pipeline.generate(raw_speech, ov::genai::language("<|en|>")); +// How are you doing today? + +raw_speech = read_wav("fr_sample.wav"); +result = pipeline.generate(raw_speech, ov::genai::language("<|fr|>")); +// Il s'agit d'une entité très complexe qui consiste... +``` + +### Translation + +By default, Whisper performs the task of speech transcription, where the source audio language is the same as the target text language. To perform speech translation, where the target text is in English, set the task to "translate": + +```c++ +ov::genai::RawSpeechInput raw_speech = read_wav("fr_sample.wav"); +auto result = pipeline.generate(raw_speech, ov::genai::task("translate")); +// It is a very complex entity that consists... +``` + +### Timestamps prediction + +The model can predict timestamps. For sentence-level timestamps, pass the `return_timestamps` argument: + +```C++ +ov::genai::RawSpeechInput raw_speech = read_wav("how_are_you_doing_today.wav"); +auto result = pipeline.generate(raw_speech, ov::genai::return_timestamps(true)); + +std::cout << std::setprecision(2); +for (auto& chunk : *result.chunks) { + std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n"; +} +// timestamps: [0, 2] text: How are you doing today? +``` + +### Long-Form audio Transcription + +The Whisper model is designed to work on audio samples of up to 30s in duration. Whisper pipeline uses sequential chunking algorithm to transcribe audio samples of arbitrary length. +Sequential chunking algorithm uses a "sliding window", transcribing 30-second slices one after the other. + +### Initial prompt and hotwords + +Whisper pipeline has `initial_prompt` and `hotwords` generate arguments: +* `initial_prompt`: initial prompt tokens passed as a previous transcription (after `<|startofprev|>` token) to the first processing window +* `hotwords`: hotwords tokens passed as a previous transcription (after `<|startofprev|>` token) to the all processing windows + +The Whisper model can use that context to better understand the speech and maintain a consistent writing style. However, prompts do not need to be genuine transcripts from prior audio segments. Such prompts can be used to steer the model to use particular spellings or styles: + +```c++ +auto result = pipeline.generate(raw_speech); +// He has gone and gone for good answered Paul Icrom who... + +result = pipeline.generate(raw_speech, ov::genai::initial_prompt("Polychrome")); +// He has gone and gone for good answered Polychrome who... +``` + + +### Troubleshooting + +#### Empty or rubbish output + +Example output: +``` +---------------- +``` + +To resolve this ensure that audio data has 16k Hz sampling rate diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/audio_utils.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/audio_utils.cpp new file mode 100644 index 0000000..eaf9b4c --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/audio_utils.cpp @@ -0,0 +1,118 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "audio_utils.hpp" + +#include +#include + +#include "openvino/genai/whisper_pipeline.hpp" + +#define DR_WAV_IMPLEMENTATION +#include + +#ifdef _WIN32 +# include +# include +#endif + +namespace { +bool is_wav_buffer(const std::string buf) { + // RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format + // WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html + if (buf.size() < 12 || buf.substr(0, 4) != "RIFF" || buf.substr(8, 4) != "WAVE") { + return false; + } + + uint32_t chunk_size = *reinterpret_cast(buf.data() + 4); + if (chunk_size + 8 != buf.size()) { + return false; + } + + return true; +} +} // namespace + +namespace utils { +namespace audio { + +#define COMMON_SAMPLE_RATE 16000 + +ov::genai::RawSpeechInput read_wav(const std::string& filename) { + drwav wav; + std::vector wav_data; // used for pipe input from stdin or ffmpeg decoding output + + if (filename == "-") { + { +#ifdef _WIN32 + _setmode(_fileno(stdin), _O_BINARY); +#endif + + uint8_t buf[1024]; + while (true) { + const size_t n = fread(buf, 1, sizeof(buf), stdin); + if (n == 0) { + break; + } + wav_data.insert(wav_data.end(), buf, buf + n); + } + } + + OPENVINO_ASSERT(drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr), + "Failed to open WAV file from stdin"); + + fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size()); + } else if (is_wav_buffer(filename)) { + OPENVINO_ASSERT(drwav_init_memory(&wav, filename.c_str(), filename.size(), nullptr), + "Failed to open WAV file from fname buffer"); + } else if (!drwav_init_file(&wav, filename.c_str(), nullptr)) { +#if defined(WHISPER_FFMPEG) + OPENVINO_ASSERT(ffmpeg_decode_audio(fname, wav_data) == 0, "Failed to ffmpeg decode") + + OPENVINO_ASSERT(drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr), + "Failed to read wav data as wav") +#else + throw std::runtime_error("failed to open as WAV file"); +#endif + } + + if (wav.channels != 1 && wav.channels != 2) { + drwav_uninit(&wav); + throw std::runtime_error("WAV file must be mono or stereo"); + } + + if (wav.sampleRate != COMMON_SAMPLE_RATE) { + drwav_uninit(&wav); + throw std::runtime_error("WAV file must be " + std::string{COMMON_SAMPLE_RATE / 1000} + " kHz"); + } + + const uint64_t n = + wav_data.empty() + ? wav.totalPCMFrameCount + : ( + wav_data.size() / + (static_cast(wav.channels) * static_cast(wav.bitsPerSample) / 8ul) + ); + + std::vector pcm16; + pcm16.resize(n * wav.channels); + drwav_read_pcm_frames_s16(&wav, n, pcm16.data()); + drwav_uninit(&wav); + + // convert to mono, float + std::vector pcmf32; + pcmf32.resize(n); + if (wav.channels == 1) { + for (uint64_t i = 0; i < n; i++) { + pcmf32[i] = float(pcm16[i]) / 32768.0f; + } + } else { + for (uint64_t i = 0; i < n; i++) { + pcmf32[i] = float(pcm16[2 * i] + pcm16[2 * i + 1]) / 65536.0f; + } + } + + return pcmf32; +} +} // namespace audio +} // namespace utils diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/audio_utils.hpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/audio_utils.hpp new file mode 100644 index 0000000..9b21d1d --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/audio_utils.hpp @@ -0,0 +1,12 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "openvino/genai/whisper_pipeline.hpp" + +namespace utils { +namespace audio { +ov::genai::RawSpeechInput read_wav(const std::string& filename); +} // namespace audio +} // namespace utils diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/whisper_speech_recognition.cpp b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/whisper_speech_recognition.cpp new file mode 100644 index 0000000..94b547b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/cpp/whisper_speech_recognition/whisper_speech_recognition.cpp @@ -0,0 +1,70 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "audio_utils.hpp" +#include "openvino/genai/whisper_pipeline.hpp" + +auto get_config_for_cache() { + ov::AnyMap config; + config.insert({ov::cache_dir("whisper_cache")}); + return config; +} + +int main(int argc, char* argv[]) try { + if (argc < 3 || argc > 4) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " \"\" "); + } + + std::filesystem::path models_path = argv[1]; + std::string wav_file_path = argv[2]; + std::string device = (argc == 4) ? argv[3] : "CPU"; // Default to CPU if no device is provided + + ov::AnyMap ov_config; + if (device == "NPU" || + device.find("GPU") != std::string::npos) { // need to handle cases like "GPU", "GPU.0" and "GPU.1" + // Cache compiled models on disk for GPU and NPU to save time on the + // next run. It's not beneficial for CPU. + ov_config = get_config_for_cache(); + } + + // Word timestamps require decomposition of cross-attention decoder SDPA layers, + // so word_timestamps must be passed to the pipeline constructor (not just in generation config) + ov_config.insert(ov::genai::word_timestamps(true)); + + ov::genai::WhisperPipeline pipeline(models_path, device, ov_config); + + ov::genai::WhisperGenerationConfig config = pipeline.get_generation_config(); + // 'task' and 'language' parameters are supported for multilingual models only + config.language = "<|en|>"; // can switch to <|zh|> for Chinese language + config.task = "transcribe"; + config.return_timestamps = true; + config.word_timestamps = true; + + // Pipeline expects normalized audio with Sample Rate of 16kHz + ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path); + auto result = pipeline.generate(raw_speech, config); + + std::cout << result << "\n"; + + std::cout << std::fixed << std::setprecision(2); + for (auto& chunk : *result.chunks) { + std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n"; + } + + for (auto& word : *result.words) { + std::cout << "[" << word.start_ts << ", " << word.end_ts << "]: " << word.word << "\n"; + } + +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/deployment-requirements.txt b/src/resources/openvino.genai-2026.1.0.0/samples/deployment-requirements.txt new file mode 100644 index 0000000..e6fea4b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/deployment-requirements.txt @@ -0,0 +1,7 @@ +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly +openvino_genai~=2026.1.0.0.dev +librosa==0.11.0 # For Whisper +pillow==12.1.1 # Image processing for VLMs +json5==0.13.0 # For ReAct +pydantic==2.12.5 # For Structured output json schema +opencv-python==4.13.0.92 # For video-to-text VLM sample diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/export-requirements.txt b/src/resources/openvino.genai-2026.1.0.0/samples/export-requirements.txt new file mode 100644 index 0000000..48248d3 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/export-requirements.txt @@ -0,0 +1,17 @@ +--extra-index-url https://download.pytorch.org/whl/cpu +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly +openvino-tokenizers[transformers]~=2026.1.0.0.dev +https://github.com/huggingface/optimum-intel/archive/2c48d6430c265ac259c1b264f3e2c4025cdd7b76.tar.gz#egg=optimum-intel +numpy==1.26.4; platform_system == "Darwin" and platform_machine == "x86_64" +safetensors==0.6.2; platform_system == "Darwin" and platform_machine == "x86_64" +einops==0.8.2 # For Qwen +transformers_stream_generator==0.0.5 # For Qwen +diffusers==0.37.0 # For image generation pipelines +timm==1.0.25 # For exporting InternVL2 +# torchvision for visual language models +torchvision==0.17.2; platform_system == "Darwin" and platform_machine == "x86_64" +torchvision==0.23.0; platform_system != "Darwin" or platform_machine != "x86_64" +transformers==4.55.4 # For Whisper +hf_transfer==0.1.9 # for faster models download, should used with env var HF_HUB_ENABLE_HF_TRANSFER=1 +backoff==2.2.1 # for microsoft/Phi-3.5-vision-instruct +peft==0.18.1 # For microsoft/Phi-4-multimodal-instruct diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/generation.gif b/src/resources/openvino.genai-2026.1.0.0/samples/generation.gif new file mode 100644 index 0000000..d6434df --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/generation.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b3ea717def68df6493c629551b80e74f58d03be02d837e6a16541b3d95787df +size 5550657 diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/.gitignore b/src/resources/openvino.genai-2026.1.0.0/samples/js/.gitignore new file mode 100644 index 0000000..3c3629e --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/.gitignore @@ -0,0 +1 @@ +node_modules diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/package-lock.json b/src/resources/openvino.genai-2026.1.0.0/samples/js/package-lock.json new file mode 100644 index 0000000..0bb3e8b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/package-lock.json @@ -0,0 +1,713 @@ +{ + "name": "openvino-genai-node-demo", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "openvino-genai-node-demo", + "version": "1.0.0", + "license": "Apache-2.0", + "devDependencies": { + "openvino-genai-node": "^2026.1.0", + "yargs": "^18.0.0", + "zod": "^4.1.13" + }, + "engines": { + "node": ">=21.0.0" + } + }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/ansi-regex": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz", + "integrity": "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/ansi-styles": { + "version": "6.2.3", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.3.tgz", + "integrity": "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/b4a": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.8.0.tgz", + "integrity": "sha512-qRuSmNSkGQaHwNbM7J78Wwy+ghLEYF1zNrSeMxj4Kgw6y33O3mXcQ6Ie9fRvfU/YnxWkOchPXbaLb73TkIsfdg==", + "dev": true, + "license": "Apache-2.0", + "peerDependencies": { + "react-native-b4a": "*" + }, + "peerDependenciesMeta": { + "react-native-b4a": { + "optional": true + } + } + }, + "node_modules/bare-events": { + "version": "2.8.2", + "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.2.tgz", + "integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==", + "dev": true, + "license": "Apache-2.0", + "peerDependencies": { + "bare-abort-controller": "*" + }, + "peerDependenciesMeta": { + "bare-abort-controller": { + "optional": true + } + } + }, + "node_modules/bare-fs": { + "version": "4.5.5", + "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.5.tgz", + "integrity": "sha512-XvwYM6VZqKoqDll8BmSww5luA5eflDzY0uEFfBJtFKe4PAAtxBjU3YIxzIBzhyaEQBy1VXEQBto4cpN5RZJw+w==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "bare-events": "^2.5.4", + "bare-path": "^3.0.0", + "bare-stream": "^2.6.4", + "bare-url": "^2.2.2", + "fast-fifo": "^1.3.2" + }, + "engines": { + "bare": ">=1.16.0" + }, + "peerDependencies": { + "bare-buffer": "*" + }, + "peerDependenciesMeta": { + "bare-buffer": { + "optional": true + } + } + }, + "node_modules/bare-os": { + "version": "3.7.1", + "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.7.1.tgz", + "integrity": "sha512-ebvMaS5BgZKmJlvuWh14dg9rbUI84QeV3WlWn6Ph6lFI8jJoh7ADtVTyD2c93euwbe+zgi0DVrl4YmqXeM9aIA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "bare": ">=1.14.0" + } + }, + "node_modules/bare-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz", + "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "bare-os": "^3.0.1" + } + }, + "node_modules/bare-stream": { + "version": "2.8.0", + "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.8.0.tgz", + "integrity": "sha512-reUN0M2sHRqCdG4lUK3Fw8w98eeUIZHL5c3H7Mbhk2yVBL+oofgaIp0ieLfD5QXwPCypBpmEEKU2WZKzbAk8GA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "streamx": "^2.21.0", + "teex": "^1.0.1" + }, + "peerDependencies": { + "bare-buffer": "*", + "bare-events": "*" + }, + "peerDependenciesMeta": { + "bare-buffer": { + "optional": true + }, + "bare-events": { + "optional": true + } + } + }, + "node_modules/bare-url": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz", + "integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "bare-path": "^3.0.0" + } + }, + "node_modules/browserify-zlib": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/browserify-zlib/-/browserify-zlib-0.1.4.tgz", + "integrity": "sha512-19OEpq7vWgsH6WkvkBJQDFvJS1uPcbFOQ4v9CU839dO+ZZXUZO6XpE6hNCqvlIIj+4fZvRiJ6DsAQ382GwiyTQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "pako": "~0.2.0" + } + }, + "node_modules/buffer-from": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", + "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/cliui": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-9.0.1.tgz", + "integrity": "sha512-k7ndgKhwoQveBL+/1tqGJYNz097I7WOvwbmmU2AR5+magtbjPWQTS1C5vzGkBC8Ym8UWRzfKUzUUqFLypY4Q+w==", + "dev": true, + "license": "ISC", + "dependencies": { + "string-width": "^7.2.0", + "strip-ansi": "^7.1.0", + "wrap-ansi": "^9.0.0" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/core-util-is": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", + "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/duplexify": { + "version": "3.7.1", + "resolved": "https://registry.npmjs.org/duplexify/-/duplexify-3.7.1.tgz", + "integrity": "sha512-07z8uv2wMyS51kKhD1KsdXJg5WQ6t93RneqRxUHnskXVtlYYkLqM0gqStQZ3pj073g687jPCHrqNfCzawLYh5g==", + "dev": true, + "license": "MIT", + "dependencies": { + "end-of-stream": "^1.0.0", + "inherits": "^2.0.1", + "readable-stream": "^2.0.0", + "stream-shift": "^1.0.0" + } + }, + "node_modules/emoji-regex": { + "version": "10.6.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.6.0.tgz", + "integrity": "sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A==", + "dev": true, + "license": "MIT" + }, + "node_modules/end-of-stream": { + "version": "1.4.5", + "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", + "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==", + "dev": true, + "license": "MIT", + "dependencies": { + "once": "^1.4.0" + } + }, + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/events-universal": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz", + "integrity": "sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "bare-events": "^2.7.0" + } + }, + "node_modules/fast-fifo": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz", + "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "dev": true, + "license": "ISC", + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/get-east-asian-width": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.4.0.tgz", + "integrity": "sha512-QZjmEOC+IT1uk6Rx0sX22V6uHWVwbdbxf1faPqJ1QhLdGgsRGCZoyaQBm/piRdJy/D2um6hM1UP7ZEeQ4EkP+Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/gunzip-maybe": { + "version": "1.4.2", + "resolved": "https://registry.npmjs.org/gunzip-maybe/-/gunzip-maybe-1.4.2.tgz", + "integrity": "sha512-4haO1M4mLO91PW57BMsDFf75UmwoRX0GkdD+Faw+Lr+r/OZrOCS0pIBwOL1xCKQqnQzbNFGgK2V2CpBUPeFNTw==", + "dev": true, + "license": "MIT", + "dependencies": { + "browserify-zlib": "^0.1.4", + "is-deflate": "^1.0.0", + "is-gzip": "^1.0.0", + "peek-stream": "^1.1.0", + "pumpify": "^1.3.3", + "through2": "^2.0.3" + }, + "bin": { + "gunzip-maybe": "bin.js" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "dev": true, + "license": "ISC" + }, + "node_modules/is-deflate": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-deflate/-/is-deflate-1.0.0.tgz", + "integrity": "sha512-YDoFpuZWu1VRXlsnlYMzKyVRITXj7Ej/V9gXQ2/pAe7X1J7M/RNOqaIYi6qUn+B7nGyB9pDXrv02dsB58d2ZAQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/is-gzip": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-gzip/-/is-gzip-1.0.0.tgz", + "integrity": "sha512-rcfALRIb1YewtnksfRIHGcIY93QnK8BIQ/2c9yDYcG/Y6+vRoJuTWBmmSEbyLLYtXm7q35pHOHbZFQBaLrhlWQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/isarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true, + "license": "MIT" + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dev": true, + "license": "ISC", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/openvino-genai-node": { + "version": "2026.1.0", + "dev": true, + "hasInstallScript": true, + "license": "Apache-2.0", + "os": [ + "linux", + "darwin", + "win32" + ], + "dependencies": { + "openvino-node": "2026.1.0" + }, + "engines": { + "node": ">=21.0.0" + } + }, + "node_modules/openvino-node": { + "version": "2026.1.0", + "dev": true, + "hasInstallScript": true, + "license": "Apache-2.0", + "os": [ + "win32", + "darwin", + "linux" + ], + "dependencies": { + "gunzip-maybe": "^1.4.2", + "https-proxy-agent": "^7.0.2", + "tar-fs": "^3.1.1" + }, + "engines": { + "node": ">=21.0.0" + } + }, + "node_modules/pako": { + "version": "0.2.9", + "resolved": "https://registry.npmjs.org/pako/-/pako-0.2.9.tgz", + "integrity": "sha512-NUcwaKxUxWrZLpDG+z/xZaCgQITkA/Dv4V/T6bw7VON6l1Xz/VnrBqrYjZQ12TamKHzITTfOEIYUj48y2KXImA==", + "dev": true, + "license": "MIT" + }, + "node_modules/peek-stream": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/peek-stream/-/peek-stream-1.1.3.tgz", + "integrity": "sha512-FhJ+YbOSBb9/rIl2ZeE/QHEsWn7PqNYt8ARAY3kIgNGOk13g9FGyIY6JIl/xB/3TFRVoTv5as0l11weORrTekA==", + "dev": true, + "license": "MIT", + "dependencies": { + "buffer-from": "^1.0.0", + "duplexify": "^3.5.0", + "through2": "^2.0.3" + } + }, + "node_modules/process-nextick-args": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", + "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==", + "dev": true, + "license": "MIT" + }, + "node_modules/pump": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/pump/-/pump-2.0.1.tgz", + "integrity": "sha512-ruPMNRkN3MHP1cWJc9OWr+T/xDP0jhXYCLfJcBuX54hhfIBnaQmAUMfDcG4DM5UMWByBbJY69QSphm3jtDKIkA==", + "dev": true, + "license": "MIT", + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, + "node_modules/pumpify": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/pumpify/-/pumpify-1.5.1.tgz", + "integrity": "sha512-oClZI37HvuUJJxSKKrC17bZ9Cu0ZYhEAGPsPUy9KlMUmv9dKX2o77RUmq7f3XjIxbwyGwYzbzQ1L2Ks8sIradQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "duplexify": "^3.6.0", + "inherits": "^2.0.3", + "pump": "^2.0.0" + } + }, + "node_modules/readable-stream": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", + "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", + "dev": true, + "license": "MIT", + "dependencies": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "dev": true, + "license": "MIT" + }, + "node_modules/stream-shift": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/stream-shift/-/stream-shift-1.0.3.tgz", + "integrity": "sha512-76ORR0DO1o1hlKwTbi/DM3EXWGf3ZJYO8cXX5RJwnul2DEg2oyoZyjLNoQM8WsvZiFKCRfC1O0J7iCvie3RZmQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/streamx": { + "version": "2.23.0", + "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz", + "integrity": "sha512-kn+e44esVfn2Fa/O0CPFcex27fjIL6MkVae0Mm6q+E6f0hWv578YCERbv+4m02cjxvDsPKLnmxral/rR6lBMAg==", + "dev": true, + "license": "MIT", + "dependencies": { + "events-universal": "^1.0.0", + "fast-fifo": "^1.3.2", + "text-decoder": "^1.1.0" + } + }, + "node_modules/string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "dev": true, + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.1.0" + } + }, + "node_modules/string-width": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz", + "integrity": "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "emoji-regex": "^10.3.0", + "get-east-asian-width": "^1.0.0", + "strip-ansi": "^7.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/strip-ansi": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.2.tgz", + "integrity": "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/tar-fs": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.2.tgz", + "integrity": "sha512-QGxxTxxyleAdyM3kpFs14ymbYmNFrfY+pHj7Z8FgtbZ7w2//VAgLMac7sT6nRpIHjppXO2AwwEOg0bPFVRcmXw==", + "dev": true, + "license": "MIT", + "dependencies": { + "pump": "^3.0.0", + "tar-stream": "^3.1.5" + }, + "optionalDependencies": { + "bare-fs": "^4.0.1", + "bare-path": "^3.0.0" + } + }, + "node_modules/tar-fs/node_modules/pump": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz", + "integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==", + "dev": true, + "license": "MIT", + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, + "node_modules/tar-stream": { + "version": "3.1.8", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.8.tgz", + "integrity": "sha512-U6QpVRyCGHva435KoNWy9PRoi2IFYCgtEhq9nmrPPpbRacPs9IH4aJ3gbrFC8dPcXvdSZ4XXfXT5Fshbp2MtlQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "b4a": "^1.6.4", + "bare-fs": "^4.5.5", + "fast-fifo": "^1.2.0", + "streamx": "^2.15.0" + } + }, + "node_modules/teex": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/teex/-/teex-1.0.1.tgz", + "integrity": "sha512-eYE6iEI62Ni1H8oIa7KlDU6uQBtqr4Eajni3wX7rpfXD8ysFx8z0+dri+KWEPWpBsxXfxu58x/0jvTVT1ekOSg==", + "dev": true, + "license": "MIT", + "dependencies": { + "streamx": "^2.12.5" + } + }, + "node_modules/text-decoder": { + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.7.tgz", + "integrity": "sha512-vlLytXkeP4xvEq2otHeJfSQIRyWxo/oZGEbXrtEEF9Hnmrdly59sUbzZ/QgyWuLYHctCHxFF4tRQZNQ9k60ExQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "b4a": "^1.6.4" + } + }, + "node_modules/through2": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/through2/-/through2-2.0.5.tgz", + "integrity": "sha512-/mrRod8xqpA+IHSLyGCQ2s8SPHiCDEeQJSep1jqLYeEUClOFG2Qsh+4FU6G9VeqpZnGW/Su8LQGc4YKni5rYSQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "readable-stream": "~2.3.6", + "xtend": "~4.0.1" + } + }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "dev": true, + "license": "MIT" + }, + "node_modules/wrap-ansi": { + "version": "9.0.2", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-9.0.2.tgz", + "integrity": "sha512-42AtmgqjV+X1VpdOfyTGOYRi0/zsoLqtXQckTmqTeybT+BDIbM/Guxo7x3pE2vtpr1ok6xRqM9OpBe+Jyoqyww==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^6.2.1", + "string-width": "^7.0.0", + "strip-ansi": "^7.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "dev": true, + "license": "ISC" + }, + "node_modules/xtend": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", + "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.4" + } + }, + "node_modules/y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/yargs": { + "version": "18.0.0", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-18.0.0.tgz", + "integrity": "sha512-4UEqdc2RYGHZc7Doyqkrqiln3p9X2DZVxaGbwhn2pi7MrRagKaOcIKe8L3OxYcbhXLgLFUS3zAYuQjKBQgmuNg==", + "dev": true, + "license": "MIT", + "dependencies": { + "cliui": "^9.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "string-width": "^7.2.0", + "y18n": "^5.0.5", + "yargs-parser": "^22.0.0" + }, + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=23" + } + }, + "node_modules/yargs-parser": { + "version": "22.0.0", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-22.0.0.tgz", + "integrity": "sha512-rwu/ClNdSMpkSrUb+d6BRsSkLUq1fmfsY6TOpYzTwvwkg1/NRG85KBy3kq++A8LKQwX6lsu+aWad+2khvuXrqw==", + "dev": true, + "license": "ISC", + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=23" + } + }, + "node_modules/zod": { + "version": "4.1.13", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.1.13.tgz", + "integrity": "sha512-AvvthqfqrAhNH9dnfmrfKzX5upOdjUVJYFqNSlkmGf64gRaTzlPwz99IHYnVs28qYAybvAlBV+H7pn0saFY4Ig==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + } + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/package.json b/src/resources/openvino.genai-2026.1.0.0/samples/js/package.json new file mode 100644 index 0000000..da0a33c --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/package.json @@ -0,0 +1,17 @@ +{ + "name": "openvino-genai-node-demo", + "version": "1.0.0", + "license": "Apache-2.0", + "type": "module", + "devDependencies": { + "openvino-genai-node": "^2026.1.0", + "yargs": "^18.0.0", + "zod": "^4.1.13" + }, + "engines": { + "node": ">=21.0.0" + }, + "scripts": { + "test": "node tests/usage.test.js" + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/rag/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/js/rag/README.md new file mode 100644 index 0000000..9bc750e --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/rag/README.md @@ -0,0 +1,68 @@ +# Retrieval Augmented Generation Sample + +This example showcases inference of Text Embedding and Text Rerank Models. The application has limited configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `TextEmbeddingPipeline` and `TextRerankPipeline`, which use text as an input source. + +## Download and Convert the Model and Tokenizers + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. + +Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model. + +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +``` + +To export text embedding model run Optimum CLI command: + +```sh +optimum-cli export openvino --task feature-extraction --model BAAI/bge-small-en-v1.5 BAAI/bge-small-en-v1.5 +``` + +To export text reranking model run Optimum CLI command: + +```sh +optimum-cli export openvino --task text-classification --model cross-encoder/ms-marco-MiniLM-L6-v2 cross-encoder/ms-marco-MiniLM-L6-v2 +``` + +## Run + +Compile GenAI JavaScript bindings archive first using [the instructions](../../../src/js/README.md#build-bindings). + +Run `npm install` and the example will be ready to run. + +### 1. Text Embedding Sample (`text_embeddings.js`) +- **Description:** + Demonstrates inference of text embedding models using OpenVINO GenAI. Converts input text into vector embeddings for downstream tasks such as retrieval or semantic search. +- **Run Command:** + ```sh + node text_embeddings.js "Document 1" "Document 2" + ``` +Refer to the [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#text-embeddings-models) for more details. + +### 2. Text Rerank Sample (`text_rerank.js`) +- **Description:** + Demonstrates inference of text rerank models using OpenVINO GenAI. Reranks a list of candidate documents based on their relevance to a query using a cross-encoder or reranker model. +- **Run Command:** + ```sh + node text_rerank.js "" "" ["" ...] + ``` + +# Text Embedding Pipeline Usage + +```js +import { TextEmbeddingPipeline } from 'openvino-genai-node'; + +const pipeline = await TextEmbeddingPipeline(model_dir, "CPU"); + +const embeddings = await pipeline.embedDocuments(["document1", "document2"]); +``` + +# Text Rerank Pipeline Usage + +```js +import { TextRerankPipeline } from 'openvino-genai-node'; + +const pipeline = await TextRerankPipeline(modelPath, { device: "CPU" }); + +const rerankResult = await pipeline.rerank(query, documents); +``` diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/rag/text_embeddings.js b/src/resources/openvino.genai-2026.1.0.0/samples/js/rag/text_embeddings.js new file mode 100644 index 0000000..c8b94d9 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/rag/text_embeddings.js @@ -0,0 +1,30 @@ +import { TextEmbeddingPipeline, PoolingType } from 'openvino-genai-node'; +import { basename } from 'node:path'; + +main(); + +async function main() { + const modelPath = process.argv[2]; + const texts = process.argv.slice(3); + + const usageCommand = `Usage: node ${basename(process.argv[1])} '' ['' ...]`; + if (!modelPath) { + console.error('Please specify path to model directory'); + console.error(usageCommand); + process.exit(1); + } + if (!texts.length) { + console.error('Please specify prompt'); + console.error(usageCommand); + process.exit(1); + } + + const device = 'CPU'; // GPU can be used as well + const config = { + 'pooling_type': PoolingType.MEAN + }; + + const pipeline = await TextEmbeddingPipeline(modelPath, device, config); + + await pipeline.embedDocuments(texts); +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/rag/text_rerank.js b/src/resources/openvino.genai-2026.1.0.0/samples/js/rag/text_rerank.js new file mode 100644 index 0000000..1c1f975 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/rag/text_rerank.js @@ -0,0 +1,42 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +import { TextRerankPipeline } from 'openvino-genai-node'; +import { basename } from 'node:path'; + +main(); + +async function main() { + const modelPath = process.argv[2]; + const query = process.argv[3]; + const documents = process.argv.slice(4); + + const usageCommand = `Usage: node ${basename(process.argv[1])} "" "" ["" ...]`; + if (!modelPath) { + console.error('Please specify path to model directory'); + console.error(usageCommand); + process.exit(1); + } + if (!query) { + console.error('Please specify query'); + console.error(usageCommand); + process.exit(1); + } + if (!documents.length) { + console.error('Please specify at least one document'); + console.error(usageCommand); + process.exit(1); + } + + const device = 'CPU'; // GPU can be used as well + const config = { top_n: 3 }; + + const pipeline = await TextRerankPipeline(modelPath, { device, config }); + + const rerankResult = await pipeline.rerank(query, documents); + + console.log('Reranked documents:'); + for (const [index, score] of rerankResult) { + console.log(`Document ${index} (score: ${score.toFixed(4)}): ${documents[index]}`); + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/README.md new file mode 100644 index 0000000..1ce0411 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/README.md @@ -0,0 +1,232 @@ +# JavaScript chat_sample that supports most popular models like LLaMA 3 + +This example showcases inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `Pipeline.LLMPipeline` and configures it for the chat scenario. + +## Download and convert the model and tokenizers + +To convert model you have to use python package `optimum-intel`. +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. + +Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model. + +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 +``` +If a converted model in OpenVINO IR format is already available in the collection of [OpenVINO optimized LLMs](https://huggingface.co/collections/OpenVINO/llm-6687aaa2abca3bbcec71a9bd) on Hugging Face, it can be downloaded directly via huggingface-cli. +```sh +pip install huggingface-hub +huggingface-cli download --local-dir +``` + +### Using GGUF models + +To run any samples with a GGUF model, simply provide the path to the .gguf file via the `model_dir` parameter. + +This capability is currently available in preview mode and supports a limited set of topologies, including SmolLM and Qwen2.5. For other models +and architectures, we still recommend converting the model to the IR format using the `optimum-intel` tool. + +## Sample Descriptions +### Common information + +When you use the [openvino.genai](https://github.com/openvinotoolkit/openvino.genai) **release branch**, install dependencies before running samples. +In the current directory, run: +```bash +npm install +``` + +If you use the master branch, you may need to follow +[this instruction](../../../src/js/README.md#build-bindings) +to build the latest version of `openvino-genai-node` from source first, then install dependencies. + + +Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU. + +See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models. + +### 1. Chat Sample (`chat_sample`) +- **Description:** +Interactive chat interface powered by OpenVINO. +Recommended models: meta-llama/Llama-2-7b-chat-hf, TinyLlama/TinyLlama-1.1B-Chat-v1.0, etc +- **Main Feature:** Real-time chat-like text generation. +- **Run Command:** + ```bash + node chat_sample.js model_dir + ``` +#### Missing chat template +If you encounter an exception indicating a missing "chat template" when launching the `ov::genai::LLMPipeline` in chat mode, it likely means the model was not tuned for chat functionality. To work this around, manually add the chat template to tokenizer_config.json of your model. +The following template can be used as a default, but it may not work properly with every model: +``` +"chat_template": "{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n<|im_start|>assistant\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|im_end|>\n'}}{% endif %}{% endfor %}", +``` + +### 2. Greedy Causal LM (`greedy_causal_lm`) +- **Description:** +Basic text generation using a causal language model. +Recommended models: meta-llama/Llama-2-7b-hf, etc +- **Main Feature:** Demonstrates simple text continuation. +- **Run Command:** + ```bash + node greedy_causal_lm.js model_dir prompt + ``` + +### 3. Beam Search Causal LM (`beam_search_causal_lm`) +- **Description:** +Uses beam search for more coherent text generation. +Recommended models: meta-llama/Llama-2-7b-hf, etc +- **Main Feature:** Improves text quality with beam search. +- **Run Command:** + ```bash + node beam_search_causal_lm.js model_dir prompt [prompts ...] + ``` + +### 4. Multinomial Causal LM (`multinomial_causal_lm`) +- **Description:** Text generation with multinomial sampling for diversity. +Recommended models: meta-llama/Llama-2-7b-hf, etc +- **Main Feature:** Introduces randomness for creative outputs. +- **Run Command:** + ```bash + node multinomial_causal_lm.js model_dir prompt + ``` + +### 5. LLM ReAct Agent Sample (`react_sample`) +- **Description:** +Interactive ReAct Agent powered by OpenVINO. +Recommended models: Qwen/Qwen2.5-3B-Instruct, Qwen/Qwen2.5-7B-Instruct +- **Main Feature:** Real-time reasoning-action from user's input. +- **Run Command:** + ```bash + node react_sample.js model_dir + ``` + +### 6. LLMs benchmarking sample (`benchmark_genai`) +- **Description:** + This sample script demonstrates how to benchmark LLMs in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text, and calculating various performance metrics. + + For more information on how performance metrics are calculated, please follow the [performance-metrics tutorial](../../../src/README.md#performance-metrics). +- **Main Feature:** Benchmark model via GenAI +- **Run Command:** + ```bash + node benchmark_genai.js [-m MODEL] [-p PROMPT] [--nw NUM_WARMUP] [-n NUM_ITER] [--mt MAX_NEW_TOKENS] [-d DEVICE] + ``` + +### 7. Structured Output Sample (`structured_output_sample`) +- **Description:** +This sample demonstrates how to use OpenVINO GenAI to generate structured outputs such as JSON from text prompts. This sample implementation is split into multiple "generate" calls to mitigate generating complex, variadic JSON structures in a single pass. This is done because not all models are able to generate a complex JSON, with a variadic number of elements in one shot, especially if the model is small and not fine-tuned for this task. By separating the task into two stages, it becomes possible to use smaller models and still achieve generated JSON good quality. + +Recommended models: meta-llama/Llama-3.2-1B-Instruct, meta-llama/Llama-3.2-8B-Instruct +- **Run Command:** + ```bash + node structured_output_generation.js model_dir + ``` + After running the command, an interactive dialog starts. You can enter a prompt and receive a structured output in response. The process is divided into two stages: + +1. **Stage One:** The model generates a JSON schema indicating the number of items of each type the user requests. For example, if you prompt: + `Generate a JSON for 2 cars and 1 person with an Irish surname` + The model might output: + `{"person": 1, "car": 2, "transaction": 0}` + This internal JSON is used to determine how many items of each type to generate in the next stage. It is not shown to the user. + +2. **Stage Two:** For each item type and count specified in the schema, the model is prompted to generate a JSON object. The original prompt is reused, but the schema guides the model to produce the correct structure. For the example above, the output might look like: + ``` + > Generate a JSON for 2 cars and 1 person with an Irish surname + output: + {"name": "John Doe", "surname": "O'Reilly", "age": 30, "city": "Dublin"} + {"model": "Toyota", "year": 2020, "engine": "hybrid"} + {"model": "Ford", "year": 2019, "color": "red"} + ``` + +**Note:** +Structured output enforcement guarantees correct JSON formatting, but does not ensure the factual correctness or sensibility of the content. The model may generate implausible or nonsensical data, such as `{"name": "John", "age": 200000}` or `{"model": "AbrakaKadabra9999######4242"}`. These are valid JSONs but may not make sense. For best results, use the latest or fine-tuned models for this task to improve the quality and relevance of the generated output. + + +### 8. Tool Calling with Structural Tags Sample (`structural_tags_generation`) +- **Description:** + Structural tags is a technique that allows to switch from regular sampling to structural output generation and back during the text generation. + If during the sampling process the model produces a trigger string, it switches to structured mode and generates output according to a JSON schema defined by the tag. After that the model switches back to regular sampling mode. + This is useful for generating function calls or other structured outputs that need to follow a specific format. + + This sample demonstrates how to use OpenVINO GenAI to generate structured tool calls from natural language prompts using structural tags. + The model is guided to output function calls in a specific format, enabling integration with external tools: + - Weather API + - Currency exchange APIs + + The system message instructs the model to call tools using a strict format: + ``` + + {"argument1": "value1", ...} + + ``` + The sample includes schemas for each tool, and the model is prompted to use them for tool calling. There are two model calls - with and without structural tags. + You can compare the results to see how the model generates structured outputs when using structural tags. + If there is no prompt provided, the sample will use the default prompt: `"What is the weather in London today and in Paris yesterday, and how many pounds can I get for 100 euros?"` + +- **Main Feature:** Structured tool call generation with LLM using schema enforcement with structural tags. +- **Run Command:** + ```bash + node structural_tags_generation.js model_dir [prompt] + ``` + After running, the script will print the generated text output with and without structural tags, and display the parsed tool calls. + +**Note:** +This approach is useful for building LLM-powered agents that interact with external APIs or services in a controlled, structured way. +For best results, use models fine-tuned for function calling and adapt structural tags according to the model function call template. +If the model does not generate trigger strings there will be no structural constraints during the generation. +The sample is verified with `meta-llama/Llama-3.2-3B-Instruct` model. Other models may not produce the expected results or might require different system prompt. + + +### 9. Compound Grammar Generation with Parsing Sample (`compound_grammar_generation`) +- **Description:** + This sample demonstrates advanced structured output generation and results parsing using compound grammars in OpenVINO GenAI. + It showcases how to combine multiple grammar types - Regex, JSONSchema and EBNF - using Union and Concat operations to strictly control LLM output and + also shows how to write parsing logic to extract structured data from the generated output. + It features multi-turn chat, switching grammar constraints between turns (e.g., "yes"/"no" answers and structured tool calls). + Union operation allows the model to choose which grammar to use during generation. + In the sample it is used to combine two regex grammars for `"yes"` or `"no"` answer. + Concat operation allows to start with one grammar and continue with another. + Also it demonstrates how to write custom parser to extract tool calls from the generated text. + In the sample it used to create a `phi-4-mini-instruct` style tool calling answer - `functools[{tool_1_json}, ...]` - by combining regex and JSON schema grammars. + +- **Main Features:** + - Create grammar building blocks: Regex, JSONSchema, EBNF grammar + - Combine grammars with Concat and Union operations + - Multi-turn chat with grammar switching + - Structured tool calling using zod schemas + - Parse generated output to call tools from extracted structured data +- **Run Command:** + ```bash + node compound_grammar_generation.js model_dir + ``` +- **Notes:** + This sample is ideal for scenarios requiring strict control over LLM outputs, such as building agents that interact with APIs or require validated structured responses. It showcases how to combine regex triggers and JSON schema enforcement for robust output generation and parsing resulting output. + The sample is verified with `microsoft/Phi-4-mini-instruct` model. Other models may not produce the expected results or might require different system prompt. + +#### Options +- `-m`, `--model`: Path to model and tokenizers base directory. [string] [required] +- `-p`, `--prompt`: The prompt to generate text. If without `-p` and `--pf`, the default prompt is `The Sky is blue because`. [string] +- `--prompt_file`, `--pf`: Read prompt from file. [string] +- `--num_warmup`, `--nw`: Number of warmup iterations. [number] [default: 1] +- `-n`, `--num_iter`: Number of iterations. [number] [default: 2] +- `--max_new_tokens`, `--mt`: Maximal number of new tokens. [number] [default: 20] +- `-d`, `--device`: Device to run the model on. [string] [default: "CPU"] + +### Troubleshooting + +#### Unicode characters encoding error on Windows + +Example error: +``` +UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to +``` + +If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this: +1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot. +2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`. + +#### Missing chat template + +If you encounter an exception indicating a missing "chat template" when launching the `ov::genai::LLMPipeline` in chat mode, it likely means the model was not tuned for chat functionality. To work this around, manually add the chat template to tokenizer_config.json of your model. +The following template can be used as a default, but it may not work properly with every model: +``` +"chat_template": "{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n<|im_start|>assistant\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|im_end|>\n'}}{% endif %}{% endfor %}", +``` diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/beam_search_causal_lm.js b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/beam_search_causal_lm.js new file mode 100644 index 0000000..5f9bb2b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/beam_search_causal_lm.js @@ -0,0 +1,36 @@ +import { LLMPipeline } from 'openvino-genai-node'; +import { basename } from 'node:path'; + +main(); + +async function main() { + const modelPath = process.argv[2]; + const prompts = process.argv.slice(3); + + if (!modelPath) { + console.error('Please specify path to model directory\n' + + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompts*'`); + process.exit(1); + } + if (!prompts) { + console.error('Please specify prompts\n' + + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompts*'`); + process.exit(1); + } + + const device = 'CPU'; // GPU can be used as well + const pipe = await LLMPipeline(modelPath, device); + + const numBeams = 15; + const config = { + 'max_new_tokens': 20, + 'num_beam_groups': 3, + 'num_beams': numBeams, + 'diversity_penalty': 1, + 'num_return_sequences': numBeams, + 'return_decoded_results': true, + + }; + const beams = await pipe.generate(prompts, config); + console.log(beams.toString()); +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/benchmark_genai.js b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/benchmark_genai.js new file mode 100644 index 0000000..7aa0d51 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/benchmark_genai.js @@ -0,0 +1,116 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +import { LLMPipeline } from "openvino-genai-node"; +import yargs from "yargs/yargs"; +import { hideBin } from "yargs/helpers"; +import { readFileSync } from "fs"; + +main(); + +async function main() { + const argv = yargs(hideBin(process.argv)) + .option("model", { + alias: "m", + type: "string", + demandOption: true, + describe: "Path to model and tokenizers base directory.", + }) + .option("prompt", { + alias: "p", + type: "string", + describe: + "The prompt to generate text. If without `-p` and `--pf`, the default prompt is `The Sky is blue because`.", + }) + .option("prompt_file", { + alias: "pf", + type: "string", + describe: "Read prompt from file.", + }) + .option("num_warmup", { + alias: "nw", + type: "number", + default: 1, + describe: "Number of warmup iterations.", + }) + .option("num_iter", { + alias: "n", + type: "number", + default: 2, + describe: "Number of iterations.", + }) + .option("max_new_tokens", { + alias: "mt", + type: "number", + default: 20, + describe: "Maximal number of new tokens.", + }) + .option("device", { + alias: "d", + type: "string", + default: "CPU", + describe: "Device.", + }) + .parse(); + + let prompt; + if (argv.prompt !== undefined && argv.prompt_file !== undefined) { + console.error(`Cannot specify both --prompt and --prompt_file options simultaneously!`); + process.exit(1); + } else { + if (argv.prompt_file !== undefined) { + prompt = [readFileSync(argv.prompt_file, "utf-8")]; + } else { + prompt = argv.prompt === undefined ? ["The Sky is blue because"] : [argv.prompt]; + } + } + if (prompt.length === 0 || prompt[0].trim() === "") { + throw new Error("Prompt is empty!"); + } + + const modelsPath = argv.model; + const { device } = argv; + const numWarmup = argv.num_warmup; + const numIter = argv.num_iter; + + const config = { + max_new_tokens: argv.max_new_tokens, + apply_chat_template: false, + return_decoded_results: true, + }; + + let pipe; + if (device === "NPU") { + pipe = await LLMPipeline(modelsPath, device); + } else { + const schedulerConfig = { + enable_prefix_caching: false, + max_num_batched_tokens: Number.MAX_SAFE_INTEGER, + }; + pipe = await LLMPipeline(modelsPath, device, { schedulerConfig: schedulerConfig }); + } + + const inputData = await pipe.getTokenizer().encode(prompt); + const promptTokenSize = inputData.input_ids.getShape()[1]; + console.log(`Prompt token size: ${promptTokenSize}`); + + for (let i = 0; i < numWarmup; i++) { + await pipe.generate(prompt, config); + } + + let res = await pipe.generate(prompt, config); + let { perfMetrics } = res; + for (let i = 0; i < numIter - 1; i++) { + res = await pipe.generate(prompt, config); + perfMetrics.add(res.perfMetrics); + } + + console.log(`Output token size: ${perfMetrics.getNumGeneratedTokens()}`); + console.log(`Load time: ${perfMetrics.getLoadTime()} ms`); + console.log(`Generate time: ${perfMetrics.getGenerateDuration().mean} ± ${perfMetrics.getGenerateDuration().std} ms`); + console.log(`Tokenization time: ${perfMetrics.getTokenizationDuration().mean} ± ${perfMetrics.getTokenizationDuration().std} ms`); + console.log(`Detokenization time: ${perfMetrics.getDetokenizationDuration().mean} ± ${perfMetrics.getDetokenizationDuration().std} ms`); + console.log(`TTFT: ${perfMetrics.getTTFT().mean} ± ${perfMetrics.getTTFT().std} ms`); + console.log(`TPOT: ${perfMetrics.getTPOT().mean} ± ${perfMetrics.getTPOT().std} ms`); + console.log(`Throughput : ${perfMetrics.getThroughput().mean} ± ${perfMetrics.getThroughput().std} tokens/s`); +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/chat_sample.js b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/chat_sample.js new file mode 100644 index 0000000..30f2a6c --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/chat_sample.js @@ -0,0 +1,60 @@ +import readline from 'readline'; +import { LLMPipeline, ChatHistory } from 'openvino-genai-node'; +import { basename } from 'node:path'; + +main(); + +function streamer(subword) { + process.stdout.write(subword); +} + +async function main() { + const MODEL_PATH = process.argv[2]; + + if (process.argv.length > 3) { + console.error(`Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir*'`); + process.exit(1); + } + if (!MODEL_PATH) { + console.error('Please specify path to model directory\n' + + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir*'`); + process.exit(1); + } + + const device = 'CPU'; // GPU can be used as well + + // Create interface for reading user input from stdin + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + + const pipe = await LLMPipeline(MODEL_PATH, device); + const config = { 'max_new_tokens': 100 }; + + const chatHistory = new ChatHistory(); + promptUser(); + + // Function to prompt the user for input + function promptUser() { + rl.question('question:\n', handleInput); + } + + // Function to handle user input + async function handleInput(input) { + input = input.trim(); + + // Check for exit command + if (!input) { + rl.close(); + process.exit(0); + } + + chatHistory.push({ role: "user", content: input }); + const decodedResults = await pipe.generate(chatHistory, config, streamer); + chatHistory.push({ role: "assistant", content: decodedResults.toString() }); + console.log('\n----------'); + + if (!rl.closed) promptUser(); + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/compound_grammar_generation.js b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/compound_grammar_generation.js new file mode 100644 index 0000000..87a0aee --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/compound_grammar_generation.js @@ -0,0 +1,160 @@ +import { z } from 'zod'; +import { ChatHistory, LLMPipeline, StructuredOutputConfig as SOC, StreamingStatus } from 'openvino-genai-node'; +import { serialize_json, toJSONSchema } from './helper.js'; + +function streamer(subword) { + process.stdout.write(subword); + return StreamingStatus.RUNNING; +} + +const bookFlightTicket = { + name: "book_flight_ticket", + schema: z.object({ + origin_airport_code: z.string().describe("The name of Departure airport code"), + destination_airport_code: z.string().describe("The name of Destination airport code"), + departure_date: z.string().describe("The date of outbound flight"), + return_date: z.string().describe("The date of return flight"), + }).describe("booking flights"), +}; + +const bookHotel = { + name: "book_hotel", + schema: z.object({ + destination: z.string().describe("The name of the city"), + check_in_date: z.string().describe("The date of check in"), + checkout_date: z.string().describe("The date of check out"), + }).describe("booking hotel"), +}; + +// Helper functions +function toolToDict(tool, withDescription = true) { + const deleteDescription = (ctx) => delete ctx.jsonSchema['description']; + const jsonSchema = toJSONSchema( + tool.schema, + withDescription + ? undefined + : { override: deleteDescription } + ); + + return { + type: "object", + properties: { + name: { type: "string", enum: [tool.name] }, + arguments: jsonSchema, + }, + required: ["name", "arguments"], + }; +} + +function toolsToArraySchema(...tools) { + return serialize_json({ + type: "array", + items: { + anyOf: tools.map(tool => toolToDict(tool, false)), + }, + }); +} + +class CustomToolCallParser { + parse(msg) { + if (!msg.content) { + msg.content = ""; + } + const content = msg.content; + + const startTag = "functools"; + const startIndex = content.indexOf(startTag); + if (startIndex === -1) { + return; + } + + const jsonPart = content.slice(startIndex + startTag.length); + try { + const toolCalls = JSON.parse(jsonPart); + msg.tool_calls = toolCalls; + return; + } catch { + return; + } + } +} + +function printToolCall(answer) { + for (const toolCall of answer.parsed[0].tool_calls) { + const args = Object.keys(toolCall["arguments"]) + .map((key) => `${key}="${toolCall["arguments"][key]}"`); + console.log(`${toolCall["name"]}(${args.join(", ")})`); + } +} + +// System message +let sysMessage = `You are a helpful AI assistant. +You can answer yes or no to questions, or you can choose to call one or more of the provided functions. + +Use the following rule to decide when to call a function: + * if the response can be generated from your internal knowledge, do so, but use only yes or no as the response + * if you need external information that can be obtained by calling one or more of the provided functions, generate function calls + +If you decide to call functions: + * prefix function calls with functools marker (no closing marker required) + * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] + * follow the provided JSON schema. Do not hallucinate arguments or values. Do not blindly copy values from the provided samples + * respect the argument type formatting. E.g., if the type is number and format is float, write value 7 as 7.0 + * make sure you pick the right functions that match the user intent +`; + +async function main() { + const modelDir = process.argv[2]; + if (!modelDir) { + console.error("Please provide the path to the model directory as the first argument."); + process.exit(1); + } + + const pipe = await LLMPipeline(modelDir, "CPU"); + const tokenizer = await pipe.getTokenizer(); + const chatHistory = new ChatHistory([{ role: "system", content: sysMessage }]); + const tools = [bookFlightTicket, bookHotel].map((tool) => toolToDict(tool, true)); + chatHistory.setTools(tools); + + const generationConfig = { + return_decoded_results: true, + max_new_tokens: 300, + do_sample: true, + }; + + const userText1 = "Do dolphins have fingers?"; + console.log("User: ", userText1); + chatHistory.push({ role: "user", content: userText1 }); + + // the example grammar works the same as SOC.Regex("yes|no") + // but the Union grammar is more flexible and can be extended with more options + const yesOrNo = SOC.Union(SOC.Regex("yes"), SOC.Regex("no")); + generationConfig.structured_output_config = new SOC({ structural_tags_config: yesOrNo }); + process.stdout.write("Assistant: "); + const answer1 = await pipe.generate(chatHistory, generationConfig, streamer); + chatHistory.push({ role: "assistant", content: answer1.texts[0] }); + console.log(); + + const userText2 = + "book flight ticket from Beijing to Paris(using airport code) in 2025-12-04 to 2025-12-10, " + + "then book hotel from 2025-12-04 to 2025-12-10 in Paris"; + console.log("User: ", userText2); + chatHistory.push({ role: "user", content: userText2 }); + + const startToolCallTag = SOC.ConstString("functools"); + const toolsJson = SOC.JSONSchema( + toolsToArraySchema(bookFlightTicket, bookHotel) + ); + const toolCall = SOC.Concat(startToolCallTag, toolsJson); + + generationConfig.structured_output_config.structural_tags_config = toolCall; + generationConfig.parsers = [new CustomToolCallParser()]; + + process.stdout.write("Assistant: "); + const answer2 = await pipe.generate(chatHistory, generationConfig); + console.log("\n\nThe following tool calls were generated:") + printToolCall(answer2) + console.log(); +} + +main(); diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/greedy_causal_lm.js b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/greedy_causal_lm.js new file mode 100644 index 0000000..9fc0057 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/greedy_causal_lm.js @@ -0,0 +1,35 @@ +import { LLMPipeline } from 'openvino-genai-node'; +import { basename } from 'node:path'; + +main(); + +async function main() { + const modelPath = process.argv[2]; + const prompt = process.argv[3]; + + if (process.argv.length > 4) { + console.error(`Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompt*'`); + process.exit(1); + } + if (!modelPath) { + console.error('Please specify path to model directory\n' + + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompt*'`); + process.exit(1); + } + if (!prompt) { + console.error('Please specify prompt\n' + + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompt*'`); + process.exit(1); + } + + const device = 'CPU'; // GPU can be used as well + const pipe = await LLMPipeline(modelPath, device); + + const config = { + 'max_new_tokens': 100, + 'return_decoded_results': true, + }; + const result = await pipe.generate(prompt, config); + + console.log(result.toString()); +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/helper.js b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/helper.js new file mode 100644 index 0000000..50c106d --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/helper.js @@ -0,0 +1,35 @@ +// Copyright(C) 2025 Intel Corporation +// SPDX - License - Identifier: Apache - 2.0 + +import { z } from 'zod'; + +/** Serialize a JavaScript object to a JSON string + * with specific formatting to align with Python. */ +export function serialize_json(object) { + return JSON.stringify(object) + // Add a space after every colon or comma not already followed by a space + .replace(/(:|,)(?! )/g, '$1 '); +} + +/** Convert a Zod schema to a JSON Schema + * with specific formatting to align with Python */ +export function toJSONSchema(zodSchema, params) { + const jsonSchema = z.toJSONSchema( + zodSchema, + { + override: (ctx) => { + if (params && params.override) { + params.override(ctx); + } + const keys = Object.keys(ctx.jsonSchema).sort(); + for (const key of keys) { + const value = ctx.jsonSchema[key]; + delete ctx.jsonSchema[key]; + ctx.jsonSchema[key] = value; + } + } + }); + delete jsonSchema.$schema; + delete jsonSchema.additionalProperties; + return jsonSchema; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/multinomial_causal_lm.js b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/multinomial_causal_lm.js new file mode 100644 index 0000000..ec6254b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/multinomial_causal_lm.js @@ -0,0 +1,40 @@ +import { LLMPipeline } from 'openvino-genai-node'; +import { basename } from 'node:path'; + +main(); + +async function main() { + const modelPath = process.argv[2]; + const prompt = process.argv[3]; + + if (process.argv.length > 4) { + console.error(`Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompt*'`); + process.exit(1); + } + if (!modelPath) { + console.error('Please specify path to model directory\n' + + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompt*'`); + process.exit(1); + } + if (!prompt) { + console.error('Please specify prompt\n' + + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompt*'`); + process.exit(1); + } + + const device = 'CPU'; // GPU can be used as well + const pipe = await LLMPipeline(modelPath, device); + + const config = { + 'max_new_tokens': 100, + 'do_sample': true, + 'top_p': 0.9, + 'top_k': 30 + }; + + // Since the streamer is set, the results will be printed + // every time a new token is generated and put into the streamer queue. + for await (const chunk of pipe.stream(prompt, config)) { + process.stdout.write(chunk); + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/react_sample.js b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/react_sample.js new file mode 100644 index 0000000..6f62a85 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/react_sample.js @@ -0,0 +1,227 @@ +// Copyright(C) 2025 Intel Corporation +// SPDX - License - Identifier: Apache - 2.0 + +import * as https from 'https'; +import { LLMPipeline, StreamingStatus } from "openvino-genai-node"; +import { serialize_json } from './helper.js'; + +const llmConfig = { + 'max_new_tokens': 256, + 'return_decoded_results': true, +} + +const TOOL_DESC = `{name_for_model}: Call this tool to interact with the {name_for_human} API. What is the {name_for_human} API useful for? {description_for_model} Parameters: {parameters}` +"get_weather: Call this tool to interact with the get weather API. What is the get weather API useful for? Get the current weather in a given city name. Parameters: [object Object]" +const PROMPT_REACT = `Answer the following questions as best as you can. You have access to the following APIs: + +{tools_text} + +Use the following format: + +Question: the input question you must answer +Thought: you should always think about what to do +Action: the action to take, should be one of [{tools_name_text}] +Action Input: the input to the action +Observation: the result of the action +... (this Thought/Action/Action Input/Observation can be repeated zero or more times) +Thought: I now know the final answer +Final Answer: the final answer to the original input question + +Begin! + +Question: {query}`; + +const tools = [ + { + "name_for_human": "get weather", + "name_for_model": "get_weather", + "description_for_model": "Get the current weather in a given city name.", + "parameters": [ + { + "name": "city_name", + "description": "City name", + "required": true, + "schema": { "type": "string" }, + } + ], + }, + { + "name_for_human": "generate image", + "name_for_model": "generate_image", + "description_for_model": "AI painting (image generation) service, input text description, and return the image URL drawn based on text information.", + "parameters": [ + { + "name": "prompt", + "description": "describe the image", + "required": true, + "schema": { "type": "string" }, + } + ], + }, +] + +function formatTemplate(template, values) { + const result = template.replace(/{(\w+)}/g, (_, key) => { + let value = values[key] || ''; + if (typeof value !== "string") { + value = serialize_json(value); + } + return value; + }); + return result; +} + +function buildInputText(tokenizer, chatHistory, listOfToolInfo) { + const toolsTextList = []; + for (const toolInfo of listOfToolInfo) { + let tool = formatTemplate(TOOL_DESC, toolInfo); + if (toolInfo["args_format"] ?? "json" === "json") { + tool += " Format the arguments as a JSON object."; + } else if (toolInfo["args_format"] === "code") { + tool += " Enclose the code within triple backticks (`) at the beginning and end of the code."; + } else { + throw Error(`This args_format: ${args_format} is not supported`); + } + toolsTextList.push(tool) + } + const toolsText = toolsTextList.join("\n\n"); + const toolsNameText = listOfToolInfo.map(toolInfo => toolInfo["name_for_model"]).join(", "); + + const messages = [{ "role": "system", "content": "You are a helpful assistant." }]; + for (let [query, response] of chatHistory) { + if (listOfToolInfo) { + if (chatHistory.length == 1) { + query = formatTemplate(PROMPT_REACT, { + 'tools_text': toolsText, + 'tools_name_text': toolsNameText, + query, + }); + } + } + if (query) messages.push({ "role": "user", "content": query }) + if (response) messages.push({ "role": "assistant", "content": response }) + } + + const prompt = tokenizer.applyChatTemplate(messages, true); + + return prompt; +} + +function parseFirstToolCall(text) { + let resultText = text; + let toolName = "", toolArgs = ""; + const i = resultText.indexOf("\nAction:"); + const j = resultText.indexOf("\nAction Input:"); + let k = resultText.indexOf("\nObservation:"); + + if (0 <= i < j) { // If the text has `Action` and `Action input`, + if (k < j) { // but does not contain `Observation`, + // then it is likely that `Observation` is omitted by the LLM, + // because the output text may have discarded the stop word. + resultText = resultText.trimEnd() + "\nObservation:" // Add it back. + } + k = resultText.indexOf("\nObservation:"); + toolName = resultText.slice(i + "\nAction:".length, j).trim(); + toolArgs = resultText.slice(j + "\nAction Input:".length, k).trim(); + resultText = resultText.slice(0, k); + } + return [toolName, toolArgs, resultText]; +} + +async function callTool(toolName, toolArgs) { + if (toolName === "get_weather") { + const cityName = JSON.parse(toolArgs)["city_name"]; + const keySelection = { + "current_condition": [ + "temp_C", + "FeelsLikeC", + "humidity", + "weatherDesc", + "observation_time", + ], + }; + const response = new Promise((resolve, reject) => { + https.get(`https://wttr.in/${cityName}?format=j1`, {}, (res) => { + let data = ''; + + res.on('data', (chunk) => { + data += chunk.toString(); + }); + + res.on('end', () => { + resolve(JSON.parse(data)); + }); + + res.on('error', (err) => { + reject(err); + }); + }); + }); + const data = await response; + const result = {}; + for (const [key, values] of Object.entries(keySelection)) { + if (data[key] && Array.isArray(data[key]) && data[key][0]) { + result[key] = {}; + for (const v of values) { + result[key][v] = data[key][0][v]; + } + } + } + return serialize_json(result); + } else if (toolName === "generate_image") { + toolArgs = toolArgs.replaceAll('(', '').replaceAll(')', ''); + const parsed = JSON.parse(toolArgs); + const prompt = encodeURIComponent(parsed.prompt); + return serialize_json({ + "image_url": `https://image.pollinations.ai/prompt/${prompt}` + }); + } else { + throw new Error(`Tool ${toolName} is not supported`); + } +} + +async function llmWithTool(llmPipe, prompt, history, listOfToolInfo) { + const chatHistory = history.map(x => [x.user, x.bot]).concat([[prompt, ""]]) + const tokenizer = llmPipe.getTokenizer(); + const planningPrompt = buildInputText(tokenizer, chatHistory, listOfToolInfo); + + let text = ""; + while (true) { + // llm pipe output based planningPrompt and the text (previous output) + // const llmConfig = llmPipe.getGenerationConfig(); + const generationOutput = await llmPipe.generate( + planningPrompt + text, + llmConfig, + streamer, + ); + // parse the output to get action + const [action, actionInput, output] = parseFirstToolCall(generationOutput.toString()); + if (action) { + const observation = await callTool(action, actionInput); + const observationTxt = `\nObservation: = ${observation}\nThought:` + console.log(`\n\n- Getting information from the tool API - ${observationTxt} \n`); + text += output + observationTxt + } else { + text += output; + break; + } + } + return [text, history] +} + +async function main() { + const llmModelPath = process.argv[2]; + const device = 'CPU' // GPU can be used as well + const llmPipe = await LLMPipeline(llmModelPath, device); + + const message_history = []; + const query = "get the weather in London, and create a picture of Big Ben based on the weather information"; + const [response, history] = await llmWithTool(llmPipe, query, message_history, tools); +} + +function streamer(subword) { + process.stdout.write(subword); + return StreamingStatus.RUNNING; +} + +main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/structural_tags_generation.js b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/structural_tags_generation.js new file mode 100644 index 0000000..d7df82f --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/structural_tags_generation.js @@ -0,0 +1,129 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +import readline from 'readline'; +import { z } from 'zod'; +import { LLMPipeline, StreamingStatus, StructuredOutputConfig, ChatHistory } from 'openvino-genai-node'; +import { serialize_json } from './helper.js'; + +const getWeatherTool = { + name: "get_weather", + schema: z.object({ + city: z.string().describe("City name").meta({ title: "City" }), + country: z.string().describe("Country name").meta({ title: "Country" }), + date: z.string().regex(/2\d\d\d-[0-1]\d-[0-3]\d/).describe("Date in YYYY-MM-DD format").meta({ title: "Date" }) + }).meta({ title: "WeatherRequest" }), +}; + +const getCurrencyExchangeTool = { + name: "get_currency_exchange", + schema: z.object({ + from_currency: z.string().describe("Currency to convert from").meta({ title: "From Currency" }), + to_currency: z.string().describe("Currency to convert to").meta({ title: "To Currency" }), + amount: z.number().describe("Amount to convert").meta({ title: "Amount" }) + }).meta({ title: "CurrencyExchangeRequest" }), +}; + +const tools = [getWeatherTool, getCurrencyExchangeTool]; + +const sysMessage = "You are a helpful assistant that can provide weather information and currency exchange rates. " + + `Today is ${new Date().toISOString().split('T')[0]}. ` + + "You can respond in natural language, always start your answer with appropriate greeting, " + + "If you need additional information to respond you can request it by calling particular tool with structured JSON. " + + `You can use the following tools: +${tools.map(tool => `, arguments=${serialize_json(tool.schema.keyof().options)}`).join('\n')} +Please, only use the following format for tool calling in your responses: +{"argument1": "value1", ...} +Use the tool name and arguments as defined in the tool schema. +If you don't know the answer, just say that you don't know, but try to call the tool if it helps to answer the question. +`; + +const functionPattern = /(.*?)<\/function>/gs; + +/** Parse the tool response from the model output. + The response should be in the format: + {"argument1": "value1", ...} + */ +function parseToolsFromResponse(response) { + const matches = response.matchAll(functionPattern); + return Array.from(matches).map(match => { + const toolName = match[1]; + const args = JSON.parse(match[2]); + return { toolName, args }; + }); +} + +function streamer(subword) { + process.stdout.write(subword); + return StreamingStatus.RUNNING; +} + +function centerString(str, width) { + if (str.length >= width) { + return str; + } + const totalPadding = width - str.length; + const paddingStart = Math.floor(totalPadding / 2); + const paddingEnd = totalPadding - paddingStart; + return ' '.repeat(paddingStart) + str + ' '.repeat(paddingEnd); +} + +async function main() { + const defaultPrompt = "What is the weather in London today and in Paris yesterday, and how many pounds can I get for 100 euros?"; + + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout + }); + + const modelDir = process.argv[2]; + if (!modelDir) { + console.error('Please provide the path to the model directory as the first argument.'); + process.exit(1); + } + + const prompt = process.argv[3] || defaultPrompt; + + rl.close(); + + const device = "CPU"; // GPU can be used as well + const pipe = await LLMPipeline(modelDir, device); + + console.log(`User prompt: ${prompt}`); + + const chatHistory = new ChatHistory(); + chatHistory.push({ role: "system", content: sysMessage }); + chatHistory.push({ role: "user", content: prompt }); + + for (const useStructuralTags of [false, true]) { + console.log("=".repeat(80)); + console.log(`${centerString(useStructuralTags ? "Using structural tags" : "Using no structural tags", 80)}`); + console.log("=".repeat(80)); + + const generation_config = {}; + generation_config.return_decoded_results = true; + generation_config.max_new_tokens = 300; + + if (useStructuralTags) { + generation_config.structured_output_config = { + structural_tags_config: StructuredOutputConfig.TriggeredTags({ + tags: tools.map(tool => StructuredOutputConfig.Tag({ + begin: ``, + content: StructuredOutputConfig.JSONSchema(serialize_json(z.toJSONSchema(tool.schema))), + end: "" + })), + triggers: [" ' + }); + // Queue for waiting for all requests to be processed before exiting + const promptQueue = []; + + const modelDir = process.argv[2]; + if (!modelDir) { + console.error('Please provide the path to the model directory as the first argument.'); + process.exit(1); + } + + const device = 'CPU'; // GPU can be used as well + // We keep the promise here to avoid missing prompts while the model is loading + const pipeline = LLMPipeline(modelDir, device); + + const config = {}; + config.return_decoded_results = true; + config.max_new_tokens = 300; + + console.log("This is a smart assistant that generates structured output in JSON format. " + + "You can ask to generate information about a person, car, or bank transaction. " + + 'For example, you can ask: "Please generate jsons for 3 persons and 1 transaction."'); + + async function handleInput(prompt) { + try { + const pipe = await pipeline; + const chatHistory = new ChatHistory(); + chatHistory.push({ role: "system", content: sysMessage }); + + config.structured_output_config = new StructuredOutputConfig({ + json_schema: JSON.stringify(z.toJSONSchema(ItemQuantitiesSchema)) + }); + config.do_sample = false; + + chatHistory.push({ role: "user", content: prompt }); + const decodedResults = await pipe.generate(chatHistory, config); + const res = JSON.parse(decodedResults.toString()); + console.log(`Generated JSON with item quantities: ${decodedResults.toString()}`); + + config.do_sample = true; + config.temperature = 0.8; + + chatHistory.clear(); + chatHistory.push({ role: "system", content: sysMessageForItems }); + chatHistory.push({ role: "user", content: prompt }); + + let generateHasRun = false; + + for (const [item, quantity] of Object.entries(res)) { + const schema = itemsMap[item]; + if (!schema) continue; + config.structured_output_config = new StructuredOutputConfig({ + json_schema: JSON.stringify(z.toJSONSchema(schema)) + }); + for (let i = 0; i < quantity; i++) { + generateHasRun = true; + const decodedResults = await pipe.generate(chatHistory, config); + // validate JSON + JSON.parse(decodedResults.toString()); + console.log(decodedResults.toString()); + } + } + + if (!generateHasRun) { + console.log("No items generated. Please try again with a different request."); + } + } catch (error) { + console.error("An error occurred:", error); + } + + rl.prompt(); + } + + rl.on('line', input => { + promptQueue.push(handleInput(input)); + }); + rl.on('close', async () => { + await Promise.all(promptQueue); + return; + }); + + rl.prompt(); +} + +main(); diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/tests/usage.test.js b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/tests/usage.test.js new file mode 100644 index 0000000..fcd58a0 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/text_generation/tests/usage.test.js @@ -0,0 +1,62 @@ +import { env } from 'process'; +import { spawn } from 'child_process'; + +const MODEL_PATH = env.MODEL_PATH; +const prompt = 'Tell me exactly, no changes, print as is: "Hello world"'; + +if (!MODEL_PATH) + throw new Error( + 'Please environment variable MODEL_PATH to the path of the model directory' + ); + +const runTest = async () => { + return new Promise((resolve, reject) => { + const script = spawn('node', ['chat_sample.js', MODEL_PATH]); + let output = ''; + + // Collect output from stdout + script.stdout.on('data', (data) => { + output += data.toString(); + }); + + // Capture errors + script.stderr.on('data', (data) => { + reject(data.toString()); + }); + + // Send input after detecting the question prompt + script.stdout.once('data', (data) => { + if (data.toString().startsWith('question:')) { + script.stdin.write(`${prompt}\n`); // Provide input + script.stdin.end(); // Close stdin to signal EOF + } + }); + + // Check results when the process exits + script.on('close', (code) => { + if (code !== 0) { + return reject(`Process exited with code ${code}`); + } + + // Log the output + console.log(`Result output: ${output}`); + + // Validate the output + if (typeof output == 'string' && output.length > 0) { + resolve('Test passed!'); + } else { + reject('Test failed: Output did not match expected result.'); + } + }); + }); +}; + +runTest() + .then((message) => { + console.log(message); + process.exit(0); + }) + .catch((err) => { + console.error(err); + process.exit(1); + }); diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/whisper_speech_recognition/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/js/whisper_speech_recognition/README.md new file mode 100644 index 0000000..8fc533b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/whisper_speech_recognition/README.md @@ -0,0 +1,147 @@ +# Whisper automatic speech recognition sample (JavaScript) + +This example showcases inference of speech recognition Whisper Models. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `WhisperPipeline` and uses audio file in wav format as an input source. Audio conversion is performed by a custom helper in `wav_utils.js` (PCM16 mono/stereo at 16 kHz) to align numerical behavior with the C++ and Python sample paths. + +## Download and convert the model and tokenizers + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. + +It's not required to install [../../export-requirements.txt](../../export-requirements.txt) for deployment if the model has already been exported. + +```sh +pip install --upgrade-strategy eager -r /samples/requirements.txt +optimum-cli export openvino --trust-remote-code --model openai/whisper-base whisper-base +``` + +## Prepare audio file + +Prepare audio file in wav format with sampling rate 16k Hz. + +You can download example audio file: https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/librispeech_s5/how_are_you_doing_today.wav + +## Run + +From the `samples/js` directory, install dependencies (if not already done): + +```bash +npm install +``` + +If you use the master branch, you may need to [build openvino-genai-node from source](../../src/js/README.md#build-bindings) first. + +Run the sample: + +```bash +node whisper_speech_recognition/whisper_speech_recognition.js whisper-base how_are_you_doing_today.wav +``` + +Optional third argument is the device (default: CPU): + +```bash +node whisper_speech_recognition/whisper_speech_recognition.js whisper-base how_are_you_doing_today.wav GPU +``` + +Output: + +``` + How are you doing today? +timestamps: [0.00, 2.00] text: How are you doing today? +[0.00, 0.xx]: +[0.xx, 0.xx]: How +... +``` + +Refer to the [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#speech-recognition-models-whisper-based) for more details. + +# Whisper pipeline usage + +```javascript +import { WhisperPipeline } from 'openvino-genai-node'; +import { readFileSync } from 'node:fs'; +import { decode } from 'node-wav'; + +const pipeline = await WhisperPipeline(modelDir, "CPU"); +const rawSpeechBuffer = readFileSync(audioFilePath); +const rawSpeech = decode(rawSpeechBuffer).channelData[0]; +const result = await pipeline.generate(rawSpeech); +console.log(result.texts[0]); +// How are you doing today? +``` + +### Transcription + +Whisper pipeline predicts the language of the source audio automatically. + +If the source audio language is known in advance, it can be specified in generation config: + +```javascript +const generationConfig = { language: "<|en|>", task: "transcribe" }; +const result = await pipeline.generate(rawSpeech, { generationConfig }); +``` + +### Translation + +By default, Whisper performs the task of speech transcription, where the source audio language is the same as the target text language. To perform speech translation, where the target text is in English, set the task to "translate": + +```javascript +const generationConfig = { task: "translate" }; +const result = await pipeline.generate(rawSpeech, { generationConfig }); +``` + +### Timestamps prediction + +The model can predict timestamps. For sentence-level timestamps, pass the `return_timestamps` argument: + +```javascript +const generationConfig = { return_timestamps: true, language: "<|en|>", task: "transcribe" }; +const result = await pipeline.generate(rawSpeech, { generationConfig }); +for (const chunk of result.chunks ?? []) { + console.log(`timestamps: [${chunk.startTs.toFixed(2)}, ${chunk.endTs.toFixed(2)}] text: ${chunk.text}`); +} +``` + +### Word-level timestamps + +Pass `word_timestamps: true` in the pipeline constructor, then in the generation config: + +```javascript +const pipeline = await WhisperPipeline(modelDir, "CPU", { word_timestamps: true }); +const generationConfig = { return_timestamps: true, word_timestamps: true, language: "<|en|>", task: "transcribe" }; +const result = await pipeline.generate(rawSpeech, { generationConfig }); +for (const w of result.words ?? []) { + console.log(`[${w.startTs.toFixed(2)}, ${w.endTs.toFixed(2)}]: ${w.word}`); +} +``` + +### Initial prompt and hotwords + +Whisper pipeline has `initial_prompt` and `hotwords` generate arguments: +* `initial_prompt`: initial prompt tokens passed as a previous transcription (after `<|startofprev|>` token) to the first processing window +* `hotwords`: hotwords tokens passed as a previous transcription (after `<|startofprev|>` token) to the all processing windows + +The Whisper model can use that context to better understand the speech and maintain a consistent writing style. However, prompts do not need to be genuine transcripts from prior audio segments. Such prompts can be used to steer the model to use particular spellings or styles: + +```javascript +let result = await pipeline.generate(rawSpeech); +// He has gone and gone for good answered Paul Icrom who... + +const generationConfig = { initial_prompt: "Polychrome" }; +result = await pipeline.generate(rawSpeech, { generationConfig }); +// He has gone and gone for good answered Polychrome who... +``` + +### Troubleshooting + +#### Empty or rubbish output + +Ensure the input is a valid WAV file. The sample's `readAudio` helper converts it to 16 kHz mono before inference. + +For non-WAV sources (MP3, M4A, FLAC), convert to WAV first with your preferred tool. + +#### NPU device + +For NPU, pass `STATIC_PIPELINE: true` in the pipeline properties: + +```javascript +const pipeline = await WhisperPipeline(modelDir, "NPU", { word_timestamps: true, STATIC_PIPELINE: true }); +``` diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/whisper_speech_recognition/wav_utils.js b/src/resources/openvino.genai-2026.1.0.0/samples/js/whisper_speech_recognition/wav_utils.js new file mode 100644 index 0000000..72e0a7a --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/whisper_speech_recognition/wav_utils.js @@ -0,0 +1,102 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +import { readFile } from 'node:fs/promises'; + +function parseWavPcm16Mono(buffer) { + if (buffer.length < 44) { + throw new Error('Invalid WAV payload: file is too small.'); + } + + if (buffer.toString('ascii', 0, 4) !== 'RIFF' || buffer.toString('ascii', 8, 12) !== 'WAVE') { + throw new Error('Invalid WAV payload: RIFF/WAVE header is missing.'); + } + + const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength); + let offset = 12; + + let audioFormat; + let channels; + let sampleRate; + let bitsPerSample; + let dataOffset; + let dataSize; + + while (offset + 8 <= buffer.length) { + const chunkId = buffer.toString('ascii', offset, offset + 4); + const chunkSize = view.getUint32(offset + 4, true); + const chunkDataOffset = offset + 8; + + if (chunkDataOffset + chunkSize > buffer.length) { + throw new Error('Invalid WAV payload: malformed chunk size.'); + } + + if (chunkId === 'fmt ') { + if (chunkSize < 16) { + throw new Error('Invalid WAV payload: fmt chunk is too small.'); + } + audioFormat = view.getUint16(chunkDataOffset, true); + channels = view.getUint16(chunkDataOffset + 2, true); + sampleRate = view.getUint32(chunkDataOffset + 4, true); + bitsPerSample = view.getUint16(chunkDataOffset + 14, true); + } else if (chunkId === 'data') { + dataOffset = chunkDataOffset; + dataSize = chunkSize; + } + + offset = chunkDataOffset + chunkSize + (chunkSize % 2); + } + + if (audioFormat !== 1) { + throw new Error('Unsupported WAV format: only PCM is supported.'); + } + + if (channels !== 1 && channels !== 2) { + throw new Error('WAV file must be mono or stereo.'); + } + + if (sampleRate !== 16000) { + throw new Error(`WAV file must be 16 kHz, but got ${sampleRate}.`); + } + + if (bitsPerSample !== 16) { + throw new Error(`Unsupported WAV bit depth: ${bitsPerSample}. Only 16-bit PCM is supported.`); + } + + if (dataOffset === undefined || dataSize === undefined) { + throw new Error('Invalid WAV payload: missing data chunk.'); + } + + const bytesPerFrame = channels * 2; + const frameCount = Math.floor(dataSize / bytesPerFrame); + const mono = new Float32Array(frameCount); + + for (let index = 0; index < frameCount; index++) { + const frameOffset = dataOffset + index * bytesPerFrame; + if (channels === 1) { + const sample = view.getInt16(frameOffset, true); + mono[index] = sample / 32768.0; + } else { + const left = view.getInt16(frameOffset, true); + const right = view.getInt16(frameOffset + 2, true); + mono[index] = (left + right) / 65536.0; + } + } + + return mono; +} + +/** + * Read WAV file and convert to 16kHz mono Float32Array for Whisper pipeline. + * @param {string} audioPath + * @returns {Promise} + */ +export async function readAudio(audioPath) { + const wavBuffer = await readFile(audioPath); + + if (wavBuffer.length === 0) { + throw new Error('Audio file is empty.'); + } + + return parseWavPcm16Mono(wavBuffer); +} diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/js/whisper_speech_recognition/whisper_speech_recognition.js b/src/resources/openvino.genai-2026.1.0.0/samples/js/whisper_speech_recognition/whisper_speech_recognition.js new file mode 100644 index 0000000..b44f238 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/js/whisper_speech_recognition/whisper_speech_recognition.js @@ -0,0 +1,85 @@ +// Copyright (C) 2023-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +import { basename } from 'node:path'; +import yargs from 'yargs/yargs'; +import { hideBin } from 'yargs/helpers'; +import { WhisperPipeline } from 'openvino-genai-node'; +import { readAudio } from './wav_utils.js'; + +/** + * Parse CLI arguments, run Whisper inference and print transcription output. + * @returns {Promise} + */ +async function main() { + const argv = yargs(hideBin(process.argv)) + .scriptName(basename(process.argv[1])) + .command( + '$0 [device]', + 'Run Whisper speech recognition on an audio file', + (yargsBuilder) => + yargsBuilder + .positional('model_dir', { + type: 'string', + describe: 'Path to the converted Whisper model directory', + demandOption: true, + }) + .positional('audio_file', { + type: 'string', + describe: 'Path to the WAV audio file', + demandOption: true, + }) + .positional('device', { + type: 'string', + describe: 'Device to run the model on (e.g. CPU, GPU)', + default: 'CPU', + }), + ) + .strict() + .help() + .parse(); + + const modelDir = argv.model_dir; + const wavFilePath = argv.audio_file; + const device = argv.device; + + let properties = {}; + if (device === 'NPU' || device.startsWith('GPU')) { + properties["CACHE_DIR"] = 'whisper_cache'; + } + // Word timestamps require word_timestamps in the pipeline constructor + properties.word_timestamps = true; + + const pipeline = await WhisperPipeline(modelDir, device, properties); + + // Pass only the options to override; avoid spreading full getGenerationConfig() + // (it can contain values that do not round-trip correctly, e.g. max_new_tokens). + const generationConfig = { + language: '<|en|>', + task: 'transcribe', + return_timestamps: true, + word_timestamps: true, + }; + + const audioTensor = await readAudio(wavFilePath); + const result = await pipeline.generate(audioTensor, { generationConfig }); + + console.log(result.texts?.[0] ?? ''); + + if (result.chunks?.length) { + for (const chunk of result.chunks) { + console.log(`timestamps: [${chunk.startTs.toFixed(2)}, ${chunk.endTs.toFixed(2)}] text: ${chunk.text}`); + } + } + + if (result.words?.length) { + for (const word of result.words) { + console.log(`[${word.startTs.toFixed(2)}, ${word.endTs.toFixed(2)}]: ${word.word}`); + } + } +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/README.md new file mode 100644 index 0000000..cba0ed6 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/README.md @@ -0,0 +1,298 @@ +# Text to Image Python Generation Pipeline + +Examples in this folder showcase inference of text to image models like Stable Diffusion 1.5, 2.1, LCM. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `openvino_genai.Text2ImagePipeline` and uses a text prompt as input source. + +There are several sample files: + - [`text2image.py`](./text2image.py) demonstrates basic usage of the text to image pipeline + - [`lora_text2image.py`](./lora_text2image.py) shows how to apply LoRA adapters to the pipeline + - [`taylorseer_text2image.py`](./taylorseer_text2image.py) demonstrates text to image generation with TaylorSeer caching optimization for improved performance. Flux and StableDiffusion3 models are supported. + - [`heterogeneous_stable_diffusion.py`](./heterogeneous_stable_diffusion.py) shows how to assemble a heterogeneous text2image pipeline from individual subcomponents (scheduler, text encoder, unet, vae decoder) + - [`image2image.py`](./image2image.py) demonstrates basic usage of the image to image pipeline + - [`inpainting.py`](./inpainting.py) demonstrates basic usage of the inpainting pipeline + - [`benchmark_image_gen.py`](./benchmark_image_gen.py) demonstrates how to benchmark the text to image / image to image / inpainting pipeline + - [`stable_diffusion_export_import.py`](./stable_diffusion_export_import.py) demonstrates how to export and import compiled models in the text to image pipeline. Only the Stable Diffusion XL model is supported. + +Users can change the sample code and play with the following generation parameters: + +- Change width or height of generated image +- Generate multiple images per prompt +- Adjust a number of inference steps +- Play with [guidance scale](https://huggingface.co/spaces/stabilityai/stable-diffusion/discussions/9) (read [more details](https://arxiv.org/abs/2207.12598)) +- (SD 1.x, 2.x; SD3, SDXL) Add negative prompt when guidance scale > 1 +- (SDXL, SD3, FLUX) Specify other positive prompts like `prompt_2` +- Apply multiple different LoRA adapters and mix them with different blending coefficients +- (Image to image and inpainting) Play with `strength` parameter to control how initial image is noised and reduce number of inference steps + +> [!NOTE] +> OpenVINO GenAI is written in C++ and uses `CppStdGenerator` random generator in Image Generation pipelines, while Diffusers library uses `torch.Generator` underhood. +> To have the same results with HuggingFace, pass manually created `torch.Generator(device='cpu').manual_seed(seed)` to Diffusers generation pipelines and `openvino_genai.TorchGenerator(seed)` to OpenVINO GenAI pipelines as value for `generator` kwarg. + +## Download and convert the models and tokenizers + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. + +Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model. +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +``` + +Then, run the export with Optimum CLI: + +```sh +optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 dreamlike_anime_1_0_ov/FP16 +``` + +Alternatively, do it in Python code (FP16 is used by default). If NNCF is installed, the model will be compressed to INT8 automatically. + +```python +from optimum.exporters.openvino.convert import export_tokenizer +from optimum.intel import OVPipelineForText2Image + +output_dir = "dreamlike_anime_1_0_ov/FP16" + +pipeline = OVPipelineForText2Image.from_pretrained("dreamlike-art/dreamlike-anime-1.0", export=True) +pipeline.save_pretrained(output_dir) +export_tokenizer(pipeline.tokenizer, output_dir + "/tokenizer") +``` + +## Run text to image + +Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pip install -r ../../deployment-requirements.txt` and then, run a sample: + +`python text2image.py ./dreamlike_anime_1_0_ov/FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"` + +### Examples + +Prompt: `cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting` + + ![](./../../cpp/image_generation/512x512.bmp) + +### Run with threaded callback + +You can also implement a callback function in `text2image.py` that runs in a separate thread. This allows for parallel processing, enabling you to interrupt generation early if intermediate results are satisfactory or to add logs. + +Please find the template of the callback usage below. + +```python +pipe = openvino_genai.Text2ImagePipeline(model_dir, device) + +def callback(step, num_steps, latent): + print(f"Image generation step: {step + 1} / {num_steps}") + image_tensor = pipe.decode(latent) # get intermediate image tensor + if your_condition: # return True if you want to interrupt image generation + return True + return False + +image = pipe.generate( + ... + callback = callback +) +``` + +## Run with optional LoRA adapters + +LoRA adapters can be connected to the pipeline and modify generated images to have certain style, details or quality. Adapters are supported in Safetensors format and can be downloaded from public sources like [Civitai](https://civitai.com) or [HuggingFace](https://huggingface.co/models) or trained by the user. Adapters compatible with a base model should be used only. A weighted blend of multiple adapters can be applied by specifying multiple adapter files with corresponding alpha parameters in command line. Check `lora_text2image.py` source code to learn how to enable adapters and specify them in each `generate` call. + +> [!NOTE] +> ### LoRA `alpha` interpretation in OpenVINO GenAI +> The OpenVINO GenAI implementation merges the traditional LoRA parameters into a **single effective scaling factor** used during inference. +> +> In this context, the `alpha` value already includes: +> - normalization by LoRA rank (`alpha / rank`) +> - any user-defined scaling factor (`weight`) +> +> This means `alpha` in GenAI should be treated as the **final scaling weight** applied to the LoRA update — not the raw `alpha` parameter from training. + +### Example: Running with a LoRA Adapter + +Here is an example how to run the sample with a single adapter. First download adapter file from https://civitai.com/models/67927/soulcard page manually and save it as `soulcard.safetensors`. Or download it from command line: + +`wget -O soulcard.safetensors https://civitai.com/api/download/models/72591` + +Then run `lora_text2image.py`: + +`python lora_text2image.py ./dreamlike_anime_1_0_ov/FP16 "curly-haired unicorn in the forest, anime, line" soulcard.safetensors 0.7` + +The sample generates two images with and without adapters applied using the same prompt: + - `lora.bmp` with adapters applied + - `baseline.bmp` without adapters applied + +Check the difference: + +With adapter | Without adapter +:---:|:---: +![](./../../cpp/image_generation/lora.bmp) | ![](./../../cpp/image_generation/baseline.bmp) + +## Run text to image with TaylorSeer caching optimization + +The `taylorseer_text2image.py` sample demonstrates how to use TaylorSeer Lite caching to accelerate text to image generation. TaylorSeer is a caching optimization technique that uses Taylor series approximation to predict intermediate outputs during diffusion inference, reducing the number of computationally expensive transformer forward passes. + +Run the sample with custom parameters: + +```bash +python taylorseer_text2image.py ./flux.1-dev/FP16 "a beautiful sunset over mountains" +``` + +The sample generates two images with and without TaylorSeer config applied using the same prompt: + - `taylorseer.bmp` with TaylorSeer config applied + - `taylorseer_baseline.bmp` without TaylorSeer config applied + +Check the difference: + +With TaylorSeer | Without TaylorSeer +:---:|:---: +![](./../../cpp/image_generation/taylorseer.bmp) | ![](./../../cpp/image_generation/taylorseer_baseline.bmp) + +## Run text to image with multiple devices + +The `heterogeneous_stable_diffusion.py` sample demonstrates how a Text2ImagePipeline object can be created from individual subcomponents - scheduler, text encoder, unet, & vae decoder. This approach gives fine-grained control over the devices used to execute each stage of the stable diffusion pipeline. + +The usage of this sample is: + +`heterogeneous_stable_diffusion.py [-h] model_dir prompt [text_encoder_device] [unet_device] [vae_decoder_device]` + +For example: + +`python heterogeneous_stable_diffusion.py ./dreamlike_anime_1_0_ov/FP16 'cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting' CPU NPU GPU` + +The sample will create a stable diffusion pipeline such that the text encoder is executed on the CPU, UNet on the NPU, and VAE decoder on the GPU. + +## Run image to image pipeline + +The `image2mage.py` sample demonstrates basic image to image generation pipeline. The difference with text to image pipeline is that final image is denoised from initial image converted to latent space and noised with image noise according to `strength` parameter. `strength` should be in range of `[0., 1.]` where `1.` means initial image is fully noised and it is an equivalent to text to image generation. +Also, `strength` parameter linearly affects a number of inferenece steps, because lower `strength` values means initial latent already has some structure and it requires less steps to denoise it. + +To run the sample, download initial image first: + +`wget -O cat.png https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png` + +And then run the sample: + +`python image2image.py ./dreamlike_anime_1_0_ov/FP16 'cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k' cat.png` + +The resulting image is: + + ![](./../../cpp/image_generation/imageimage.bmp) + +Note, that LoRA, heterogeneous execution and other features of `Text2ImagePipeline` are applicable for `Image2ImagePipeline`. + +## Run inpainting pipeline + +The `inpainting.py` sample demonstrates usage of inpainting pipeline, which can inpaint initial image by a given mask. Inpainting pipeline can work on typical text to image models as well as on specialized models which are often named `space/model-inpainting`, e.g. `stabilityai/stable-diffusion-2-inpainting`. + +Such models can be converted in the same way as regular ones via `optimum-cli`: + +`optimum-cli export openvino --model stabilityai/stable-diffusion-2-inpainting --weight-format fp16 stable-diffusion-2-inpainting` + +Let's also download input data: + +`wget -O image.png https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png` + +`wget -O mask_image.png https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png` + +And run the sample: + +`python inpainting.py ./stable-diffusion-2-inpainting 'Face of a yellow cat, high resolution, sitting on a park bench' image.png mask_image.png` + +The resulting image is: + + ![](./../../cpp/image_generation/inpainting.bmp) + +Note, that LoRA, heterogeneous execution and other features of `Text2ImagePipeline` are applicable for `InpaintingPipeline`. + +## benchmarking sample for image generation pipelines + +This `benchmark_image_gen.py` sample script demonstrates how to benchmark text to image / image to image / inpainting pipeline. The script includes functionality for warm-up iterations, generating image, and calculating various performance metrics. + +The usage of this sample is: +```bash +python benchmark_image_gen.py [OPTIONS] +``` +Options: +- `-t, --pipeline_type`: Pipeline type: text2image/image2image/inpainting. +- `-m, --model`: Path to the model and tokenizers base directory. +- `-p, --prompt` (default: `"The Sky is blue because"`): The prompt to generate text. +- `-nw, --num_warmup` (default: `1`): Number of warmup iterations. +- `-n, --num_iter` (default: `3`): Number of iterations. +- `-d, --device` (default: `"CPU"`): Device(s) to run the pipeline with. +- `-w, --width` (default: `512`): The width of the output image. +- `-ht, --height` (default: `512`): The height of the output image. +- `-is, --num_inference_steps` (default: `20`): The number of inference steps. +- `-ni, --num_images_per_prompt` (default: `1`): The number of images to generate per generate() call. +- `-o, --output_dir` (default: `""`): Path to save output image. +- `-i, --image`: Path to input image. +- `-mi, --mask_image`: Path to the mask image. +- `-s, --strength`: Indicates extent to transform the reference `image`. Must be between 0 and 1. +- `-r, --reshape': Reshape pipeline before compilation. This can improve image generation performance. + +For example: + +`python benchmark_image_gen.py -t text2image -m dreamlike_anime_1_0_ov/FP16 -n 10 -d CPU` + +Performance output: + +``` +[warmup-0] generate time: 85008.00 ms, total infer time:84999.88 ms +[warmup-0] text encoder infer time: 98.00 ms +[warmup-0] unet iteration num:21, first iteration time:4317.94 ms, other iteration avg time:3800.91 ms +[warmup-0] unet inference num:21, first inference time:4317.71 ms, other inference avg time:3800.61 ms +[warmup-0] vae encoder infer time:0.00 ms, vae decoder infer time:4572.00 ms + +[iter-0] generate time: 84349.00 ms, total infer time:84340.97 ms +[iter-0] text encoder infer time: 76.00 ms +[iter-0] unet iteration num:21, first iteration time:3805.63 ms, other iteration avg time:3799.68 ms +[iter-0] unet inference num:21, first inference time:3805.42 ms, other inference avg time:3799.38 ms +[iter-0] vae encoder infer time:0.00 ms, vae decoder infer time:4472.00 ms + +[iter-1] generate time: 84391.00 ms, total infer time:84384.36 ms +[iter-1] text encoder infer time: 78.00 ms +[iter-1] unet iteration num:21, first iteration time:3801.15 ms, other iteration avg time:3802.17 ms +[iter-1] unet inference num:21, first inference time:3800.93 ms, other inference avg time:3801.87 ms +[iter-1] vae encoder infer time:0.00 ms, vae decoder infer time:4468.00 ms + +[iter-2] generate time: 84377.00 ms, total infer time:84366.51 ms +[iter-2] text encoder infer time: 76.00 ms +[iter-2] unet iteration num:21, first iteration time:3783.31 ms, other iteration avg time:3802.25 ms +[iter-2] unet inference num:21, first inference time:3783.09 ms, other inference avg time:3801.82 ms +[iter-2] vae encoder infer time:0.00 ms, vae decoder infer time:4471.00 ms + +Test finish, load time: 9356.00 ms +Warmup number:1, first generate warmup time:85008.00 ms, infer warmup time:84999.88 ms +Generate iteration number:3, for one iteration, generate avg time: 84372.34 ms, infer avg time:84363.95 ms, all text encoders infer avg time:76.67 ms, vae encoder infer avg time:0.00 ms, vae decoder infer avg time:4470.33 ms +``` + +### Image Generation Pipeline reuse + +To extend the pipeline's capabilities, we provide an interface that allows a specific image generation pipeline to reuse models from another pipeline that has already loaded them. The table below shows the support scope. + +| Image Generation pipeline | Model can be reused from | +|:---|:---| +| `Text2ImagePipeline` | `Image2ImagePipeline` or `InpaintingPipeline` | +| `Image2ImagePipeline` | `InpaintingPipeline` | +| `InpaintingPipeline` | `Image2ImagePipeline` | + +This example shows how `Text2ImagePipeline` reuses models from `Image2ImagePipeline` and executes a different pipeline depending on whether an initial image is provided. + +```py +img2img_pipe = openvino_genai.Image2ImagePipeline(models_path, device) +text2img_pipe = openvino_genai.Text2ImagePipeline(img2img_pipe) + +if image_path: + image = read_image(image_path) + image_tensor = img2img_pipe.generate(prompt, image, strength=0.8) +else: + image_tensor = text2img_pipe.generate(prompt, strength=1.0) +``` + +## Export and import compiled models + +`openvino_genai.Image2ImagePipeline` supports exporting and importing compiled models to and from a specified directory. This API can significantly reduce model load time, especially for large models like UNet. Only the Stable Diffusion XL model is supported. + +```python +# export models +pipeline = openvino_genai.Text2ImagePipeline(models_path, device) +pipeline.export_model(models_path / "blobs") + +# import models +imported_pipeline = openvino_genai.Text2ImagePipeline(models_path, device, blob_path=models_path / "blobs") +``` diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/benchmark_image_gen.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/benchmark_image_gen.py new file mode 100644 index 0000000..f3e1365 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/benchmark_image_gen.py @@ -0,0 +1,252 @@ +# Copyright (C) 2023-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import openvino +import openvino_genai as ov_genai +import numpy as np +from PIL import Image + +def get_total_text_encoder_infer_duration(metrics): + total_duration = 0.0 + for key, value in metrics.get_text_encoder_infer_duration().items(): + total_duration = total_duration + value + return total_duration + +def print_one_generate(metrics, prefix, idx): + prefix_idx = "[" + prefix + "-" + str(idx) + "]" + print(f"\n{prefix_idx} generate time: {metrics.get_generate_duration():.2f} ms, total infer time: {metrics.get_inference_duration():.2f} ms") + print(f"{prefix_idx} text encoder infer time: {get_total_text_encoder_infer_duration(metrics):.2f} ms") + first_iter_time = 0.0 + other_iter_avg_time = 0.0 + first_infer_time = 0.0 + other_infer_avg_time = 0.0 + first_iter_time, other_iter_avg_time = metrics.get_first_and_other_iter_duration() + if len(metrics.raw_metrics.transformer_inference_durations) > 0: + first_infer_time, other_infer_avg_time = metrics.get_first_and_other_trans_infer_duration() + print(f"{prefix_idx} transformer iteration num: {len(metrics.raw_metrics.iteration_durations)}, first iteration time: {first_iter_time:.2f} ms, other iteration avg time: {other_iter_avg_time:.2f} ms") + print(f"{prefix_idx} transformer inference num: {len(metrics.raw_metrics.transformer_inference_durations)}, first inference time: {first_infer_time:.2f} ms, other inference avg time: {other_infer_avg_time:.2f} ms") + else: + first_infer_time, other_infer_avg_time = metrics.get_first_and_other_unet_infer_duration() + print(f"{prefix_idx} unet iteration num: {len(metrics.raw_metrics.iteration_durations)}, first iteration time: {first_iter_time:.2f} ms, other iteration avg time: {other_iter_avg_time:.2f} ms") + print(f"{prefix_idx} unet inference num: {len(metrics.raw_metrics.unet_inference_durations)}, first inference time: {first_infer_time:.2f} ms, other inference avg time: {other_infer_avg_time:.2f} ms") + print(f"{prefix_idx} vae encoder infer time: {metrics.get_vae_encoder_infer_duration():.2f} ms, vae decoder infer time: {metrics.get_vae_decoder_infer_duration():.2f} ms") + +def print_statistic(warmup_metrics, iter_metrics): + generate_durations = [] + inference_durations = [] + text_encoder_durations = [] + vae_encoder_durations = [] + vae_decoder_durations = [] + load_time = 0.0 + warmup_num = len(warmup_metrics) + iter_num = len(iter_metrics) + generate_warmup = 0.0 + inference_warmup = 0.0 + if warmup_num > 0: + generate_warmup = warmup_metrics[0].get_generate_duration() + inference_warmup = warmup_metrics[0].get_inference_duration() + + for metrics in iter_metrics: + generate_durations.append(metrics.get_generate_duration()) + inference_durations.append(metrics.get_inference_duration()) + text_encoder_durations.append(get_total_text_encoder_infer_duration(metrics)) + vae_encoder_durations.append(metrics.get_vae_encoder_infer_duration()) + vae_decoder_durations.append(metrics.get_vae_decoder_infer_duration()) + load_time = metrics.get_load_time() + + generate_mean = sum(generate_durations) + if (len(generate_durations) > 0): + generate_mean = generate_mean / len(generate_durations) + + inference_mean = sum(inference_durations) + if (len(inference_durations) > 0): + inference_mean = inference_mean / len(inference_durations) + + text_encoder_mean = sum(text_encoder_durations) + if (len(text_encoder_durations) > 0): + text_encoder_mean = text_encoder_mean / len(text_encoder_durations) + + vae_encoder_mean = sum(vae_encoder_durations) + if (len(vae_encoder_durations) > 0): + vae_encoder_mean = vae_encoder_mean / len(vae_encoder_durations) + + vae_decoder_mean = sum(vae_decoder_durations) + if (len(vae_decoder_durations) > 0): + vae_decoder_mean = vae_decoder_mean / len(vae_decoder_durations) + + print(f"\nTest finish, load time: {load_time:.2f} ms") + print(f"Warmup number: {warmup_num}, first generate warmup time: {generate_warmup:.2f} ms, infer warmup time: {inference_warmup:.2f} ms") + print(f"Generate iteration number: {iter_num}, for one iteration, generate avg time: {generate_mean:.2f} ms, " + f"infer avg time: {inference_mean:.2f} ms, all text encoder infer avg time: {text_encoder_mean:.2f} ms, " + f"vae encoder infer avg time: {vae_encoder_mean:.2f} ms, vae decoder infer avg time: {vae_decoder_mean:.2f} ms") + +def device_string_to_triplet(device_input): + devices = [device.strip() for device in device_input.split(",")] + if len(devices) == 1: + return [devices[0]] * 3 + elif len(devices) == 3: + return devices + else: + raise ValueError("The device specified by -d/--device must be a single device (e.g. -d \"GPU\"), " + + "or exactly 3 comma separated device names (e.g. -d \"CPU,NPU,GPU\")") + +def text2image(args): + prompt = args.prompt + models_path = args.model + devices = device_string_to_triplet(args.device) + num_warmup = args.num_warmup + num_iter = args.num_iter + output_dir = args.output_dir + + pipe = ov_genai.Text2ImagePipeline(models_path) + if args.reshape: + pipe.reshape(args.num_images_per_prompt, args.height, args.width, pipe.get_generation_config().guidance_scale) + pipe.compile(devices[0], devices[1], devices[2]) + + config = pipe.get_generation_config() + config.width = args.width + config.height = args.height + config.num_inference_steps = args.num_inference_steps + config.num_images_per_prompt = args.num_images_per_prompt + pipe.set_generation_config(config) + + warmup_metrics = [] + for i in range(num_warmup): + pipe.generate(prompt) + metrics = pipe.get_performance_metrics() + warmup_metrics.append(metrics) + print_one_generate(metrics, "warmup", i) + + iter_metrics = [] + for i in range(num_iter): + image_tensor = pipe.generate(prompt) + perf_metrics = pipe.get_performance_metrics() + iter_metrics.append(perf_metrics) + image = Image.fromarray(image_tensor.data[0]) + image_name = output_dir + "/image_" + str(i) + ".bmp" + image.save(image_name) + print_one_generate(perf_metrics, "iter", i) + + print_statistic(warmup_metrics, iter_metrics) + +def read_image(path: str) -> openvino.Tensor: + pic = Image.open(path).convert("RGB") + image_data = np.array(pic)[None] + return openvino.Tensor(image_data) + +def image2image(args): + prompt = args.prompt + models_path = args.model + devices = device_string_to_triplet(args.device) + num_warmup = args.num_warmup + num_iter = args.num_iter + output_dir = args.output_dir + image_path = args.image + strength = args.strength + + image_input = read_image(image_path) + + pipe = ov_genai.Image2ImagePipeline(models_path) + if args.reshape: + height = image_input.get_shape()[1] + width = image_input.get_shape()[2] + pipe.reshape(1, height, width, pipe.get_generation_config().guidance_scale) + pipe.compile(devices[0], devices[1], devices[2]) + + warmup_metrics = [] + for i in range(num_warmup): + pipe.generate(prompt, image_input, strength=strength) + metrics = pipe.get_performance_metrics() + warmup_metrics.append(metrics) + print_one_generate(metrics, "warmup", i) + + iter_metrics = [] + for i in range(num_iter): + image_tensor = pipe.generate(prompt, image_input, strength=strength) + perf_metrics = pipe.get_performance_metrics() + iter_metrics.append(perf_metrics) + image = Image.fromarray(image_tensor.data[0]) + image_name = output_dir + "/image_" + str(i) + ".bmp" + image.save(image_name) + print_one_generate(perf_metrics, "iter", i) + + print_statistic(warmup_metrics, iter_metrics) + +def inpainting(args): + prompt = args.prompt + models_path = args.model + devices = device_string_to_triplet(args.device) + num_warmup = args.num_warmup + num_iter = args.num_iter + output_dir = args.output_dir + image_path = args.image + strength = args.strength + mask_image_path = args.mask_image + + image_input = read_image(image_path) + mask_image = read_image(mask_image_path) + + pipe = ov_genai.InpaintingPipeline(models_path) + if args.reshape: + height = image_input.get_shape()[1] + width = image_input.get_shape()[2] + pipe.reshape(1, height, width, pipe.get_generation_config().guidance_scale) + pipe.compile(devices[0], devices[1], devices[2]) + + warmup_metrics = [] + for i in range(num_warmup): + pipe.generate(prompt, image_input, mask_image) + metrics = pipe.get_performance_metrics() + warmup_metrics.append(metrics) + print_one_generate(metrics, "warmup", i) + + iter_metrics = [] + for i in range(num_iter): + image_tensor = pipe.generate(prompt, image_input, mask_image) + perf_metrics = pipe.get_performance_metrics() + iter_metrics.append(perf_metrics) + image = Image.fromarray(image_tensor.data[0]) + image_name = output_dir + "/image_" + str(i) + ".bmp" + image.save(image_name) + print_one_generate(perf_metrics, "iter", i) + + print_statistic(warmup_metrics, iter_metrics) + + +def main(): + parser = argparse.ArgumentParser(description="Help command") + parser.add_argument("-t", "--pipeline_type", type=str, default="text2image", help="pipeline type: text2image/image2image/inpainting") + parser.add_argument("-m", "--model", type=str, help="Path to model and tokenizers base directory") + parser.add_argument("-p", "--prompt", type=str, default="The Sky is blue because", help="Prompt") + parser.add_argument("-nw", "--num_warmup", type=int, default=1, help="Number of warmup iterations") + parser.add_argument("-n", "--num_iter", type=int, default=3, help="Number of iterations") + parser.add_argument("-d", "--device", type=str, default="CPU", help="Device") + parser.add_argument("-o", "--output_dir", type=str, default=".", help="Path to save output image") + parser.add_argument("-is", "--num_inference_steps", type=int, default=20, help="The number of inference steps used to denoise initial noised latent to final image") + parser.add_argument("-ni", "--num_images_per_prompt", type=int, default=1, help="The number of images to generate per generate() call") + parser.add_argument("-i", "--image", type=str, help="Image path") + parser.add_argument("-r", "--reshape", action="store_true", help="Reshape pipeline before compilation") + # special parameters of text2image pipeline + parser.add_argument("-w", "--width", type=int, default=512, help="The width of the resulting image") + parser.add_argument("-ht", "--height", type=int, default=512, help="The height of the resulting image") + # special parameters of image2image pipeline + parser.add_argument("-s", "--strength", type=float, default=0.8, help="Indicates extent to transform the reference `image`. Must be between 0 and 1") + # special parameters of inpainting pipeline + parser.add_argument("-mi", "--mask_image", type=str, help="Mask image path") + + args = parser.parse_args() + + type = args.pipeline_type + + if type == "text2image": + text2image(args) + elif type == "image2image": + image2image(args) + elif type == "inpainting": + inpainting(args) + else: + print(f"not support pipeline type: {type}\n") + +if __name__ == "__main__": + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/heterogeneous_stable_diffusion.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/heterogeneous_stable_diffusion.py new file mode 100644 index 0000000..2901eb2 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/heterogeneous_stable_diffusion.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse + +import openvino_genai + +from PIL import Image + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('model_dir') + parser.add_argument('prompt') + + # Set devices to command-line args if specified, otherwise default to CPU. + # Note that these can be set to CPU, GPU, or NPU. + parser.add_argument('text_encoder_device', nargs='?', default='CPU') + parser.add_argument('unet_device', nargs='?', default='CPU') + parser.add_argument('vae_decoder_device', nargs='?', default='CPU') + + args = parser.parse_args() + + width = 512 + height = 512 + number_of_images_to_generate = 1 + number_of_inference_steps_per_image = 20 + + print(f"text_encoder_device = {args.text_encoder_device}") + print(f"unet_device = {args.unet_device}") + print(f"vae_decoder_device = {args.vae_decoder_device}") + + # this is the path to where compiled models will get cached + # (so that the 'compile' method run much faster 2nd+ time) + ov_cache_dir = "./cache" + + # + # Step 1: Create the initial Text2ImagePipeline, given the model path + # + pipe = openvino_genai.Text2ImagePipeline(args.model_dir) + + # + # Step 2: Reshape the pipeline given number of images, height, width, and guidance scale. + # + pipe.reshape(1, height, width, pipe.get_generation_config().guidance_scale) + + # + # Step 3: Compile the pipeline given the specified devices, and properties (like cache dir) + # + properties = {"CACHE_DIR": ov_cache_dir} + + # Note that if there are device-specific properties that are needed, they can + # be added using a "DEVICE_PROPERTIES" entry, like this: + #properties = { + # "DEVICE_PROPERTIES": + # { + # "CPU": {"CACHE_DIR": "cpu_cache"}, + # "GPU": {"CACHE_DIR": "gpu_cache"}, + # "NPU": {"CACHE_DIR": "npu_cache"} + # } + #} + + pipe.compile(args.text_encoder_device, args.unet_device, args.vae_decoder_device, config=properties) + + # + # Step 4: Use the Text2ImagePipeline to generate 'number_of_images_to_generate' images. + # + + for imagei in range(0, number_of_images_to_generate): + image_tensor = pipe.generate( + args.prompt, + num_inference_steps=number_of_inference_steps_per_image, + ) + + image = Image.fromarray(image_tensor.data[0]) + image.save("image_" + str(imagei) + ".bmp") + + +if '__main__' == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/image2image.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/image2image.py new file mode 100644 index 0000000..8c15e8b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/image2image.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import openvino +import openvino_genai +import numpy as np + +from PIL import Image + +def read_image(path: str) -> openvino.Tensor: + pic = Image.open(path).convert("RGB") + image_data = np.array(pic)[None] + return openvino.Tensor(image_data) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('model_dir') + parser.add_argument('prompt') + parser.add_argument('image') + args = parser.parse_args() + + device = 'CPU' # GPU can be used as well + pipe = openvino_genai.Image2ImagePipeline(args.model_dir, device) + + image = read_image(args.image) + + def callback(step, num_steps, latent): + print(f"Step {step + 1}/{num_steps}") + return False + + image_tensor = pipe.generate( + args.prompt, + image, + strength=0.8, + callback=callback + ) + + image = Image.fromarray(image_tensor.data[0]) + image.save("image.bmp") + + +if __name__ == '__main__': + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/inpainting.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/inpainting.py new file mode 100644 index 0000000..08eabcc --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/inpainting.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import openvino +import openvino_genai +import numpy as np + +from PIL import Image + +def read_image(path: str) -> openvino.Tensor: + pic = Image.open(path).convert("RGB") + image_data = np.array(pic)[None] + return openvino.Tensor(image_data) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('model_dir') + parser.add_argument('prompt') + parser.add_argument('image') + parser.add_argument('mask') + args = parser.parse_args() + + device = 'CPU' # GPU can be used as well + pipe = openvino_genai.InpaintingPipeline(args.model_dir, device) + + image = read_image(args.image) + mask_image = read_image(args.mask) + + def callback(step, num_steps, latent): + print(f"Step {step + 1}/{num_steps}") + return False + + image_tensor = pipe.generate(args.prompt, image, mask_image, callback=callback) + + image = Image.fromarray(image_tensor.data[0]) + image.save("image.bmp") + + +if __name__ == '__main__': + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/lora_text2image.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/lora_text2image.py new file mode 100644 index 0000000..03525ab --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/lora_text2image.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse + +import openvino as ov +import openvino_genai + +def image_write(path: str, image_tensor: ov.Tensor): + from PIL import Image + image = Image.fromarray(image_tensor.data[0]) + image.save(path) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('models_path') + parser.add_argument('prompt') + args, adapters = parser.parse_known_args() + + prompt = args.prompt + + device = "CPU" # GPU can be used as well + adapter_config = openvino_genai.AdapterConfig() + + # Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters: + for i in range(int(len(adapters) / 2)): + adapter = openvino_genai.Adapter(adapters[2 * i]) + alpha = float(adapters[2 * i + 1]) + adapter_config.add(adapter, alpha) + + # LoRA adapters passed to the constructor will be activated by default in next generates + pipe = openvino_genai.Text2ImagePipeline(args.models_path, device, adapters=adapter_config) + + print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp") + image = pipe.generate(prompt, + width=512, + height=896, + num_inference_steps=20, + rng_seed=42) + + image_write("lora.bmp", image) + print("Generating image without LoRA adapters applied, resulting image will be in baseline.bmp") + image = pipe.generate(prompt, + # passing adapters in generate overrides adapters set in the constructor; openvino_genai.AdapterConfig() means no adapters + adapters=openvino_genai.AdapterConfig(), + width=512, + height=896, + num_inference_steps=20, + rng_seed=42) + image_write("baseline.bmp", image) + + +if '__main__' == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/stable_diffusion_export_import.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/stable_diffusion_export_import.py new file mode 100644 index 0000000..24e3f59 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/stable_diffusion_export_import.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse + +import openvino_genai + +from pathlib import Path + +from PIL import Image + + +def pipeline_export_import(root_dir: Path): + pipe = openvino_genai.Text2ImagePipeline(root_dir, "CPU") + pipe.export_model(root_dir / "exported") + # pipeline models are exported to dedicated subfolders + # for stable diffusion xl: + # exported/ + # ├── text_encoder/ + # │ └── openvino_model.blob + # ├── text_encoder_2/ + # │ └── openvino_model.blob + # ├── unet/ + # │ └── openvino_model.blob + # └── vae_decoder/ + # └── openvino_model.blob + + # during import, specify blob_path property to point to the exported model location + imported_pipe = openvino_genai.Text2ImagePipeline(root_dir, "CPU", blob_path=root_dir / "exported") + + +def dedicated_models_export_import(root_dir: Path): + blob_path = root_dir / "blobs" + device = "CPU" + + text_encoder = openvino_genai.CLIPTextModel(root_dir / "text_encoder", device) + text_encoder.export_model(blob_path / "text_encoder") + + text_encoder_2 = openvino_genai.CLIPTextModelWithProjection(root_dir / "text_encoder_2", device) + text_encoder_2.export_model(blob_path / "text_encoder_2") + + unet = openvino_genai.UNet2DConditionModel(root_dir / "unet", device) + unet.export_model(blob_path / "unet") + + vae = openvino_genai.AutoencoderKL(root_dir / "vae_decoder") + vae.compile(device) + vae.export_model(blob_path) + # AutoencoderKL can be composed with decoder and encoder models + # exported/ + # └── vae_decoder/ + # └── openvino_model.blob + # └── vae_encoder/ + # └── openvino_model.blob + + pipe = openvino_genai.Text2ImagePipeline.stable_diffusion_xl( + scheduler=openvino_genai.Scheduler.from_config(root_dir / "scheduler" / "scheduler_config.json"), + clip_text_model=openvino_genai.CLIPTextModel(root_dir / "text_encoder", device, blob_path=blob_path / "text_encoder"), + clip_text_model_with_projection=openvino_genai.CLIPTextModelWithProjection(root_dir / "text_encoder_2", device, blob_path=blob_path / "text_encoder_2"), + unet=openvino_genai.UNet2DConditionModel(root_dir / "unet", device, blob_path=blob_path / "unet"), + vae=openvino_genai.AutoencoderKL(root_dir / "vae_decoder", device, blob_path=blob_path), + ) + + +def export_import_with_reshape(root_dir: Path, prompt: str): + device = "CPU" + + width = 512 + height = 512 + number_of_images_to_generate = 1 + number_of_inference_steps_per_image = 20 + + pipe = openvino_genai.Text2ImagePipeline(root_dir) + pipe.reshape(1, height, width, pipe.get_generation_config().guidance_scale) + pipe.compile(device) + pipe.export_model(root_dir / "exported") + + imported_pipe = openvino_genai.Text2ImagePipeline(root_dir, device, blob_path=root_dir / "exported") + + # update generation config according to the new shape parameters + config = imported_pipe.get_generation_config() + config.height = height + config.width = width + config.num_images_per_prompt = number_of_images_to_generate + imported_pipe.set_generation_config(config) + + for imagei in range(0, number_of_images_to_generate): + image_tensor = imported_pipe.generate( + prompt, + num_inference_steps=number_of_inference_steps_per_image, + ) + + image = Image.fromarray(image_tensor.data[0]) + image.save("image_" + str(imagei) + ".bmp") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_dir") + parser.add_argument("prompt") + + args = parser.parse_args() + + root_dir = Path(args.model_dir) + + pipeline_export_import(root_dir) + dedicated_models_export_import(root_dir) + export_import_with_reshape(root_dir, args.prompt) + + +if "__main__" == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/taylorseer_text2image.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/taylorseer_text2image.py new file mode 100644 index 0000000..4a6e49b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/taylorseer_text2image.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# Copyright (C) 2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import time + +import openvino_genai +from PIL import Image + + +def main(): + parser = argparse.ArgumentParser(description="Text-to-image generation with TaylorSeer caching optimization") + parser.add_argument("model_dir", help="Path to the converted OpenVINO model directory") + parser.add_argument("prompt", help="Text prompt for image generation") + args = parser.parse_args() + + device = "CPU" # GPU can be used as well + pipe = openvino_genai.Text2ImagePipeline(args.model_dir, device) + + # TaylorSeer configuration + cache_interval = 3 + disable_before = 6 + disable_after = -2 + num_inference_steps = 28 + + def callback(step, num_steps, latent): + print(f"Step {step + 1}/{num_steps}") + return False + + generate_kwargs = { + "width": 512, + "height": 512, + "num_inference_steps": num_inference_steps, + "rng_seed": 42, + "num_images_per_prompt": 1, + "callback": callback, + } + + # Generate baseline for comparison + print(f"\nGenerating baseline image without caching...") + start_time = time.time() + baseline_tensor = pipe.generate(args.prompt, **generate_kwargs) + baseline_time = time.time() - start_time + + print(f"Baseline generation completed in {baseline_time:.2f}s") + + baseline_filename = "taylorseer_baseline.bmp" + baseline_image = Image.fromarray(baseline_tensor.data[0]) + baseline_image.save(baseline_filename) + print(f"Baseline image saved to {baseline_filename}") + + # Configure TaylorSeer caching + print(f"\nGenerating image with TaylorSeer caching...") + + taylorseer_config = openvino_genai.TaylorSeerCacheConfig() + taylorseer_config.cache_interval = cache_interval + taylorseer_config.disable_cache_before_step = disable_before + taylorseer_config.disable_cache_after_step = disable_after + print(taylorseer_config) + generation_config = pipe.get_generation_config() + generation_config.taylorseer_config = taylorseer_config + pipe.set_generation_config(generation_config) + + start_time = time.time() + image_tensor = pipe.generate(args.prompt, **generate_kwargs) + taylorseer_time = time.time() - start_time + print(f"TaylorSeer generation completed in {taylorseer_time:.2f}s") + + image_filename = "taylorseer.bmp" + image = Image.fromarray(image_tensor.data[0]) + image.save(image_filename) + print(f"Image saved to {image_filename}") + + # Performance comparison + speedup = baseline_time / taylorseer_time if taylorseer_time > 0 else 0.0 + time_saved = baseline_time - taylorseer_time if baseline_time > 0 else 0.0 + percentage = (baseline_time - taylorseer_time) / baseline_time * 100 if baseline_time > 0 else 0.0 + + print(f"\nPerformance Comparison:") + print(f" Baseline time: {baseline_time:.2f}s") + print(f" TaylorSeer time: {taylorseer_time:.2f}s") + print(f" Speedup: {speedup:.2f}x") + print(f" Time saved: {time_saved:.2f}s ({percentage:.1f}%)") + + +if __name__ == "__main__": + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/text2image.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/text2image.py new file mode 100644 index 0000000..0fab77d --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/image_generation/text2image.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse + +import openvino_genai +from PIL import Image + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('model_dir') + parser.add_argument('prompt') + args = parser.parse_args() + + device = 'CPU' # GPU can be used as well + pipe = openvino_genai.Text2ImagePipeline(args.model_dir, device) + + def callback(step, num_steps, latent): + print(f"Step {step + 1}/{num_steps}") + return False + + image_tensor = pipe.generate( + args.prompt, + width=512, + height=512, + num_inference_steps=20, + num_images_per_prompt=1, + callback=callback) + + image = Image.fromarray(image_tensor.data[0]) + image.save("image.bmp") + + +if __name__ == '__main__': + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/rag/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/python/rag/README.md new file mode 100644 index 0000000..84f08eb --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/rag/README.md @@ -0,0 +1,83 @@ +# Retrieval Augmented Generation Sample + +This example showcases inference of Text Embedding and Text Rerank Models. The application has limited configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `openvino_genai.TextEmbeddingPipeline` and `openvino_genai.TextRerankPipeline` and uses text as an input source. + +## Download and Convert the Model and Tokenizers + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. + +Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model. + +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +``` + +To export text embedding model run Optimum CLI command: + +```sh +optimum-cli export openvino --task feature-extraction --model BAAI/bge-small-en-v1.5 BAAI/bge-small-en-v1.5 +``` + +To export text reranking model run Optimum CLI command: + +```sh +optimum-cli export openvino --task text-classification --model cross-encoder/ms-marco-MiniLM-L6-v2 cross-encoder/ms-marco-MiniLM-L6-v2 +``` + +Alternatively, do it in Python code: + +```python +from optimum.exporters.openvino.convert import export_tokenizer +from optimum.intel import OVModelForFeatureExtraction +from transformers import AutoTokenizer + +output_dir = "embedding_model" + +model = OVModelForFeatureExtraction.from_pretrained("BAAI/bge-small-en-v1.5", export=True) +model.save_pretrained(output_dir) + +tokenizer = AutoTokenizer.from_pretrained("BAAI/bge-small-en-v1.5") +export_tokenizer(tokenizer, output_dir) +``` + +## Run + +Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pip install -r ../../deployment-requirements.txt` and then, run a sample: + +### 1. Text Embedding Sample (`text_embeddings.py`) +- **Description:** + Demonstrates inference of text embedding models using OpenVINO GenAI. Converts input text into vector embeddings for downstream tasks such as retrieval or semantic search. +- **Run Command:** + ```sh + python text_embeddings.py "Document 1" "Document 2" + ``` +Refer to the [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#text-embeddings-models) for more details. + +### 2. Text Rerank Sample (`text_rerank.py`) +- **Description:** + Demonstrates inference of text rerank models using OpenVINO GenAI. Reranks a list of candidate documents based on their relevance to a query using a cross-encoder or reranker model. +- **Run Command:** + ```sh + python text_rerank.py "" "" ["" ...] + ``` + + +# Text Embedding Pipeline Usage + +```python +import openvino_genai + +pipeline = openvino_genai.TextEmbeddingPipeline(model_dir, "CPU") + +embeddings = pipeline.embed_documents(["document1", "document2"]) +``` + +# Text Rerank Pipeline Usage + +```python +import openvino_genai + +pipeline = openvino_genai.TextRerankPipeline(model_dir, "CPU") + +rerank_result = pipeline.rerank(query, documents) +``` diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/rag/text_embeddings.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/rag/text_embeddings.py new file mode 100644 index 0000000..3af4ff6 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/rag/text_embeddings.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import openvino_genai + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_dir") + parser.add_argument("texts", nargs="+") + args = parser.parse_args() + + device = "CPU" # GPU can be used as well + + config = openvino_genai.TextEmbeddingPipeline.Config() + config.pooling_type = openvino_genai.TextEmbeddingPipeline.PoolingType.MEAN + + pipeline = openvino_genai.TextEmbeddingPipeline(args.model_dir, device, config) + + text_embeddings = pipeline.embed_documents(args.texts) + query_embeddings = pipeline.embed_query("What is the capital of France?") + + +if "__main__" == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/rag/text_rerank.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/rag/text_rerank.py new file mode 100644 index 0000000..8eb9554 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/rag/text_rerank.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import openvino_genai + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_dir") + parser.add_argument("query") + parser.add_argument("texts", nargs="+") + args = parser.parse_args() + + device = "CPU" # GPU can be used as well + + config = openvino_genai.TextRerankPipeline.Config() + config.top_n = 3 + + pipeline = openvino_genai.TextRerankPipeline(args.model_dir, device, config) + + rerank_result = pipeline.rerank(args.query, args.texts) + + print("Reranked documents:") + for index, score in rerank_result: + print(f"Document {index} (score: {score:.4f}): {args.texts[index]}") + + +if __name__ == "__main__": + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/speech_generation/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/python/speech_generation/README.md new file mode 100644 index 0000000..68b2636 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/speech_generation/README.md @@ -0,0 +1,79 @@ +# Text-to-speech pipeline sample + +This example demonstrates how to use the openvino_genai.Text2SpeechPipeline in Python to convert input text into speech. +You can specify a target voice using a speaker embedding vector that captures the desired voice characteristics. +Additionally, you can choose the inference device (e.g., CPU, GPU) to control where the model runs. + +## Download and convert the model and tokenizers + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. + +Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model. + +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +``` + +Then, run the export with Optimum CLI: + +```sh +optimum-cli export openvino --model microsoft/speecht5_tts --model-kwargs "{\"vocoder\": \"microsoft/speecht5_hifigan\"}" speecht5_tts +``` + +Alternatively, you can do it in Python code: + +```python +from optimum.exporters.openvino.convert import export_tokenizer +from optimum.intel import OVModelForTextToSpeechSeq2Seq +from transformers import AutoTokenizer + +output_dir = "speecht5_tts" + +model = OVModelForTextToSpeechSeq2Seq.from_pretrained("microsoft/speecht5_tts", vocoder="microsoft/speecht5_hifigan", export=True) +model.save_pretrained(output_dir) + +tokenizer = AutoTokenizer.from_pretrained("microsoft/speecht5_tts") +export_tokenizer(tokenizer, output_dir) +``` + +**Note:** Currently, text-to-speech in OpenVINO GenAI supports the `SpeechT5 TTS` model. +When exporting the model, you must specify a vocoder using the `--model-kwargs` option in JSON format. + +## Prepare speaker embedding file + +To generate speech using the SpeechT5 TTS model, you can specify a target voice by providing a speaker embedding file. +This file must contain 512 32-bit floating-point values that represent the voice characteristics of the target speaker. +The model will use these characteristics to synthesize the input text in the specified voice. + +If no speaker embedding is provided, the model will default to a built-in speaker for speech generation. + +You can generate a speaker embedding using the [`create_speaker_embedding.py`](create_speaker_embedding.py) script. +This script records 5 seconds of audio from your microphone and extracts a speaker embedding vector from the recording. + +To run the script: + +``` +python create_speaker_embedding.py +``` + +## Run Text-to-speech sample + +Install [deployment-requirements.txt](../../deployment-requirements.txt) +via `pip install -r ../../deployment-requirements.txt` and then, run a sample: + +`python text2speech.py --speaker_embedding_file_path speaker_embedding.bin speecht5_tts "Hello OpenVINO GenAI"` + +It generates `output_audio.wav` file containing the phrase `Hello OpenVINO GenAI` spoken in the target voice. + +Refer to the [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#speech-generation-models) for more details. + +# Text-to-speech pipeline usage + +```python +import openvino_genai + +pipe = openvino_genai.Text2SpeechPipeline(model_dir, device) +result = pipe.generate("Hello OpenVINO GenAI", speaker_embedding) +speech = result.speeches[0] +# speech tensor contains the waveform of the spoken phrase +``` diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/speech_generation/create_speaker_embedding.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/speech_generation/create_speaker_embedding.py new file mode 100644 index 0000000..309a614 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/speech_generation/create_speaker_embedding.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import sounddevice as sd +import torch.nn.functional as F +import torchaudio +from scipy.io.wavfile import write +from speechbrain.pretrained import SpeakerRecognition + +# Settings +duration = 5 # seconds +sample_rate = 16000 # Hz +output_file = "your_audio.wav" + +print(f"Recording for {duration} seconds...") + +# Record audio +recording = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype='int16') +sd.wait() + +# Save to WAV file +write(output_file, sample_rate, recording) + +print(f"Saved recording to {output_file}") + +# Load your WAV file +signal, fs = torchaudio.load("your_audio.wav") +assert fs == 16000, "Frame rate must be 16 KHz" + +# Load the pre-trained speaker embedding model (x-vector) +# based on https://huggingface.co/mechanicalsea/speecht5-vc/blob/main/manifest/utils/prep_cmu_arctic_spkemb.py +model = SpeakerRecognition.from_hparams( + source="speechbrain/spkrec-xvect-voxceleb", +) + +# Extract x-vector embedding +embedding = model.encode_batch(signal) +embedding = F.normalize(embedding, dim=2) +embedding = embedding.squeeze().cpu().numpy().astype("float32") + +embedding.tofile("speaker_embedding.bin") diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/speech_generation/text2speech.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/speech_generation/text2speech.py new file mode 100644 index 0000000..a3edb45 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/speech_generation/text2speech.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse + +import numpy as np +import openvino as ov +import openvino_genai +import soundfile as sf + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_dir", help="Path to the model directory") + parser.add_argument("text", help="Input text for which to generate speech") + parser.add_argument("--speaker_embedding_file_path", default=None, + help="Path to the binary file with a speaker embedding") + parser.add_argument("--device", nargs="?", default="CPU", help="Device to run the model on (default: CPU)") + args = parser.parse_args() + + # read speaker embedding from binary file + speaker_embedding = None + if args.speaker_embedding_file_path: + speaker_embedding = np.fromfile(args.speaker_embedding_file_path, dtype=np.float32).reshape(1, 512) + speaker_embedding = ov.Tensor(speaker_embedding) + + pipe = openvino_genai.Text2SpeechPipeline(args.model_dir, args.device) + if speaker_embedding is not None: + result = pipe.generate(args.text, speaker_embedding) + else: + result = pipe.generate(args.text) + + assert len(result.speeches) == 1, "Expected only one waveform for the requested input text" + speech = result.speeches[0] + output_file_name = "output_audio.wav" + sf.write(output_file_name, speech.data[0], samplerate=16000) + + print("[Info] Text successfully converted to audio file \"", output_file_name, "\".") + + perf_metrics = result.perf_metrics; + if perf_metrics.m_evaluated: + print("\n\n=== Performance Summary ===") + print("Throughput : ", perf_metrics.throughput.mean, " samples/sec.") + print("Total Generation Time : ", perf_metrics.generate_duration.mean / 1000.0, " sec.") + + +if "__main__" == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/README.md new file mode 100644 index 0000000..ee07c20 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/README.md @@ -0,0 +1,324 @@ +# OpenVINO GenAI Text Generation Python Samples + +These samples showcase the use of OpenVINO's inference capabilities for text generation tasks, including different decoding strategies such as beam search, multinomial sampling, and speculative decoding. Each sample has a specific focus and demonstrates a unique aspect of text generation. +The applications don't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. +There are also Jupyter notebooks for some samples. You can find links to them in the appropriate sample descriptions. + +## Table of Contents +1. [Download and Convert the Model and Tokenizers](#download-and-convert-the-model-and-tokenizers) +2. [Sample Descriptions](#sample-descriptions) +3. [Troubleshooting](#troubleshooting) +4. [Support and Contribution](#support-and-contribution) + +## Download and convert the model and tokenizers +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. +Install [../../export-requirements.txt](../../export-requirements.txt) if model conversion is required. + +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +``` + +Then, run the export with Optimum CLI: + +```sh +optimum-cli export openvino --model +``` + +Alternatively, do it in Python code (e.g. TinyLlama_v1.1). If NNCF is installed, the model will be compressed to INT8 automatically. + +```python +from optimum.exporters.openvino.convert import export_tokenizer +from optimum.intel import OVModelForCausalLM +from transformers import AutoTokenizer + +output_dir = "chat_model" + +model = OVModelForCausalLM.from_pretrained("TinyLlama/TinyLlama_v1.1", export=True) +model.save_pretrained(output_dir) + +tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama_v1.1") +tokenizer.save_pretrained(output_dir) +export_tokenizer(tokenizer, output_dir) +``` +[//]: # "tokenizer.save_pretrained(output_dir) is required above to mitigate runtime errors" + +If a converted model in OpenVINO IR format is already available in the collection of [OpenVINO optimized LLMs](https://huggingface.co/collections/OpenVINO/llm-6687aaa2abca3bbcec71a9bd) on Hugging Face, it can be downloaded directly via huggingface-cli. +```sh +pip install huggingface-hub +huggingface-cli download --local-dir +``` + +### Using GGUF models + +To run any samples with a GGUF model, simply provide the path to the .gguf file via the `model_dir` parameter. + +This capability is currently available in preview mode and supports a limited set of topologies, including SmolLM and Qwen2.5. For other models +and architectures, we still recommend converting the model to the IR format using the `optimum-intel` tool. + +## Sample Descriptions +### Common information +Follow [Get Started with Samples](https://docs.openvino.ai/2026/get-started/learn-openvino/openvino-samples/get-started-demos.html) to get common information about OpenVINO samples. +Follow [build instruction](../../../src/docs/BUILD.md) to build GenAI samples + +GPUs usually provide better performance compared to CPUs. Modify the source code to change the device for inference to the GPU. + +Refer to the [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#large-language-models-llms) for more details. + +Install [../../deployment-requirements.txt](../../deployment-requirements.txt) to run samples +```sh +pip install --upgrade-strategy eager -r ../../deployment-requirements.txt +``` + +### 1. Chat Sample (`chat_sample`) +- **Description:** +Interactive chat interface powered by OpenVINO. +Here is a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-chatbot) that provides an example of LLM-powered text generation in Python. +Recommended models: meta-llama/Llama-2-7b-chat-hf, TinyLlama/TinyLlama-1.1B-Chat-v1.0, etc +- **Main Feature:** Real-time chat-like text generation. +- **Run Command:** + ```bash + python chat_sample.py model_dir + ``` +#### Missing chat template +If you encounter an exception indicating a missing "chat template" when launching the `ov::genai::LLMPipeline` in chat mode, it likely means the model was not tuned for chat functionality. To work this around, manually add the chat template to tokenizer_config.json of your model or update it using call `pipe.get_tokenizer().set_chat_template(new_chat_template)`. +The following template can be used as a default, but it may not work properly with every model: +``` +"chat_template": "{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n<|im_start|>assistant\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|im_end|>\n'}}{% endif %}{% endfor %}", +``` + +#### NPU support + +NPU device is supported with some limitations. See [NPU inference of +LLMs](https://docs.openvino.ai/2026/openvino-workflow-generative/inference-with-genai/inference-with-genai-on-npu.html) documentation. In particular: + +- Models must be exported with symmetric INT4 quantization (`optimum-cli export openvino --weight-format int4 --sym --model `). + For models with more than 4B parameters, channel wise quantization should be used (`--group-size -1`). +- Beam search and parallel sampling are not supported. +- Use OpenVINO 2025.0 or later (installed by deployment-requirements.txt, see "Common information" section), and the latest NPU driver. + + +### 2. Greedy Causal LM (`greedy_causal_lm`) +- **Description:** +Basic text generation using a causal language model. +Here is a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-question-answering) that provides an example of LLM-powered text generation in Python. +Recommended models: meta-llama/Llama-2-7b-hf, etc +- **Main Feature:** Demonstrates simple text continuation. +- **Run Command:** + ```bash + python greedy_causal_lm.py [-h] model_dir prompt + ``` + +### 3. Beam Search Causal LM (`beam_search_causal_lm`) +- **Description:** +Uses beam search for more coherent text generation. +Here is a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-question-answering) that provides an example of LLM-powered text generation in Python. +Recommended models: meta-llama/Llama-2-7b-hf, etc +- **Main Feature:** Improves text quality with beam search. +- **Run Command:** + ```bash + python beam_search_causal_lm.py model_dir prompt [prompts ...] + ``` + +### 4. Multinomial Causal LM (`multinomial_causal_lm`) +- **Description:** Text generation with multinomial sampling for diversity. +Recommended models: meta-llama/Llama-2-7b-hf, etc +- **Main Feature:** Introduces randomness for creative outputs. +- **Run Command:** + ```bash + python multinomial_causal_lm.py model_dir prompt + ``` + +### 5. Prompt Lookup Decoding LM (`prompt_lookup_decoding_lm`) +- **Description:** +[Prompt Lookup decoding](https://github.com/apoorvumang/prompt-lookup-decoding) is [assested-generation](https://huggingface.co/blog/assisted-generation#understanding-text-generation-latency) technique where the draft model is replaced with simple string matching the prompt to generate candidate token sequences. This method highly effective for input grounded generation (summarization, document QA, multi-turn chat, code editing), where there is high n-gram overlap between LLM input (prompt) and LLM output. This could be entity names, phrases, or code chunks that the LLM directly copies from the input while generating the output. Prompt lookup exploits this pattern to speed up autoregressive decoding in LLMs. This results in significant speedups with no effect on output quality. +Recommended models: meta-llama/Llama-2-7b-hf, etc +- **Main Feature:** Specialized prompt-based inference. +- **Run Command:** + ```bash + python prompt_lookup_decoding_lm.py model_dir prompt + ``` + +### 6. Speculative Decoding LM (`speculative_decoding_lm`) +- **Description:** +Speculative decoding (or [assisted-generation](https://huggingface.co/blog/assisted-generation#understanding-text-generation-latency) in HF terminology) is a recent technique, that allows to speed up token generation when an additional smaller draft model is used alongside with the main model. + +Speculative decoding works the following way. The draft model predicts the next K tokens one by one in an autoregressive manner, while the main model validates these predictions and corrects them if necessary. We go through each predicted token, and if a difference is detected between the draft and main model, we stop and keep the last token predicted by the main model. Then the draft model gets the latest main prediction and again tries to predict the next K tokens, repeating the cycle. + +This approach reduces the need for multiple infer requests to the main model, enhancing performance. For instance, in more predictable parts of text generation, the draft model can, in best-case scenarios, generate the next K tokens that exactly match the target. In that case they are validated in a single inference request to the main model (which is bigger, more accurate but slower) instead of running K subsequent requests. More details can be found in the original paper https://arxiv.org/pdf/2211.17192.pdf, https://arxiv.org/pdf/2302.01318.pdf + +Here is a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/speculative-sampling) that provides an example of LLM-powered text generation in Python. + +Recommended models: meta-llama/Llama-2-13b-hf as main model and TinyLlama/TinyLlama-1.1B-Chat-v1.0 as draft model. Note that GGUF models are not supported as draft models. +- **Main Feature:** Reduces latency while generating high-quality text. +- **Run Command:** + ```bash + python speculative_decoding_lm.py model_dir draft_model_dir prompt + ``` + +### 7. LoRA Greedy Causal LM (`lora_greedy_causal_lm`) +- **Description:** +This sample demonstrates greedy decoding using Low-Rank Adaptation (LoRA) fine-tuned causal language models. LoRA enables efficient fine-tuning, reducing resource requirements for adapting large models to specific tasks. +- **Main Feature:** Lightweight fine-tuning with LoRA for efficient text generation +- **Run Command:** + ```bash + python lora_greedy_causal_lm.py model_dir adapter_safetensors_file prompt + ``` + +> [!NOTE] +> ### LoRA `alpha` interpretation in OpenVINO GenAI +> The OpenVINO GenAI implementation merges the traditional LoRA parameters into a **single effective scaling factor** used during inference. +> +> In this context, the `alpha` value already includes: +> - normalization by LoRA rank (`alpha / rank`) +> - any user-defined scaling factor (`weight`) +> +> This means `alpha` in GenAI should be treated as the **final scaling weight** applied to the LoRA update — not the raw `alpha` parameter from training. + +### 8. Encrypted Model Causal LM (`encrypted_model_causal_lm`) +- **Description:** +LLMPipeline and Tokenizer objects can be initialized directly from the memory buffer, e.g. when user stores only encrypted files and decrypts them on-the-fly. +- **Main Feature:** Read model directly from memory buffer +- **Run Command:** + ```bash + python encrypted_model_causal_lm.py model_dir prompt + ``` + +### 9. LLMs benchmarking sample (`benchmark_genai`) +- **Description:** +This sample script demonstrates how to benchmark LLMs in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text, and calculating various performance metrics. + +For more information how performance metrics are calculated, please follow the [performance-metrics tutorial](../../../src/README.md#performance-metrics). +- **Main Feature:** Benchmark model via GenAI +- **Run Command:** + ```bash + python benchmark_genai.py [-m MODEL] [-p PROMPT] [-nw NUM_WARMUP] [-n NUM_ITER] [-mt MAX_NEW_TOKENS] [-d DEVICE] + ``` + #### Options +- `-m, --model`: Path to the model and tokenizers base directory. +- `-p, --prompt` (default: `None`): The prompt to generate text. If without `-p` and `-pf`, the default prompt is `"The Sky is blue because"` +- `-pf, --prompt_file` Read prompt from file. +- `-nw, --num_warmup` (default: `1`): Number of warmup iterations. +- `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. +- `-n, --num_iter` (default: `3`): Number of iterations. +- `-d, --device` (default: `"CPU"`): Device to run the model on. + +### 10. LLM ReAct Agent Sample (`react_sample`) +- **Description:** +Interactive ReAct Agent powered by OpenVINO. +Here is a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-native-agent-react) that provides an example of LLM-powered reasoning engine to execute an action in Python. +Recommended models: Qwen/Qwen2.5-3B-Instruct, Qwen/Qwen2.5-7B-Instruct +- **Main Feature:** Real-time reasoning-action from user's input. +- **Run Command:** + ```bash + python react_sample.py model_dir + ``` + + +### 11. Structured Output Sample (`structured_output_sample`) +- **Description:** +This sample demonstrates how to use OpenVINO GenAI to generate structured outputs such as JSON from text prompts. This sample implementation is split into multiple "generate" calls to mitigate generating complex, variadic JSON structures in a single pass. This is done because not all models are able to generate a complex JSON, with a variadic number of elements in one shot, especially if the model is small and not fine-tuned for this task. By separating the task into two stages, it becomes possible to use smaller models and still achieve generated JSON good quality. + +Recommended models: meta-llama/Llama-3.2-1B-Instruct, meta-llama/Llama-3.2-8B-Instruct +- **Run Command:** + ```bash + python structured_output_generation.py model_dir + ``` + After running the command, an interactive dialog starts. You can enter a prompt and receive a structured output in response. The process is divided into two stages: + +1. **Stage One:** The model generates a JSON schema indicating the number of items of each type the user requests. For example, if you prompt: + `Generate a JSON for 2 cars and 1 person with an Irish surname` + The model might output: + `{"person": 1, "car": 2, "transaction": 0}` + This internal JSON is used to determine how many items of each type to generate in the next stage. It is not shown to the user. + +2. **Stage Two:** For each item type and count specified in the schema, the model is prompted to generate a JSON object. The original prompt is reused, but the schema guides the model to produce the correct structure. For the example above, the output might look like: + ``` + > Generate a JSON for 2 cars and 1 person with an Irish surname + output: + {"name": "John Doe", "surname": "O'Reilly", "age": 30, "city": "Dublin"} + {"model": "Toyota", "year": 2020, "engine": "hybrid"} + {"model": "Ford", "year": 2019, "color": "red"} + ``` + +**Note:** +Structured output enforcement guarantees correct JSON formatting, but does not ensure the factual correctness or sensibility of the content. The model may generate implausible or nonsensical data, such as `{"name": "John", "age": 200000}` or `{"model": "AbrakaKadabra9999######4242"}`. These are valid JSONs but may not make sense. For best results, use the latest or fine-tuned models for this task to improve the quality and relevance of the generated output. + + +### 12. Tool Calling with Structural Tags Sample (`structural_tags_generation`) +- **Description:** + Structural tags is a technique that allows to switch from regular sampling to structural output generation and back during the text generation. + If during the sampling process the model produces a trigger string, it switches to structured mode and generates output according to a JSON schema defined by the tag. After that the model switches back to regular sampling mode. + This is useful for generating function calls or other structured outputs that need to follow a specific format. + + This sample demonstrates how to use OpenVINO GenAI to generate structured tool calls from natural language prompts using structural tags. + The model is guided to output function calls in a specific format, enabling integration with external tools: + - Weather API + - Currency exchange APIs + + The system message instructs the model to call tools using a strict format: + ``` + + {"argument1": "value1", ...} + + ``` + The sample includes schemas for each tool, and the model is prompted to use them for tool calling. There are two model calls - with and without structural tags. + You can compare the results to see how the model generates structured outputs when using structural tags. + If there is no prompt provided, the sample will use the default prompt: `"What is the weather in London today and in Paris yesterday, and how many pounds can I get for 100 euros?"` + +- **Main Feature:** Structured tool call generation with LLM using schema enforcement with structural tags. +- **Run Command:** + ```bash + python structural_tags_generation.py model_dir [--prompt "Your prompt here"] + ``` + After running, the script will print the generated text output with and without structural tags, and display the parsed tool calls. + +**Note:** +This approach is useful for building LLM-powered agents that interact with external APIs or services in a controlled, structured way. +For best results, use models fine-tuned for function calling and adapt structural tags according to the model function call template. +If the model does not generate trigger strings there will be no structural constraints during the generation. +The sample is verified with `meta-llama/Llama-3.2-3B-Instruct` model. Other models may not produce the expected results or might require different system prompt. + + +### 13. Compound Grammar Generation with Parsing Sample (`compound_grammar_generation`) +- **Description:** + This sample demonstrates advanced structured output generation and results parsing using compound grammars in OpenVINO GenAI. + It showcases how to combine multiple grammar types - Regex, JSONSchema and EBNF - using Union (`|`) and Concat (`+`) operations to strictly control LLM output and + also shows how to write parsing logic to extract structured data from the generated output. + It features multi-turn chat, switching grammar constraints between turns (e.g., "yes"/"no" answers and structured tool calls). + Union (`|`) operation allows the model to choose which grammar to use during generation. + In the sample it is used to combine two regex grammars for `"yes"` or `"no"` answer. + Concat (`+`) operation allows to start with one grammar and continue with another. + Also it demonstrates how to write custom parser to extract tool calls from the generated text. + In the sample it used to create a `phi-4-mini-instruct` style tool calling answer - `functools[{tool_1_json}, ...]` - by combining regex and JSON schema grammars. + +- **Main Features:** + - Create grammar building blocks: Regex, JSONSchema, EBNF grammar + - Combine grammars with Concat (`+`) and Union (`|`) operations + - Multi-turn chat with grammar switching + - Structured tool calling using Pydantic schemas + - Parse generated output to call tools from extracted structured data +- **Run Command:** + ```bash + python compound_grammar_generation.py model_dir + ``` +- **Notes:** + This sample is ideal for scenarios requiring strict control over LLM outputs, such as building agents that interact with APIs or require validated structured responses. It showcases how to combine regex triggers and JSON schema enforcement for robust output generation and parsing resulting output. + The sample is verified with `microsoft/Phi-4-mini-instruct` model. Other models may not produce the expected results or might require different system prompt. + + +## Troubleshooting + +### Unicode characters encoding error on Windows + +Example error: +``` +UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to +``` + +If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this: +1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot. +2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`. + +## Support and Contribution +- For troubleshooting, consult the [OpenVINO documentation](https://docs.openvino.ai). +- To report issues or contribute, visit the [GitHub repository](https://github.com/openvinotoolkit/openvino.genai). diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/beam_search_causal_lm.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/beam_search_causal_lm.py new file mode 100644 index 0000000..4e2430a --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/beam_search_causal_lm.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import openvino_genai + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('model_dir') + parser.add_argument('prompts', nargs='+') + args = parser.parse_args() + + device = 'CPU' # GPU can be used as well + pipe = openvino_genai.LLMPipeline(args.model_dir, device) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 20 + config.num_beam_groups = 3 + config.num_beams = 15 + config.diversity_penalty = 1 + config.num_return_sequences = config.num_beams + + beams = pipe.generate(args.prompts, config) + print(beams) + + +if '__main__' == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/benchmark_genai.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/benchmark_genai.py new file mode 100644 index 0000000..e042483 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/benchmark_genai.py @@ -0,0 +1,85 @@ +# Copyright (C) 2023-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import sys + +import openvino_genai as ov_genai +from openvino import get_version + + +def main(): + parser = argparse.ArgumentParser(description="Help command") + parser.add_argument("-m", "--model", type=str, required=True, help="Path to model and tokenizers base directory") + parser.add_argument("-p", "--prompt", type=str, default=None, help="Prompt") + parser.add_argument("-pf", "--prompt_file", type=str, help="Read prompt from file") + parser.add_argument("-nw", "--num_warmup", type=int, default=1, help="Number of warmup iterations") + parser.add_argument("-n", "--num_iter", type=int, default=2, help="Number of iterations") + parser.add_argument("-mt", "--max_new_tokens", type=int, default=20, help="Maximal number of new tokens") + parser.add_argument("-d", "--device", type=str, default="CPU", help="Device") + + args = parser.parse_args() + + if args.prompt is not None and args.prompt_file is not None: + raise RuntimeError("Cannot specify both --prompt and --prompt_file options simultaneously!") + else: + if args.prompt_file is not None: + with open(args.prompt_file, "r", encoding="utf-8") as f: + prompt = [f.read()] + else: + prompt = ["The Sky is blue because"] if args.prompt is None else [args.prompt] + if len(prompt) == 0: + raise RuntimeError("Prompt is empty!") + + print(f"openvino runtime version: {get_version()}, genai version: {ov_genai.__version__}") + + # Perf metrics is stored in DecodedResults. + # In order to get DecodedResults instead of a string input should be a list. + models_path = args.model + device = args.device + num_warmup = args.num_warmup + num_iter = args.num_iter + + config = ov_genai.GenerationConfig() + config.max_new_tokens = args.max_new_tokens + config.apply_chat_template = False + + if device == "NPU": + pipe = ov_genai.LLMPipeline(models_path, device) + else: + scheduler_config = ov_genai.SchedulerConfig() + scheduler_config.enable_prefix_caching = False + scheduler_config.max_num_batched_tokens = sys.maxsize + pipe = ov_genai.LLMPipeline(models_path, device, scheduler_config=scheduler_config) + + input_data = pipe.get_tokenizer().encode(prompt) + prompt_token_size = input_data.input_ids.get_shape()[1] + print(f"Prompt token size: {prompt_token_size}") + + for _ in range(num_warmup): + pipe.generate(prompt, config) + + res = pipe.generate(prompt, config) + perf_metrics = res.perf_metrics + for _ in range(num_iter - 1): + res = pipe.generate(prompt, config) + perf_metrics += res.perf_metrics + + print(f"Output token size: {res.perf_metrics.get_num_generated_tokens()}") + print(f"Load time: {perf_metrics.get_load_time():.2f} ms") + print( + f"Generate time: {perf_metrics.get_generate_duration().mean:.2f} ± {perf_metrics.get_generate_duration().std:.2f} ms" + ) + print( + f"Tokenization time: {perf_metrics.get_tokenization_duration().mean:.2f} ± {perf_metrics.get_tokenization_duration().std:.2f} ms" + ) + print( + f"Detokenization time: {perf_metrics.get_detokenization_duration().mean:.2f} ± {perf_metrics.get_detokenization_duration().std:.2f} ms" + ) + print(f"TTFT: {perf_metrics.get_ttft().mean:.2f} ± {perf_metrics.get_ttft().std:.2f} ms") + print(f"TPOT: {perf_metrics.get_tpot().mean:.2f} ± {perf_metrics.get_tpot().std:.2f} ms") + print(f"Throughput : {perf_metrics.get_throughput().mean:.2f} ± {perf_metrics.get_throughput().std:.2f} tokens/s") + + +if __name__ == "__main__": + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/chat_sample.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/chat_sample.py new file mode 100644 index 0000000..70f258a --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/chat_sample.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import openvino_genai + + +def streamer(subword): + print(subword, end="", flush=True) + # Return flag corresponds whether generation should be stopped. + return openvino_genai.StreamingStatus.RUNNING + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_dir", help="Path to the model directory") + parser.add_argument("device", nargs="?", default="CPU", help="Device to run the model on (default: CPU)") + args = parser.parse_args() + + device = args.device + pipe = openvino_genai.LLMPipeline(args.model_dir, device) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + + chat_history = openvino_genai.ChatHistory() + while True: + try: + prompt = input("question:\n") + except EOFError: + break + chat_history.append({"role": "user", "content": prompt}) + decoded_results: openvino_genai.DecodedResults = pipe.generate(chat_history, config, streamer) + chat_history.append({"role": "assistant", "content": decoded_results.texts[0]}) + print("\n----------") + + +if "__main__" == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/compound_grammar_generation.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/compound_grammar_generation.py new file mode 100644 index 0000000..7502e3a --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/compound_grammar_generation.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import json +from typing import Any + +from openvino_genai import ( + GenerationConfig, + LLMPipeline, + StreamingStatus, + Parser, + DecodedResults, + ChatHistory, +) + +from openvino_genai import ( + StructuredOutputConfig as SOC, +) +from pydantic import BaseModel, Field + + +def streamer(subword): + print(subword, end="", flush=True) + return StreamingStatus.RUNNING + + +class book_flight_ticket(BaseModel): + """booking flights""" + + origin_airport_code: str = Field(description="The name of Departure airport code") + destination_airport_code: str = Field(description="The name of Destination airport code") + departure_date: str = Field(description="The date of outbound flight") + return_date: str = Field(description="The date of return flight") + + +class book_hotel(BaseModel): + """booking hotel""" + + destination: str = Field(description="The name of the city") + check_in_date: str = Field(description="The date of check in") + checkout_date: str = Field(description="The date of check out") + + +def _recursive_purge_dict_key(d: dict[str, Any], k: str) -> None: + """Remove a key from a dictionary recursively""" + if isinstance(d, dict): + for key in list(d.keys()): + if key == k and "type" in d.keys(): + del d[key] + else: + _recursive_purge_dict_key(d[key], k) + + +def tool_to_dict(tool: BaseModel, with_description: bool = True) -> dict[str, Any]: + schema = tool.model_json_schema() + _recursive_purge_dict_key(schema, "title") + if not with_description: + _recursive_purge_dict_key(schema, "description") + return { + "type": "object", + "properties": { + "name": {"type": "string", "enum": [tool.__name__]}, + "arguments": schema, + }, + "required": ["name", "arguments"], + } + + +def tools_to_array_schema(*tools: BaseModel) -> str: + return json.dumps( + { + "type": "array", + "items": {"anyOf": [tool_to_dict(tool, with_description=False) for tool in tools]}, + } + ) + + +class CustomToolCallParser(Parser): + """parser to extract tool calls from the model output. + + Custom parser should be inherited from Parser and implement 'parse' method. + """ + + def parse(self, msg: dict): + if "content" not in msg: + msg["content"] = "" + content = msg["content"] + + start_tag = "functools" + start_index = content.find(start_tag) + if start_index == -1: + return + + json_part = content[start_index + len(start_tag) :] + try: + tool_calls = json.loads(json_part) + msg["tool_calls"] = tool_calls + return + except json.JSONDecodeError: + return + + +def print_tool_call(answer: DecodedResults): + for tool_call in answer.parsed[0]["tool_calls"]: + print( + f"""{tool_call["name"]}({", ".join(f'{key}="{value}"' for key, value in tool_call["arguments"].items())})""" + ) + + +# modified system message from: +# https://github.com/vllm-project/vllm/blob/main/examples/tool_chat_template_phi4_mini.jinja +sys_message = """You are a helpful AI assistant. +You can answer yes or no to questions, or you can choose to call one or more of the provided functions. + +Use the following rule to decide when to call a function: + * if the response can be generated from your internal knowledge, do so, but use only yes or no as the response + * if you need external information that can be obtained by calling one or more of the provided functions, generate function calls + +If you decide to call functions: + * prefix function calls with functools marker (no closing marker required) + * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] + * follow the provided JSON schema. Do not hallucinate arguments or values. Do not blindly copy values from the provided samples + * respect the argument type formatting. E.g., if the type is number and format is float, write value 7 as 7.0 + * make sure you pick the right functions that match the user intent +""" + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "model_dir", + help="Path to the model directory. It should contain the OpenVINO model files.", + ) + args = parser.parse_args() + + pipe = LLMPipeline(args.model_dir, "CPU") + + tools = [tool_to_dict(tool) for tool in [book_flight_ticket, book_hotel]] + chat_history = ChatHistory() + chat_history.set_tools(tools) + chat_history.append({"role": "system", "content": sys_message}) + + generation_config = GenerationConfig() + generation_config.max_new_tokens = 300 + generation_config.do_sample = True + + user_text_1 = "Do dolphins have fingers?" + print("User: ", user_text_1) + chat_history.append({"role": "user", "content": user_text_1}) + + # same as SOC.Union(SOC.ConstString("yes"), SOC.ConstString("no")) + yes_or_no_grammar = SOC.ConstString("yes") | SOC.ConstString("no") + generation_config.structured_output_config = SOC(structural_tags_config=yes_or_no_grammar) + print("Assistant: ", end="") + answer = pipe.generate(chat_history, generation_config, streamer=streamer) + chat_history.append({"role": "assistant", "content": answer.texts[0]}) + print() + + user_text_2 = ( + "book flight ticket from Beijing to Paris(using airport code) in 2025-12-04 to 2025-12-10, " + "then book hotel from 2025-12-04 to 2025-12-10 in Paris" + ) + print("User: ", user_text_2) + chat_history.append({"role": "user", "content": user_text_2}) + + start_tool_call_tag = SOC.ConstString(r"functools") + tools_json = SOC.JSONSchema(tools_to_array_schema(book_flight_ticket, book_hotel)) + tool_call_grammar = start_tool_call_tag + tools_json # SOC.Concat(start_tool_call_tag, tools_json) + generation_config.structured_output_config.structural_tags_config = tool_call_grammar + + print("Assistant: ", end="") + answer = pipe.generate(chat_history, generation_config, parsers=[CustomToolCallParser()]) + + print("\n\nThe following tool calls were generated:") + print_tool_call(answer) + + print() + + +if __name__ == "__main__": + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/encrypted_model_causal_lm.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/encrypted_model_causal_lm.py new file mode 100644 index 0000000..e860892 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/encrypted_model_causal_lm.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +from openvino import Tensor +import openvino_genai +import numpy as np + + +def decrypt_model(model_dir, model_file_name, weights_file_name): + with open(model_dir + '/' + model_file_name, "r") as file: + model = file.read() + # decrypt model + + with open(model_dir + '/' + weights_file_name, "rb") as file: + binary_data = file.read() + # decrypt weights + weights = np.frombuffer(binary_data, dtype=np.uint8).astype(np.uint8) + + return model, Tensor(weights) + +def read_tokenizer(model_dir): + tokenizer_model_name = 'openvino_tokenizer.xml' + tokenizer_weights_name = 'openvino_tokenizer.bin' + tokenizer_model, tokenizer_weights = decrypt_model(model_dir, tokenizer_model_name, tokenizer_weights_name) + + detokenizer_model_name = 'openvino_detokenizer.xml' + detokenizer_weights_name = 'openvino_detokenizer.bin' + detokenizer_model, detokenizer_weights = decrypt_model(model_dir, detokenizer_model_name, detokenizer_weights_name) + + return openvino_genai.Tokenizer(tokenizer_model, tokenizer_weights, detokenizer_model, detokenizer_weights) + + +# here is example how to make cache de-encryption based on base64 +import base64 + +def encrypt_base64(src: bytes): + return base64.b64encode(src) + +def decrypt_base64(src: bytes): + return base64.b64decode(src) + +def get_config_for_cache_encryption(): + config_cache = dict() + config_cache["CACHE_DIR"] = "llm_cache" + config_cache["CACHE_ENCRYPTION_CALLBACKS"] = [encrypt_base64, decrypt_base64] + config_cache["CACHE_MODE"] = "OPTIMIZE_SIZE" + return config_cache + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('model_dir') + parser.add_argument('prompt') + args = parser.parse_args() + + device = "CPU" + + config = dict() + if device == "GPU": + # Cache compiled models on disk for GPU to save time on the + # next run. It's not beneficial for CPU. + config = get_config_for_cache_encryption() + + model, weights = decrypt_model(args.model_dir, 'openvino_model.xml', 'openvino_model.bin') + tokenizer = read_tokenizer(args.model_dir) + + pipe = openvino_genai.LLMPipeline(model, weights, tokenizer, device, **config) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + + print(pipe.generate(args.prompt, config)) + +if '__main__' == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/greedy_causal_lm.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/greedy_causal_lm.py new file mode 100644 index 0000000..983195c --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/greedy_causal_lm.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import openvino_genai + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('model_dir') + parser.add_argument('prompt') + args = parser.parse_args() + + device = 'CPU' # GPU can be used as well + pipe = openvino_genai.LLMPipeline(args.model_dir, device) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + + print(pipe.generate(args.prompt, config)) + + +if '__main__' == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/limit_checker.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/limit_checker.py new file mode 100644 index 0000000..b6b666b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/limit_checker.py @@ -0,0 +1,242 @@ + +import gc +import os +import psutil +import csv +from dataclasses import dataclass +from pathlib import Path +from typing import Optional +from tqdm import tqdm + +from optimum.intel.openvino import OVModelForCausalLM +from openvino_genai import ContinuousBatchingPipeline, SchedulerConfig, GenerationResult, GenerationConfig, CacheEvictionConfig, AggregationMode +from openvino_tokenizers import convert_tokenizer +from openvino import serialize +from transformers import AutoTokenizer +import argparse + +import time +import logging +from huggingface_hub.utils import HfHubHTTPError +from subprocess import CalledProcessError # nosec B404 +from requests.exceptions import RequestException + +# Configure the logger +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def retry_request(func, retries=5): + """ + Retries a function that makes a request up to a specified number of times. + + Parameters: + func (callable): The function to be retried. It should be a callable that makes a request. + retries (int): The number of retry attempts. Default is 5. + + Returns: + Any: The return value of the function `func` if it succeeds. + """ + network_error_patterns = [ + "ConnectionError", + "Timeout", + "Time-out", + "ServiceUnavailable", + "InternalServerError", + "OSError", + "HTTPError", + ] + + for attempt in range(retries): + try: + return func() + except (CalledProcessError, RequestException, HfHubHTTPError) as e: + if isinstance(e, CalledProcessError): + if e.stderr is not None and any(pattern in e.stderr for pattern in network_error_patterns): + logger.warning(f"CalledProcessError occurred: {e.stderr}") + else: + raise + if attempt < retries - 1: + timeout = 2 ** attempt + logger.info(f"Attempt {attempt + 1} failed. Retrying in {timeout} seconds.") + time.sleep(timeout) + else: + raise + +def load_prompts_dataset(file_name : str) -> dict[str, list[str]]: + TESTS_ROOT = Path('tests/python_tests') + file_path = TESTS_ROOT / 'data' / file_name + with open(file_path, 'r') as f: + return {"prompts": [s for s in f]} + +def load_samsum_dataset(file_name : str) -> dict[str, list[str]]: + import json + retval = {"prompts": []} + with open(file_name, 'r') as json_file: + json_list = list(json_file) + for json_str in json_list: + result = json.loads(json_str) + retval["prompts"].append(result["prompt"]) + return retval + +def get_scheduler_config(num_kv_blocks: Optional[int]) -> SchedulerConfig: + scheduler_config = SchedulerConfig() + if num_kv_blocks is not None: + scheduler_config.num_kv_blocks = num_kv_blocks + scheduler_config.max_num_batched_tokens = 32 * num_kv_blocks + scheduler_config.dynamic_split_fuse = True + scheduler_config.max_num_seqs = 256 + scheduler_config.use_cache_eviction = False + return scheduler_config + +@dataclass +class ConvertedModel: + model: OVModelForCausalLM + tokenizer: AutoTokenizer + models_path: Path + + +def get_converted_model(base_model_path: Path, model_id: str): + model = retry_request(lambda: OVModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, load_in_8bit=False, compile=False, ov_config=get_default_llm_properties())) + tokenizer = retry_request(lambda: AutoTokenizer.from_pretrained(model_id)) + models_path = base_model_path / model_id + models_path.mkdir(parents=True, exist_ok=True) + model.save_pretrained(models_path) + ov_tokenizer, ov_detokenizer = convert_tokenizer(tokenizer, with_detokenizer=True, skip_special_tokens=True) + serialize(ov_tokenizer, models_path / "openvino_tokenizer.xml") + serialize(ov_detokenizer, models_path / "openvino_detokenizer.xml") + converted_model = ConvertedModel(model, tokenizer, models_path) + return converted_model + + +import openvino.properties.hint as hints +import openvino.properties as props +import openvino as ov + +def get_default_llm_properties(): + return { + hints.inference_precision : ov.Type.f32, + hints.kv_cache_precision : ov.Type.f16, + } + +def run_and_write_metrics(model, prompt, generation_config, report_file): + result: GenerationResult = model_cb_opt.generate([prompt], generation_config=[generation_config]) + + pipeline_opt_metrics = model_cb_opt.get_metrics() + rss_usage_gb = psutil.Process(os.getpid()).memory_info().rss / 1024 ** 3 + result_length = len(result[0].m_generation_ids[0]) + print(f"avg_cache_usage:{pipeline_opt_metrics.avg_cache_usage:.2f}% max_cache_usage:{pipeline_opt_metrics.max_cache_usage:.2f}% rss_usage:{rss_usage_gb:.3f} GB") + print(f"result length: {result_length}") + print() + + if report_file is not None: + with open(report_file, 'a') as f: + csv_writer = csv.writer(f) + csv_writer.writerow([generation_config.max_new_tokens - 1, result_length, pipeline_opt_metrics.avg_cache_usage, pipeline_opt_metrics.max_cache_usage, rss_usage_gb]) + return pipeline_opt_metrics.max_cache_usage + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--eviction_on", action='store_true', help="Whether to apply cache eviction") + parser.add_argument("--model", type=str, help="Model ID") + parser.add_argument("--num_kv_blocks", type=int, help='Number of blocks to statically pre-allocate in cache.' + 'If left unspecified, will allocate dynamically to accommodate the generation length.') + parser.add_argument("--report", type=str, help="File name for CSV-formatted export of limit search data") + parser.add_argument("--mode", type=str, nargs='?', choices=['gen_length', 'gen_throughput'], required=True) + parser.add_argument("--data", type=str, help="Dataset jsonl file") + parser.add_argument("--timeout", type=int, help="Maximum time allowed for a single round of generation in the `gen_length` mode", default=120) + parser.add_argument("--device", type=str, help="Device for model inference", default="CPU") + + args = parser.parse_args() + seqs_per_request = 1 + num_kv_blocks = args.num_kv_blocks + + scheduler_config_opt = get_scheduler_config(num_kv_blocks) + if args.eviction_on: + scheduler_config_opt.use_cache_eviction = True + print("Eviction is ON") + else: + print("Eviction is OFF") + + base_model_path = Path("limit_checker_models") + converted_model = get_converted_model(base_model_path, args.model) + models_path = converted_model.models_path + model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, args.device, {}, get_default_llm_properties()) + + tokenizer = converted_model.tokenizer + if args.mode == "gen_length": + data_dict = load_prompts_dataset('long_prompts.txt') + prompt = data_dict["prompts"][0] + + generation_length = 1 + + if args.report is not None: + with open(args.report, 'w') as f: + csv_writer = csv.writer(f) + csv_writer.writerow(['generation_length', 'result_length', 'avg_cache_usage_%', 'max_cache_usage_%', 'rss_usage_gb']) + + + while True: + gc.collect() + generation_config = GenerationConfig() # expecting default greedy sampling + generation_config.num_return_sequences = 1 + generation_config.max_new_tokens = generation_length + 1 + generation_config.apply_chat_template = False + generation_config.ignore_eos = True + print(f"generation_length:{generation_length} ", sep='') + + start = time.time() + max_cache_usage = run_and_write_metrics(model_cb_opt, prompt, generation_config, args.report) + end = time.time() + if (end - start) > args.timeout: + print("Maximum generation time reached") + break + elif max_cache_usage == 100: + print("Cache size exhausted") + break + + generation_length *= 2 + + del data_dict + elif args.mode == "gen_throughput": + dataset = load_samsum_dataset(args.data) + prompt_throughput = 1 + prompt_left_bound = prompt_throughput + prompt_right_bound = None + is_right_bound = False + + while True: + gc.collect() + generation_config = GenerationConfig() # expecting default greedy sampling + generation_config.num_return_sequences = 1 + generation_config.apply_chat_template = False + prompt_subset = dataset["prompts"][:prompt_throughput] + print(f"prompt_throughput {prompt_throughput}") + result: GenerationResult = model_cb_opt.generate(prompt_subset, generation_config=[generation_config] * len(prompt_subset)) + + pipeline_opt_metrics = model_cb_opt.get_metrics() + rss_usage_gb = psutil.Process(os.getpid()).memory_info().rss / 1024 ** 3 + print(f"avg_cache_usage:{pipeline_opt_metrics.avg_cache_usage:.2f}% max_cache_usage:{pipeline_opt_metrics.max_cache_usage:.2f}% rss_usage:{rss_usage_gb:.3f} GB") + print() + + max_cache_usage = pipeline_opt_metrics.max_cache_usage + + if max_cache_usage == 100.0 and not is_right_bound: + is_right_bound = True + prompt_right_bound = prompt_throughput + + if not is_right_bound: + prompt_left_bound = prompt_throughput + prompt_throughput *= 2 + else: + if max_cache_usage == 100.0: + prompt_right_bound = prompt_throughput + elif max_cache_usage < 100.0: + prompt_left_bound = prompt_throughput + prompt_throughput = (prompt_left_bound + prompt_right_bound) // 2 + + if (prompt_right_bound - prompt_left_bound <= 1): + break + + + print(f"Approximate highest throughput: {prompt_throughput} prompts") diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/lora_greedy_causal_lm.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/lora_greedy_causal_lm.py new file mode 100644 index 0000000..64eb542 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/lora_greedy_causal_lm.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import openvino_genai + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('models_path') + parser.add_argument('adapter_path') + parser.add_argument('prompt') + args = parser.parse_args() + + device = 'CPU' # GPU can be used as well + adapter = openvino_genai.Adapter(args.adapter_path) + adapter_config = openvino_genai.AdapterConfig(adapter) + pipe = openvino_genai.LLMPipeline(args.models_path, device, adapters=adapter_config) # register all required adapters here + + print("Generate with LoRA adapter and alpha set to 0.75:") + print(pipe.generate(args.prompt, max_new_tokens=100, adapters=openvino_genai.AdapterConfig(adapter, 0.75))) + + print("\n-----------------------------") + print("Generate without LoRA adapter:") + print(pipe.generate(args.prompt, max_new_tokens=100, adapters=openvino_genai.AdapterConfig())) + +if '__main__' == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/multinomial_causal_lm.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/multinomial_causal_lm.py new file mode 100644 index 0000000..e8f150c --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/multinomial_causal_lm.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import openvino_genai +import queue +import threading +from typing import Union + + +class IterableStreamer(openvino_genai.StreamerBase): + """ + A custom streamer class for handling token streaming and detokenization with buffering. + + Attributes: + tokenizer (Tokenizer): The tokenizer used for encoding and decoding tokens. + tokens_cache (list): A buffer to accumulate tokens for detokenization. + text_queue (Queue): A synchronized queue for storing decoded text chunks. + print_len (int): The length of the printed text to manage incremental decoding. + """ + + def __init__(self, tokenizer): + """ + Initializes the IterableStreamer with the given tokenizer. + + Args: + tokenizer (Tokenizer): The tokenizer to use for encoding and decoding tokens. + """ + super().__init__() + self.tokenizer = tokenizer + self.tokens_cache = [] + self.text_queue = queue.Queue() + self.print_len = 0 + self.decoded_lengths = [] + + def __iter__(self): + """ + Returns the iterator object itself. + """ + return self + + def __next__(self): + """ + Returns the next value from the text queue. + + Returns: + str: The next decoded text chunk. + + Raises: + StopIteration: If there are no more elements in the queue. + """ + # get() will be blocked until a token is available. + value = self.text_queue.get() + if value is None: + raise StopIteration + return value + + def get_stop_flag(self): + """ + Checks whether the generation process should be stopped or cancelled. + + Returns: + openvino_genai.StreamingStatus: Always returns RUNNING in this implementation. + """ + return openvino_genai.StreamingStatus.RUNNING + + def write_word(self, word: str): + """ + Puts a word into the text queue. + + Args: + word (str): The word to put into the queue. + """ + self.text_queue.put(word) + + def write(self, token: Union[int, list[int]]) -> openvino_genai.StreamingStatus: + """ + Processes a token and manages the decoding buffer. Adds decoded text to the queue. + + Args: + token (Union[int, list[int]]): The token(s) to process. + + Returns: + bool: True if generation should be stopped, False otherwise. + """ + if type(token) is list: + self.tokens_cache += token + self.decoded_lengths += [-2 for _ in range(len(token) - 1)] + else: + self.tokens_cache.append(token) + + text = self.tokenizer.decode(self.tokens_cache) + self.decoded_lengths.append(len(text)) + + word = "" + delay_n_tokens = 3 + if len(text) > self.print_len and "\n" == text[-1]: + # Flush the cache after the new line symbol. + word = text[self.print_len :] + self.tokens_cache = [] + self.decoded_lengths = [] + self.print_len = 0 + elif len(text) > 0 and text[-1] == chr(65533): + # Don't print incomplete text. + self.decoded_lengths[-1] = -1 + elif len(self.tokens_cache) >= delay_n_tokens: + self.compute_decoded_length_for_position( + len(self.decoded_lengths) - delay_n_tokens + ) + print_until = self.decoded_lengths[-delay_n_tokens] + if print_until != -1 and print_until > self.print_len: + # It is possible to have a shorter text after adding new token. + # Print to output only if text length is increased and text is complete (print_until != -1). + word = text[self.print_len : print_until] + self.print_len = print_until + self.write_word(word) + + stop_flag = self.get_stop_flag() + if stop_flag != openvino_genai.StreamingStatus.RUNNING: + # When generation is stopped from streamer then end is not called, need to call it here manually. + self.end() + + return stop_flag + + def compute_decoded_length_for_position(self, cache_position: int): + # decode was performed for this position, skippping + if self.decoded_lengths[cache_position] != -2: + return + + cache_for_position = self.tokens_cache[: cache_position + 1] + text_for_position = self.tokenizer.decode(cache_for_position) + + if len(text_for_position) > 0 and text_for_position[-1] == chr(65533): + # Mark text as incomplete + self.decoded_lengths[cache_position] = -1 + else: + self.decoded_lengths[cache_position] = len(text_for_position) + + def end(self): + """ + Flushes residual tokens from the buffer and puts a None value in the queue to signal the end. + """ + text = self.tokenizer.decode(self.tokens_cache) + if len(text) > self.print_len: + word = text[self.print_len :] + self.write_word(word) + self.tokens_cache = [] + self.print_len = 0 + self.text_queue.put(None) + + +class ChunkStreamer(IterableStreamer): + + def __init__(self, tokenizer, tokens_len): + super().__init__(tokenizer) + self.tokens_len = tokens_len + + def write(self, token: Union[int, list[int]]) -> openvino_genai.StreamingStatus: + if (len(self.tokens_cache) + 1) % self.tokens_len == 0: + return super().write(token) + + if type(token) is list: + self.tokens_cache += token + # -2 means no decode was done for this token position + self.decoded_lengths += [-2 for _ in range(len(token))] + else: + self.tokens_cache.append(token) + self.decoded_lengths.append(-2) + + return openvino_genai.StreamingStatus.RUNNING + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_dir") + parser.add_argument("prompt") + args = parser.parse_args() + + device = "CPU" # GPU can be used as well + tokens_len = 10 # chunk size + pipe = openvino_genai.LLMPipeline(args.model_dir, device) + + text_print_streamer = ChunkStreamer(pipe.get_tokenizer(), tokens_len) + + def token_printer(): + # Getting next elements from iterable will be blocked until a new token is available. + for word in text_print_streamer: + print(word, end="", flush=True) + + printer_thread = threading.Thread(target=token_printer, daemon=True) + printer_thread.start() + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + config.do_sample = True + config.top_p = 0.9 + config.top_k = 30 + + # Since the streamer is set, the results will be printed + # every time a new token is generated and put into the streamer queue. + pipe.generate(args.prompt, config, text_print_streamer) + printer_thread.join() + + +if "__main__" == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/prompt_lookup_decoding_lm.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/prompt_lookup_decoding_lm.py new file mode 100644 index 0000000..830ac52 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/prompt_lookup_decoding_lm.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import openvino_genai + +def streamer(subword): + print(subword, end='', flush=True) + # Return flag corresponds whether generation should be stopped. + return openvino_genai.StreamingStatus.RUNNING + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('model_dir') + parser.add_argument('prompt') + args = parser.parse_args() + + device = 'CPU' + + pipe = openvino_genai.LLMPipeline(args.model_dir, device, prompt_lookup=True) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + # add parameter to enable prompt lookup decoding to generate `num_assistant_tokens` candidates per iteration + config.num_assistant_tokens = 5 + # Define max_ngram_size + config.max_ngram_size = 3 + + # Since the streamer is set, the results will be printed + # every time a new token is generated and put into the streamer queue. + pipe.generate(args.prompt, config, streamer) + print() + +if '__main__' == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/react_sample.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/react_sample.py new file mode 100644 index 0000000..de270ee --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/react_sample.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import requests +import argparse +import openvino_genai +import urllib.parse +import json +import json5 + +TOOL_DESC = """{name_for_model}: Call this tool to interact with the {name_for_human} API. What is the {name_for_human} API useful for? {description_for_model} Parameters: {parameters}""" + +PROMPT_REACT = """Answer the following questions as best as you can. You have access to the following APIs: + +{tools_text} + +Use the following format: + +Question: the input question you must answer +Thought: you should always think about what to do +Action: the action to take, should be one of [{tools_name_text}] +Action Input: the input to the action +Observation: the result of the action +... (this Thought/Action/Action Input/Observation can be repeated zero or more times) +Thought: I now know the final answer +Final Answer: the final answer to the original input question + +Begin! + +Question: {query}""" + + +tools = [ + { + "name_for_human": "get weather", + "name_for_model": "get_weather", + "description_for_model": "Get the current weather in a given city name.", + "parameters": [ + { + "name": "city_name", + "description": "City name", + "required": True, + "schema": {"type": "string"}, + } + ], + }, + { + "name_for_human": "generate image", + "name_for_model": "generate_image", + "description_for_model": "AI painting (image generation) service, input text description, and return the image URL drawn based on text information.", + "parameters": [ + { + "name": "prompt", + "description": "describe the image", + "required": True, + "schema": {"type": "string"}, + } + ], + }, +] + +def build_input_text(tokenizer, chat_history, list_of_tool_info) -> str: + tools_text = [] + for tool_info in list_of_tool_info: + tool = TOOL_DESC.format( + name_for_model=tool_info["name_for_model"], + name_for_human=tool_info["name_for_human"], + description_for_model=tool_info["description_for_model"], + parameters=json.dumps(tool_info["parameters"], ensure_ascii=False), + ) + if tool_info.get("args_format", "json") == "json": + tool += " Format the arguments as a JSON object." + elif tool_info["args_format"] == "code": + tool += " Enclose the code within triple backticks (`) at the beginning and end of the code." + else: + raise NotImplementedError + tools_text.append(tool) + + tools_text = "\n\n".join(tools_text) + tools_name_text = ", ".join([tool_info["name_for_model"] for tool_info in list_of_tool_info]) + + messages = [{"role": "system", "content": "You are a helpful assistant."}] + for i, (query, response) in enumerate(chat_history): + if list_of_tool_info: + if (len(chat_history) == 1): + query = PROMPT_REACT.format( + tools_text=tools_text, + tools_name_text=tools_name_text, + query=query, + ) + if query: + messages.append({"role": "user", "content": query}) + if response: + messages.append({"role": "assistant", "content": response}) + + prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True) + + return prompt + +def parse_first_tool_call(text): + tool_name, tool_args = "", "" + i = text.find("\nAction:") + j = text.find("\nAction Input:") + k = text.find("\nObservation:") + if 0 <= i < j: # If the text has `Action` and `Action input`, + if k < j: # but does not contain `Observation`, + # then it is likely that `Observation` is omitted by the LLM, + # because the output text may have discarded the stop word. + text = text.rstrip() + "\nObservation:" # Add it back. + k = text.find("\nObservation:") + tool_name = text[i + len("\nAction:") : j].strip() + tool_args = text[j + len("\nAction Input:") : k].strip() + text = text[:k] + return tool_name, tool_args, text + +def call_tool(tool_name: str, tool_args: str) -> str: + if tool_name == "get_weather": + city_name = json5.loads(tool_args)["city_name"] + key_selection = { + "current_condition": [ + "temp_C", + "FeelsLikeC", + "humidity", + "weatherDesc", + "observation_time", + ], + } + resp = requests.get(f"https://wttr.in/{city_name}?format=j1") + resp.raise_for_status() + resp = resp.json() + ret = {k: {_v: resp[k][0][_v] for _v in v} for k, v in key_selection.items()} + return json.dumps(ret, ensure_ascii=False) + elif tool_name == "generate_image": + tool_args = tool_args.replace("(", "").replace(")", "") + prompt = json5.loads(tool_args)["prompt"] + prompt = urllib.parse.quote(prompt) + return json.dumps( + {"image_url": f"https://image.pollinations.ai/prompt/{prompt}"}, + ensure_ascii=False, + ) + else: + raise NotImplementedError + +def llm_with_tool(llm_pipe, prompt, history, list_of_tool_info): + chat_history = [(x["user"], x["bot"]) for x in history] + [(prompt, "")] + planning_prompt = build_input_text(llm_pipe.get_tokenizer(), chat_history, list_of_tool_info) + + text = "" + while True: + # llm pipe output based planning_prompt and the text (previous output) + llm_config = llm_pipe.get_generation_config() + output = llm_pipe.generate(planning_prompt + text, llm_config, streamer) + # parse the output to get action + action, action_input, output = parse_first_tool_call(output) + if action: + observation = call_tool(action, action_input) + observation_txt = f"\nObservation: = {observation}\nThought:" + print("\n\n- Getting information from the tool API -", observation_txt, "\n") + output += observation_txt + text += output + else: + text += output + break + + history.append({"user": prompt, "bot": text}) + return text, history + +def streamer(subword): + print(subword, end='', flush=True) + # Return flag corresponds whether generation should be stopped. + return openvino_genai.StreamingStatus.RUNNING + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('model_dir') + args = parser.parse_args() + + device = 'CPU' # GPU can be used as well + llm_model_path = args.model_dir + + llm_pipe = openvino_genai.LLMPipeline(llm_model_path, device) + llm_config = openvino_genai.GenerationConfig() + llm_config.max_new_tokens = 256 + llm_pipe.set_generation_config(llm_config) + + history = [] + query = "get the weather in London, and create a picture of Big Ben based on the weather information" + response, history = llm_with_tool(llm_pipe, prompt=query, history=history, list_of_tool_info=tools) + +if '__main__' == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/speculative_decoding_lm.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/speculative_decoding_lm.py new file mode 100644 index 0000000..7f84527 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/speculative_decoding_lm.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import openvino_genai +import queue + +def streamer(subword): + print(subword, end='', flush=True) + # Return flag corresponds whether generation should be stopped. + return openvino_genai.StreamingStatus.RUNNING + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('model_dir') + parser.add_argument('draft_model_dir') + parser.add_argument('prompt') + args = parser.parse_args() + + # User can run main and draft model on different devices. + # Please, set device for main model in `openvino_genai.LLMPipeline` constructor and in `openvino_genai.draft_model` for draft. + # CPU, GPU and NPU can be used. For NPU, the preferred configuration is when both the main and draft models use NPU. + main_device = 'CPU' + draft_device = 'CPU' + + draft_model = openvino_genai.draft_model(args.draft_model_dir, draft_device) + + pipe = openvino_genai.LLMPipeline(args.model_dir, main_device, draft_model=draft_model) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + # Speculative decoding generation parameters like `num_assistant_tokens` and `assistant_confidence_threshold` are mutually excluded. + # Add parameter to enable speculative decoding to generate `num_assistant_tokens` candidates by draft_model per iteration. + # NOTE: ContinuousBatching backend uses `num_assistant_tokens` as is. Stateful backend uses `num_assistant_tokens`'s copy as initial + # value and adjusts it based on recent number of accepted tokens. If `num_assistant_tokens` is not set, it defaults to `5` for both + # backends. + config.num_assistant_tokens = 4 + # Add parameter to enable speculative decoding to generate candidates by draft_model while candidate probability is higher than + # `assistant_confidence_threshold`. + # NOTE: `assistant_confidence_threshold` is supported only by ContinuousBatching backend. + # config.assistant_confidence_threshold = 0.4 + + # Since the streamer is set, the results will be printed + # every time a new token is generated and put into the streamer queue. + res = pipe.generate([args.prompt], config, streamer) + print() + if (res.extended_perf_metrics): + main_model_metrics = res.extended_perf_metrics.main_model_metrics + print(f"MAIN MODEL") + print(f" Generate time: {main_model_metrics.get_generate_duration().mean:.2f} ms" ) + print(f" TTFT: {main_model_metrics.get_ttft().mean:.2f} ± {main_model_metrics.get_ttft().std:.2f} ms" ) + print(f" TTST: {main_model_metrics.get_ttst().mean:.2f} ± {main_model_metrics.get_ttst().std:.2f} ms/token") + print(f" TPOT: {main_model_metrics.get_tpot().mean:.2f} ± {main_model_metrics.get_tpot().std:.2f} ms/iteration") + print(f" AVG Latency: {main_model_metrics.get_latency().mean:.2f} ± {main_model_metrics.get_latency().std:.2f} ms/token") + print(f" Num generated token: {main_model_metrics.get_num_generated_tokens()} tokens") + print(f" Total iteration number: {len(main_model_metrics.raw_metrics.m_durations)}") + print(f" Num accepted token: {res.extended_perf_metrics.get_num_accepted_tokens()} tokens") + + draft_model_metrics = res.extended_perf_metrics.draft_model_metrics + print(f"DRAFT MODEL" ) + print(f" Generate time: {draft_model_metrics.get_generate_duration().mean:.2f} ms" ) + print(f" TTFT: {draft_model_metrics.get_ttft().mean:.2f} ms") + print(f" TTST: {draft_model_metrics.get_ttst().mean:.2f} ms/token") + print(f" TPOT: {draft_model_metrics.get_tpot().mean:.2f} ± {draft_model_metrics.get_tpot().std:.2f} ms/token") + print(f" AVG Latency: {draft_model_metrics.get_latency().mean:.2f} ± {draft_model_metrics.get_latency().std:.2f} ms/iteration") + print(f" Num generated token: {draft_model_metrics.get_num_generated_tokens()} tokens") + print(f" Total iteration number: {len(draft_model_metrics.raw_metrics.m_durations)}") + print() + +if '__main__' == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/structural_tags_generation.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/structural_tags_generation.py new file mode 100644 index 0000000..6481618 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/structural_tags_generation.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import json +import re +from datetime import datetime +from pprint import pprint +from typing import ClassVar + +from openvino_genai import ( + GenerationConfig, + LLMPipeline, + StreamingStatus, + ChatHistory, +) +from openvino_genai import ( + StructuredOutputConfig as SOC, +) +from pydantic import BaseModel, Field + + +class ToolRequest(BaseModel): + @classmethod + def string_representation(cls) -> str: + return f', arguments={json.dumps(list(cls.model_fields))}' + + @classmethod + def get_name(cls) -> str: + return cls._name + + +class WeatherRequest(ToolRequest): + _name: ClassVar[str] = "get_weather" + + city: str = Field(description="City name") + country: str = Field(description="Country name") + date: str = Field(pattern=r"2\d\d\d-[0-1]\d-[0-3]\d", description="Date in YYYY-MM-DD format") + + +class CurrencyExchangeRequest(ToolRequest): + _name: ClassVar[str] = "get_currency_exchange" + + from_currency: str = Field(description="Currency to convert from") + to_currency: str = Field(description="Currency to convert to") + amount: float = Field(description="Amount to convert") + + +tools = {tool.get_name(): tool for tool in [WeatherRequest, CurrencyExchangeRequest]} + +new_line = "\n" # to use inside f-string +sys_message = ( + "You are a helpful assistant that can provide weather information and currency exchange rates. " + f"Today is {datetime.today().strftime('%Y-%m-%d')}. " # Use the current date in the system message in YYYY-MM-DD format + "You can respond in natural language, always start your answer with appropriate greeting, " + "If you need additional information to respond you can request it by calling particular tool with structured JSON. " + "You can use the following tools:\n" + f"{new_line.join([tool.string_representation() for tool in tools.values()])}\n" + "Please, only use the following format for tool calling in your responses:\n" + '' + '{"argument1": "value1", ...}' + "\n" + "Use the tool name and arguments as defined in the tool schema.\n" + "If you don't know the answer, just say that you don't know, but try to call the tool if it helps to answer the question.\n" +) + +function_pattern = r'({.*?})' +function_pattern = re.compile(function_pattern, re.DOTALL) + + +def parse_tools_from_response(response: str) -> list[ToolRequest]: + """ + Parse the tool response from the model output. + The response should be in the format: + {"argument1": "value1", ...} + """ + matches = re.finditer(function_pattern, response) + return [tools.get(match.group(1)).model_validate_json(match.group(2)) for match in matches] + + +def streamer(subword): + print(subword, end="", flush=True) + return StreamingStatus.RUNNING + + +def main(): + default_prompt = ( + "What is the weather in London today and in Paris yesterday, and how many pounds can I get for 100 euros?" + ) + + description = ( + "This script demonstrates how to use OpenVINO GenAI with structured tags to generate responses " + "that include tool calls. It uses a simple LLM pipeline to generate a response based on the provided prompt, " + "and it parses the tool calls from the response. Available tools are weather and currency exchange." + ) + parser = argparse.ArgumentParser(description=description) + parser.add_argument( + "model_dir", + help="Path to the model directory. It should contain the OpenVINO model files.", + ) + parser.add_argument( + "--prompt", + type=str, + default=default_prompt, + help="Prompt to generate the response.", + ) + args = parser.parse_args() + + device = "CPU" # GPU can be used as well + pipe = LLMPipeline(args.model_dir, device) + + print(f"User prompt: {args.prompt}") + + history = ChatHistory() + history.append({"role": "system", "content": sys_message}) + history.append({"role": "user", "content": args.prompt}) + + for use_structural_tags in [False, True]: + print("=" * 80) + print(f"{'Using structural tags' if use_structural_tags else 'Using no structural tags':^80}") + print("=" * 80) + config = GenerationConfig() + config.max_new_tokens = 300 + + if use_structural_tags: + config.structured_output_config = SOC( + structural_tags_config=SOC.TriggeredTags( + triggers=["', + content=SOC.JSONSchema(json.dumps(tool.model_json_schema())), + end="", + ) + for name, tool in tools.items() + ], + ) + ) + config.do_sample = True + + decoded_results = pipe.generate(history, config, streamer=streamer) + + print("\n" + "-" * 80) + print("Correct tool calls by the model:") + pprint(parse_tools_from_response(decoded_results.texts[0])) + + +if "__main__" == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/structured_output_generation.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/structured_output_generation.py new file mode 100644 index 0000000..55ad712 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/text_generation/structured_output_generation.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import json +from typing import Literal + +from openvino_genai import GenerationConfig, LLMPipeline, StructuredOutputConfig, ChatHistory +from pydantic import BaseModel, Field + + +class Person(BaseModel): + name: str = Field(pattern=r"^[A-Z][a-z]{1,20}$") + surname: str = Field(pattern=r"^[A-Z][a-z]{1,20}$") + age: int + city: Literal["Dublin", "Dubai", "Munich"] + + +class Car(BaseModel): + model: str = Field(pattern=r"^[A-Z][a-z]{1,20} ?[A-Z][a-z]{0,20} ?.?$") + year: int + engine_type: Literal["diesel", "petrol", "electric", "hybrid"] + + +class Transaction(BaseModel): + id: int = Field(ge=1000, le=10_000_000) + amount: float + currency: Literal["EUR", "PLN", "RUB", "AED", "CHF", "GBP", "USD"] + + +class ItemQuantities(BaseModel): + person: int = Field(ge=0, le=100) + car: int = Field(ge=0, le=100) + transaction: int = Field(ge=0, le=100) + + +items_map = {"person": Person, "car": Car, "transaction": Transaction} + +sys_message = ( + "You generate JSON objects based on the user's request. You can generate JSON objects with different types of objects: person, car, transaction. " + "If the user requested a different type, the JSON fields should remain zero. " + "Please note that the words 'individual', 'person', 'people', 'man', 'human', 'woman', 'inhabitant', 'citizen' are synonyms and can be used interchangeably. " + 'E.g. if the user wants 5 houses, then the JSON must be {"person": 0, "car": 0, "transaction": 0}. ' + 'If the user wants 3 people and 1 house, then the JSON must be {"person": 3, "car": 0, "transaction": 0}. ' + "Make sure that the JSON contains the numbers that the user requested. If the user asks for specific attributes, like 'surname', 'model', etc., " + "ignore this information and generate JSON objects with the same fields as in the schema. " + "Please use double quotes for JSON keys and values. " +) + +sys_message_for_items = "Please try to avoid generating the same JSON objects multiple times." + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_dir", help="Path to the model directory. It should contain the OpenVINO model files.") + args = parser.parse_args() + + device = "CPU" # GPU can be used as well + pipe = LLMPipeline(args.model_dir, device) + + config = GenerationConfig() + config.max_new_tokens = 300 + + print( + "This is a smart assistant that generates structured output in JSON format. " + "You can ask to generate information about a person, car, or bank transaction. " + 'For example, you can ask: "Please generate jsons for 3 persons and 1 transaction."' + ) + + while True: + try: + prompt = input("> ") + except EOFError: + break + + history = ChatHistory() + history.append({"role": "system", "content": sys_message}) + config.structured_output_config = StructuredOutputConfig( + json_schema=json.dumps(ItemQuantities.model_json_schema()) + ) + config.do_sample = False + history.append({"role": "user", "content": prompt}) + decoded_results = pipe.generate(history, config) + json_response = decoded_results.texts[0] + res = json.loads(json_response) + print(f"Generated JSON with item quantities: {json_response}") + + config.do_sample = True + config.temperature = 0.8 + + history.clear() + history.append({"role": "system", "content": sys_message_for_items}) + history.append({"role": "user", "content": prompt}) + + generate_has_run = False + for item, quantity in res.items(): + config.structured_output_config = StructuredOutputConfig( + json_schema=json.dumps(items_map[item].model_json_schema()) + ) + for _ in range(quantity): + generate_has_run = True + decoded_results = pipe.generate(history, config) + json_strs = decoded_results.texts[0] + # Validate generated JSON + json.loads(json_strs) + print(json_strs) + + if not generate_has_run: + print("No items generated. Please try again with a different request.") + + +if "__main__" == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/README.md new file mode 100644 index 0000000..9fb5cfc --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/README.md @@ -0,0 +1,164 @@ +# OpenVINO GenAI Video Generation Python Samples + +These samples showcase the use of OpenVINO's inference capabilities for video generation tasks. The sample features `openvino_genai.Text2VideoPipeline` for generating videos from text prompts using models like LTX-Video. +The applications don't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. + + - [`text2video.py`](./text2video.py) demonstrates basic text to video generation. + - [`taylorseer_text2video.py`](./taylorseer_text2video.py) demonstrates text to video generation with TaylorSeer caching optimization for improved performance. LTX-Video model is supported only. + +## Table of Contents +1. [Download and Convert the Model](#download-and-convert-the-model) +2. [Sample Descriptions](#sample-descriptions) +3. [Troubleshooting](#troubleshooting) +4. [Support and Contribution](#support-and-contribution) + +## Download and Convert the Model + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. +Install [../../export-requirements.txt](../../export-requirements.txt) if model conversion is required. + +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +``` + +Then, run the export with Optimum CLI: + +```sh +optimum-cli export openvino --model Lightricks/LTX-Video --task text-to-video --weight-format int8 ltx_video_ov/INT8 +``` + +Alternatively, do it in Python code. If NNCF is installed, the model will be compressed to INT8 automatically. + +```python +from optimum.intel.openvino import OVLTXPipeline + +output_dir = "ltx_video_ov/INT8" + +pipeline = OVLTXPipeline.from_pretrained("Lightricks/LTX-Video", export=True, compile=False, load_in_8bit=True) +pipeline.save_pretrained(output_dir) +``` + +## Sample Descriptions + +### Common Information + +Follow [Get Started with Samples](https://docs.openvino.ai/2026/get-started/learn-openvino/openvino-samples/get-started-demos.html) to get common information about OpenVINO samples. +Follow [build instruction](../../../src/docs/BUILD.md) to build GenAI samples. + +GPUs usually provide better performance compared to CPUs. Modify the source code to change the device for inference to the GPU. + +Install [../../deployment-requirements.txt](../../deployment-requirements.txt) to run samples: +```sh +pip install --upgrade-strategy eager -r ../../deployment-requirements.txt +``` + +### Text to Video Sample (`text2video.py`) + +- **Description:** + Basic video generation using a text-to-video model. This sample demonstrates how to generate videos from text prompts using the OpenVINO GenAI Text2VideoPipeline. The LTX-Video model is recommended for this sample. + + Recommended models: Lightricks/LTX-Video + +- **Main Feature:** Generate videos from text descriptions with customizable parameters. + +- **Run Command:** + ```bash + python text2video.py model_dir prompt [--device DEVICE] [--output OUTPUT] + ``` + + Example: + ```bash + python text2video.py ./ltx_video_ov/INT8 "A woman with long brown hair and light skin smiles at another woman with long blonde hair" + ``` + +The sample will generate a video file `genai_video.avi` in the current directory. + +Users can modify the source code to experiment with different generation parameters: +- Change width or height of generated video +- Change number of frames +- Generate multiple videos per prompt +- Adjust number of inference steps +- Play with guidance scale (improves quality when > 1) +- Add negative prompt when guidance scale > 1 +- Adjust frame rate + +#### Run with threaded callback + +You can also implement a callback function that runs in a separate thread. This allows for parallel processing, enabling you to interrupt generation early if intermediate results are satisfactory or to add logs. + +Please find the template of the callback usage below: + +```python +pipe = openvino_genai.Text2VideoPipeline(model_dir, device) + +def callback(step, num_steps, latent): + print(f"Video generation step: {step + 1} / {num_steps}") + if your_condition: # return True if you want to interrupt video generation + return True + return False + +video = pipe.generate( + prompt, + callback=callback +).video +``` + +### TaylorSeer Text to Video Sample (`taylorseer_text2video.py`) + +- **Description:** + Generate videos with TaylorSeer caching optimization. This sample runs two generations: one baseline without caching and one with TaylorSeer caching enabled, then compares their performance. + +- **Run Command:** + ```bash + python taylorseer_text2video.py model_dir prompt + ``` + + Example: + ```bash + python taylorseer_text2video.py ./ltx_video_ov/INT8 "a robot dancing in the rain" + ``` + +The sample will generate two video files: `taylorseer_baseline.avi` (without caching) and `taylorseer.avi` (with caching), and display a performance comparison showing the speedup achieved. + +The TaylorSeer configuration parameters can be adjusted in the source code: +- `cache_interval`: Number of steps between cache updates (default: 3) +- `disable_cache_before_step`: Disable caching before this step for warmup (default: 6) +- `disable_cache_after_step`: Disable caching after this step (default: -2, meaning 2 steps before the end) + +For more details about TaylorSeer, see the [diffusion caching documentation](../../../site/docs/concepts/optimization-techniques/diffusion-caching.md). + +## Troubleshooting + +### LTX-Video Model Constraints + +> [!NOTE] +> The LTX-Video model works best on: +> - Resolutions divisible by 32 (e.g., 480x704, 512x512, 720x1280) +> - Number of frames divisible by 8 + 1 (e.g., 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 121, 161, 257) +> - At least 2 inference steps (1 step may produce artifacts) +> - Best quality achieved with resolutions under 720x1280 and number of frames below 257 + +### OpenCV Installation + +If you encounter issues with OpenCV when running the samples, ensure it's properly installed: + +```sh +pip install opencv-python==4.12.0.88 +``` + +This dependency is included in [../../deployment-requirements.txt](../../deployment-requirements.txt). + +### Unicode characters encoding error on Windows + +Example error: +``` +UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to +``` + +If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this: +1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot. +2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`. + +## Support and Contribution +- For troubleshooting, consult the [OpenVINO documentation](https://docs.openvino.ai). +- To report issues or contribute, visit the [GitHub repository](https://github.com/openvinotoolkit/openvino.genai). diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/taylorseer_text2video.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/taylorseer_text2video.py new file mode 100644 index 0000000..558e637 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/taylorseer_text2video.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# Copyright (C) 2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import time +import openvino_genai +from video_utils import save_video + + +def main(): + parser = argparse.ArgumentParser(description="Text-to-video generation with TaylorSeer caching optimization") + parser.add_argument("model_dir", help="Path to the converted OpenVINO model directory") + parser.add_argument("prompt", help="Text prompt for video generation") + args = parser.parse_args() + + device = "CPU" # GPU can be used as well + pipe = openvino_genai.Text2VideoPipeline(args.model_dir, device) + frame_rate = pipe.get_generation_config().frame_rate + + # TaylorSeer configuration + cache_interval = 3 + disable_before = 6 + disable_after = -2 + negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted" + num_inference_steps = 25 + + def callback(step, num_steps, latent): + print(f"Step {step + 1}/{num_steps}") + return False + + generate_kwargs = { + "negative_prompt": negative_prompt, + "num_inference_steps": num_inference_steps, + "callback": callback, + } + + # Generate baseline for comparison + print(f"\nGenerating baseline video without caching...") + start_time = time.time() + baseline_output = pipe.generate(args.prompt, **generate_kwargs) + baseline_time = time.time() - start_time + + print(f"Baseline generation completed in {baseline_time:.2f}s") + + baseline_filename = "taylorseer_baseline.avi" + save_video(baseline_filename, baseline_output.video, frame_rate) + print(f"Baseline video saved to {baseline_filename}") + + # Configure TaylorSeer caching + print(f"\nGenerating video with TaylorSeer caching...") + + taylorseer_config = openvino_genai.TaylorSeerCacheConfig() + taylorseer_config.cache_interval = cache_interval + taylorseer_config.disable_cache_before_step = disable_before + taylorseer_config.disable_cache_after_step = disable_after + print(taylorseer_config) + + start_time = time.time() + output = pipe.generate(args.prompt, taylorseer_config=taylorseer_config, **generate_kwargs) + taylorseer_time = time.time() - start_time + print(f"TaylorSeer generation completed in {taylorseer_time:.2f}s") + + video_filename = "taylorseer.avi" + save_video(video_filename, output.video, frame_rate) + print(f"Video saved to {video_filename}") + + # Performance comparison + speedup = baseline_time / taylorseer_time if taylorseer_time > 0 else 0.0 + time_saved = baseline_time - taylorseer_time if baseline_time > 0 else 0.0 + percentage = (baseline_time - taylorseer_time) / baseline_time * 100 if baseline_time > 0 else 0.0 + + print(f"\nPerformance Comparison:") + print(f" Baseline time: {baseline_time:.2f}s") + print(f" TaylorSeer time: {taylorseer_time:.2f}s") + print(f" Speedup: {speedup:.2f}x") + print(f" Time saved: {time_saved:.2f}s ({percentage:.1f}%)") + + +if __name__ == "__main__": + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/text2video.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/text2video.py new file mode 100644 index 0000000..73a6eeb --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/text2video.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import openvino_genai +from video_utils import save_video + + +def main(): + parser = argparse.ArgumentParser(description="Generate video from text prompt using OpenVINO GenAI") + parser.add_argument("model_dir", help="Path to the model directory") + parser.add_argument("prompt", help="Text prompt for video generation") + args = parser.parse_args() + + pipe = openvino_genai.Text2VideoPipeline(args.model_dir, "CPU") # GPU can be used as well + + frame_rate = 25 + + def callback(step, num_steps, latent): + print(f"Generation step {step + 1} / {num_steps}") + return False + + output = pipe.generate( + args.prompt, + negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted", + height=480, + width=704, + num_frames=161, + num_inference_steps=25, + num_videos_per_prompt=1, + callback=callback, + frame_rate=frame_rate, + guidance_scale=3, + ) + + save_video("genai_video.avi", output.video, frame_rate) + + print(f"\nPerformance metrics:") + print(f" Load time: {output.perf_metrics.get_load_time():.2f} ms") + print(f" Generate duration: {output.perf_metrics.get_generate_duration():.2f} ms") + print(f" Transformer duration: {output.perf_metrics.get_transformer_infer_duration().mean:.2f} ms") + print(f" VAE decoder duration: {output.perf_metrics.get_vae_decoder_infer_duration():.2f} ms") + + +if __name__ == "__main__": + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/video_utils.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/video_utils.py new file mode 100644 index 0000000..d010804 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/video_generation/video_utils.py @@ -0,0 +1,26 @@ +# Copyright (C) 2025-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import cv2 + + +def save_video(filename: str, video_tensor, fps: int = 25): + batch_size, num_frames, height, width, _ = video_tensor.shape + video_data = video_tensor.data + + for b in range(batch_size): + if batch_size == 1: + output_path = filename + else: + base, ext = filename.rsplit(".", 1) if "." in filename else (filename, "avi") + output_path = f"{base}_b{b}.{ext}" + + fourcc = cv2.VideoWriter_fourcc(*"MJPG") + writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) + + for f in range(num_frames): + frame_bgr = cv2.cvtColor(video_data[b, f], cv2.COLOR_RGB2BGR) + writer.write(frame_bgr) + + writer.release() + print(f"Wrote {output_path} ({num_frames} frames, {width}x{height} @ {fps} fps)") diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/README.md new file mode 100644 index 0000000..b23f6b4 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/README.md @@ -0,0 +1,148 @@ +# Python vlm_chat_sample that supports VLM models + +This example showcases inference of text-generation Vision Language Models (VLMs): `miniCPM-V-2_6` and other models with the same signature. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `openvino_genai.VLMPipeline` and configures it for the chat scenario. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/minicpm-v-multimodal-chatbot) which provides an example of Visual-language assistant. + +The following are sample files: + - [`visual_language_chat.py`](./visual_language_chat.py) demonstrates basic usage of the VLM pipeline which supports accelerated inference using prompt lookup decoding. + - [`video_to_text_chat.py`](./video_to_text_chat.py) demonstrates video to text usage of the VLM pipeline. + - [`benchmark_vlm.py`](./benchmark_vlm.py) shows how to benchmark a VLM in OpenVINO GenAI. The script includes functionality for warm-up iterations, generating text and calculating various performance metrics. + - [`visual_language_lora.py`](./visual_language_lora.py) demonstrates how to apply one or more LoRA adapters to a VLM at runtime. + - [`milebench_eval_vlm.py`](./milebench_eval_vlm.py) provides MileBench validation for VLMs, enabling evaluation of image–text reasoning and visual QA tasks across multiple subsets designed to assess the MultImodal Long-contExt capabilities of MLLMs. + +## Download and convert the model and tokenizers + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. + +Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model. + +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +``` + +Then, run the export with Optimum CLI: + +```sh +optimum-cli export openvino --model openbmb/MiniCPM-V-2_6 --trust-remote-code MiniCPM-V-2_6 +``` + +Alternatively, you can do it in Python code: + +```python +from optimum.exporters.openvino.convert import export_tokenizer +from optimum.intel import OVModelForVisualCausalLM +from transformers import AutoTokenizer + +output_dir = "MiniCPM-V-2_6" + +model = OVModelForVisualCausalLM.from_pretrained("openbmb/MiniCPM-V-2_6", export=True, trust_remote_code=True) +model.save_pretrained(output_dir) + +tokenizer = AutoTokenizer.from_pretrained("openbmb/MiniCPM-V-2_6") +export_tokenizer(tokenizer, output_dir) +``` + +Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pip install -r ../../deployment-requirements.txt` to run VLM samples. + +## Run image-to-text chat sample: + +[This image](https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11) can be used as a sample image. + +`python visual_language_chat.py ./miniCPM-V-2_6/ 319483352-d5fbbd1a-d484-415c-88cb-9986625b7b11.jpg` + +See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models. + +## Run image-to-text sample with LoRA adapters: + +This sample runs generation twice for the same prompt and image: first with LoRA adapter(s) applied, then without any adapters (base model). + +Export `Qwen/Qwen2.5-VL-7B-Instruct` to OpenVINO as [described above for MiniCPM-V](#download-and-convert-the-model-and-tokenizers), then download LoRA `Mouad2004/qwen2.5-vl-lora-diagrams`: + +```sh +wget -O adapter_model.safetensors \ + https://huggingface.co/Mouad2004/qwen2.5-vl-lora-diagrams/resolve/main/adapter_model.safetensors +``` + +This OpenVINO overview diagram can be used as a convenient image input: + +```sh +wget -O openvino-overview-diagram.jpg \ + https://docs.openvino.ai/2026/_images/openvino-overview-diagram.jpg +``` + +`python visual_language_lora.py ./Qwen2.5-VL-7B-Instruct ./openvino-overview-diagram.jpg "What is shown in this diagram?" ./adapter_model.safetensors 4.0` + +> You can run with multiple LoRA adapters by providing multiple ` ` pairs. + +> [!NOTE] +> ### LoRA `alpha` interpretation in OpenVINO GenAI +> The OpenVINO GenAI implementation merges the traditional LoRA parameters into a **single effective scaling factor** used during inference. +> +> In this context, the `alpha` value already includes: +> - normalization by LoRA rank (`alpha / rank`) +> - any user-defined scaling factor (`weight`) +> +> This means `alpha` in GenAI should be treated as the **final scaling weight** applied to the LoRA update — not the raw `alpha` parameter from training. + +## Run video-to-text chat sample: + +A model that supports video input is required to run this sample, for example `llava-hf/LLaVA-NeXT-Video-7B-hf`. + +[This video](https://huggingface.co/datasets/raushan-testing-hf/videos-test/resolve/main/sample_demo_1.mp4) can be used as a sample video. + +`python video_to_text_chat.py ./LLaVA-NeXT-Video-7B-hf/ sample_demo_1.mp4` + +Supported models with video input are listed in [this section](https://openvinotoolkit.github.io/openvino.genai/docs/use-cases/image-processing/#use-image-or-video-tags-in-prompt). + +Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. # TODO: examples of larger models +Modify the source code to change the device for inference to the GPU. + +## Run benchmark: + +```sh +python benchmark_vlm.py [OPTIONS] +``` + +### Options + +- `-m, --model`(default: `.`): Path to the model and tokenizers base directory. +- `-p, --prompt` (default: `None`): The prompt to generate text. If without `-p` and `-pf`, the default prompt is `"What is on the image?"` +- `-pf, --prompt_file` Read prompt from file. +- `-i, --image` (default: `image.jpg`): Path to the image. +- `-nw, --num_warmup` (default: `1`): Number of warmup iterations. +- `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. +- `-n, --num_iter` (default: `3`): Number of iterations. +- `-d, --device` (default: `"CPU"`): Device to run the model on. +- `-pr, --pruning_ratio`: (optional): Percentage of visual tokens to prune (valid range: 0-100). If this option is not provided, pruning is disabled. +- `-rw, --relevance_weight` (optional): Float value from 0 to 1, control the trade-off between diversity and relevance for visual tokens pruning, a value of 0 disables relevance weighting, while higher values (up to 1.0) emphasize relevance, making pruning more conservative on borderline tokens. + +### Output: + +``` +python benchmark_vlm.py -m miniCPM-V-2_6 -i 319483352-d5fbbd1a-d484-415c-88cb-9986625b7b11.jpg -n 3 +``` + +``` +Load time: 1982.00 ms +Generate time: 13820.99 ± 64.62 ms +Tokenization time: 1.26 ± 0.09 ms +Detokenization time: 0.33 ± 0.05 ms +Embeddings preparation time: 5733.85 ± 26.34 ms +TTFT: 11246.98 ± 80.55 ms +TPOT: 135.45 ± 4.73 ms/token +Throughput: 7.38 ± 0.26 tokens/s +``` + +For more information how performance metrics are calculated please follow [performance-metrics tutorial](../../../src/README.md#performance-metrics). + +### Troubleshooting + +#### Unicode characters encoding error on Windows + +Example error: +``` +UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to +``` + +If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this: +1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot. +2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`. diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/benchmark_vlm.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/benchmark_vlm.py new file mode 100644 index 0000000..5e1a46e --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/benchmark_vlm.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +# Copyright (C) 2023-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import sys +import argparse +import openvino_genai as ov_genai +from PIL import Image +from openvino import Tensor +from pathlib import Path +import numpy as np +from openvino import get_version + + +def read_image(path: str) -> Tensor: + ''' + + Args: + path: The path to the image. + + Returns: the ov.Tensor containing the image. + + ''' + pic = Image.open(path).convert("RGB") + image_data = np.array(pic) + return Tensor(image_data) + +def read_images(path: str) -> list[Tensor]: + entry = Path(path) + if entry.is_dir(): + return [read_image(str(file)) for file in sorted(entry.iterdir())] + return [read_image(path)] + + +def ratio_type(value): + ivalue = int(value) + if ivalue < 0 or ivalue > 100: + raise argparse.ArgumentTypeError(f"pruning_ratio must be between 0 and 100, got {value}") + return ivalue + + +def weight_0_1(value): + fvalue = float(value) + if not 0.0 <= fvalue <= 1.0: + raise argparse.ArgumentTypeError(f"relevance_weight must be between 0 and 1, got {value}") + return fvalue + + +def main(): + parser = argparse.ArgumentParser(description="Help command") + parser.add_argument("-m", "--model", type=str, help="Path to model and tokenizers base directory") + parser.add_argument("-p", "--prompt", type=str, default=None, help="Prompt") + parser.add_argument("-pf", "--prompt_file", type=str, help="Read prompt from file") + parser.add_argument("-i", "--image", type=str, default="image.jpg", help="Image") + parser.add_argument("-nw", "--num_warmup", type=int, default=1, help="Number of warmup iterations") + parser.add_argument("-n", "--num_iter", type=int, default=2, help="Number of iterations") + parser.add_argument("-mt", "--max_new_tokens", type=int, default=20, help="Maximal number of new tokens") + parser.add_argument("-d", "--device", type=str, default="CPU", help="Device") + parser.add_argument( + "--pruning_ratio", + type=ratio_type, + default=0, + help="(optional): Percentage of visual tokens to prune (valid range: 0-100). If this option is not provided, pruning is disabled.", + ) + parser.add_argument( + "--relevance_weight", + type=weight_0_1, + help="(optional): Float value from 0 to 1, control the trade-off between diversity and relevance for visual tokens pruning, " + "a value of 0 disables relevance weighting, while higher values (up to 1.0) emphasize relevance, making pruning more conservative on borderline tokens.", + ) + + args = parser.parse_args() + + if args.prompt is not None and args.prompt_file is not None: + raise RuntimeError(f'Prompt and prompt file should not exist together!') + else: + if args.prompt_file is not None: + with open(args.prompt_file, 'r', encoding='utf-8') as f: + prompt = f.read() + else: + prompt = 'What is on the image?' if args.prompt is None else args.prompt + if len(prompt) == 0: + raise RuntimeError(f'Prompt is empty!') + + print(f'openvino runtime version: {get_version()}, genai version: {ov_genai.__version__}') + + # Perf metrics is stored in VLMDecodedResults. + # In order to get VLMDecodedResults instead of a string input should be a list. + models_path = args.model + images = read_images(args.image) + device = args.device + num_warmup = args.num_warmup + num_iter = args.num_iter + + config = ov_genai.GenerationConfig() + config.max_new_tokens = args.max_new_tokens + if args.pruning_ratio is not None: + config.pruning_ratio = args.pruning_ratio + if args.relevance_weight is not None: + config.relevance_weight = args.relevance_weight + + if device == "NPU": + pipe = ov_genai.VLMPipeline(models_path, device) + else: + # Setting of Scheduler config will trigger usage of ContinuousBatching pipeline, which is not default for Qwen2VL, Qwen2.5VL, Gemma3 due to accuracy issues. + scheduler_config = ov_genai.SchedulerConfig() + scheduler_config.enable_prefix_caching = False + scheduler_config.max_num_batched_tokens = sys.maxsize + pipe = ov_genai.VLMPipeline(models_path, device, scheduler_config=scheduler_config) + + input_data = pipe.get_tokenizer().encode(prompt) + prompt_token_size = input_data.input_ids.get_shape()[1] + print(f"Number of images:{len(images)}, Prompt token size: {prompt_token_size}") + + for _ in range(num_warmup): + pipe.generate(prompt, images=images, generation_config=config) + + res = pipe.generate(prompt, images=images, generation_config=config) + perf_metrics = res.perf_metrics + for _ in range(num_iter - 1): + res = pipe.generate(prompt, images=images, generation_config=config) + perf_metrics += res.perf_metrics + + print(f"Output token size: {res.perf_metrics.get_num_generated_tokens()}") + print(f"Load time: {perf_metrics.get_load_time():.2f} ms") + print( + f"Generate time: {perf_metrics.get_generate_duration().mean:.2f} ± {perf_metrics.get_generate_duration().std:.2f} ms") + print( + f"Tokenization time: {perf_metrics.get_tokenization_duration().mean:.2f} ± {perf_metrics.get_tokenization_duration().std:.2f} ms") + print( + f"Detokenization time: {perf_metrics.get_detokenization_duration().mean:.2f} ± {perf_metrics.get_detokenization_duration().std:.2f} ms") + print( + f"Embeddings preparation time: {perf_metrics.get_prepare_embeddings_duration().mean:.2f} ± {perf_metrics.get_prepare_embeddings_duration().std:.2f} ms") + print(f"TTFT: {perf_metrics.get_ttft().mean:.2f} ± {perf_metrics.get_ttft().std:.2f} ms") + print(f"TPOT: {perf_metrics.get_tpot().mean:.2f} ± {perf_metrics.get_tpot().std:.2f} ms") + print(f"Throughput : {perf_metrics.get_throughput().mean:.2f} ± {perf_metrics.get_throughput().std:.2f} tokens/s") + + +if __name__ == "__main__": + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/encrypted_model_vlm.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/encrypted_model_vlm.py new file mode 100644 index 0000000..2fd3b07 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/encrypted_model_vlm.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse + +import numpy as np +import openvino_genai +import openvino +from PIL import Image +from openvino import Tensor +from pathlib import Path + + +def decrypt_model(model_dir, model_file_name, weights_file_name): + with open(model_dir + '/' + model_file_name, "r") as file: + model = file.read() + # decrypt model + + with open(model_dir + '/' + weights_file_name, "rb") as file: + binary_data = file.read() + # decrypt weights + weights = np.frombuffer(binary_data, dtype=np.uint8).astype(np.uint8) + + return model, Tensor(weights) + + +def read_tokenizer(model_dir): + tokenizer_model_name = 'openvino_tokenizer.xml' + tokenizer_weights_name = 'openvino_tokenizer.bin' + tokenizer_model, tokenizer_weights = decrypt_model(model_dir, tokenizer_model_name, tokenizer_weights_name) + + detokenizer_model_name = 'openvino_detokenizer.xml' + detokenizer_weights_name = 'openvino_detokenizer.bin' + detokenizer_model, detokenizer_weights = decrypt_model(model_dir, detokenizer_model_name, detokenizer_weights_name) + + return openvino_genai.Tokenizer(tokenizer_model, tokenizer_weights, detokenizer_model, detokenizer_weights) + + +def streamer(subword: str) -> bool: + ''' + + Args: + subword: sub-word of the generated text. + + Returns: Return flag corresponds whether generation should be stopped. + + ''' + print(subword, end='', flush=True) + + # No value is returned as in this example we don't want to stop the generation in this method. + # "return None" will be treated the same as "return openvino_genai.StreamingStatus.RUNNING". + + +def read_image(path: str) -> Tensor: + ''' + + Args: + path: The path to the image. + + Returns: the ov.Tensor containing the image. + + ''' + pic = Image.open(path).convert("RGB") + image_data = np.array(pic) + return Tensor(image_data) + + +def read_images(path: str) -> list[Tensor]: + entry = Path(path) + if entry.is_dir(): + return [read_image(str(file)) for file in sorted(entry.iterdir())] + return [read_image(path)] + + +# here is example how to make cache de-encryption based on base64 +import base64 + +def encrypt_base64(src: bytes): + return base64.b64encode(src) + + +def decrypt_base64(src: bytes): + return base64.b64decode(src) + + +def get_config_for_cache_encryption(): + config_cache = dict() + config_cache["CACHE_DIR"] = "llm_cache" + config_cache["CACHE_ENCRYPTION_CALLBACKS"] = [encrypt_base64, decrypt_base64] + config_cache["CACHE_MODE"] = "OPTIMIZE_SIZE" + return config_cache + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('model_dir') + parser.add_argument('image_dir', help="Image file or dir with images") + parser.add_argument('prompt', help="Image file or dir with images") + args = parser.parse_args() + + model_name_to_file_map = { + ('language', 'openvino_language_model'), + ('resampler', 'openvino_resampler_model'), + ('text_embeddings', 'openvino_text_embeddings_model'), + ('vision_embeddings', 'openvino_vision_embeddings_model')} + + models_map = dict() + for model_name, file_name in model_name_to_file_map: + model, weights = decrypt_model(args.model_dir, file_name + '.xml', file_name + '.bin') + models_map[model_name] = (model, weights) + + tokenizer = read_tokenizer(args.model_dir) + + # GPU can be used as well. + device = 'CPU' + enable_compile_cache = dict() + if "GPU" == device: + # Cache compiled models on disk for GPU to save time on the + # next run. It's not beneficial for CPU. + enable_compile_cache = get_config_for_cache_encryption() + + pipe = openvino_genai.VLMPipeline(models_map, tokenizer, args.model_dir, device, **enable_compile_cache) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + + rgbs = read_images(args.image_dir) + + pipe.generate(args.prompt, images=rgbs, generation_config=config, streamer=streamer) + + +if '__main__' == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/milebench_eval_vlm.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/milebench_eval_vlm.py new file mode 100644 index 0000000..f1b3a27 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/milebench_eval_vlm.py @@ -0,0 +1,497 @@ +#!/usr/bin/env python3 +# Copyright (C) 2023-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from argparse import ArgumentParser +import json +import os +import re +import requests +import shutil +from logging import getLogger +from typing import Optional + +import numpy as np +from PIL import Image +from rouge import Rouge +from tqdm import tqdm + +import openvino +from openvino_genai import ( + AggregationMode, + CacheEvictionConfig, + ContinuousBatchingPipeline, + GenerationConfig, + SchedulerConfig, +) + +logger = getLogger(__name__) + + +class MileBenchDataset: + def __init__(self, data_dir, subset, subset_size=200): + self.data_dir = data_dir + self.subset = subset + self.subset_size = subset_size + + self._download_data() + annotation_path = os.path.join(self.data_dir, self.subset, f"{self.subset}.json") + with open(annotation_path) as f: + self.annotation = json.load(f) + + self.image_dir = os.path.join(self.data_dir, self.subset, "images") + + def _download_data(self): + LINKS = { + "MileBench_part0.tar.gz": "https://huggingface.co/datasets/FreedomIntelligence/MileBench/resolve/main/MileBench_part0.tar.gz", + "MileBench_part1.tar.gz": "https://huggingface.co/datasets/FreedomIntelligence/MileBench/resolve/main/MileBench_part1.tar.gz", + "MileBench_part2.tar.gz": "https://huggingface.co/datasets/FreedomIntelligence/MileBench/resolve/main/MileBench_part2.tar.gz", + "MileBench_part3.tar.gz": "https://huggingface.co/datasets/FreedomIntelligence/MileBench/resolve/main/MileBench_part3.tar.gz", + "MileBench_part4.tar.gz": "https://huggingface.co/datasets/FreedomIntelligence/MileBench/resolve/main/MileBench_part4.tar.gz", + "MileBench_part5.tar.gz": "https://huggingface.co/datasets/FreedomIntelligence/MileBench/resolve/main/MileBench_part5.tar.gz", + } + + SUBSET2ARCHIVE = { + # Realistic Temporal + "ActionLocalization": "MileBench_part0.tar.gz", + "ActionPrediction": "MileBench_part0.tar.gz", + "ActionSequence": "MileBench_part0.tar.gz", + "CharacterOrder": "MileBench_part0.tar.gz", + "CounterfactualInference": "MileBench_part1.tar.gz", + "EgocentricNavigation": "MileBench_part1.tar.gz", + "MovingAttribute": "MileBench_part2.tar.gz", + "MovingDirection": "MileBench_part2.tar.gz", + "ObjectExistence": "MileBench_part3.tar.gz", + "ObjectInteraction": "MileBench_part3.tar.gz", + "ObjectShuffle": "MileBench_part3.tar.gz", + "SceneTransition": "MileBench_part3.tar.gz", + "StateChange": "MileBench_part3.tar.gz", + # Realistic Semantic + "ALFRED": "MileBench_part0.tar.gz", + "CLEVR-Change": "MileBench_part1.tar.gz", + "DocVQA": "MileBench_part1.tar.gz", + "IEdit": "MileBench_part2.tar.gz", + "MMCoQA": "MileBench_part2.tar.gz", + "MultiModalQA": "MileBench_part2.tar.gz", + "nuscenes": "MileBench_part3.tar.gz", + "OCR-VQA": "MileBench_part4.tar.gz", + "SlideVQA": "MileBench_part4.tar.gz", + "Spot-the-Diff": "MileBench_part4.tar.gz", + "TQA": "MileBench_part5.tar.gz", + "WebQA": "MileBench_part5.tar.gz", + "WikiVQA": "MileBench_part5.tar.gz", + # Diagnostic + "TextNeedleInAHaystack": "MileBench_part5.tar.gz", + "ImageNeedleInAHaystack": "MileBench_part2.tar.gz", + "GPR1200": "MileBench_part1.tar.gz", + } + + archive_name = SUBSET2ARCHIVE.get(self.subset) + archive_url = LINKS[archive_name] + archive_path = os.path.join(self.data_dir, archive_name) + dir_name = os.path.join(self.data_dir, self.subset) + + if not os.path.exists(dir_name): + if not os.path.exists(archive_path): + logger.info(f"Downloading {archive_name} from {archive_url}...") + os.makedirs(self.data_dir, exist_ok=True) + response = requests.get(archive_url, stream=True) + response.raise_for_status() + with open(archive_path, "wb") as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + logger.info(f"Downloaded archive to {archive_path}") + else: + logger.info(f"Archive already exists at {archive_path}") + + logger.info(f"Extracting {archive_path}...") + shutil.unpack_archive(archive_path, self.data_dir) + logger.info(f"Extracted to {self.data_dir}") + else: + logger.info(f"Already extracted to {self.data_dir}") + + def __len__(self): + return min(self.annotation["meta_data"]["num_sample"], self.subset_size) + + @staticmethod + def _transform_string(s: str) -> str: + counter = iter(range(1, s.count("{i}") + 1)) + return re.sub(r"\{i\}", lambda _: str(next(counter)), s) + + @staticmethod + def _preprocess_image(image_path, max_size=512, min_size=32): + image = Image.open(image_path).convert("RGB") + w, h = image.size + if max(w, h) > max_size: + scale_factor = max_size / max(w, h) + elif min(w, h) < min_size: + scale_factor = min_size / min(w, h) + else: + scale_factor = 1.0 # No scaling needed + + new_size = (int(w * scale_factor), int(h * scale_factor)) + image = image.resize(new_size, Image.Resampling.LANCZOS) + return image + + def __getitem__(self, idx): + if idx >= len(self) or idx < 0: + raise IndexError("Index out of range for the dataset.") + + ann = self.annotation["data"][idx] + task_instructions = self.annotation["meta_data"]["task_instruction"] + + context = ann["task_instance"]["context"] + if "choice_list" in ann["task_instance"].keys(): + choice_str = "\nChoice list: \n" + choice_str += "\n".join( + [ + (f"{chr(65+idx)}. ") + f"{item}" + for idx, item in enumerate(ann["task_instance"]["choice_list"]) + ] + ) + choice_str += "\nYour answer is: " + context += choice_str + + img_num = len(ann["task_instance"]["images_path"]) + + def idx_to_ov_image_placeholder(idx: int) -> str: + return f"" + + for i in range(img_num): + rmv_txt = "{image#%d}" % (i + 1) + rmv_tbl = "{table#%d}" % (i + 1) + image_placeholder = idx_to_ov_image_placeholder(i) + context = context.replace(rmv_txt, image_placeholder) + context = context.replace(rmv_tbl, image_placeholder) + + task_instruction_id = ann["task_instruction_id"] + context_str = task_instructions[task_instruction_id] + "\n" + context + prompt = MileBenchDataset._transform_string(context_str) + + images = [] + for p in ann["task_instance"]["images_path"]: + img_path = os.path.join(self.image_dir, p) + image = MileBenchDataset._preprocess_image(img_path) + image_data = np.array(image) + image_tensor = openvino.Tensor(image_data) + images.append(image_tensor) + + return { + "prompt": prompt, + "images": images, + "gt_answer": ann["response"], + "choice_list": ann["task_instance"].get("choice_list", None), + } + + +class Eval: + def __init__(self): + self.periodStrip = re.compile("(?!<=\d)(\.)(?!\d)") + self.commaStrip = re.compile("(\d)(\,)(\d)") + self.punct = [ + ";", + r"/", + "[", + "]", + '"', + "{", + "}", + "(", + ")", + "=", + "+", + "\\", + "_", + "-", + ">", + "<", + "@", + "`", + ",", + "?", + "!", + ] + + def char(self, index): + if index < 26: + return chr(index + 65) + elif index < 52: + return "A" + chr(index + 65 - 26) + else: + return "B" + chr(index + 65 - 26 - 26) + + def processPunctuation(self, inText): + outText = inText + for p in self.punct: + if (p + " " in inText or " " + p in inText) or ( + re.search(self.commaStrip, inText) is not None + ): + outText = outText.replace(p, "") + else: + outText = outText.replace(p, " ") + outText = self.periodStrip.sub("", outText, re.UNICODE) + return outText + + def process(self, answer): + answer = answer.replace("\n", " ") + answer = answer.replace("\t", " ") + answer = answer.strip() + answer = self.processPunctuation(answer) + answer = answer.strip('"') + answer = answer.strip().lower() + return answer + + def evaluate_rouge(self, predictions): + rouge = Rouge() + acc = [] + for res in predictions: + gt_ans = self.process(res["gt_answer"]) + pred_ans = self.process(res["pred"]) + assert gt_ans != "" + if pred_ans == "": + score = 0 + else: + score = rouge.get_scores(pred_ans, gt_ans)[0]["rouge-l"]["f"] + acc.append(score) + return np.mean(acc) + + def match_choice(self, text, option): + """Return: A B C D...""" + + def preprocess_option_string(option_string): + # First, preprocess the option text to normalize it + processed_option = self.process(option_string) + + # Then, escape any special regex characters in the processed option text + # List of regex special characters that need to be escaped + special_chars = [ + "\\", + ".", + "^", + "$", + "*", + "+", + "?", + "{", + "}", + "[", + "]", + "|", + "(", + ")", + ] + # Escape the special characters by prefixing them with a backslash + for char in special_chars: + if char in processed_option: + processed_option = processed_option.replace(char, "\\" + char) + # escaped_option = escape_special_chars(processed_option) + return processed_option + + if text == "": + return "C" + try: + # Maybe start from the head + # 1. Char+Choice: `A. Blastomycosis` + option_str = "|".join( + [preprocess_option_string(f"{k} {v}") for k, v in option.items()] + ) + option_pattern = rf"({option_str})" + option_res = re.search( + option_pattern, text, re.S + ) # NOTE we dont use match_all + if option_res: + return (option_res.group(0)[0]).upper() + + # 2. Choice: `Blastomycosis` + option_str = "|".join( + [ + preprocess_option_string(v).replace(" ", "") + for k, v in option.items() + ] + ) + option_pattern = rf"({option_str})" + option_res = re.search( + option_pattern, text.replace(" ", ""), re.S + ) # NOTE we dont use match_all + if option_res: + for k, v in option.items(): + if option_res[0].strip() == preprocess_option_string(v).replace( + " ", "" + ): + return k.upper() + + # 3. Char: `A` `AB` + if len(text) in [1, 2] and text.upper() in option.keys(): + return text.upper() + + # use gpt extract + + except Exception as e: + print(f"something wrong during match_choice {text}: {e}") + return text + return "".join([i.upper() for i in text if i.upper() in option]) + + def judge_multi_choice(self, sample): + gt_ans = sample["gt_answer"] + pred_ans = sample["pred"] + choice_list = sample["choice_list"] + assert gt_ans in choice_list + # Convert choice_list to a dictionary format expected by match_choice + option_dict = {self.char(i): choice for i, choice in enumerate(choice_list)} + + # Use match_choice to determine the selected answer from pred_ans + selected_answer = self.match_choice(pred_ans, option_dict) + + # Check if the selected answer matches the ground truth + gt_ans_chr = self.char(choice_list.index(sample["gt_answer"])) + if selected_answer == gt_ans_chr: + return 1, selected_answer + else: + return 0, selected_answer + + def process_sample(self, sample): + sample["gt_answer"] = self.process(sample["gt_answer"]) + sample["pred"] = self.process(sample["pred"]) + for i in range(len(sample["choice_list"])): + sample["choice_list"][i] = self.process(sample["choice_list"][i]) + + def evaluate_multichoice(self, predictions): + correct = 0 + for sample in predictions: + self.process_sample(sample) + score, extracted_answer = self.judge_multi_choice(sample) + sample["extracted"] = extracted_answer + sample["result"] = score + correct += score + return correct / len(predictions) + + def evaluate_needle(self, predictions, needle=True): + correct = 0 + for sample in predictions: + gt_ans = self.process(sample["gt_answer"]) + pred_ans = self.process(sample["pred"]) + + if needle: + score = 1 if gt_ans in pred_ans.split() else 0 + else: + score = 1 if gt_ans in pred_ans else 0 + + sample["result"] = score + correct += score + return correct / len(predictions) + + def evaluate(self, predictions, dataset_name, question_type): + if "NeedleInAHaystack" in dataset_name or "MMCoQA" in dataset_name: + return self.evaluate_needle( + predictions, needle="NeedleInAHaystack" in dataset_name + ) + elif question_type == "open-ended": + return self.evaluate_rouge(predictions) + elif question_type == "multi-choice": + return self.evaluate_multichoice(predictions) + else: + raise ValueError("Dataset not supported") + + +def get_scheduler_config(num_kv_blocks: Optional[int]) -> SchedulerConfig: + scheduler_config = SchedulerConfig() + if num_kv_blocks is not None: + scheduler_config.num_kv_blocks = num_kv_blocks + scheduler_config.max_num_batched_tokens = 32 * num_kv_blocks + scheduler_config.dynamic_split_fuse = True + scheduler_config.max_num_seqs = 256 + scheduler_config.use_cache_eviction = False + return scheduler_config + + +def main(): + parser = ArgumentParser(description="Help command") + parser.add_argument("-m", "--model_dir", type=str, help="Path to the model directory") + parser.add_argument("-mt", "--max_new_tokens", type=int, default=512, help="Maximal number of new tokens") + parser.add_argument("-d", "--device", type=str, default="CPU", help="Device") + parser.add_argument("-s", "--subset", type=str, help="MileBench subset to use") + parser.add_argument( + "--data_dir", + type=str, + default=None, + help="Path to MileBench data directory. If not provided, data will be downloaded to ./milebench_data" + ) + parser.add_argument("--enable_cache_eviction", action='store_true', help="Whether to apply cache eviction") + parser.add_argument( + "--num_kv_blocks", + type=int, + default=500, + help=( + "Number of blocks to statically pre-allocate in the KV cache. " + "If unspecified, blocks are allocated dynamically based on generation length." + ) + ) + parser.add_argument("--seqs_per_request", type=int, default=1, help="Number of sequences per request") + + args = parser.parse_args() + + generation_config = GenerationConfig() + generation_config.num_return_sequences = 1 + generation_config.max_new_tokens = args.max_new_tokens + generation_config.do_sample = False + generation_config.apply_chat_template = True + + scheduler_config = get_scheduler_config(args.num_kv_blocks) + if args.enable_cache_eviction: + scheduler_config.use_cache_eviction = True + eviction_config = CacheEvictionConfig( + start_size=32, + recent_size=64, + max_cache_size=512, + aggregation_mode=AggregationMode.SUM, + snapkv_window_size=8, + ) + scheduler_config.cache_eviction_config = eviction_config + print("Eviction is ON") + else: + print("Eviction is OFF") + + model_cb = ContinuousBatchingPipeline(args.model_dir, scheduler_config, args.device) + + data = MileBenchDataset( + data_dir=args.data_dir if args.data_dir is not None else "milebench_data", + subset=args.subset, + subset_size=100, + ) + + with tqdm(total=len(data)) as progress_bar: + prompts, images = [], [] + answers = [] + ref_answers = [] + for p_idx, data_sample in enumerate(data): + prompt = data_sample["prompt"] + image = data_sample["images"] + + progress_bar.update(1) + prompts.append(prompt) + images.append(image) + answers.append({"gt_answer": data_sample["gt_answer"], "choice_list": data_sample["choice_list"]}) + ref_answers.append({"gt_answer": data_sample["gt_answer"], "choice_list": data_sample["choice_list"]}) + + if len(prompts) == args.seqs_per_request or p_idx == len(data) - 1: + ans_batch = model_cb.generate( + prompts, images=images, generation_config=[generation_config] * len(prompts) + ) + + batch_start_idx = p_idx - len(prompts) + 1 + for i, output in enumerate(ans_batch, start=batch_start_idx): + answers[i]["pred"] = output.texts[0] + prompts.clear() + images.clear() + + question_type = data.annotation['meta_data']['question_type'] + scorer = Eval() + score = scorer.evaluate(answers, args.subset, question_type) + print(f"Score: {score}") + + pipeline_metrics = model_cb.get_metrics() + print(f"Cache usage: max {pipeline_metrics.max_cache_usage:.3f}, avg {pipeline_metrics.avg_cache_usage:.3f}") + + +if __name__ == '__main__': + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/video_to_text_chat.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/video_to_text_chat.py new file mode 100644 index 0000000..eee4477 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/video_to_text_chat.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import argparse +import numpy as np +import cv2 +import openvino_genai +from openvino import Tensor +from pathlib import Path + + +def streamer(subword: str) -> bool: + """ + + Args: + subword: sub-word of the generated text. + + Returns: Return flag corresponds whether generation should be stopped. + + """ + print(subword, end="", flush=True) + + # No value is returned as in this example we don't want to stop the generation in this method. + # "return None" will be treated the same as "return openvino_genai.StreamingStatus.RUNNING". + + +def read_video(path: str, num_frames: int = 8) -> Tensor: + """ + + Args: + path: The path to the video. + num_frames: Number of frames sampled from the video. + + Returns: the ov.Tensor containing the video. + + """ + cap = cv2.VideoCapture(path) + + frames = [] + total_num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + indices = np.arange(0, total_num_frames, total_num_frames / num_frames).astype(int) + + idx = 0 + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + if idx in indices: + frames.append(np.array(frame)) + idx += 1 + + cap.release() + assert idx == total_num_frames, "Frame count mismatch: expected {}, got {}".format(total_num_frames, idx) + + return Tensor(frames) + + +def read_videos(path: str) -> list[Tensor]: + entry = Path(path) + if entry.is_dir(): + return [read_video(str(file)) for file in sorted(entry.iterdir())] + return [read_video(path)] + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_dir", help="Path to the model directory") + parser.add_argument("video_dir", help="Path to a video file.") + parser.add_argument("device", nargs="?", default="CPU", help="Device to run the model on (default: CPU)") + args = parser.parse_args() + + videos = read_videos(args.video_dir) + + # GPU and NPU can be used as well. + # Note: If NPU is selected, only the language model will be run on the NPU. + enable_compile_cache = dict() + if args.device == "GPU": + # Cache compiled models on disk for GPU to save time on the next run. + # It's not beneficial for CPU. + enable_compile_cache["CACHE_DIR"] = "vlm_cache" + + pipe = openvino_genai.VLMPipeline(args.model_dir, args.device, **enable_compile_cache) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + + history = openvino_genai.ChatHistory() + prompt = input("question:\n") + history.append({"role": "user", "content": prompt}) + decoded_results = pipe.generate(history, videos=videos, generation_config=config, streamer=streamer) + history.append({"role": "assistant", "content": decoded_results.texts[0]}) + + while True: + try: + prompt = input("\n----------\nquestion:\n") + except EOFError: + break + + history.append({"role": "user", "content": prompt}) + # New images and videos can be passed at each turn + decoded_results = pipe.generate(history, generation_config=config, streamer=streamer) + history.append({"role": "assistant", "content": decoded_results.texts[0]}) + + +if __name__ == "__main__": + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/visual_language_chat.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/visual_language_chat.py new file mode 100644 index 0000000..a835096 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/visual_language_chat.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse + +import numpy as np +import openvino_genai +from PIL import Image +from openvino import Tensor +from pathlib import Path + + +def streamer(subword: str) -> bool: + ''' + + Args: + subword: sub-word of the generated text. + + Returns: Return flag corresponds whether generation should be stopped. + + ''' + print(subword, end='', flush=True) + + # No value is returned as in this example we don't want to stop the generation in this method. + # "return None" will be treated the same as "return openvino_genai.StreamingStatus.RUNNING". + + +def read_image(path: str) -> Tensor: + ''' + + Args: + path: The path to the image. + + Returns: the ov.Tensor containing the image. + + ''' + pic = Image.open(path).convert("RGB") + image_data = np.array(pic) + return Tensor(image_data) + + +def read_images(path: str) -> list[Tensor]: + entry = Path(path) + if entry.is_dir(): + return [read_image(str(file)) for file in sorted(entry.iterdir())] + return [read_image(path)] + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_dir", help="Path to the model directory") + parser.add_argument("image_dir", help="Image file or dir with images") + parser.add_argument("device", nargs="?", default="CPU", help="Device to run the model on (default: CPU)") + parser.add_argument( + "prompt_lookup", nargs="?", default="false", help="Enable prompt lookup decoding (default: false)" + ) + args = parser.parse_args() + + rgbs = read_images(args.image_dir) + + # GPU and NPU can be used as well. + # Note: If NPU is selected, only the language model will be run on the NPU. + # Prompt lookup decoding in VLM pipeline enforces ContinuousBatching backend + prompt_lookup = args.prompt_lookup == "true" + properties = {"prompt_lookup": prompt_lookup} + if args.device == "GPU": + # Cache compiled models on disk for GPU to save time on the next run. + # It's not beneficial for CPU. + properties["CACHE_DIR"] = "vlm_cache" + + pipe = openvino_genai.VLMPipeline(args.model_dir, args.device, **properties) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + if prompt_lookup: + # add parameter to enable prompt lookup decoding to generate `num_assistant_tokens` candidates per iteration + config.num_assistant_tokens = 5 + # Define max_ngram_size + config.max_ngram_size = 3 + + history = openvino_genai.ChatHistory() + prompt = input('question:\n') + history.append({"role": "user", "content": prompt}) + decoded_results = pipe.generate(history, images=rgbs, generation_config=config, streamer=streamer) + history.append({"role": "assistant", "content": decoded_results.texts[0]}) + + while True: + try: + prompt = input("\n----------\n" + "question:\n") + except EOFError: + break + + history.append({"role": "user", "content": prompt}) + # New images and videos can be passed at each turn + decoded_results = pipe.generate(history, generation_config=config, streamer=streamer) + history.append({"role": "assistant", "content": decoded_results.texts[0]}) + + +if '__main__' == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/visual_language_lora.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/visual_language_lora.py new file mode 100644 index 0000000..1678d35 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/visual_language_chat/visual_language_lora.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# Copyright (C) 2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import numpy as np +import openvino_genai as ov_genai + +from pathlib import Path +from PIL import Image +from openvino import Tensor + + +def streamer(subword: str) -> bool: + """ + + Args: + subword: sub-word of the generated text. + + Returns: Return flag corresponds whether generation should be stopped. + + """ + print(subword, end="", flush=True) + + # No value is returned as in this example we don't want to stop the generation in this method. + # "return None" will be treated the same as "return openvino_genai.StreamingStatus.RUNNING". + + +def read_image(path: str) -> Tensor: + """ + + Args: + path: The path to the image. + + Returns: the ov.Tensor containing the image. + + """ + pic = Image.open(path).convert("RGB") + image_data = np.array(pic) + return Tensor(image_data) + + +def read_images(path: str) -> list[Tensor]: + entry = Path(path) + if entry.is_dir(): + return [read_image(str(file)) for file in sorted(entry.iterdir())] + return [read_image(path)] + + +def parse_lora_pairs(raw): + if len(raw) < 2: + raise argparse.ArgumentTypeError( + "At least one LoRA adapter pair is required: [ ...]" + ) + if len(raw) % 2 != 0: + raise argparse.ArgumentTypeError("LoRA args must come in pairs: ...") + + pairs = [] + for i in range(0, len(raw), 2): + path = raw[i] + try: + alpha = float(raw[i + 1]) + except ValueError as e: + raise argparse.ArgumentTypeError(f"Invalid alpha '{raw[i + 1]}' for LoRA '{path}'") from e + pairs.append((path, alpha)) + return pairs + + +def main() -> int: + p = argparse.ArgumentParser( + description="OpenVINO GenAI VLM sample: run with and without LoRA adapters.", + formatter_class=argparse.RawTextHelpFormatter, + ) + p.add_argument("model_dir", help="Path to model directory") + p.add_argument("images_path", help="Image file OR directory with images") + p.add_argument("prompt", help="Prompt/question to ask") + p.add_argument( + "lora_pairs", + nargs="+", + metavar="LORA_ALPHA", + help="Pairs: ...", + ) + + args = p.parse_args() + prompt = args.prompt + loras = parse_lora_pairs(args.lora_pairs) + + rgbs = read_images(args.images_path) + + device = "CPU" # GPU can be used as well + + pipe_kwargs = {} + + # Configure LoRA adapters with weights (alphas) + if loras: + adapter_config = ov_genai.AdapterConfig() + for lora_path, alpha in loras: + adapter_config.add(ov_genai.Adapter(lora_path), alpha) + pipe_kwargs["adapters"] = adapter_config + + pipe = ov_genai.VLMPipeline(args.model_dir, device, **pipe_kwargs) + + gen_cfg = ov_genai.GenerationConfig() + gen_cfg.max_new_tokens = 100 + + print("Generating answer with LoRA adapters applied:") + pipe.generate( + prompt, + images=rgbs, + generation_config=gen_cfg, + streamer=streamer, + ) + + print("\n----------\nGenerating answer without LoRA adapters applied:") + pipe.generate( + prompt, + images=rgbs, + generation_config=gen_cfg, + adapters=ov_genai.AdapterConfig(), + streamer=streamer, + ) + + print("\n----------") + return 0 + + +if __name__ == "__main__": + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/whisper_speech_recognition/README.md b/src/resources/openvino.genai-2026.1.0.0/samples/python/whisper_speech_recognition/README.md new file mode 100644 index 0000000..3bd7698 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/whisper_speech_recognition/README.md @@ -0,0 +1,160 @@ +# Whisper automatic speech recognition sample + +This example showcases inference of speech recognition Whisper Models. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `openvino_genai.WhisperPipeline` and uses audio file in wav format as an input source. + +## Download and convert the model and tokenizers + +The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version. + +Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model. + +```sh +pip install --upgrade-strategy eager -r ../../export-requirements.txt +``` + +Then, run the export with Optimum CLI: + +```sh +optimum-cli export openvino --trust-remote-code --model openai/whisper-base whisper-base +``` + +Alternatively, you can do it in Python code: + +```python +from optimum.exporters.openvino.convert import export_tokenizer +from optimum.intel import OVModelForSpeechSeq2Seq +from transformers import AutoTokenizer + +output_dir = "whisper-base" + +model = OVModelForSpeechSeq2Seq.from_pretrained("openai/whisper-base", export=True, trust_remote_code=True) +model.save_pretrained(output_dir) + +tokenizer = AutoTokenizer.from_pretrained("openai/whisper-base") +export_tokenizer(tokenizer, output_dir) +``` + +## Prepare audio file + +Download example audio file: https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/librispeech_s5/how_are_you_doing_today.wav + +Or you can use the [`recorder.py`](recorder.py) script. The script records 5 seconds of audio from the microphone. + +To install `PyAudio` dependency follow the [installation instructions](https://pypi.org/project/PyAudio/). + +To run the script: +``` +python recorder.py +``` + +## Run the Whisper model + +Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pip install -r ../../deployment-requirements.txt` and then, run a sample: + +`python whisper_speech_recognition.py whisper-base how_are_you_doing_today.wav` + +Output: +``` + How are you doing today? +timestamps: [0, 2] text: How are you doing today? +``` + +Refer to the [Supported Models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#speech-recognition-models-whisper-based) for more details. + +# Whisper pipeline usage + +```python +import openvino_genai +import librosa + +def read_wav(filepath): + raw_speech, samplerate = librosa.load(filepath, sr=16000) + return raw_speech.tolist() + +pipe = openvino_genai.WhisperPipeline(model_dir, "CPU") +# Pipeline expects normalized audio with Sample Rate of 16kHz +raw_speech = read_wav('how_are_you_doing_today.wav') +result = pipe.generate(raw_speech) +# How are you doing today? +``` + +### Transcription + +Whisper pipeline predicts the language of the source audio automatically. + +```python +raw_speech = read_wav('how_are_you_doing_today.wav') +result = pipe.generate(raw_speech) +# How are you doing today? + +raw_speech = read_wav('fr_sample.wav') +result = pipe.generate(raw_speech) +# Il s'agit d'une entité très complexe qui consiste... +``` + +If the source audio language is known in advance, it can be specified as an argument to `generate` method: + +```python +raw_speech = read_wav("how_are_you_doing_today.wav") +result = pipe.generate(raw_speech, language="<|en|>") +# How are you doing today? + +raw_speech = read_wav("fr_sample.wav") +result = pipe.generate(raw_speech, language="<|fr|>") +# Il s'agit d'une entité très complexe qui consiste... +``` + +### Translation + +By default, Whisper performs the task of speech transcription, where the source audio language is the same as the target text language. To perform speech translation, where the target text is in English, set the task to "translate": + +```python +raw_speech = read_wav("fr_sample.wav") +result = pipe.generate(raw_speech, task="translate") +# It is a very complex entity that consists... +``` + +### Timestamps prediction + +The model can predict timestamps. For sentence-level timestamps, pass the `return_timestamps` argument: + +```python +raw_speech = read_wav("how_are_you_doing_today.wav") +result = pipe.generate(raw_speech, return_timestamps=True) + +for chunk in result.chunks: + print(f"timestamps: [{chunk.start_ts:.2f}, {chunk.end_ts:.2f}] text: {chunk.text}") +# timestamps: [0.00, 2.00] text: How are you doing today? +``` + +### Long-Form audio Transcription + +The Whisper model is designed to work on audio samples of up to 30s in duration. Whisper pipeline uses sequential chunking algorithm to transcribe audio samples of arbitrary length. +Sequential chunking algorithm uses a "sliding window", transcribing 30-second slices one after the other. + +### Initial prompt and hotwords + +Whisper pipeline has `initial_prompt` and `hotwords` generate arguments: +* `initial_prompt`: initial prompt tokens passed as a previous transcription (after `<|startofprev|>` token) to the first processing window +* `hotwords`: hotwords tokens passed as a previous transcription (after `<|startofprev|>` token) to the all processing windows + +The Whisper model can use that context to better understand the speech and maintain a consistent writing style. However, prompts do not need to be genuine transcripts from prior audio segments. Such prompts can be used to steer the model to use particular spellings or styles: + +```python +result = pipe.generate(raw_speech) +# He has gone and gone for good answered Paul Icrom who... + +result = pipe.generate(raw_speech, initial_prompt="Polychrome") +# He has gone and gone for good answered Polychrome who... +``` + +### Troubleshooting + +#### Empty or rubbish output + +Example output: +``` +---------------- +``` + +To resolve this ensure that audio data has a 16k Hz sampling rate. You can use the recorder.py provided to record or use FFmpeg to convert the audio to the required format. diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/whisper_speech_recognition/recorder.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/whisper_speech_recognition/recorder.py new file mode 100644 index 0000000..e79f1f9 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/whisper_speech_recognition/recorder.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Setup Instruction +#1. Install Dependencies with `pip install pyaudio` +#2. Run python recorder.py +#3. Record for 5 seconds and wait for the process to complete +#4. Check directory to see the recording file (output.wav) + +import pyaudio +import wave + +chunk = 1024 # Record in chunks of 1024 samples +sample_format = pyaudio.paInt16 # 16 bits per sample +channels = 1 +fs = 16000 # Record at 16k samples per second +seconds = 5 +filename = "output.wav" + +p = pyaudio.PyAudio() # Create an interface to PortAudio + +print('Recording') +stream = p.open(format=sample_format, + channels=channels, + rate=fs, + frames_per_buffer=chunk, + input=True) + +frames = [] # Initialize array to store frames + +# Store data in chunks for 3 seconds +for i in range(0, int(fs / chunk * seconds)): + data = stream.read(chunk) + frames.append(data) + +# Stop and close the stream +stream.stop_stream() +stream.close() +# Terminate the PortAudio interface +p.terminate() +print('Finished recording') + +# Save the recorded data as a WAV file +wf = wave.open(filename, 'wb') +wf.setnchannels(channels) +wf.setsampwidth(p.get_sample_size(sample_format)) +wf.setframerate(fs) +wf.writeframes(b''.join(frames)) +wf.close() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/python/whisper_speech_recognition/whisper_speech_recognition.py b/src/resources/openvino.genai-2026.1.0.0/samples/python/whisper_speech_recognition/whisper_speech_recognition.py new file mode 100644 index 0000000..3c091b8 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/python/whisper_speech_recognition/whisper_speech_recognition.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import openvino_genai +import librosa + + +def read_wav(filepath): + raw_speech, samplerate = librosa.load(filepath, sr=16000) + return raw_speech.tolist() + + +def get_config_for_cache(): + config_cache = dict() + config_cache["CACHE_DIR"] = "whisper_cache" + return config_cache + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("model_dir", help="Path to the model directory") + parser.add_argument("wav_file_path", help="Path to the WAV file") + parser.add_argument("device", nargs="?", default="CPU", help="Device to run the model on (default: CPU)") + args = parser.parse_args() + + ov_config = dict() + if args.device == "NPU" or "GPU" in args.device: # need to handle cases like "GPU", "GPU.0" and "GPU.1" + # Cache compiled models on disk for GPU and NPU to save time on the + # next run. It's not beneficial for CPU. + ov_config = get_config_for_cache() + + # Word timestamps require decomposition of cross-attention decoder SDPA layers, + # so word_timestamps must be passed to the pipeline constructor (not just in generation config) + ov_config["word_timestamps"] = True + + pipe = openvino_genai.WhisperPipeline(args.model_dir, args.device, **ov_config) + + config = pipe.get_generation_config() + # 'task' and 'language' parameters are supported for multilingual models only + config.language = "<|en|>" # can switch to <|zh|> for Chinese language + config.task = "transcribe" + config.return_timestamps = True + config.word_timestamps = True + + # Pipeline expects normalized audio with Sample Rate of 16kHz + raw_speech = read_wav(args.wav_file_path) + result = pipe.generate(raw_speech, config) + + print(result) + + if result.chunks: + for chunk in result.chunks: + print(f"timestamps: [{chunk.start_ts:.2f}, {chunk.end_ts:.2f}] text: {chunk.text}") + + if result.words: + for word in result.words: + print(f"[{word.start_ts:.2f}, {word.end_ts:.2f}]: {word.word}") + + +if "__main__" == __name__: + main() diff --git a/src/resources/openvino.genai-2026.1.0.0/samples/requirements.txt b/src/resources/openvino.genai-2026.1.0.0/samples/requirements.txt new file mode 100644 index 0000000..377b6c8 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/samples/requirements.txt @@ -0,0 +1,3 @@ +-r ./deployment-requirements.txt +-r ./export-requirements.txt +pydantic diff --git a/src/resources/openvino.genai-2026.1.0.0/site/.editorconfig b/src/resources/openvino.genai-2026.1.0.0/site/.editorconfig new file mode 100644 index 0000000..22a1ec1 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/.editorconfig @@ -0,0 +1,16 @@ +# http://editorconfig.org + +root = false + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +indent_style = space +indent_size = 2 +max_line_length = 100 +trim_trailing_whitespace = true + +[*.md] +insert_final_newline = true +trim_trailing_whitespace = false diff --git a/src/resources/openvino.genai-2026.1.0.0/site/.gitignore b/src/resources/openvino.genai-2026.1.0.0/site/.gitignore new file mode 100644 index 0000000..e3f4a44 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/.gitignore @@ -0,0 +1,26 @@ +# Dependencies +/node_modules + +# Production +/build + +# Generated files +.docusaurus +.cache-loader + +# Misc +.DS_Store +.env.local +.env.development.local +.env.test.local +.env.production.local + +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Generated docs files for samples +/docs/samples/*/ +!/docs/samples/_*/ +# !/docs/samples/index.mdx +# !/docs/samples/_category_.json diff --git a/src/resources/openvino.genai-2026.1.0.0/site/.prettierignore b/src/resources/openvino.genai-2026.1.0.0/site/.prettierignore new file mode 100644 index 0000000..e25125f --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/.prettierignore @@ -0,0 +1,12 @@ +dist +node_modules +.yarn +build +coverage +.docusaurus +.idea + +.svg +*.svg + +*.mdx diff --git a/src/resources/openvino.genai-2026.1.0.0/site/.prettierrc b/src/resources/openvino.genai-2026.1.0.0/site/.prettierrc new file mode 100644 index 0000000..f25cf20 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/.prettierrc @@ -0,0 +1,10 @@ +{ + "printWidth": 100, + "trailingComma": "es5", + "useTabs": false, + "tabWidth": 2, + "semi": true, + "bracketSpacing": true, + "singleQuote": true, + "arrowParens": "always" +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/README.md b/src/resources/openvino.genai-2026.1.0.0/site/README.md new file mode 100644 index 0000000..8b967ce --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/README.md @@ -0,0 +1,41 @@ +# Website + +This website is built using [Docusaurus](https://docusaurus.io/), a modern static website generator. + +### Installation + +``` +$ npm i +``` + +### Local Development + +``` +$ npm run start +``` + +This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server. + +### Build + +``` +$ npm run build +``` + +This command generates static content into the `build` directory and can be served using any static contents hosting service. + +### Deployment + +Using SSH: + +``` +$ USE_SSH=true npm run deploy +``` + +Not using SSH: + +``` +$ GIT_USER= npm run deploy +``` + +If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/bindings/_category_.json b/src/resources/openvino.genai-2026.1.0.0/site/docs/bindings/_category_.json new file mode 100644 index 0000000..8c1a743 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/bindings/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Bindings", + "position": 7, + "link": { + "type": "generated-index", + "description": "Bindings for OpenVINO GenAI." + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/bindings/node-js.md b/src/resources/openvino.genai-2026.1.0.0/site/docs/bindings/node-js.md new file mode 100644 index 0000000..b0d4131 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/bindings/node-js.md @@ -0,0 +1,98 @@ +--- +sidebar_position: 1 +sidebar_label: Node.js +description: Node.js bindings provide JavaScript/TypeScript API. +--- + +# Node.js Bindings for OpenVINO™ GenAI + +OpenVINO GenAI provides Node.js bindings that enable you to use generative AI pipelines in JavaScript and TypeScript applications. + +:::warning API Coverage +Node.js bindings currently provide a subset of the full OpenVINO GenAI API available in C++ and Python. The focus is on core text generation (`LLMPipeline`), vision language models (`VLMPipeline`), text embedding (`TextEmbeddingPipeline`), and text reranking (`TextRerankPipeline`) functionality. +::: + +## Supported Pipelines and Features + +Node.js bindings currently support: + +- `LLMPipeline`: Text generation with Large Language Models + - Chat mode with conversation history + - Streaming support + - Batch generation + - Multiple sampling strategies (greedy, beam search) + - Structured output + - ReAct agent support +- `VLMPipeline`: Vision Language Model inference for multimodal tasks + - Process images and videos with text prompts + - Chat mode with conversation history + - Streaming support +- `TextEmbeddingPipeline`: Generate text embeddings for semantic search and RAG applications +- `TextRerankPipeline`: Rerank documents by semantic relevance for RAG applications + - Configurable top-n results +- `Tokenizer`: Fast tokenization / detokenization and chat prompt formatting + - Encode strings into token id and attention mask tensors + - Decode token sequences + - Apply chat template + - Access special tokens (BOS/EOS/PAD) + - Supports paired input + +## Installation + +To install OpenVINO GenAI for Node.js, refer to the [Install Guide](https://docs.openvino.ai/2026/get-started/install-openvino.html). + +## Quick Start + +:::tip Model Preparation +Before using LLMPipeline, you need to convert your model to OpenVINO IR format. +See [Model Preparation](/docs/category/model-preparation) for details. +::: + +After installation, you can start using OpenVINO GenAI in your Node.js projects: + +```js +import { LLMPipeline } from "openvino-genai-node"; + +async function main() { + const modelPath = "/path/to/ov/model"; + const device = "CPU"; + const pipe = await LLMPipeline(modelPath, device); + + const input = "What is OpenVINO?"; + const config = { max_new_tokens: 100 }; + + for await (const chunk of pipe.stream(input, config)) { + process.stdout.write(chunk); + } +} + +main(); +``` + +## Next Steps + +- Check out [Code Samples](/docs/samples) +- Review [Supported Models](/docs/supported-models) +- Explore [Use Cases](/docs/category/use-cases) +- Browse the [Node.js bindings source](https://github.com/openvinotoolkit/openvino.genai/tree/master/src/js) +- View the [NPM package](https://www.npmjs.com/package/openvino-genai-node) + +## Troubleshooting + +### Module Not Found Errors + +If you encounter errors like `Cannot find module 'openvino-genai-node'`: + +1. Verify installation: `npm list openvino-genai-node` +2. Check Node.js version: `node --version` +3. Ensure ES modules are enabled: add `"type": "module"` in your `package.json` + +### Version Compatibility Issues + +If you encounter errors related to shared libraries or ABI compatibility: + +1. Ensure both `openvino-node` and `openvino-genai-node` are the same version +2. If building from source, rebuild both OpenVINO and OpenVINO GenAI bindings +3. Check that your system meets the requirements for your platform + +For more help, refer to the [OpenVINO GenAI GitHub repository](https://github.com/openvinotoolkit/openvino.genai) or open an issue. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/_category_.json b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/_category_.json new file mode 100644 index 0000000..1cb778d --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Concepts", + "position": 6, + "link": { + "type": "generated-index", + "description": "Concepts to OpenVINO GenAI." + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/beam-search.md b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/beam-search.md new file mode 100644 index 0000000..7e99f7e --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/beam-search.md @@ -0,0 +1,7 @@ +--- +sidebar_position: 3 +--- + +# Beam Search + +> **Note:** This page is a work in progress. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/how-it-works.md b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/how-it-works.md new file mode 100644 index 0000000..65a4fb4 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/how-it-works.md @@ -0,0 +1,70 @@ +--- +sidebar_position: 1 +sidebar_label: How It Works +description: Understanding the internal workings of OpenVINO GenAI +--- + +# How OpenVINO GenAI Works + +## Stateful LLM + +A common optimization for LLM inference is using a past KV (key/value)-cache. This cache is represented by the corresponding inputs and outputs in a model originally implemented in a DL framework (e.g. PyTorch models from Hugging Face). For further optimization and easier use, the model is transformed to a stateful form. This transformation improves inference performance and decreases the allocated runtime memory in long-running text generation scenarios. It is achieved by hiding inputs and outputs of the model that represent past KV-cache tensors and handling them inside the model in a more efficient way. Although the cache is still accessible with state API. It is opposed to stateless model approach requiring manipulating these inputs and outputs explicitly. An introduction to the stateful models can be found in the [Stateful Models article](https://docs.openvino.ai/2026/openvino-workflow/running-inference/inference-request/stateful-models.html). + +### Beam Search and KV-Cache + +Hiding KV-cache introduces a peculiarity for beam search algorithm. Beam search suggests batched inference of multiple beams. The design described here so far would result in generating multiple independent sequences of tokens. Beam search algorithm, on the other hand, requires removing some of the ongoing beams and splitting other beams to multiple branches. Beam removal requires deleting corresponding KV-cache entry and beam splitting requires copying corresponding KV-cache values. + +To provide the possibility to implement beam search without accessing model's internal state, a stateful LLM converted with `optimum-intel` or [llm_bench](https://github.com/openvinotoolkit/openvino.genai/tree/master/tools/llm_bench) introduces an additional 1-dimentional `beam_idx` input. `beam_idx` must contain indexes of elements in a batch which are intended to be selected and will evolve during the next beam search iteration. There's only one beam when the generation starts. That beam corresponds to the initial prompt. `beam_idx` must have values: `[0, 0]` to keep the initial beam and introduce its copy. The dynamic batch size enables to change the number of beams dynamically. `beam_idx` must have `[1]` as the value to remove zeroth sequence and keep the second beam only. + +Assume there are two running beams. To proceed with generating both beams at the next iteration, `beam_idx` values must be `[0, 1]`, pointing to batch elements `0` and `1`. To drop the last beam and split the other beam in two, `beam_idx` must be set to `[0, 0]`. This results in utilizing only the part of KV cache corresponding to the zeroth element in the batch. The process of selecting proper entries in cache is called Cache Reorder. + +#### Forking Beam Example + +In this diagram, setting `beam_idx = [0, 0]` creates two identical copies of Beam 0, which can then diverge in future iterations: + +![Beam forking](/img/beam_idx-fork.gif) + +#### Selecting Specific Beam Example + +The diagram below shows how setting `beam_idx = [1]` selects only Beam 1 from the KV-cache, effectively removing Beam 0 from consideration in the next iteration: + +![Beam selection](/img/beam_idx-drop.gif) + +## Stateless vs Stateful Models + +The images below represent stateless and stateful LLM pipelines. +The model has the following inputs: + +1. `input_ids` contains the next selected token +2. `attention_mask` is filled with `1` +3. `position_ids` encodes a position of currently generating token in the sequence +4. `beam_idx` selects beams + +The model has 1 output `logits` describing the predicted distribution over the next tokens. And there's KV cache state. + +### Stateless Pipeline + +In the stateless model approach, the developer needs to manage all KV-cache inputs and outputs explicitly: + +![Stateless Model](/img/stateless.jpg) + +With a stateless model: + +- Normal parameters (`input_ids`, `attention_mask`, `position_ids`) must be managed by the developer +- KV-cache from previous iterations (`past_key_values`) must be passed as inputs +- Updated KV-cache (`present_key_values`) must be handled as outputs +- The cache can be modified based on token selection strategy or due to cache shrinking techniques + +### Stateful Pipeline + +In the stateful model with Cache Reorder, much of the KV-cache management is handled internally: + +![Stateful Model with Cache Reorder](/img/stateful.jpg) + +With a stateful model: + +- A new `beam_idx` parameter is introduced to manage beam selection +- KV-cache is stored in the Model State within the OpenVINO InferRequest +- ReadValue operations retrieve the KV-cache from previous iterations +- Assign operations update the KV-cache for the next iteration +- Next token selection and beam scoring are simplified diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/lora.md b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/lora.md new file mode 100644 index 0000000..87c1832 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/lora.md @@ -0,0 +1,33 @@ +--- +sidebar_position: 2 +--- + +# Low-Rank Adaptation (LoRA) + +LoRA, or [Low-Rank Adaptation](https://arxiv.org/abs/2106.09685), is a popular and lightweight training technique used for fine-tuning Large Language and Stable Diffusion Models without needing full model training. +Full fine-tuning of larger models (consisting of billions of parameters) is inherently expensive and time-consuming. +LoRA works by adding a smaller number of new weights to the model for training, rather than retraining the entire parameter space of the model. +This makes training with LoRA much faster, memory-efficient, and produces smaller model weights (a few hundred MBs), which are easier to store and share. + +At its core, LoRA leverages the concept of low-rank matrix factorization. +Instead of updating all the parameters in a neural network, LoRA decomposes the parameter space into two low-rank matrices. +This decomposition allows the model to capture essential information with fewer parameters, significantly reducing the amount of data and computation required for fine-tuning. +This vastly reduces the storage requirement for large language models adapted to specific tasks and enables efficient task-switching during deployment all without introducing inference latency. + +![LoRA](/img/lora.png) + +Some more advantages of using LoRA: + +- LoRA makes fine-tuning more efficient by drastically reducing the number of trainable parameters. +- The original pre-trained weights are kept frozen, which means you can have multiple lightweight and portable LoRA models for various downstream tasks built on top of them. +- LoRA is orthogonal to many other parameter-efficient methods and can be combined with many of them. +- Performance of models fine-tuned using LoRA is comparable to the performance of fully fine-tuned models. +- LoRA does not add any inference latency because adapter weights can be merged with the base model. + +:::info +More details about LoRA can be found in HuggingFace [conceptual guide](https://huggingface.co/docs/peft/conceptual_guides/adapter) and [blog post](https://huggingface.co/blog/peft). +::: + +:::tip +See [LoRA Adapters Guide](/docs/guides/lora-adapters) for an example of using LoRA with OpenVINO GenAI. +::: diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/_category_.json b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/_category_.json new file mode 100644 index 0000000..e42bf68 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Optimization techniques", + "position": 4, + "link": { + "type": "generated-index", + "description": "Optimization techniques to OpenVINO GenAI." + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/continuous-batching.md b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/continuous-batching.md new file mode 100644 index 0000000..3920205 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/continuous-batching.md @@ -0,0 +1,7 @@ +--- +sidebar_position: 3 +--- + +# Continuous Batching + +> **Note:** This page is a work in progress. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/diffusion-caching.md b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/diffusion-caching.md new file mode 100644 index 0000000..18fce21 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/diffusion-caching.md @@ -0,0 +1,74 @@ +--- +sidebar_position: 5 +--- + +# Diffusion Caching (TaylorSeer Lite) + +## Overview +Diffusion Caching is an optimization technique that accelerates diffusion transformers by identifying and reusing computational redundancies in the iterative denoising process. The technique exploits the observation that intermediate activations often change minimally between closely spaced timesteps, making it possible to cache and reuse previous computations instead of performing full forward passes at every step. + +OpenVINO GenAI implements **TaylorSeer Lite**, a memory-efficient variant of the TaylorSeer algorithm introduced in [From Reusing to Forecasting: Accelerating Diffusion Models with TaylorSeers](https://arxiv.org/pdf/2503.06923). This approach achieves significant inference speedups while maintaining visual quality, without requiring model retraining or architectural modifications. + +## Conceptual Model +TaylorSeer Lite uses Taylor series approximation to predict transformer outputs during denoising steps, eliminating the need for full forward passes. Instead of caching features from all transformer layers (which would consume substantial memory), TaylorSeer Lite caches only the output of the final linear layer along with its first derivative. + +During cached steps, the transformer computation is completely skipped. The output is extrapolated using the Taylor series approximation based on the cached values. + +At regular intervals (controlled by `cache_interval`), a full forward pass is executed to refresh the cache and update derivatives, ensuring prediction accuracy. + +## Configuration Interface +TaylorSeer Lite is configured through `ov::genai::TaylorSeerCacheConfig` and exposed in `ov::genai::ImageGenerationConfig` and `ov::genai::VideoGenerationConfig`. + +### Parameters +* **`cache_interval`** (`size_t`, defaults to `3`) - Controls how often a full forward pass is performed after warm-up. Once warm-up is finished, TaylorSeer performs a full transformer computation every `cache_interval` steps and uses Taylor-series predictions for the intermediate steps, resulting in up to `cache_interval - 1` predicted (cached) denoising steps between two full computations. + +* **`disable_cache_before_step`** (`size_t`, defaults to `6`) - Number of initial denoising steps during which caching is disabled. In practice, the implementation always performs full computations for steps `0..max(disable_cache_before_step, 2) - 1`, ensuring at least two warm-up steps with no caching to stabilize the derivatives before prediction begins. + +* **`disable_cache_after_step`** (`int`, defaults to `-2`) - Step index from which caching is disabled (inclusive) to ensure quality in the final denoising stages. Negative values are interpreted relative to the end of the schedule: `num_inference_steps + disable_cache_after_step`. + +## Sample Usage (Python) +[samples/python/image_generation/taylorseer_text2image.py](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/image_generation/taylorseer_text2image.py) demonstrates TaylorSeer Lite usage with performance comparison. + +Basic usage: +```bash +python taylorseer_text2image.py ./flux.1-dev/FP16 "a beautiful sunset over mountains" +``` + +Configuration in code: +```python +taylorseer_config = openvino_genai.TaylorSeerCacheConfig() +taylorseer_config.cache_interval = 5 +taylorseer_config.disable_cache_before_step = 2 +taylorseer_config.disable_cache_after_step = -1 +``` + +### Image Generation (Flux / StableDiffusion3) +```python +pipe = openvino_genai.Text2ImagePipeline(models_path, device) +# Apply TaylorSeerCacheConfig to generation config +generation_config = pipe.get_generation_config() +generation_config.taylorseer_config = taylorseer_config +pipe.set_generation_config(generation_config) + +res = pipe.generate(prompt, num_inference_steps=28) +``` + +### Video Generation (LTX-Video) +```python +pipe = openvino_genai.Text2VideoPipeline(models_path, device) +# Pass TaylorSeerCacheConfig directly as a keyword argument +result = pipe.generate( + prompt, + num_inference_steps=50, + taylorseer_config=taylorseer_config, +) +``` + +## Benefits +* By skipping full transformer computations for multiple consecutive steps, inference time is significantly reduced. +* Only the final layer output and derivative are cached, avoiding the memory explosion that would occur with full-layer caching. +* The Taylor approximation maintains high visual similarity to full computation results. +* Speedup scales with transformer computation intensity, input resolution and number of inference steps. + +## Current Limitations +* TaylorSeer Lite currently supports Flux and StableDiffusion3 Text2Image pipelines, and LTX-Video Text2Video pipeline. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/kvcache-eviction-algorithm.md b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/kvcache-eviction-algorithm.md new file mode 100644 index 0000000..619c8ee --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/kvcache-eviction-algorithm.md @@ -0,0 +1,152 @@ +--- +sidebar_position: 2 +--- + +# KVCache Token Eviction Algorithm + + +## Overview +The cache eviction algorithm is designed to manage KV (Key-Value) cache memory for large language models (LLMs) during text generation. It determines which blocks of tokens should be evicted from the KV cache based on importance scores calculated from attention scores across different attention layers. + +The cache eviction algorithm allows for average and maximum KV cache consumption savings since it effectively imposes a configurable hard limit on the amount of KV cache blocks that each sequence can occupy. +A fixed, relatively small value of KV cache block limit means that there is less compute spent on generating next token, when compared to the no-eviction case where the entire KV cache history of the sequence, including prompt tokens, would have to be processed; token latency would also remain stable. +This effect only comes into play during the generation stage and is most noticeable for longer generation lengths. + +No eviction is done during prefill stage, therefore no speedups would be achieved during the prefill using cache eviction alone and the reduction in maximum KV cache consumption over the entire generation process is limited from below by the amount of KV cache blocks occupied by the full prompt. To achieve prefill stage speedup, the [sparse attention prefill algorithms](./sparse-attention-prefill.md) can be used, either separately or along with cache eviction. + +## Conceptual Model +The KV cache for each sequence is divided into three logical areas: + +![KV cache layout with cache eviction](./../../../static/img/kv-cache-areas-diagram.svg) + +* Start Area: Initial tokens that are never evicted +* Evictable Area: Tokens that can be evicted based on importance scores +* Recent Area: Most recent tokens that are preserved (not evicted while in this area, but naturally migrating toward the evictable area as the text generation goes on) + +The sizes of all three areas can be configured by modifying corresponding fields in a `CacheEvictionConfig` struct, which itself is a part of the pipeline-wide `SchedulerConfig`. +As the generation starts, the blocks in respective logical areas are filled token-by-token, and once at least one block past the "recent" area is filled, eviction may take place. +The tokens are evicted based on accumulated importance scores following the [H2O](https://arxiv.org/abs/2306.14048) approach. +The scores are accumulated throughout the entire generation process and their weighting may be changed by adjusting the `CacheEvictionConfig.aggregation_mode` parameter. +Eviction occurs with a block-wise granularity, and only the completely filled blocks from the "evictable" area are evicted. +By default the start area is 32 tokens, evictable area is 512 tokens and recent area is 128 tokens, which amounts to a total maximum cache usage by sequence during the generation phase of 672 tokens. + +This approach allows LLMs to handle long sequences efficiently by keeping the most contextually important tokens in the cache while evicting those of lesser importance. +The downside of the eviction procedure is potential loss of generation accuracy, since the cache no longer contains the entire context for the generation, but only the most "important" token blocks. +The user can adjust the individual sizes of the eviction sub-areas to hit the optimal point of accuracy/memory usage tradeoff in their particular case. + +Note that currently the eviction only starts after the full prompt has been processed, i.e. no eviction takes place during the prefill phase. +This means that for longer prompt sizes the maximum cache usage may exceed the limit defined by the `CacheEvictionConfig` parameters. + +After the prefill phase, however, the maximum cache occupancy for each sequence currently being processed is strictly limited by the combined sizes of the 3 areas described above. +`CacheEvictionConfig.get_max_cache_size_after_eviction()` can be queried to get this cache size limit in tokens. + +### (Optional) Adaptive R-KV score aggregation +Along with the more straightforward `AggregationMode.SUM` and `AggregationMode.NORM_SUM`, there is an option to set `CacheEvictionConfig.aggregation_mode = AggregationMode.ADAPTIVE_RKV` to enable the [R-KV](https://arxiv.org/pdf/2505.24133v3)-based scoring for the blocks to be evicted. + +Whenever the eviction should occur, the R-KV approach uses the same per-token, block-accumulated attention scores to determine the subset of KV cache blocks that comprises a configurable portion of the total attention scores across currently retained blocks (i.e. "attention mass", as controlled `CacheEvictionConfig.adaptive_rkv_config.attention_mass`). +This block subset (up to the limits imposed by the higher-level config's `start_size`, `recent_size` and `max_cache_size`) will be retained after eviction. +The rest of the blocks that would need to be retained during current eviction step will be taken from the most "diverse" remaining blocks in the evictable area. +The "diversity" is calculated as the negative of the block-aggregated cross-token cosine similarity; the aim is to find the most dissimilar blocks among the non-attention-mass subset and keep only those as most representative ones. + +In addition to the above, the attention scores are aggregated across generation step by a running average with a window size controlled by the `CacheEvictionConfig.adaptive_rkv_config.window_size`. + +The Adaptive R-KV aggregation mode is generally recommended LLM usage scenarios with reasoning enabled, as promoting diversity helps reduce repetitive or redundant reasoning patterns. + +## Sample - impact of cache eviction on possible generation length and prompt throughput +[limit_checker.py](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/text_generation/limit_checker.py) can be used to visualize the impact of the cache eviction algorithm on the end performance of the generation pipeline. +The script is parameterized to allow specifying own model (by its `huggingface_hub` ID) and the base cache size. + +With `--mode gen_length`, the script will run the generation pipeline with increasing requested length of generation until it either hits 100% maximum cache usage or times out. +With cache eviction disabled, the pipeline will eventually exhaust the cache size, and the generation length will be capped at the output token count determined by the base cache size. +With eviction enabled, however, the pipeline is able to generate sequences of arbitrary length (as long as the cache size is at least `max(prompt_size, max_cache_size_after_eviction)`, and the script will instead finish with a timeout. + +With `--mode gen_throughput`, the script will run a binary search to determine the minimum number of concurrently processed sequences to hit the 100% cache utilization. + + +## (Optional) Cache Rotation +By default, no additional cache modification is performed during eviction. +Most LLMs employ some kind of positional embedding at some point in the inferencing, which effectively becomes associated with each per-token KV cache vector as well. +The popular RoPE positional embedding is more or less continuous in the linear space of the token positions, but when token eviction takes place, the continuity of the remaining blocks is disrupted. +This may impact the ability of the model to correctly recognize the relative positions of the remaining blocks and degrade the generation accuracy. + +Cache rotation seeks to alleviate this by "re-rotating" corresponding blocks so that the blocks that remain after each eviction are once again "continuous" in terms of the effective RoPE embedding.n +It can be enabled by setting the `CacheEvictionConfig.apply_rotation` field to `true` (default is `false`). + +## Current limitations + +* Cache rotation is only targeted for the regular, linear LLaMa-like RoPE application and may degrade accuracy on models that use other RoPE schemes. + +* Cache rotation is currently only supported for the models with uniform V embedding sizes across the layers. + +## (Optional) KVCrush + +KVCrush enhances the standard H2O/SnapKV eviction by selecting the most representative blocks from the evictable area using clustering analysis, rather than simply evicting the low score blocks. + +### Algorithm Overview + +1. **Indicator Creation**: Generate binary indicators for tokens based on importance scores +2. **Anchor Point Generation**: Create reference patterns using configurable modes +3. **Distance Calculation**: Measure Hamming distance between block patterns and the anchor point +4. **Representative Selection**: Select blocks to best represent context diversity + +### Configuration +Setup KVCrush config parameters and pass it to ```CacheEvictionConfig```. Sample code to allocate KVCrush a budget of 2 blocks and use MEAN anchor mode is following. +```cpp +const ov::genai::CacheEvictionConfig EXAMPLE_CACHE_EVICTION_CONFIG = + {32, 32, 192, ov::genai::AggregationMode::NORM_SUM, false, 8, KVCrushConfig(2, KVCrushAnchorPointMode::MEAN)}; +``` +```python +CacheEvictionConfig( + start_size=32, + recent_size=128, + max_cache_size=448, + aggregation_mode=AggregationMode.NORM_SUM, + apply_rotation=False, + snapkv_window_size=8, + kvcrush_config=KVCrushConfig(budget=2, anchor_point_mode=KVCrushAnchorPointMode.MEAN) + ) +``` + +**Anchor Point Modes:** +- `RANDOM`: Random binary pattern +- `ZEROS`: All zeros pattern +- `ONES`: All ones pattern +- `MEAN`: Mean of indicators across blocks +- `ALTERNATING`: Alternating 0-1 pattern + +### Performance Comparison on LongBench + +**Note:** Values in **`this style`** indicate performance equal to or better than the respective baseline configurations. + +#### SnapKV +The following table shows accuracy (using 200 samples) results comparing standard SnapKV eviction with KVCrush. + +Configuration format: SnapKV budget (tokens), KVCrush budget (blocks), Anchor Point + +| Configuration | qasper | samsum | trec | +|---------------|--------|--------|------| +| **1024, 0** | 19.77 | 37.72 | 62.50 | +| 768, 8, ALTERNATING | 18.79 | **`37.78`** | **`62.50`** | +| 768, 8, MEAN | 19.29 | 37.67 | **`62.50`** | +| 768, 8, RANDOM | 18.95 | **`37.75`** | **`62.50`** | +| 960, 2, ALTERNATING | **`19.83`** | **`37.77`** | **`62.50`** | +| 960, 2, MEAN | **`19.82`** | **`37.95`** | **`62.50`** | +| 960, 2, RANDOM | **`20.56`** | 37.33 | **`62.50`** | +| 992, 1, ALTERNATING | **`20.05`** | 37.42 | **`62.50`** | +| 992, 1, MEAN | **`19.83`** | **`37.80`** | **`62.50`** | +| 992, 1, RANDOM | **`19.92`** | 37.56 | **`62.50`** | +| **KVCrush - Best** | **`20.56`** | **`37.95`** | **`62.50`** | + +| Configuration | qasper | samsum | trec | +|---------------|--------|--------|------| +| **512, 0** | 16.97 | 36.60 | 62.50 | +| 384, 4, ALTERNATING | 16.69 | 36.18 | **`62.50`** | +| 384, 4, MEAN | 16.73 | **`36.91`** | **`62.50`** | +| 384, 4, RANDOM | **`17.34`** | 36.24 | **`62.50`** | +| 448, 2, ALTERNATING | **`17.14`** | 36.34 | **`62.50`** | +| 448, 2, MEAN | **`17.09`** | 35.99 | **`62.50`** | +| 448, 2, RANDOM | 16.94 | 36.26 | **`62.50`** | +| 480, 1, ALTERNATING | **`17.40`** | **`36.61`** | **`62.50`** | +| 480, 1, MEAN | 16.77 | 36.39 | **`62.50`** | +| 480, 1, RANDOM | **`17.20`** | 36.54 | **`62.50`** | +| **KVCrush - Best** | **`17.40`** | **`36.91`** | **`62.50`** | diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/prefix-caching.md b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/prefix-caching.md new file mode 100644 index 0000000..a37a7ca --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/prefix-caching.md @@ -0,0 +1,7 @@ +--- +sidebar_position: 4 +--- + +# Prefix Caching + +> **Note:** This page is a work in progress. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/sparse-attention-prefill.md b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/sparse-attention-prefill.md new file mode 100644 index 0000000..c51145f --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/sparse-attention-prefill.md @@ -0,0 +1,69 @@ +--- +sidebar_position: 2 +--- + +# Sparse Attention prefill algorithms + +## Overview +The sparse attention prefill algorithms enable speedups during the prompt processing (prefill) stage of the generation process by reducing the amount of computation taken up by the attention operation. +During the prefill stage, the attention operation could only be applied to the subset of blocks determined to be most "important" for the current generation context, with importance estimation method determined by the algorithm. + +The KV-cache blocks that are deemed "unimportant" at a certain stage of prefill are not discarded entirely, but are preserved as-is so that they may still be considered for usage in the attention computation in latter stages of prompt processing. +Moreover, the sparse prefill algorithms do not apply during the generation stage. +The sparse prefill algorithms therefore do not lead to decreased maximum and average memory consumption throughout the generation process, but they do lead to the decreased total generation and first token latency times due to enabling faster prefills. + +To achieve overall memory savings and generation stage memory and compute optimizations, the [cache eviction algorithm](./kvcache-eviction-algorithm.md) can be used along with sparse prefill algorithms, or separately. + + +## Usage with openvino.genai +The sparse attention prefill can be enabled by setting `.use_sparse_attention` field to `True` in the `openvino_genai.SchedulerConfig` structure that serves as input to most of the `openvino_genai` model pipeline objects. +Further configuration of the sparse prefill algorithm is done using the `.sparse_attention_config` field of `openvino_genai.SchedulerConfig`, which accepts objects of `openvino_genai.SparseAttentionConfig`. See the [in-code documentation](../../../../src/cpp/include/openvino/genai/sparse_attention.hpp) for the description of individual fields; in particular, the `.mode` field of `SparseAttentionConfig` selects the type of the sparse prefill algorithm to be applied. + +Currently two sparse prefill algorithms are supported - the tri-shape algorithm (https://arxiv.org/pdf/2412.10319) and the XAttention algorithm (https://arxiv.org/pdf/2503.16428). + +### Refresher on the openvino.genai paged attention implementation +In a vLLM-like approach, which underlies `openvino.genai` inference, the KV cache of a sequence is divided into blocks of fixed size. +The size of the blocks is hardware-defined (i.e. 32 tokens for CPU, 16 for GPU, etc.) and, depending on the algorithm, imposes limitations on the minimum granularity of sparsity that can be introduced into the attention operation. + +The `openvino.genai` API allows for generation of multiple sequences at once, which are divided into chunks of arbitrary size by the internal scheduler and submitted to the paged attention kernels in one go to achieve better throughput. In practice this means that the full prefill stage for each sequence may take several time steps, between which the adjustments of KV cache blocks assigned to each sequence are possible. This impacts the operation of the sparse prefill algorithms as implemented in `openvino`/`openvino.genai` as described below. + +### Tri-shape +For the tri-shape algorithm, the majority of the prefill occurs with as little as 2-3 KV cache blocks (depending on the configuration) being utilized as previous KV cache data to process each new prompt chunk. +These retained blocks are the blocks in the chronological beginning of the prompt, the last-processed full blocks of the same prompt and the last KV cache block not currently completely filled (if it exists). +The sizes of the retained areas can be adjusted using the `SparseAttentionConfig.num_retained_start_tokens_in_cache` and `SparseAttentionConfig.num_retained_recent_tokens_in_cache`. + + +![Tri-shape sparse prefill illustrated](./../../../static/img/trishape.svg) + +The picture above illustrates the tri-shape algorithm in more detail. For simplicity, it is presumed that the prompt takes up 8 full KV cache blocks and is filled within 5 chunks. The `.num_retained_start_tokens_in_cache` and `.num_retained_recent_tokens_in_cache` are both set to 1 HW-dependent block size in tokens. + + +The prompt processing occurs as usual until at least two KV cache blocks have been completely filled (`t = 0, 1`). +After that, for the next prompt chunks only the first and the last/second-last blocks processed will be visible as KV cache contents, effectively introducing sparsity in the attention computation for the rest of the KV cache "body" (`t = 2-4`). + +Upon reaching the tail of the prompt the KV cache for the entire prompt is used in attention again, effectively switching back from the sparse attention mode to "dense" attention (`t = 5`). +Apart from improving the generation accuracy, this also makes it possible to effectively combine the tri-shape sparse prefill algorithm with the cache eviction algorithm, which relies on the model having "seen" the entire prompt KV cache when processing the last tokens of the prompt. The "dense attention" portion of the prompt can be configured using the `SparseAttentionConfig.num_last_dense_tokens_in_prefill` field. + + +### XAttention +For the XAttention algorithm, the prefill computation is accelerated by selectively attending only to the most important regions of the attention matrix, determined dynamically through antidiagonal-based importance estimation. During the prefill stage, each query block attends only to the subset of key blocks whose cumulative estimated attention mass exceeds a predefined threshold, while the rest of the KV cache blocks are excluded from the attention computation. + +To enable XAttention with default settings, select `SparseAttentionMode.XATTENTION` in `SparseAttentionConfig.mode` within the `SchedulerConfig` and set `use_sparse_attention=True`: +```python +cb_config = SchedulerConfig( + use_sparse_attention=True, + sparse_attention_config=SparseAttentionConfig( + mode=SparseAttentionMode.XATTENTION + ) +) +``` + +The importance estimation procedure consists of two stages. In the first stage, using stride-based reshaping, the query and key tensors are permuted along antidiagonal patterns, with the stride value determined by the `SparseAttentionConfig.xattention_stride` parameter. The reshaped tensors are then used to compute a coarse estimate of the attention mass per block, with the block size defined by `SparseAttentionConfig.xattention_block_size`. The attention values within each block are summed to produce an importance score that represents the approximate total attention mass associated with that block. In the second stage, for each query block, the corresponding key blocks are sorted in descending order of their estimated attention mass. The algorithm then identifies the minimal subset of blocks whose cumulative antidiagonal attention exceeds the predefined threshold `SparseAttentionConfig.xattention_threshold`. The block selection process always retains the diagonal blocks - corresponding to the most recently processed query positions, as well as the least recent KV cache block. + +![XAttention sparse prefill illustrated](./../../../static/img/xattention.svg) + +The picture above illustrates the XAttention algorithm in more detail. For simplicity, it is presumed that the prompt occupies 8 full KV cache blocks and is processed within 5 chunks. The `xattention_block_size` corresponds to one HW-dependent block of tokens. + +The prompt processing occurs as usual until at least two KV cache blocks have been completely filled (`t = 0, 1`). Once the block-level importance scores have been computed (`t = 2-4`), only the subset of KV blocks with cumulative attention mass exceeding the `xattention_threshold` is retained for attention computation, effectively introducing sparsity in the attention computation. + +Upon reaching the tail of the prompt, the KV cache corresponding to the entire prompt becomes visible again, reverting to dense attention mode (`t = 5`). This transition ensures that the model attends to the complete prompt context before entering the generation stage. Similar to the tri-shape algorithm, the final dense portion of the prefill can be configured using the `SparseAttentionConfig.num_last_dense_tokens_in_prefill` field. Due to the block-wise cache organization and scheduler chunking, the actual number of prompt tokens processed with dense attention may slightly exceed the specified value, potentially extending across a full block or subsequence chunk depending on the hardware configuration. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/speculative-decoding.md b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/speculative-decoding.md new file mode 100644 index 0000000..983444d --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/speculative-decoding.md @@ -0,0 +1,7 @@ +--- +sidebar_position: 1 +--- + +# Speculative Decoding + +> **Note:** This page is a work in progress. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/visual-token-pruning.md b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/visual-token-pruning.md new file mode 100644 index 0000000..94fbcde --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/concepts/optimization-techniques/visual-token-pruning.md @@ -0,0 +1,78 @@ +--- +sidebar_position: 5 +--- + +# Visual Token Pruning (CDPruner) + +## Overview +Visual Token Pruning is a context compression technique for Multimodal / Visual Language Models (VLMs) that aims to enhance inference efficiency without significant performance degradation by identifying and removing redundant or less informative tokens. A representative approach is CDPruner, introduced in the paper [Beyond Attention or Similarity: Maximizing Conditional Diversity for Token Pruning in MLLMs](https://arxiv.org/pdf/2506.10967). Its main goal is to lower inference latency and memory footprint while retaining the visual information most relevant to the user's query. + +Unlike traditional attention-based or similarity-based pruning techniques, which can either retain redundant tokens or neglect instruction relevance, CDPruner focuses on maximizing the conditional diversity of the retained visual tokens. Pruned tokens are removed from further attention computations, shrinking KV cache footprint, reducing Time To First Token (TTFT) and improving throughput. A relevance weighting factor controls the influence of instruction relevance during pruning, helping balance token reduction against the preservation of important visual details. + +## Conceptual Model +CDPruner operates on the sequence of visual token embeddings produced by the vision encoder before they are passed to the language model. Instead of forwarding all tokens, it selects a subset based on conditional diversity, combining token similarity and instruction relevance. + +### Token Partitioning + +The visual tokens are conceptually divided into: +* Retained Tokens: A selected subset that provides diverse and instruction-relevant visual information. +* Pruned Tokens: Tokens excluded from further processing because they contribute redundant or low-relevance information. + +High-level flow: +1. Encode image producing N visual tokens (embeddings). +2. Compute pairwise token similarity and per-token relevance scores. +3. Relevance and similarity are combined into a conditional kernel. A greedy DPP-based MAP algorithm identifies the least important tokens to discard according to `pruning_ratio`, adjusting scores using `relevance_weight` to control the trade-off between diversity and relevance. +4. Build reduced token set; subsequent generation attends only to retained tokens. + +Improvement beyond the paper's approach: +1. In step 3, when applying the DPP-based token selection algorithm, this implementation provides a splitting strategy option in addition to the original CDPruner approach. While the original approach processes the entire kernel matrix at once, the splitting strategy divides the kernel matrix into two separate blocks for parallel processing when the visual token count exceeds a threshold (default : 1, can be set via environment variable `CDPRUNER_SPLIT_THRESHOLD`). +2. *Note:* The split variant is not semantically equivalent to running DPP on the full kernel. In the split approach, an equal number of tokens are selected from each half and then merged, whereas a single full-kernel DPP call may select all the top-K tokens from one half if those tokens are most diverse/relevant. This constraint in the split variant can change the token selection set and may affect accuracy differently depending on the model and input. In practice, this splitting strategy has shown: (a) improved accuracy when evaluated on Qwen2.5-VL models, and (b) significantly faster GPU execution with OpenCL kernels due to better parallelization (2-3x speedup with large token counts). By default, the splitting strategy is enabled. Advanced users can disable it by setting the environment variable CDPRUNER_SPLIT_THRESHOLD=0 to use the original approach. + +**Effect:** Pruning less important visual tokens reduces memory usage and can speed up generation; extremely high pruning may degrade answer quality for complex visual queries. + +## Configuration Interface +Visual Token Pruning is exposed through fields of `ov::genai::GenerationConfig`: + +* `pruning_ratio` (integer, 0–99): Portion of visual tokens to prune, specified as an integer percentage. A value of 0 disables pruning. For example, `25` means prune 25% of the visual tokens (keep 75%). Out-of-range values (negative or >=100) are treated as 0 (disabled) to avoid eliminating the entire visual context. +* `relevance_weight` (float): Weighting factor applied when aggregating or scaling dominance scores. **Recommended range:** 0.0–1.0. A value of 0 disables relevance weighting (pruning is based solely on raw dominance scores), while higher values (up to 1.0) emphasize relevance, making pruning more conservative on borderline tokens. Values above 1.0 are allowed but may have diminishing or unpredictable effects; negative values are not recommended. Default value is `0.5f`. + +### Sample Usage (Python Benchmark Script) +[samples/python/visual_language_chat/benchmark_vlm.py](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/visual_language_chat/benchmark_vlm.py) provides a convenient way to measure performance impact of pruning. + +Minimal example (prune 70% of visual tokens on GPU): +```bash +python benchmark_vlm.py \ + -m ./models/vlm \ + -i ./data/example.jpg \ + -p "What is on the image?" \ + -d GPU \ + --pruning_ratio 70 \ + --relevance_weight 0.6 +``` + +Relevant configuration excerpt: +```python +config = ov_genai.GenerationConfig() +config.max_new_tokens = args.max_new_tokens +config.pruning_ratio = args.pruning_ratio +config.relevance_weight = args.relevance_weight +``` + +Pipeline creation and generation: +```python +pipe = ov_genai.VLMPipeline(models_path, device, scheduler_config=scheduler_config) +res = pipe.generate(prompt, images=images, generation_config=config) +``` + +The script prints performance metrics (Includes TTFT, Embeddings preparation time). Compare runs with different `--pruning_ratio` to quantify latency improvements and memory savings. + +## Performance & Benefits +* Reduced KV cache memory for visual tokens -> enables larger batch sizes or longer text generation within the same memory budget. +* Lower per-step attention computations involving image tokens -> improved latency. +* Helpful for edge or GPU memory-constrained deployments (e.g., running VLM on integrated GPU with limited VRAM). + +## Current Limitations +* Current implementation assumes a standard image encoder output; exotic hierarchical or sparse encoders might require adjusted scoring strategies. +* Pruning is applied only after the initial image encoding; does not dynamically re-introduce pruned tokens later. +* Score computation details are internal; no per-token debug API is exposed yet. +* The current implementation supports Qwen2-VL and Qwen2.5-VL models only; support for other models will be added in a subsequent release. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/getting-started/_category_.json b/src/resources/openvino.genai-2026.1.0.0/site/docs/getting-started/_category_.json new file mode 100644 index 0000000..e1b9ff2 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/getting-started/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Getting Started", + "position": 1, + "link": { + "type": "generated-index", + "description": "Getting started guide for OpenVINO GenAI" + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/getting-started/installation.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/getting-started/installation.mdx new file mode 100644 index 0000000..0431e54 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/getting-started/installation.mdx @@ -0,0 +1,57 @@ +--- +sidebar_position: 2 +sidebar_label: Installation +--- + +# Install OpenVINO GenAI + +OpenVINO GenAI is available for installation via [PyPI](https://pypi.org/project/openvino-genai/), [Archive](https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/) +and [npm](https://www.npmjs.com/package/openvino-genai-node) distributions and supports Linux, Windows and macOS platforms. + + + + ```bash + pip install openvino-genai + ``` + :::info Note + Some models may require additional dependencies. + See the [Model Preparation Guide](/docs/guides/model-preparation/convert-to-openvino) for details. + ::: + + + 1. Download [OpenVINO GenAI archive](https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/). + 2. Follow the [Archive Installation instructions](https://docs.openvino.ai/2026/get-started/install-openvino/install-openvino-genai.html#archive-installation). + + + ```bash + npm install openvino-genai-node + ``` + See the [Node.js Bindings](/docs/bindings/node-js) for more information. + + + +Refer to the [Install Guide](https://docs.openvino.ai/2026/get-started/install-openvino.html) for detailed instructions. + +To build OpenVINO GenAI library from source, refer to the [Build Instructions](https://github.com/openvinotoolkit/openvino.genai/blob/master/src/docs/BUILD.md). + +:::info +Please make sure that you are following the versions compatibility rules, refer to the [OpenVINO GenAI Dependencies](#openvino-genai-dependencies) for more information. +::: + +## System Requirements + +OpenVINO GenAI is built on top of OpenVINO Runtime and shares the same system requirements. + +Refer to the [OpenVINO System Requirements](https://docs.openvino.ai/2026/about-openvino/release-notes-openvino/system-requirements.html) for more details. + +## OpenVINO GenAI Dependencies + +OpenVINO GenAI depends on [OpenVINO](https://github.com/openvinotoolkit/openvino) and [OpenVINO Tokenizers](https://github.com/openvinotoolkit/openvino_tokenizers). + +When installing OpenVINO GenAI from PyPi, the same versions of OpenVINO and OpenVINO Tokenizers are used (e.g. `openvino==2024.3.0` and `openvino-tokenizers==2024.3.0.0` are installed for `openvino-genai==2024.3.0`). +If you update one of the dependency packages (e.g. `pip install openvino --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly`), versions might be incompatible due to different ABI and running OpenVINO GenAI can result in errors (e.g. `ImportError: libopenvino.so.2430: cannot open shared object file: No such file or directory`). +Having packages version in format `...`, only `` part of the full version can be varied to ensure ABI compatibility, while changing ``, `` or `` parts of the version might break ABI. + +GenAI, Tokenizers, and OpenVINO wheels for Linux on PyPI are compiled with `_GLIBCXX_USE_CXX11_ABI=0` to cover a wider range of platforms. In contrast, C++ archive distributions for Ubuntu are compiled with `_GLIBCXX_USE_CXX11_ABI=1`. It is not possible to mix different Application Binary Interfaces (ABIs) because doing so results in a link error. This incompatibility prevents the use of, for example, OpenVINO from C++ archive distributions alongside GenAI from PyPI. + +If you want to try OpenVINO GenAI with different dependencies versions (**not** prebuilt packages as archives or python wheels), build OpenVINO GenAI library from source. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/getting-started/introduction.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/getting-started/introduction.mdx new file mode 100644 index 0000000..f22fceb --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/getting-started/introduction.mdx @@ -0,0 +1,69 @@ +--- +sidebar_position: 1 +sidebar_label: Introduction +--- + +# Introduction to OpenVINO GenAI + +## What is OpenVINO GenAI? + +OpenVINO™ GenAI is a library of the most popular Generative AI model pipelines, optimized execution methods, and samples that run on top of highly performant [OpenVINO Runtime](https://docs.openvino.ai/). +It provides simplified APIs for running generative models, hiding the complexity of the generation process and enabling developers to easily integrate state-of-the-art generative models into their applications with minimal code. + +As a lightweight solution designed for efficient inference, OpenVINO GenAI includes all the core functionality needed for generative model execution (e.g. tokenization via `openvino-tokenizers`) with no external dependencies required. +This library is friendly to PC and laptop execution, and optimized for resource consumption. + +## Key Features and Benefits + +- **📦 Pre-built Generative AI Pipelines:** Ready-to-use pipelines for text generation (LLMs), image generation (Diffuser-based), speech recognition (Whisper), and visual language models (VLMs). See all [supported use cases](/docs/category/use-cases). +- **👣 Minimal Footprint:** Smaller binary size and reduced memory footprint compared to other frameworks. +- **🚀 Performance Optimization:** Hardware-specific optimizations for CPU, GPU, and NPU devices. +- **👨‍💻 Programming Language Support:** Comprehensive APIs in Python, C++, and Node.js. +- **🗜️ Model Compression:** Support for 8-bit and 4-bit weight compression, including embedding layers. +- **🎓 Advanced Inference Capabilities:** In-place KV-cache, dynamic quantization, speculative sampling, and more. +- **🎨 Wide Model Compatibility:** Support for popular models including Llama, Mistral, Phi, Qwen, Stable Diffusion, Flux, Whisper, and others. Refer to the [Supported Models](/docs/supported-models) for more details. + +## Workflow Overview + +Using OpenVINO GenAI typically involves three main steps: + +1. **Model Preparation:** + - Download pre-converted model in OpenVINO IR format (e.g. from [OpenVINO Toolkit](https://huggingface.co/OpenVINO) organization on Hugging Face). + - Convert model from other frameworks to the OpenVINO IR format (e.g. using `optimum-intel`), optionally applying weights compression. + :::info + + You can use models from [Hugging Face](https://huggingface.co/) and [ModelScope](https://modelscope.cn/home) + + Refer to [Model Preparation](/docs/category/model-preparation) for more details. + + ::: +2. **Pipeline Setup:** Initialize the appropriate pipeline for your task (`LLMPipeline`, `Text2ImagePipeline`, `WhisperPipeline`, `VLMPipeline`, etc.) with the converted model. +3. **Inference:** Run the model with your inputs using the pipeline's simple API. + +![OpenVINO GenAI Workflow](/img/openvino-genai-workflow.svg) + +## Comparison with Alternatives + +Unlike base OpenVINO, which requires manual implementation of generation loops, tokenization, scheduling etc., OpenVINO GenAI provides these components in a ready-to-use package. + +Compared to Hugging Face Optimum Intel, OpenVINO GenAI offers a smaller footprint, fewer dependencies, and better performance optimization options, particularly for C++ applications. + +| Feature | OpenVINO GenAI | Base OpenVINO | Hugging Face Optimum Intel | +|-|-|-|-| +| Easy-to-use APIs | ✅ | ❌ | ✅ | +| Low footprint | ✅ | ✅ | ❌ | +| C++ support | ✅ | ✅ | ❌ | +| Node.js bindings | ✅ | ✅ | ❌ | +| Pre-built pipelines | ✅ | ❌ | ✅ | +| Model variety | Medium | High | High | + +## Additional Resources + +- [OpenVINO Generative AI workflow](https://docs.openvino.ai/2026/openvino-workflow-generative.html) +- [Optimum Intel and OpenVINO](https://huggingface.co/docs/optimum/intel/openvino/export) +- [OpenVINO Notebooks with GenAI](https://openvinotoolkit.github.io/openvino_notebooks/?libraries=OpenVINO+GenAI) + +Explore blogs to setup your first hands-on experience with OpenVINO GenAI: + +- [How to Build OpenVINO™ GenAI APP in C++](https://medium.com/openvino-toolkit/how-to-build-openvino-genai-app-in-c-32dcbe42fa67) +- [How to run Llama 3.2 locally with OpenVINO™](https://medium.com/openvino-toolkit/how-to-run-llama-3-2-locally-with-openvino-60a0f3674549) diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/_category_.json b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/_category_.json new file mode 100644 index 0000000..ee9e561 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "Guides", + "position": 3, + "link": null +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/chat-scenario.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/chat-scenario.mdx new file mode 100644 index 0000000..48f27f9 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/chat-scenario.mdx @@ -0,0 +1,373 @@ +--- +sidebar_position: 2 +title: Chat Scenario +--- + +# Use OpenVINO GenAI in Chat Scenario + +For chat applications, OpenVINO GenAI provides special optimizations to maintain conversation context and improve performance using KV-cache. + +Refer to the [How It Works](/docs/concepts/how-it-works) for more information about KV-cache. + +:::info +Chat mode is supported for both `LLMPipeline` and `VLMPipeline`. +::: + +## `ChatHistory` + +[`ChatHistory`](https://docs.openvino.ai/2026/api/genai_api/_autosummary/openvino_genai.ChatHistory.html) stores conversation messages and optional metadata for chat templates. +Messages are stored as JSON-like objects, so it supports various nested message structures with any field names your model or chat template requires (not just simple `"role"` and `"content"` fields). + +A simple chat example (with grouped beam search decoding): + + + + ```python showLineNumbers + import openvino_genai as ov_genai + + pipe = ov_genai.LLMPipeline(model_path, 'CPU') + + config = {'max_new_tokens': 100, 'num_beam_groups': 3, 'num_beams': 15, 'diversity_penalty': 1.5} + pipe.set_generation_config(config) + + # highlight-next-line + chat_history = ov_genai.ChatHistory() + + while True: + try: + prompt = input('question:\n') + except EOFError: + break + + # highlight-next-line + chat_history.append({"role": "user", "content": prompt}) + # highlight-next-line + decoded_results = pipe.generate(chat_history) + # Add assistant's response to chat history + # highlight-next-line + chat_history.append({"role": "assistant", "content": decoded_results.texts[0]}) + + print('answer:\n') + print(decoded_results.texts[0]) + print('\n----------\n') + ``` + + + ```cpp showLineNumbers + #include "openvino/genai/llm_pipeline.hpp" + #include + + int main(int argc, char* argv[]) { + std::string prompt; + + std::string model_path = argv[1]; + ov::genai::LLMPipeline pipe(model_path, "CPU"); + + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + config.num_beam_groups = 3; + config.num_beams = 15; + config.diversity_penalty = 1.0f; + + // highlight-next-line + ov::genai::ChatHistory chat_history; + + std::cout << "question:\n"; + while (std::getline(std::cin, prompt)) { + // highlight-next-line + chat_history.push_back({{"role", "user"}, {"content", std::move(prompt)}}); + // highlight-next-line + auto decoded_results = pipe.generate(chat_history, config); + // Add assistant's response to chat history + // highlight-next-line + chat_history.push_back({{"role", "assistant"}, {"content", std::move(decoded_results.texts[0])}}); + + std::cout << "answer:\n"; + std::cout << decoded_results.texts[0] << std::endl; + std::cout << "\n----------\n" + "question:\n"; + } + } + ``` + + + ```js showLineNumbers + import { LLMPipeline, ChatHistory } from "openvino-genai-node"; + import readline from 'readline'; + + const pipe = await LLMPipeline(model_path, 'CPU'); + + const config = { + max_new_tokens: 100, + num_beam_groups: 3, + num_beams: 15, + diversity_penalty: 1.5 + }; + + // highlight-next-line + const chatHistory = new ChatHistory(); + + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + + console.log('question:'); + rl.on('line', async (prompt) => { + // highlight-next-line + chatHistory.push({ role: 'user', content: prompt }); + // highlight-next-line + const decodedResults = await pipe.generate(chatHistory, config); + // Add assistant's response to chat history + // highlight-next-line + chatHistory.push({ role: 'assistant', content: decodedResults.toString() }); + + console.log('answer:'); + console.log(decodedResults.toString()); + console.log('\n----------\nquestion:'); + }); + + rl.on('close', async () => { + process.exit(0); + }); + ``` + + + +:::info +`ChatHistory` messages are not updated automatically when using `pipe.generate()`. +You need to manually append user prompts and model responses to the `ChatHistory` instance as shown in the examples above. +::: + +### System Prompt + +Add a system message at the beginning to set the assistant's behavior: + + + + ```python showLineNumbers + import openvino_genai as ov_genai + + chat_history = ov_genai.ChatHistory() + chat_history.append({"role": "system", "content": "You are a helpful assistant."}) + + # Or using constructor + chat_history = ov_genai.ChatHistory([ + {"role": "system", "content": "You are a helpful assistant."} + ]) + ``` + + + ```cpp showLineNumbers + #include "openvino/genai/chat_history.hpp" + + ov::genai::ChatHistory chat_history; + chat_history.push_back({{"role", "system"}, {"content", "You are a helpful assistant."}}); + + // Or using constructor + ov::genai::ChatHistory chat_history({ + {{"role", "system"}, {"content", "You are a helpful assistant."}} + }); + ``` + + + ```js showLineNumbers + import { ChatHistory } from "openvino-genai-node"; + + const chatHistory = new ChatHistory(); + chatHistory.push({ role: 'system', content: 'You are a helpful assistant.' }); + + // Or using constructor + const chatHistory = new ChatHistory([ + { role: 'system', content: 'You are a helpful assistant.' } + ]); + ``` + + + +### Chat History Metadata + +Additionally, `ChatHistory` manages optional metadata for consistent chat template application: + - Tools definitions for function calling and agentic scenarios + - Custom chat template variables (e.g. `enable_thinking` for models with extended reasoning like Qwen3) + + + + ```python showLineNumbers + import openvino_genai as ov_genai + import json + + chat_history = ov_genai.ChatHistory() + chat_history.append({"role": "system", "content": system_prompt}) + + # Load tools from JSON string + tools: list[dict] = json.loads("...") + + # Set tools definitions + # highlight-next-line + chat_history.set_tools(tools) + # Set custom chat template variables + # highlight-next-line + chat_history.set_extra_context({ "enable_thinking": True }) + + chat_history.append({"role": "user", "content": user_prompt}) + decoded_results = pipe.generate(chat_history, config) + # Add assistant's response to chat history + chat_history.append({"role": "assistant", "content": decoded_results.texts[0]}) + ``` + + + ```cpp showLineNumbers + #include "openvino/genai/chat_history.hpp" + + ov::genai::ChatHistory chat_history; + chat_history.push_back({{"role", "system"}, {"content", std::move(system_prompt)}}); + + // Load tools from JSON string + ov::genai::JsonContainer tools = ov::genai::JsonContainer::from_json_string("..."); + + // Set tools definitions + // highlight-next-line + chat_history.set_tools(tools); + // Set custom chat template variables + // highlight-next-line + chat_history.set_extra_context({{"enable_thinking", true}}); + + chat_history.push_back({{"role", "user"}, {"content", std::move(user_prompt)}}); + auto decoded_results = pipe.generate(chat_history, config); + // Add assistant's response to chat history + chat_history.push_back({{"role", "assistant"}, {"content", std::move(decoded_results.texts[0])}}); + ``` + + + ```js showLineNumbers + import { ChatHistory } from "openvino-genai-node"; + + const chatHistory = new ChatHistory(); + chatHistory.push({ role: 'system', content: systemPrompt }); + + // Load tools from JSON string + const tools = JSON.parse("..."); + + // Set tools definitions + // highlight-next-line + chatHistory.setTools(tools); + // Set custom chat template variables + // highlight-next-line + chatHistory.setExtraContext({ enable_thinking: true }); + + chatHistory.push({ role: 'user', content: userPrompt }); + const decodedResults = await pipe.generate(chatHistory, config); + // Add assistant's response to chat history + chatHistory.push({ role: 'assistant', content: decodedResults.toString() }); + ``` + + + +## `start_chat()` / `finish_chat()` API + +:::warning Deprecation Notice +`start_chat()` / `finish_chat()` API is deprecated and will be removed in the next major release. It is recommended to use `ChatHistory` for managing chat conversations. +::: + +A simple chat example (with grouped beam search decoding): + + + + ```python showLineNumbers + import openvino_genai as ov_genai + + pipe = ov_genai.LLMPipeline(model_path, 'CPU') + + config = {'max_new_tokens': 100, 'num_beam_groups': 3, 'num_beams': 15, 'diversity_penalty': 1.5} + pipe.set_generation_config(config) + + # highlight-next-line + pipe.start_chat() + while True: + try: + prompt = input('question:\n') + except EOFError: + break + answer = pipe.generate(prompt) + print('answer:\n') + print(answer) + print('\n----------\n') + # highlight-next-line + pipe.finish_chat() + ``` + + + ```cpp showLineNumbers + #include "openvino/genai/llm_pipeline.hpp" + #include + + int main(int argc, char* argv[]) { + std::string prompt; + + std::string model_path = argv[1]; + ov::genai::LLMPipeline pipe(model_path, "CPU"); + + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + config.num_beam_groups = 3; + config.num_beams = 15; + config.diversity_penalty = 1.0f; + + // highlight-next-line + pipe.start_chat(); + std::cout << "question:\n"; + while (std::getline(std::cin, prompt)) { + std::cout << "answer:\n"; + auto answer = pipe.generate(prompt, config); + std::cout << answer << std::endl; + std::cout << "\n----------\n" + "question:\n"; + } + // highlight-next-line + pipe.finish_chat(); + } + ``` + + + ```js showLineNumbers + import { LLMPipeline } from "openvino-genai-node"; + import readline from 'readline'; + + const pipe = await LLMPipeline(model_path, 'CPU'); + + const config = { + max_new_tokens: 100, + num_beam_groups: 3, + num_beams: 15, + diversity_penalty: 1.5 + }; + + // highlight-next-line + await pipe.startChat(); + + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + + console.log('question:'); + rl.on('line', async (prompt) => { + console.log('answer:'); + const answer = await pipe.generate(prompt, config); + console.log(answer); + console.log('\n----------\nquestion:'); + }); + + rl.on('close', async () => { + // highlight-next-line + await pipe.finishChat(); + process.exit(0); + }); + ``` + + + +:::info +For more information, refer to the [Python](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/text_generation/chat_sample.py), [C++](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/cpp/text_generation/chat_sample.cpp), and [JavaScript](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/js/text_generation/chat_sample.js) chat samples. +::: diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/debug-logging.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/debug-logging.mdx new file mode 100644 index 0000000..3e8a32a --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/debug-logging.mdx @@ -0,0 +1,88 @@ +--- +sidebar_position: 8 +--- + +# Debug Logging + +There are six log levels, which can be called explicitly or set via the `OPENVINO_LOG_LEVEL` environment variable: https://github.com/openvinotoolkit/openvino/blob/f35beb7f1355cb3f7f73b0d431933afbb6a8d3c6/src/inference/include/openvino/runtime/properties.hpp#L640 + +- -1 - `ov::log::Level::NO` +- 0 - `ov::log::Level::ERR` +- 1 - `ov::log::Level::WARNING` +- 2 - `ov::log::Level::INFO` +- 3 - `ov::log::Level::DEBUG` +- 4 - `ov::log::Level::TRACE` + +When setting the environment variable `OPENVINO_LOG_LEVEL` > `ov::log::Level::DEBUG`, the properties of the compiled model will be printed. + + + + ```sh + export OPENVINO_LOG_LEVEL=3 + ``` + + + ```sh + set OPENVINO_LOG_LEVEL=3 + ``` + + + +After pipeline initialization and reading the model, the properties of the compiled model will be printed to the console. + +```sh title="Output:" +NETWORK_NAME: Model0 +OPTIMAL_NUMBER_OF_INFER_REQUESTS: 1 +NUM_STREAMS: 1 +INFERENCE_NUM_THREADS: 48 +PERF_COUNT: NO +INFERENCE_PRECISION_HINT: bf16 +PERFORMANCE_HINT: LATENCY +EXECUTION_MODE_HINT: PERFORMANCE +PERFORMANCE_HINT_NUM_REQUESTS: 0 +ENABLE_CPU_PINNING: YES +SCHEDULING_CORE_TYPE: ANY_CORE +MODEL_DISTRIBUTION_POLICY: +ENABLE_HYPER_THREADING: NO +EXECUTION_DEVICES: CPU +CPU_DENORMALS_OPTIMIZATION: NO +LOG_LEVEL: LOG_NONE +CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1 +DYNAMIC_QUANTIZATION_GROUP_SIZE: 32 +KV_CACHE_PRECISION: f16 +AFFINITY: CORE +EXECUTION_DEVICES: +CPU: Intel(R) Xeon(R) Platinum 8468 +``` + +When Speculative Decoding ot Prompt Lookup pipeline is executed, performance metrics will be also printed. + +```sh title="Output:" +=============================== +Total duration, sec: 26.6217 +Draft model duration, sec: 1.60329 +Main model duration, sec: 25.0184 +Draft model duration, %: 6.02248 +Main model duration, %: 93.9775 +AVG acceptance rate, %: 21.6809 +=============================== +REQUEST_ID: 0 +Main model iterations: 47 +Token per sec: 3.75633 +AVG acceptance rate, %: 21.6809 +Accepted tokens by draft model: 51 +Generated tokens: 100 +Accepted token rate, %: 51 +=============================== +Request_id: 0 ||| 40 0 40 20 0 0 40 40 0 20 20 20 0 40 0 0 20 80 0 80 20 0 0 0 40 80 0 40 60 40 80 0 0 0 0 40 20 20 0 40 20 40 0 20 0 0 0 +``` + +When a GGUF model is passed to the pipeline, the detailed debug info will also be printed. + +```sh title="Output:" +[GGUF Reader]: Loading and unpacking model from: gguf_models/qwen2.5-0.5b-instruct-q4_0.gguf +[GGUF Reader]: Loading and unpacking model done. Time: 196ms +[GGUF Reader]: Start generating OpenVINO model... +[GGUF Reader]: Save generated OpenVINO model to: gguf_models/openvino_model.xml done. Time: 466 ms +[GGUF Reader]: Model generation done. Time: 757ms +``` diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/lora-adapters.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/lora-adapters.mdx new file mode 100644 index 0000000..56d9a86 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/lora-adapters.mdx @@ -0,0 +1,105 @@ +--- +sidebar_position: 4 +--- + +# LoRA Adapters + +## Overview + +Low-Rank Adaptation (LoRA) is a technique for efficiently fine-tuning large models without changing the base model's weights. +LoRA adapters enable customization of model outputs for specific tasks, styles, or domains while requiring significantly fewer computational resources than full fine-tuning. + +:::info +For more details about LoRA, see [Low-Rank Adaptation (LoRA)](/docs/concepts/lora). +::: + +OpenVINO GenAI provides built-in support for LoRA adapters in [text generation](/docs/use-cases/text-generation/), [image generation](/docs/use-cases/image-generation/), and [image processing (VLM)](/docs/use-cases/image-processing) pipelines. +This capability allows you to dynamically switch between or combine multiple adapters without recompiling the model. + +:::info +See [Supported Models](/docs/supported-models/) for the list of models that support LoRA adapters. +::: + +## Key Features + +- **Dynamic Adapter Application:** Apply LoRA adapters at runtime without model recompilation. +- **Multiple Adapter Support:** Blend effects from multiple adapters with different weights. +- **Adapter Switching:** Change adapters between generation calls without pipeline reconstruction. +- **Safetensors Format:** Support for industry-standard `safetensors` format for adapter files. + +## Using LoRA Adapters + + + + ```python + import openvino_genai as ov_genai + + # Initialize pipeline with adapters + adapter_config = ov_genai.AdapterConfig() + + # Add multiple adapters with different weights + adapter1 = ov_genai.Adapter("path/to/lora1.safetensors") + adapter2 = ov_genai.Adapter("path/to/lora2.safetensors") + + adapter_config.add(adapter1, alpha=0.5) + adapter_config.add(adapter2, alpha=0.5) + + pipe = ov_genai.LLMPipeline( + model_path, + "CPU", + adapters=adapter_config + ) + + # Generate with current adapters + output1 = pipe.generate("Generate story about", max_new_tokens=100) + + # Switch to different adapter configuration + new_config = ov_genai.AdapterConfig() + new_config.add(adapter1, alpha=1.0) + output2 = pipe.generate( + "Generate story about", + max_new_tokens=100, + adapters=new_config + ) + ``` + + + ```cpp + #include "openvino/genai/llm_pipeline.hpp" + + int main() { + ov::genai::AdapterConfig adapter_config; + + // Add multiple adapters with different weights + ov::genai::Adapter adapter1("path/to/lora1.safetensors"); + ov::genai::Adapter adapter2("path/to/lora2.safetensors"); + + adapter_config.add(adapter1, 0.5f); + adapter_config.add(adapter2, 0.5f); + + ov::genai::LLMPipeline pipe( + model_path, + "CPU", + ov::genai::adapters(adapter_config) + ); + + // Generate with current adapters + auto output1 = pipe.generate("Generate story about", ov::genai::max_new_tokens(100)); + + // Switch to different adapter configuration + ov::genai::AdapterConfig new_config; + new_config.add(adapter1, 1.0f); + auto output2 = pipe.generate( + "Generate story about", + ov::genai::adapters(new_config), + ov::genai::max_new_tokens(100) + ); + } + ``` + + + +## LoRA Adapters Sources + +1. **Hugging Face:** Browse adapters for various models at [huggingface.co/models](https://huggingface.co/models?other=lora) using "LoRA" filter. +2. **Civitai:** For stable diffusion models, [Civitai](https://civitai.com/) offers a wide range of LoRA adapters for various styles and subjects. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/_category_.json b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/_category_.json new file mode 100644 index 0000000..7b65526 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Model Preparation", + "position": 1, + "link": { + "type": "generated-index", + "description": "Prepare generative models for inference with OpenVINO GenAI." + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/_use_cases_note.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/_use_cases_note.mdx new file mode 100644 index 0000000..70ecf45 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/_use_cases_note.mdx @@ -0,0 +1,5 @@ +:::info + +Refer to the [Use Cases](/docs/category/use-cases) for detailed instructions on using models with OpenVINO GenAI. + +::: diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/convert-to-openvino.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/convert-to-openvino.mdx new file mode 100644 index 0000000..dd2b5bf --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/convert-to-openvino.mdx @@ -0,0 +1,73 @@ +--- +sidebar_position: 2 +description: How to convert models to OpenVINO format +--- + +import OptimumCLI from '@site/src/components/OptimumCLI'; +import UseCasesNote from './_use_cases_note.mdx'; + +# Convert Models to OpenVINO Format + +This page explains how to convert various generative AI models from [Hugging Face](https://huggingface.co/) and [ModelScope](https://modelscope.cn/) to OpenVINO IR format. +Refer to the [Supported Models](../../supported-models/index.mdx) for a list of available models. + +For downloading pre-converted models, see [Download Pre-Converted OpenVINO Models](./download-openvino-models.mdx). + +## Converting Models from Hugging Face + +1. Install `optimum-intel` package to download, convert and optimize models: + + :::info Note + Some models may require additional dependencies. + To convert models with `optimum-cli` and to run the examples, install the dependencies from [`./samples/export-requirements.txt`](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/export-requirements.txt). + ::: + + ```bash + pip install --requirement ./samples/export-requirements.txt + ``` + +2. Download and convert a model to the OpenVINO IR format using `optimum-cli` tool from Hugging Face: + + + :::tip + + For better performance with minimal accuracy impact, convert the model to lower precision by using `--weight-format` argument: + + + + + + + + + + + + + + ::: + + :::info + + The `--trust-remote-code` flag is required for some models that use custom code. + + Check a full list of conversion options [here](https://huggingface.co/docs/optimum/en/intel/openvino/export). + + ::: + +## Converting Models from ModelScope + +ModelScope models need to be downloaded first, then converted to OpenVINO IR format. + +1. Install `modelscope` and `optimum-intel` packages to download, convert and optimize models: + ```bash + pip install modelscope --requirement ./samples/export-requirements.txt + ``` +2. Download the required model (e.g. `Qwen/Qwen2-7b`) to a local directory using `modelscope` tool: + ```bash + modelscope download --model 'Qwen/Qwen2-7b' --local_dir + ``` +3. Convert the model (and optionally compress weights) using `optimum-cli` tool: + + + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/download-openvino-models.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/download-openvino-models.mdx new file mode 100644 index 0000000..d3cf2c9 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/model-preparation/download-openvino-models.mdx @@ -0,0 +1,53 @@ +--- +sidebar_position: 1 +description: How to get pre-converted OpenVINO models +--- + +import UseCasesNote from './_use_cases_note.mdx'; + +# Download Pre-Converted OpenVINO Models + +OpenVINO GenAI allows to run different generative AI models (see [Supported Models](../../supported-models/index.mdx)). +While you can convert models from other frameworks (see [Convert Models to OpenVINO Format](./convert-to-openvino.mdx)), using pre-converted models from [Hugging Face](https://huggingface.co/) and [ModelScope](https://modelscope.cn/) can save time and effort. + +## Download from Hugging Face + +The simplest way to download models is using the `huggingface_hub` package: +1. Install the package: + ```bash + pip install huggingface_hub + ``` +2. Download the model, specifying model id (e.g. [`OpenVINO/phi-2-fp16-ov`](https://huggingface.co/OpenVINO/phi-2-fp16-ov)) and output directory `model_path`: + ```bash + huggingface-cli download "OpenVINO/phi-2-fp16-ov" --local-dir model_path + ``` + :::info + The `-ov` suffix in model id usually defines OpenVINO pre-converted model. + ::: + +:::tip Available Model Collections +OpenVINO offers collections of pre-converted and pre-optimized models available on Hugging Face under the [OpenVINO Toolkit](https://huggingface.co/OpenVINO) organization: + +- [Large Language Models](https://huggingface.co/collections/OpenVINO/llm-6687aaa2abca3bbcec71a9bd) +- [Image Generation](https://huggingface.co/collections/OpenVINO/image-generation-67697d9952fb1eee4a252aa8) +- [Speech-to-Text](https://huggingface.co/collections/OpenVINO/speech-to-text-672321d5c070537a178a8aeb) +- [Visual Language Models](https://huggingface.co/collections/OpenVINO/visual-language-models-6792248a0eed57085d2b094b) +- [Speculative Decoding Draft Models Collection](https://huggingface.co/collections/OpenVINO/speculative-decoding-draft-models-673f5d944d58b29ba6e94161) +- and others. + +These models are ready to use with OpenVINO GenAI. +::: + + +## Download from ModelScope + +1. Install the package: + ```bash + pip install modelscope + ``` +2. Download the model, specifying model id (e.g. [`OpenVINO/phi-2-fp16-ov`](https://modelscope.cn/models/OpenVINO/phi-2-fp16-ov)) and output directory `model_path`: + ```bash + modelscope download --model "OpenVINO/phi-2-fp16-ov" --local_dir model_path + ``` + + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/performance-metrics.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/performance-metrics.mdx new file mode 100644 index 0000000..a5cdcd0 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/performance-metrics.mdx @@ -0,0 +1,226 @@ +--- +sidebar_position: 5 +--- + +# Performance Metrics + +## Overview + +[`openvino_genai.PerfMetrics`](https://docs.openvino.ai/2026/api/genai_api/_autosummary/openvino_genai.PerfMetrics.html) (referred as `PerfMetrics` for simplicity) is a structure that holds performance metrics for each generate call. +`PerfMetrics` holds fields with mean and standard deviations for the following metrics: +- Time To the First Token (TTFT), ms +- Time per Output Token (TPOT), ms/token +- Generate total duration, ms +- Tokenization duration, ms +- Detokenization duration, ms +- Throughput, tokens/s + +and: + +- Load time, ms +- Number of generated tokens +- Number of tokens in the input prompt + +Performance metrics are stored either in the [`DecodedResults`](https://docs.openvino.ai/2026/api/genai_api/_autosummary/openvino_genai.DecodedResults.html) or [`EncodedResults`](https://docs.openvino.ai/2026/api/genai_api/_autosummary/openvino_genai.EncodedResults.html) in `perf_metric` field. +Additionally to the fields mentioned above, `PerfMetrics` has a member `raw_metrics` of type [`openvino_genai.RawPerfMetrics`](https://docs.openvino.ai/2026/api/genai_api/_autosummary/openvino_genai.RawPerfMetrics.html) that contains raw values for the durations of each batch of new token generation, tokenization durations, detokenization durations, and more. +These raw metrics are accessible if you wish to calculate your own statistical values such as median or percentiles. +However, since mean and standard deviation values are usually sufficient, we will focus on `PerfMetrics`. + + + + ```python + import openvino_genai as ov_genai + pipe = ov_genai.LLMPipeline(models_path, "CPU") + result = pipe.generate(["The Sun is yellow because"], max_new_tokens=20) + perf_metrics = result.perf_metrics + + print(f'Generate duration: {perf_metrics.get_generate_duration().mean:.2f}') + print(f'TTFT: {perf_metrics.get_ttft().mean:.2f} ms') + print(f'TPOT: {perf_metrics.get_tpot().mean:.2f} ms/token') + print(f'Throughput: {perf_metrics.get_throughput().mean:.2f} tokens/s') + ``` + + + ```cpp + #include "openvino/genai/llm_pipeline.hpp" + #include + + int main(int argc, char* argv[]) { + std::string models_path = argv[1]; + ov::genai::LLMPipeline pipe(models_path, "CPU"); + auto result = pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(20)); + auto perf_metrics = result.perf_metrics; + + std::cout << std::fixed << std::setprecision(2); + std::cout << "Generate duration: " << perf_metrics.get_generate_duration().mean << " ms" << std::endl; + std::cout << "TTFT: " << metrics.get_ttft().mean << " ms" << std::endl; + std::cout << "TPOT: " << metrics.get_tpot().mean << " ms/token " << std::endl; + std::cout << "Throughput: " << metrics.get_throughput().mean << " tokens/s" << std::endl; + } + ``` + + + ```js + import { LLMPipeline } from "openvino-genai-node"; + + const pipe = await LLMPipeline(models_path, "CPU"); + const result = await pipe.generate("The Sun is yellow because", { + max_new_tokens: 20, + return_decoded_results: true + }); + const perf_metrics = result.perfMetrics; + + console.log(`Generate duration: ${perf_metrics.getGenerateDuration().mean.toFixed(2)} ms`); + console.log(`TTFT: ${perf_metrics.getTTFT().mean.toFixed(2)} ms`); + console.log(`TPOT: ${perf_metrics.getTPOT().mean.toFixed(2)} ms/token`); + console.log(`Throughput: ${perf_metrics.getThroughput().mean.toFixed(2)} tokens/s`); + ``` + + + +```sh title="Output:" +Generate duration: 702.85 +TTFT: 137.58 ms +TPOT: 29.74 ms/token +Throughput: 33.62 tokens/s +``` + +:::info Note +If the input prompt is just a string, the generate function returns only a string without perf_metrics. +To obtain perf_metrics, provide the prompt as a list with at least one element or call generate with encoded inputs. +::: + +:::info Note +TPOT (Time Per Output Token) represents the average time required to generate each output token in the final result. +For beam search scenario, TPOT is calculated based on the effective output tokens delivered to users, not the tokens generated by individual beams during internal processing. +::: + +## Accumulating Metrics + +Several `perf_metrics` can be added to each other. +In that case `raw_metrics` are concatenated and mean/std values are recalculated. +This accumulates statistics from several `generate()` calls. + + + + ```python + import openvino_genai as ov_genai + pipe = ov_genai.LLMPipeline(models_path, "CPU") + res_1 = pipe.generate(["The Sun is yellow because"], max_new_tokens=20) + res_2 = pipe.generate(["Why Sky is blue because"], max_new_tokens=20) + perf_metrics = res_1.perf_metrics + res_2.perf_metrics + + print(f'Generate duration: {perf_metrics.get_generate_duration().mean:.2f}') + print(f'TTFT: {perf_metrics.get_ttft().mean:.2f} ms') + print(f'TPOT: {perf_metrics.get_tpot().mean:.2f} ms/token') + print(f'Throughput: {perf_metrics.get_throughput().mean:.2f} tokens/s') + ``` + + + ```cpp + #include "openvino/genai/llm_pipeline.hpp" + #include + + int main(int argc, char* argv[]) { + std::string models_path = argv[1]; + ov::genai::LLMPipeline pipe(models_path, "CPU"); + auto result_1 = pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(20)); + auto result_2 = pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(20)); + auto perf_metrics = result_1.perf_metrics + result_2.perf_metrics + + std::cout << std::fixed << std::setprecision(2); + std::cout << "Generate duration: " << perf_metrics.get_generate_duration().mean << " ms" << std::endl; + std::cout << "TTFT: " << metrics.get_ttft().mean << " ms" << std::endl; + std::cout << "TPOT: " << metrics.get_tpot().mean << " ms/token " << std::endl; + std::cout << "Throughput: " << metrics.get_throughput().mean << " tokens/s" << std::endl; + } + ``` + + + ```js + import { LLMPipeline } from "openvino-genai-node"; + + const pipe = await LLMPipeline(models_path, "CPU"); + const res_1 = await pipe.generate("The Sun is yellow because", { + max_new_tokens: 20, + return_decoded_results: true + }); + const res_2 = await pipe.generate("Why Sky is blue because", { + max_new_tokens: 20, + return_decoded_results: true + }); + const perf_metrics = res_1.perfMetrics.add(res_2.perfMetrics); + + console.log(`Generate duration: ${perf_metrics.getGenerateDuration().mean.toFixed(2)} ms`); + console.log(`TTFT: ${perf_metrics.getTTFT().mean.toFixed(2)} ms`); + console.log(`TPOT: ${perf_metrics.getTPOT().mean.toFixed(2)} ms/token`); + console.log(`Throughput: ${perf_metrics.getThroughput().mean.toFixed(2)} tokens/s`); + ``` + + + +## Using Raw Performance Metrics + +In addition to mean and standard deviation values, the `perf_metrics` object has a `raw_metrics` field. +This field stores raw data, including: + +- Timestamps for each batch of generated tokens +- Batch sizes for each timestamp +- Tokenization durations +- Detokenization durations +- Other relevant metrics + +These metrics can be use for more fine grained analysis, such as calculating exact median values, percentiles, etc. + +Below are a few examples of how to use raw metrics. + +#### Getting timestamps for each generated token: + +```python +import openvino_genai as ov_genai +pipe = ov_genai.LLMPipeline(models_path, "CPU") +result = pipe.generate(["The Sun is yellow because"], max_new_tokens=20) +perf_metrics = result.perf_metrics +raw_metrics = perf_metrics.raw_metrics + +print(f'Generate duration: {perf_metrics.get_generate_duration().mean:.2f}') +print(f'Throughput: {perf_metrics.get_throughput().mean:.2f} tokens/s') +print(f'Timestamps: {" ms, ".join(f"{i:.2f}" for i in raw_metrics.m_new_token_times)}') +``` + +#### Getting pure inference time without tokenizatin and detokenization duration: + +```python +import openvino_genai as ov_genai +import numpy as np +pipe = ov_genai.LLMPipeline(models_path, "CPU") +result = pipe.generate(["The Sun is yellow because"], max_new_tokens=20) +perf_metrics = result.perf_metrics +print(f'Generate duration: {perf_metrics.get_generate_duration().mean:.2f} ms') + +raw_metrics = perf_metrics.raw_metrics +generate_duration = np.array(raw_metrics.generate_durations) +tok_detok_duration = np.array(raw_metrics.tokenization_durations) - np.array(raw_metrics.detokenization_durations) +pure_inference_duration = np.sum(generate_duration - tok_detok_duration) / 1000 # in milliseconds +print(f'Pure Inference duration: {pure_inference_duration:.2f} ms') +``` + +#### Using raw metrics to calculate median value of generate duration: + +```python +import openvino_genai as ov_genai +import numpy as np +pipe = ov_genai.LLMPipeline(models_path, "CPU") +result = pipe.generate(["The Sun is yellow because"], max_new_tokens=20) +perf_metrics = result.perf_metrics +raw_metrics = perf_metrics.raw_metrics + +print(f'Generate duration: {perf_metrics.get_generate_duration().mean:.2f}') +print(f'Throughput: {perf_metrics.get_throughput().mean:.2f} tokens/s') +durations = np.array(raw_metrics.m_new_token_times[1:]) - np.array(raw_metrics.m_new_token_times[:-1]) +print(f'Median from token to token duration: {np.median(durations):.2f} ms') +``` + +:::tip +For more examples of how metrics are used, please refer to the Python [benchmark_genai.py](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/text_generation/README.md#9-llms-benchmarking-sample-benchmark_genai) and C++ [benchmark_genai](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/cpp/text_generation/README.md#9-llms-benchmarking-sample-benchmark_genai) samples. +::: diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/streaming.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/streaming.mdx new file mode 100644 index 0000000..b6d2df7 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/streaming.mdx @@ -0,0 +1,217 @@ +--- +sidebar_position: 3 +--- + +# Streaming the Output + +For more interactive UIs during generation, you can stream output tokens. + +:::info +Streaming is supported for `LLMPipeline`, `VLMPipeline` and `WhisperPipeline`. +::: + +## Streaming Function + +In this example, a function outputs words to the console immediately upon generation: + + + + ```python showLineNumbers + import openvino_genai as ov_genai + + pipe = ov_genai.LLMPipeline(model_path, "CPU") + + # highlight-start + # Create a streamer function + def streamer(subword): + print(subword, end='', flush=True) + # Return flag corresponds whether generation should be stopped. + return ov_genai.StreamingStatus.RUNNING + # highlight-end + + # highlight-next-line + pipe.start_chat() + while True: + try: + prompt = input('question:\n') + except EOFError: + break + # highlight-next-line + pipe.generate(prompt, streamer=streamer, max_new_tokens=100) + print('\n----------\n') + # highlight-next-line + pipe.finish_chat() + ``` + + + ```cpp showLineNumbers + #include "openvino/genai/llm_pipeline.hpp" + #include + + int main(int argc, char* argv[]) { + std::string prompt; + + std::string model_path = argv[1]; + ov::genai::LLMPipeline pipe(model_path, "CPU"); + + // highlight-start + // Create a streamer function + auto streamer = [](std::string word) { + std::cout << word << std::flush; + // Return flag corresponds whether generation should be stopped. + return ov::genai::StreamingStatus::RUNNING; + }; + // highlight-end + + // highlight-next-line + pipe.start_chat(); + std::cout << "question:\n"; + while (std::getline(std::cin, prompt)) { + // highlight-next-line + pipe.generate(prompt, ov::genai::streamer(streamer), ov::genai::max_new_tokens(100)); + std::cout << "\n----------\n" + "question:\n"; + } + // highlight-next-line + pipe.finish_chat(); + } + ``` + + + ```js showLineNumbers + import { LLMPipeline } from "openvino-genai-node"; + import readline from 'readline'; + + const pipe = await LLMPipeline(model_path, "CPU"); + + // highlight-start + // Create a streamer function + function streamer(subword) { + process.stdout.write(subword); + } + // highlight-end + + // highlight-next-line + await pipe.startChat(); + + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + + console.log('question:'); + rl.on('line', async (prompt) => { + // highlight-next-line + await pipe.generate(prompt, { max_new_tokens: 100 }, streamer); + console.log('\n----------\nquestion:'); + }); + + rl.on('close', async () => { + // highlight-next-line + await pipe.finishChat(); + process.exit(0); + }); + ``` + + + +## Custom Streamer Class + +You can also create your custom streamer for more sophisticated processing: + + + + ```python showLineNumbers + import openvino_genai as ov_genai + + pipe = ov_genai.LLMPipeline(model_path, "CPU") + + # highlight-start + # Create custom streamer class + class CustomStreamer(ov_genai.StreamerBase): + def __init__(self): + super().__init__() + # Initialization logic. + + def write(self, token: int | list[int]) -> ov_genai.StreamingStatus: + # Custom processing logic for new decoded token(s). + + # Return flag corresponds whether generation should be stopped. + return ov_genai.StreamingStatus.RUNNING + + def end(self): + # Custom finalization logic. + pass + # highlight-end + + # highlight-next-line + pipe.start_chat() + while True: + try: + prompt = input('question:\n') + except EOFError: + break + # highlight-next-line + pipe.generate(prompt, streamer=CustomStreamer(), max_new_tokens=100) + print('\n----------\n') + # highlight-next-line + pipe.finish_chat() + ``` + + + ```cpp showLineNumbers + #include "openvino/genai/streamer_base.hpp" + #include "openvino/genai/llm_pipeline.hpp" + #include + + // highlight-start + // Create custom streamer class + class CustomStreamer: public ov::genai::StreamerBase { + public: + ov::genai::StreamingStatus write(int64_t token) { + // Custom processing logic for new decoded token. + + // Return flag corresponds whether generation should be stopped. + return ov::genai::StreamingStatus::RUNNING; + }; + + ov::genai::StreamingStatus write(const std::vector& tokens) { + // Custom processing logic for new vector of decoded tokens. + + // Return flag corresponds whether generation should be stopped. + return ov::genai::StreamingStatus::RUNNING; + }; + + void end() { + // Custom finalization logic. + }; + }; + // highlight-end + + int main(int argc, char* argv[]) { + std::string prompt; + // highlight-next-line + std::shared_ptr custom_streamer; + + std::string model_path = argv[1]; + ov::genai::LLMPipeline pipe(model_path, "CPU"); + + // highlight-next-line + pipe.start_chat(); + std::cout << "question:\n"; + while (std::getline(std::cin, prompt)) { + // highlight-next-line + pipe.generate(prompt, ov::genai::streamer(custom_streamer), ov::genai::max_new_tokens(100)); + std::cout << "\n----------\n" + "question:\n"; + } + // highlight-next-line + pipe.finish_chat(); + } + ``` + + + +:::info +For fully implemented iterable `CustomStreamer` refer to [multinomial_causal_lm](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/text_generation/multinomial_causal_lm.py) sample. +::: diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/structured-output.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/structured-output.mdx new file mode 100644 index 0000000..aeb25c3 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/structured-output.mdx @@ -0,0 +1,130 @@ +--- +sidebar_position: 7 +--- + +# Structured Output + +Structured output generation forces a model to produce responses that follow a defined format - for example JSON, a regular expression, or an EBNF grammar. OpenVINO GenAI provides a flexible structured output enforcement so the model can generate well‑formed output. + +## How It Works + +![Example from XGrammar paper](/img/structured_output_work_example.png) + +Structured output generation consists of several steps: +1. Compile the structured-output configuration (JSON Schema, regex, or EBNF grammar) into an internal grammar representation. +2. After the model computes the probability distribution for the next token, a mask is applied that prevents tokens incompatible with the current grammar state from being sampled. +3. When a token is sampled, advance the grammar state and recompute the mask for the next sampling round. + +Refer to [XGrammar paper](https://arxiv.org/pdf/2411.15100) for more details. + +:::info +Structured enforcement guarantees the syntactic format but not the semantic correctness of generated data. Always validate and sanitize outputs. +::: + +:::info +Structured constraints can affect model behavior and may reduce accuracy on some downstream tasks. +::: + +## What OpenVINO GenAI Provides + +- Base structured output formats: JSON, regex, and grammar (EBNF) based structured generation. +- Structural tags: + - Compound grammars: combine several grammars to handle more complex tasks or enforce different function calling styles. + - Tags: blend regular free-form generation with structural tags during single generate call. + +## Structured Output Configuration + +Structured output is configured through a `StructuredOutputConfig` (exposed in the Python and C++ APIs). The main options are: + +- `json_schema` - a JSON Schema that the generator will try to satisfy; useful when you need typed fields and nested structures. +- `regex` - a regular expression that the output must match. +- `grammar` - an EBNF grammar that describes structure of the output. +- `structural_tags_config` and `compound_grammar` - advanced options to combine multiple grammars. + +:::info +You should set only one primary structured constraint (for example `json_schema`, `regex`, or `grammar`) on a `StructuredOutputConfig` instance. The library validates the configuration and will raise an error if conflicting constraints are provided (for example both `json_schema` and `regex`). +::: + +:::warning Deprecation Note +The `StructuralTagsConfig` class and the `compound_grammar` field are deprecated. Use the `structural_tags_config` field on `StructuredOutputConfig` instead; it provides the same functionality and is the recommended API going forward. +::: + +## Using Structured Output + +### Examples + + + +```python +import json +import openvino_genai as ov_genai + + +pipe = ov_genai.LLMPipeline(model_path, "CPU") + +# Structured output configuration: simple JSON schema +so_config = ov_genai.StructuredOutputConfig( + json_schema=json.dumps({ + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"} + }, + "required": ["name"] + }), +) +gen_config = ov_genai.GenerationConfig(max_new_tokens=100, structured_output_config=so_config) + +prompt = "Extract a person's name and age from the text and return only a JSON object.\nText: 'Alice is 29 years old.'" +result = pipe.generate(prompt, generation_config=gen_config) +print(json.loads(result)) +``` + + +```cpp +#include + +using namespace ov::genai; + +int main() { + std::string models_path = "/path/to/model/dir"; + auto pipe = LLMPipeline(models_path, "CPU"); + + // Build a JSON schema object (represented using the SDK types) and configure structured output + StructuredOutputConfig so_config; + so_config.json_schema = R"({ + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"} + }, + "required": ["name"] + })"; + + std::string prompt = "Extract a person's name and age and return only a JSON object.\nText: 'Bob is 34 years old.'"; + + // Attach structured output config to a GenerationConfig and call generate + GenerationConfig gen_config; + gen_config.max_new_tokens = 100; + gen_config.structured_output_config = so_config; + + auto result = pipe.generate(prompt, gen_config); + + std::cout << "Model output: " << result.texts[0] << std::endl; + return 0; +} +``` + + + +### Samples and Further Reading + +You can find complete sample programs that demonstrate structured output generation here: + +- [Python sample](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/text_generation/structured_output_generation.py) +- [C++ sample](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/cpp/text_generation/structured_output_generation.cpp) +- [Combining multiple grammars for structured output](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/text_generation/compound_grammar_generation.py) +- [Trigger structured output during regular generation run](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/text_generation/structural_tags_generation.py) +- [Test-driven examples](https://github.com/openvinotoolkit/openvino.genai/blob/master/tests/python_tests/test_structured_output.py) (comprehensive examples covering JSON, regex, EBNF and structural tags) + +OpenVINO GenAI JavaScript bindings also support the Structured Output feature. Check [JS samples](/docs/samples/js/text_generation#7-structured-output-sample-structured_output_sample) for usage examples. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/tokenization.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/tokenization.mdx new file mode 100644 index 0000000..644feda --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/guides/tokenization.mdx @@ -0,0 +1,179 @@ +--- +sidebar_position: 6 +--- + +# Tokenization + +OpenVINO GenAI provides a way to tokenize and detokenize text using the [`ov::genai::Tokenizer`](https://docs.openvino.ai/2026/api/genai_api/_autosummary/openvino_genai.Tokenizer.html) class. +The `Tokenizer` is a high level abstraction over the [OpenVINO Tokenizers](https://github.com/openvinotoolkit/openvino_tokenizers) library. + +It can be initialized from the path, in-memory IR representation or obtained from the `ov::genai::LLMPipeline` object. + + + + ```python + import openvino_genai as ov_genai + + # Initialize from the path + tokenizer = ov_genai.Tokenizer(models_path) + + # Or get tokenizer instance from LLMPipeline + pipe = ov_genai.LLMPipeline(models_path, "CPU") + tokenizer = pipe.get_tokenizer() + ``` + + + ```cpp + #include "openvino/genai/llm_pipeline.hpp" + + // Initialize from the path + auto tokenizer = ov::genai::Tokenizer(models_path); + + // Or get tokenizer instance from LLMPipeline + auto pipe = ov::genai::LLMPipeline pipe(models_path, "CPU"); + auto tokenzier = pipe.get_tokenizer(); + ``` + + + ```js + import { LLMPipeline, Tokenizer } from 'openvino-genai-node'; + + let tokenizer; + + // Initialize from the path + tokenizer = new Tokenizer(models_path); + + // Or get tokenizer instance from LLMPipeline + const pipe = await LLMPipeline(models_path, "CPU"); + tokenizer = pipe.getTokenizer(); + ``` + + + +`Tokenizer` has `encode()` and `decode()` methods which support the following arguments: `add_special_tokens`, `skip_special_tokens`, `pad_to_max_length`, `max_length`. + +#### Example - disable adding special tokens: + + + + ```python + tokens = tokenizer.encode("The Sun is yellow because", add_special_tokens=False) + ``` + + + ```cpp + auto tokens = tokenizer.encode("The Sun is yellow because", ov::genai::add_special_tokens(false)); + ``` + + + ```js + const tokens = tokenizer.encode("The Sun is yellow because", { add_special_tokens: false }); + ``` + + + +The `encode()` method returns a [`TokenizedInputs`](https://docs.openvino.ai/2026/api/genai_api/_autosummary/openvino_genai.TokenizedInputs.html) object containing `input_ids` and `attention_mask`, both stored as `ov::Tensor`. +Since `ov::Tensor` requires fixed-length sequences, padding is applied to match the longest sequence in a batch, ensuring a uniform shape. +Also resulting sequence is truncated by `max_length`. +If this value is not defined by used, it's is taken from the IR. + +Both padding and `max_length` can be controlled by the user. +If `pad_to_max_length` is set to true, then instead of padding to the longest sequence it will be padded to the `max_length`. + +#### Example - control padding: + + + + ```python + import openvino_genai as ov_genai + + tokenizer = ov_genai.Tokenizer(models_path) + prompts = ["The Sun is yellow because", "The"] + + # Since prompt is definitely shorter than maximal length (which is taken from IR) will not affect shape. + # Resulting shape is defined by length of the longest tokens sequence. + # Equivalent of HuggingFace hf_tokenizer.encode(prompt, padding="longest", truncation=True) + tokens = tokenizer.encode(["The Sun is yellow because", "The"]) + # or is equivalent to + tokens = tokenizer.encode(["The Sun is yellow because", "The"], pad_to_max_length=False) + print(tokens.input_ids.shape) + # out_shape: [2, 6] + + # Resulting tokens tensor will be padded to 1024, sequences which exceed this length will be truncated. + # Equivalent of HuggingFace hf_tokenizer.encode(prompt, padding="max_length", truncation=True, max_length=1024) + tokens = tokenizer.encode(["The Sun is yellow because", + "The" + "The longest string ever" * 2000], pad_to_max_length=True, max_length=1024) + print(tokens.input_ids.shape) + # out_shape: [3, 1024] + + # For single string prompts truncation and padding are also applied. + tokens = tokenizer.encode("The Sun is yellow because", pad_to_max_length=True, max_length=128) + print(tokens.input_ids.shape) + # out_shape: [1, 128] + ``` + + + ```cpp + #include "openvino/genai/llm_pipeline.hpp" + + auto tokenizer = ov::genai::Tokenizer(models_path); + std::vector prompts = {"The Sun is yellow because", "The"}; + + // Since prompt is definitely shorter than maximal length (which is taken from IR) will not affect shape. + // Resulting shape is defined by length of the longest tokens sequence. + // Equivalent of HuggingFace hf_tokenizer.encode(prompt, padding="longest", truncation=True) + tokens = tokenizer.encode({"The Sun is yellow because", "The"}) + // or is equivalent to + tokens = tokenizer.encode({"The Sun is yellow because", "The"}, ov::genai::pad_to_max_length(False)) + // out_shape: [2, 6] + + // Resulting tokens tensor will be padded to 1024. + // Equivalent of HuggingFace hf_tokenizer.encode(prompt, padding="max_length", truncation=True, max_length=1024) + tokens = tokenizer.encode({"The Sun is yellow because", + "The", + std::string(2000, 'n')}, ov::genai::pad_to_max_length(True), ov::genai::max_length(1024)) + // out_shape: [3, 1024] + + // For single string prompts truncation and padding are also applied. + tokens = tokenizer.encode({"The Sun is yellow because"}, ov::genai::pad_to_max_length(True), ov::genai::max_length(1024)) + // out_shape: [1, 128] + ``` + + + ```js + import { Tokenizer } from 'openvino-genai-node'; + + const tokenizer = new Tokenizer(models_path); + const prompts = ["The Sun is yellow because", "The"]; + let tokens; + + // Since prompt is definitely shorter than maximal length (which is taken from IR) will not affect shape. + // Resulting shape is defined by length of the longest tokens sequence. + // Equivalent of HuggingFace hf_tokenizer.encode(prompt, padding="longest", truncation=True) + tokens = tokenizer.encode(["The Sun is yellow because", "The"]); + // or is equivalent to + tokens = tokenizer.encode(["The Sun is yellow because", "The"], { pad_to_max_length: false }); + console.log(tokens.input_ids.getShape()); + // out_shape: [2, 6] + + // Resulting tokens tensor will be padded to 1024, sequences which exceed this length will be truncated. + // Equivalent of HuggingFace hf_tokenizer.encode(prompt, padding="max_length", truncation=True, max_length=1024) + tokens = tokenizer.encode([ + "The Sun is yellow because", + "The", + "The longest string ever".repeat(2000), + ], { + pad_to_max_length: true, + max_length: 1024, + }); + console.log(tokens.input_ids.getShape()); + // out_shape: [3, 1024] + + // For single string prompts truncation and padding are also applied. + tokens = tokenizer.encode("The Sun is yellow because", { pad_to_max_length: true, max_length: 128 }); + console.log(tokens.input_ids.getShape()); + // out_shape: [1, 128] + ``` + + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/samples/_category_.json b/src/resources/openvino.genai-2026.1.0.0/site/docs/samples/_category_.json new file mode 100644 index 0000000..02cbcdb --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/samples/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Samples", + "position": 5, + "link": { + "type": "doc", + "id": "samples/index" + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/samples/_components/samples-list/index.tsx b/src/resources/openvino.genai-2026.1.0.0/site/docs/samples/_components/samples-list/index.tsx new file mode 100644 index 0000000..6c360e5 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/samples/_components/samples-list/index.tsx @@ -0,0 +1,40 @@ +import Link from '@docusaurus/Link'; +import { usePluginData } from '@docusaurus/useGlobalData'; +import { type GenAISamples } from '@site/src/plugins/genai-samples-docs-plugin'; +import Heading from '@theme/Heading'; +import React from 'react'; + +function SamplesListItem({ + item: { language, name, githubLink }, +}: { + item: GenAISamples[string][number]; +}): React.JSX.Element { + return ( +
  • + {name} (GitHub) +
  • + ); +} + +export default function SamplesList(): React.JSX.Element { + const samplesMap = usePluginData('genai-samples-docs-plugin') as GenAISamples; + + return ( + <> + {Object.entries(samplesMap) + .sort(([a], [b]) => a.localeCompare(b)) + .map(([language, samples]) => ( +
    + {samples[0]?.languageTitle} +
      + {samples + .sort((a, b) => a.name.localeCompare(b.name)) + .map((sample) => ( + + ))} +
    +
    + ))} + + ); +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/samples/index.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/samples/index.mdx new file mode 100644 index 0000000..4cc85bc --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/samples/index.mdx @@ -0,0 +1,9 @@ +--- +sidebar_position: 1 +--- + +import SamplesList from './_components/samples-list'; + +# OpenVINO GenAI Samples + + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_category_.json b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_category_.json new file mode 100644 index 0000000..8c0ec25 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Supported Models", + "position": 4, + "link": { + "type": "doc", + "id": "supported-models/index" + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/base-models-table/index.tsx b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/base-models-table/index.tsx new file mode 100644 index 0000000..e800c46 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/base-models-table/index.tsx @@ -0,0 +1,38 @@ +import Link from '@docusaurus/Link'; +import { Children } from 'react'; + +type BaseModelsTableProps = { + headers: string[]; + rows: React.JSX.Element[]; +}; + +export function BaseModelsTable({ headers, rows }: BaseModelsTableProps): React.JSX.Element { + return ( + + + + {headers.map((v) => ( + + ))} + + + {Children.map(rows, (row) => row)} +
    {v}
    + ); +} + +export const LinksCell = ({ links }: { links: string[] }) => ( + +
      + {links.map((link) => ( +
    • + {new URL(link).pathname.slice(1)} +
    • + ))} +
    + +); + +export const StatusCell = ({ value }: { value: boolean }) => ( + {value ? '✅' : '❌'} +); diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/image-generation-models-table/index.tsx b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/image-generation-models-table/index.tsx new file mode 100644 index 0000000..f0345c8 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/image-generation-models-table/index.tsx @@ -0,0 +1,31 @@ +import React from 'react'; +import { BaseModelsTable, LinksCell, StatusCell } from '../base-models-table'; +import { IMAGE_GENERATION_MODELS } from './models'; + +export default function ImageGenerationModelsTable(): React.JSX.Element { + const headers = [ + 'Architecture', + 'Text to Image', + 'Image to Image', + 'Inpainting', + 'LoRA Support', + 'Example HuggingFace Models', + ]; + + const rows = IMAGE_GENERATION_MODELS.map( + ({ architecture, textToImage, imageToImage, inpainting, loraSupport, links }) => ( + + + {architecture} + + + + + + + + ) + ); + + return ; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/image-generation-models-table/models.ts b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/image-generation-models-table/models.ts new file mode 100644 index 0000000..65182dd --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/image-generation-models-table/models.ts @@ -0,0 +1,113 @@ +type ImageGenerationModelType = { + architecture: string; + textToImage: boolean; + imageToImage: boolean; + inpainting: boolean; + loraSupport: boolean; + links: string[]; +}; + +export const IMAGE_GENERATION_MODELS: ImageGenerationModelType[] = [ + { + architecture: 'Latent Consistency Model', + textToImage: true, + imageToImage: true, + inpainting: true, + loraSupport: true, + links: ['https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7'], + }, + { + architecture: 'Stable Diffusion', + textToImage: true, + imageToImage: true, + inpainting: true, + loraSupport: true, + links: [ + 'https://huggingface.co/CompVis/stable-diffusion-v1-1', + 'https://huggingface.co/CompVis/stable-diffusion-v1-2', + 'https://huggingface.co/CompVis/stable-diffusion-v1-3', + 'https://huggingface.co/CompVis/stable-diffusion-v1-4', + 'https://huggingface.co/junnyu/stable-diffusion-v1-4-paddle', + 'https://huggingface.co/jcplus/stable-diffusion-v1-5', + 'https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5', + 'https://huggingface.co/botp/stable-diffusion-v1-5', + 'https://huggingface.co/dreamlike-art/dreamlike-anime-1.0', + 'https://huggingface.co/stabilityai/stable-diffusion-2', + 'https://huggingface.co/stabilityai/stable-diffusion-2-base', + 'https://huggingface.co/stabilityai/stable-diffusion-2-1', + 'https://huggingface.co/bguisard/stable-diffusion-nano-2-1', + 'https://huggingface.co/justinpinkney/pokemon-stable-diffusion', + 'https://huggingface.co/stablediffusionapi/architecture-tuned-model', + 'https://huggingface.co/IDEA-CCNL/Taiyi-Stable-Diffusion-1B-Chinese-EN-v0.1', + 'https://huggingface.co/ZeroCool94/stable-diffusion-v1-5', + 'https://huggingface.co/pcuenq/stable-diffusion-v1-4', + 'https://huggingface.co/rinna/japanese-stable-diffusion', + 'https://huggingface.co/benjamin-paine/stable-diffusion-v1-5', + 'https://huggingface.co/philschmid/stable-diffusion-v1-4-endpoints', + 'https://huggingface.co/naclbit/trinart_stable_diffusion_v2', + 'https://huggingface.co/Fictiverse/Stable_Diffusion_PaperCut_Model', + ], + }, + { + architecture: 'Stable Diffusion Inpainting', + textToImage: false, + imageToImage: false, + inpainting: true, + loraSupport: true, + links: [ + 'https://huggingface.co/stabilityai/stable-diffusion-2-inpainting', + 'https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-inpainting', + 'https://huggingface.co/botp/stable-diffusion-v1-5-inpainting', + 'https://huggingface.co/parlance/dreamlike-diffusion-1.0-inpainting', + ], + }, + { + architecture: 'Stable Diffusion XL', + textToImage: true, + imageToImage: true, + inpainting: true, + loraSupport: true, + links: [ + 'https://huggingface.co/stabilityai/stable-diffusion-xl-base-0.9', + 'https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0', + 'https://huggingface.co/stabilityai/sdxl-turbo', + 'https://huggingface.co/cagliostrolab/animagine-xl-4.0', + ], + }, + { + architecture: 'Stable Diffusion XL Inpainting', + textToImage: false, + imageToImage: false, + inpainting: true, + loraSupport: true, + links: ['https://huggingface.co/diffusers/stable-diffusion-xl-1.0-inpainting-0.1'], + }, + { + architecture: 'Stable Diffusion 3', + textToImage: true, + imageToImage: true, + inpainting: true, + loraSupport: false, + links: [ + 'https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers', + 'https://huggingface.co/stabilityai/stable-diffusion-3.5-medium', + 'https://huggingface.co/stabilityai/stable-diffusion-3.5-large', + 'https://huggingface.co/stabilityai/stable-diffusion-3.5-large-turbo', + 'https://huggingface.co/tensorart/stable-diffusion-3.5-medium-turbo', + 'https://huggingface.co/tensorart/stable-diffusion-3.5-large-TurboX' + ], + }, + { + architecture: 'Flux', + textToImage: true, + imageToImage: true, + inpainting: true, + loraSupport: true, + links: [ + 'https://huggingface.co/black-forest-labs/FLUX.1-schnell', + 'https://huggingface.co/shuttleai/shuttle-3-diffusion', + 'https://huggingface.co/shuttleai/shuttle-3.1-aesthetic', + 'https://huggingface.co/shuttleai/shuttle-jaguar', + ], + }, +]; diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/llm-models-table/index.tsx b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/llm-models-table/index.tsx new file mode 100644 index 0000000..2397936 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/llm-models-table/index.tsx @@ -0,0 +1,27 @@ +import React from 'react'; +import { BaseModelsTable, LinksCell } from '../base-models-table'; +import { LLM_MODELS } from './models'; + +export default function LLMModelsTable(): React.JSX.Element { + const headers = ['Architecture', 'Models', 'Example HuggingFace Models']; + + const rows = LLM_MODELS.map(({ architecture, models }) => ( + <> + + + {architecture} + + {models[0].name} + + + {models.slice(1).map(({ name, links }) => ( + + {name} + + + ))} + + )); + + return ; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/llm-models-table/models.ts b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/llm-models-table/models.ts new file mode 100644 index 0000000..cef7cca --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/llm-models-table/models.ts @@ -0,0 +1,886 @@ +type LLMModelType = { + architecture: string; + models: Array<{ + name: string; + links: string[]; + }>; +}; + +export const LLM_MODELS: LLMModelType[] = [ + { + architecture: 'AquilaModel', + models: [ + { + name: 'Aquila', + links: [ + 'https://huggingface.co/BAAI/Aquila-7B', + 'https://huggingface.co/BAAI/AquilaChat-7B', + 'https://huggingface.co/BAAI/Aquila2-7B', + 'https://huggingface.co/BAAI/AquilaChat2-7B', + ], + }, + ], + }, + { + architecture: 'ArcticForCausalLM', + models: [ + { + name: 'Snowflake', + links: [ + 'https://huggingface.co/Snowflake/snowflake-arctic-instruct', + 'https://huggingface.co/Snowflake/snowflake-arctic-base', + ], + }, + ], + }, + { + architecture: 'BaichuanForCausalLM', + models: [ + { + name: 'Baichuan2', + links: [ + 'https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat', + 'https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat', + ], + }, + ], + }, + { + architecture: 'BloomForCausalLM', + models: [ + { + name: 'Bloom', + links: [ + 'https://huggingface.co/bigscience/bloom-560m', + 'https://huggingface.co/bigscience/bloom-1b1', + 'https://huggingface.co/bigscience/bloom-1b7', + 'https://huggingface.co/bigscience/bloom-3b', + 'https://huggingface.co/bigscience/bloom-7b1', + ], + }, + { + name: 'Bloomz', + links: [ + 'https://huggingface.co/bigscience/bloomz-560m', + 'https://huggingface.co/bigscience/bloomz-1b1', + 'https://huggingface.co/bigscience/bloomz-1b7', + 'https://huggingface.co/bigscience/bloomz-3b', + 'https://huggingface.co/bigscience/bloomz-7b1', + ], + }, + ], + }, + { + architecture: 'ChatGLMModel', + models: [ + { + name: 'ChatGLM', + links: [ + 'https://huggingface.co/THUDM/chatglm2-6b', + 'https://huggingface.co/THUDM/chatglm3-6b', + 'https://huggingface.co/THUDM/glm-4-9b', + 'https://huggingface.co/THUDM/glm-4-9b-chat', + ], + }, + ], + }, + { + architecture: 'CodeGenForCausalLM', + models: [ + { + name: 'CodeGen', + links: [ + 'https://huggingface.co/Salesforce/codegen-350m-multi', + 'https://huggingface.co/Salesforce/codegen-2B-multi', + 'https://huggingface.co/Salesforce/codegen-6B-multi', + 'https://huggingface.co/Salesforce/codegen-16B-multi', + 'https://huggingface.co/Salesforce/codegen-350m-mono', + 'https://huggingface.co/Salesforce/codegen-2B-mono', + 'https://huggingface.co/Salesforce/codegen-6B-mono', + 'https://huggingface.co/Salesforce/codegen-16B-mono', + 'https://huggingface.co/Salesforce/codegen2-1B_P', + 'https://huggingface.co/Salesforce/codegen2-3_7B_P', + 'https://huggingface.co/Salesforce/codegen2-7B_P', + 'https://huggingface.co/Salesforce/codegen2-16B_P', + ], + }, + ], + }, + { + architecture: 'CohereForCausalLM', + models: [ + { + name: 'Aya', + links: [ + 'https://huggingface.co/CohereLabs/aya-23-8B', + 'https://huggingface.co/CohereLabs/aya-expanse-8b', + 'https://huggingface.co/CohereLabs/aya-23-35B', + ], + }, + { + name: 'C4AI Command R', + links: [ + 'https://huggingface.co/CohereLabs/c4ai-command-r7b-12-2024', + 'https://huggingface.co/CohereLabs/c4ai-command-r-v01', + ], + }, + ], + }, + { + architecture: 'DbrxForCausalLM', + models: [ + { + name: 'DBRX', + links: [ + 'https://huggingface.co/databricks/dbrx-instruct', + 'https://huggingface.co/databricks/dbrx-base', + ], + }, + ], + }, + { + architecture: 'DeciLMForCausalLM', + models: [ + { + name: 'DeciLM', + links: [ + 'https://huggingface.co/Deci/DeciLM-7B', + 'https://huggingface.co/Deci/DeciLM-7B-instruct', + ], + }, + ], + }, + { + architecture: 'DeepseekForCausalLM', + models: [ + { + name: 'DeepSeek-MoE', + links: [ + 'https://huggingface.co/deepseek-ai/deepseek-moe-16b-base', + 'https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat', + ], + }, + ], + }, + { + architecture: 'DeepseekV2ForCausalLM', + models: [ + { + name: 'DeepSeekV2', + links: [ + 'https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite', + 'https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite-Chat', + 'https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct', + ], + }, + ], + }, + { + architecture: 'DeepseekV3ForCausalLM', + models: [ + { + name: 'DeepSeekV3', + links: [ + 'https://huggingface.co/deepseek-ai/DeepSeek-V3', + 'https://huggingface.co/deepseek-ai/DeepSeek-V3-Base', + 'https://huggingface.co/deepseek-ai/DeepSeek-R1', + 'https://huggingface.co/deepseek-ai/DeepSeek-R1-Zero', + ], + }, + ], + }, + { + architecture: 'ExaoneForCausalLM', + models: [ + { + name: 'Exaone', + links: [ + 'https://huggingface.co/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct', + 'https://huggingface.co/LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct', + 'https://huggingface.co/LGAI-EXAONE/EXAONE-3.5-32B-Instruct', + 'https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct', + ], + }, + ], + }, + { + architecture: 'Exaone4ForCausalLM', + models: [ + { + name: 'Exaone 4.0', + links: [ + 'https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-1.2B', + ], + }, + ], + }, + { + architecture: 'FalconForCausalLM', + models: [ + { + name: 'Falcon', + links: [ + 'https://huggingface.co/tiiuae/falcon-11B', + 'https://huggingface.co/tiiuae/falcon-7b', + 'https://huggingface.co/tiiuae/falcon-7b-instruct', + 'https://huggingface.co/tiiuae/falcon-40b', + 'https://huggingface.co/tiiuae/falcon-40b-instruct', + ], + }, + ], + }, + { + architecture: 'GemmaForCausalLM', + models: [ + { + name: 'Gemma', + links: [ + 'https://huggingface.co/google/gemma-2b', + 'https://huggingface.co/google/gemma-2b-it', + 'https://huggingface.co/google/gemma-1.1-2b-it', + 'https://huggingface.co/google/codegemma-2b', + 'https://huggingface.co/google/codegemma-1.1-2b', + 'https://huggingface.co/google/gemma-7b', + 'https://huggingface.co/google/gemma-7b-it', + 'https://huggingface.co/google/gemma-1.1-7b-it', + 'https://huggingface.co/google/codegemma-7b', + 'https://huggingface.co/google/codegemma-7b-it', + 'https://huggingface.co/google/codegemma-1.1-7b-it', + ], + }, + ], + }, + { + architecture: 'Gemma2ForCausalLM', + models: [ + { + name: 'Gemma2', + links: [ + 'https://huggingface.co/google/gemma-2-2b', + 'https://huggingface.co/google/gemma-2-2b-it', + 'https://huggingface.co/google/gemma-2-9b', + 'https://huggingface.co/google/gemma-2-9b-it', + 'https://huggingface.co/google/gemma-2-27b', + 'https://huggingface.co/google/gemma-2-27b-it', + ], + }, + ], + }, + { + architecture: 'Gemma3ForCausalLM', + models: [ + { + name: 'Gemma3', + links: [ + 'https://huggingface.co/google/gemma-3-270m', + 'https://huggingface.co/google/gemma-3-270m-it', + 'https://huggingface.co/google/gemma-3-1b-it', + 'https://huggingface.co/google/gemma-3-1b-pt', + ], + }, + ], + }, + { + architecture: 'GlmForCausalLM', + models: [ + { + name: 'GLM', + links: [ + 'https://huggingface.co/THUDM/glm-edge-1.5b-chat', + 'https://huggingface.co/THUDM/glm-edge-4b-chat', + 'https://huggingface.co/THUDM/glm-4-9b-hf', + 'https://huggingface.co/THUDM/glm-4-9b-chat-hf', + 'https://huggingface.co/THUDM/glm-4-9b-chat-1m-hf', + ], + }, + ], + }, + { + architecture: 'GPT2LMHeadModel', + models: [ + { + name: 'GPT2', + links: [ + 'https://huggingface.co/openai-community/gpt2', + 'https://huggingface.co/openai-community/gpt2-medium', + 'https://huggingface.co/openai-community/gpt2-large', + 'https://huggingface.co/openai-community/gpt2-xl', + 'https://huggingface.co/distilbert/distilgpt2', + ], + }, + { + name: 'CodeParrot', + links: [ + 'https://huggingface.co/codeparrot/codeparrot-small', + 'https://huggingface.co/codeparrot/codeparrot-small-code-to-text', + 'https://huggingface.co/codeparrot/codeparrot-small-text-to-code', + 'https://huggingface.co/codeparrot/codeparrot-small-multi', + 'https://huggingface.co/codeparrot/codeparrot', + ], + }, + ], + }, + { + architecture: 'GPTBigCodeForCausalLM', + models: [ + { + name: 'StarCoder', + links: [ + 'https://huggingface.co/bigcode/starcoderbase-1b', + 'https://huggingface.co/bigcode/starcoderbase-3b', + 'https://huggingface.co/bigcode/starcoderbase-7b', + 'https://huggingface.co/bigcode/starcoderbase', + 'https://huggingface.co/bigcode/starcoder', + 'https://huggingface.co/bigcode/octocoder', + 'https://huggingface.co/HuggingFaceH4/starchat-alpha', + 'https://huggingface.co/HuggingFaceH4/starchat-beta', + ], + }, + ], + }, + { + architecture: 'GPTJForCausalLM', + models: [ + { + name: 'GPT-J', + links: [ + 'https://huggingface.co/EleutherAI/gpt-j-6b', + 'https://huggingface.co/crumb/Instruct-GPT-J', + ], + }, + ], + }, + { + architecture: 'GPTNeoForCausalLM', + models: [ + { + name: 'GPT Neo', + links: [ + 'https://huggingface.co/EleutherAI/gpt-neo-1.3B', + 'https://huggingface.co/EleutherAI/gpt-neo-2.7B', + ], + }, + ], + }, + { + architecture: 'GPTNeoXForCausalLM', + models: [ + { + name: 'GPT NeoX', + links: ['https://huggingface.co/EleutherAI/gpt-neox-20b'], + }, + { + name: 'Dolly', + links: [ + 'https://huggingface.co/databricks/dolly-v2-3b', + 'https://huggingface.co/databricks/dolly-v2-7b', + 'https://huggingface.co/databricks/dolly-v2-12b', + ], + }, + { + name: 'RedPajama', + links: [ + 'https://huggingface.co/ikala/redpajama-3b-chat', + 'https://huggingface.co/togethercomputer/RedPajama-INCITE-Chat-3B-v1', + 'https://huggingface.co/togethercomputer/RedPajama-INCITE-Instruct-3B-v1', + 'https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Chat', + 'https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Instruct', + ], + }, + ], + }, + { + architecture: 'GPTNeoXJapaneseForCausalLM', + models: [ + { + name: 'GPT NeoX Japanese', + links: ['https://huggingface.co/abeja/gpt-neox-japanese-2.7b'], + }, + ], + }, + { + architecture: 'GptOssForCausalLM', + models: [ + { + name: 'GPT-OSS', + links: [ + 'https://huggingface.co/openai/gpt-oss-20b', + ], + }, + ], + }, + { + architecture: 'GraniteForCausalLM', + models: [ + { + name: 'Granite', + links: [ + 'https://huggingface.co/ibm-granite/granite-3.2-2b-instruct', + 'https://huggingface.co/ibm-granite/granite-3.2-8b-instruct', + 'https://huggingface.co/ibm-granite/granite-3.1-2b-instruct', + 'https://huggingface.co/ibm-granite/granite-3.1-8b-instruct', + 'https://huggingface.co/ibm-granite/granite-3.0-2b-instruct', + 'https://huggingface.co/ibm-granite/granite-3.0-8b-instruct', + ], + }, + ], + }, + { + architecture: 'GraniteMoeForCausalLM', + models: [ + { + name: 'GraniteMoE', + links: [ + 'https://huggingface.co/ibm-granite/granite-3.1-1b-a400m-instruct', + 'https://huggingface.co/ibm-granite/granite-3.1-3b-a800m-instruct', + 'https://huggingface.co/ibm-granite/granite-3.0-1b-a400m-instruct', + 'https://huggingface.co/ibm-granite/granite-3.0-3b-a800m-instruct', + ], + }, + ], + }, + { + architecture: 'InternLMForCausalLM', + models: [ + { + name: 'InternLM', + links: [ + 'https://huggingface.co/internlm/internlm-chat-7b', + 'https://huggingface.co/internlm/internlm-7b', + ], + }, + ], + }, + { + architecture: 'InternLM2ForCausalLM', + models: [ + { + name: 'InternLM2', + links: [ + 'https://huggingface.co/internlm/internlm2-chat-1_8b', + 'https://huggingface.co/internlm/internlm2-1_8b', + 'https://huggingface.co/internlm/internlm2-chat-7b', + 'https://huggingface.co/internlm/internlm2-7b', + 'https://huggingface.co/internlm/internlm2-chat-20b', + 'https://huggingface.co/internlm/internlm2-20b', + 'https://huggingface.co/internlm/internlm2_5-1_8b-chat', + 'https://huggingface.co/internlm/internlm2_5-1_8b', + 'https://huggingface.co/internlm/internlm2_5-7b-chat', + 'https://huggingface.co/internlm/internlm2_5-7b', + 'https://huggingface.co/internlm/internlm2_5-20b-chat', + 'https://huggingface.co/internlm/internlm2_5-20b', + ], + }, + ], + }, + { + architecture: 'JAISLMHeadModel', + models: [ + { + name: 'Jais', + links: [ + 'https://huggingface.co/inceptionai/jais-13b-chat', + 'https://huggingface.co/inceptionai/jais-13b', + ], + }, + ], + }, + { + architecture: 'LlamaForCausalLM', + models: [ + { + name: 'Llama 3', + links: [ + 'https://huggingface.co/meta-llama/Llama-3.2-1B', + 'https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct', + 'https://huggingface.co/meta-llama/Llama-3.2-3B', + 'https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct', + 'https://huggingface.co/meta-llama/Llama-3.1-8B', + 'https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct', + 'https://huggingface.co/meta-llama/Meta-Llama-3-8B', + 'https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct', + 'https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct', + 'https://huggingface.co/meta-llama/Llama-3.1-70B', + 'https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct', + 'https://huggingface.co/meta-llama/Meta-Llama-3-70B', + 'https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct', + 'https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B', + 'https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B', + ], + }, + { + name: 'Llama 2', + links: [ + 'https://huggingface.co/meta-llama/Llama-2-13b-chat-hf', + 'https://huggingface.co/meta-llama/Llama-2-13b-hf', + 'https://huggingface.co/meta-llama/Llama-2-7b-chat-hf', + 'https://huggingface.co/meta-llama/Llama-2-7b-hf', + 'https://huggingface.co/meta-llama/Llama-2-70b-chat-hf', + 'https://huggingface.co/meta-llama/Llama-2-70b-hf', + 'https://huggingface.co/microsoft/Llama2-7b-WhoIsHarryPotter', + ], + }, + { + name: 'Falcon3', + links: [ + 'https://huggingface.co/tiiuae/Falcon3-1B-Instruct', + 'https://huggingface.co/tiiuae/Falcon3-1B-Base', + 'https://huggingface.co/tiiuae/Falcon3-3B-Instruct', + 'https://huggingface.co/tiiuae/Falcon3-3B-Base', + 'https://huggingface.co/tiiuae/Falcon3-7B-Instruct', + 'https://huggingface.co/tiiuae/Falcon3-7B-Base', + 'https://huggingface.co/tiiuae/Falcon3-10B-Instruct', + 'https://huggingface.co/tiiuae/Falcon3-10B-Base', + ], + }, + { + name: 'OpenLLaMA', + links: [ + 'https://huggingface.co/openlm-research/open_llama_13b', + 'https://huggingface.co/openlm-research/open_llama_3b', + 'https://huggingface.co/openlm-research/open_llama_3b_v2', + 'https://huggingface.co/openlm-research/open_llama_7b', + 'https://huggingface.co/openlm-research/open_llama_7b_v2', + ], + }, + { + name: 'TinyLlama', + links: ['https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0'], + }, + ], + }, + { + architecture: 'MPTForCausalLM', + models: [ + { + name: 'MPT', + links: [ + 'https://huggingface.co/mosaicml/mpt-7b', + 'https://huggingface.co/mosaicml/mpt-7b-instruct', + 'https://huggingface.co/mosaicml/mpt-7b-chat', + 'https://huggingface.co/mosaicml/mpt-30b', + 'https://huggingface.co/mosaicml/mpt-30b-instruct', + 'https://huggingface.co/mosaicml/mpt-30b-chat', + ], + }, + ], + }, + { + architecture: 'MiniCPMForCausalLM', + models: [ + { + name: 'MiniCPM', + links: [ + 'https://huggingface.co/openbmb/MiniCPM-1B-sft-bf16', + 'https://huggingface.co/openbmb/MiniCPM-2B-dpo-fp16', + 'https://huggingface.co/openbmb/MiniCPM-2B-sft-fp32', + 'https://huggingface.co/openbmb/MiniCPM-2B-dpo-fp32', + 'https://huggingface.co/openbmb/MiniCPM-2B-sft-bf16', + 'https://huggingface.co/openbmb/MiniCPM-2B-dpo-bf16', + 'https://huggingface.co/openbmb/MiniCPM4-0.5B', + 'https://huggingface.co/openbmb/MiniCPM4-8B', + ], + }, + ], + }, + { + architecture: 'MiniCPM3ForCausalLM', + models: [ + { + name: 'MiniCPM3', + links: ['https://huggingface.co/openbmb/MiniCPM3-4B'], + }, + ], + }, + { + architecture: 'MistralForCausalLM', + models: [ + { + name: 'Mistral', + links: [ + 'https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1', + 'https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2', + 'https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3', + 'https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407', + 'https://huggingface.co/mistralai/Mistral-Nemo-Base-2407', + 'https://huggingface.co/mistralai/Mistral-7B-v0.1', + 'https://huggingface.co/mistralai/Mistral-7B-v0.3', + ], + }, + { + name: 'Notus', + links: ['https://huggingface.co/argilla/notus-7b-v1'], + }, + { + name: 'Zephyr', + links: ['https://huggingface.co/HuggingFaceH4/zephyr-7b-beta'], + }, + { + name: 'Neural Chat', + links: [ + 'https://huggingface.co/Intel/neural-chat-7b-v3-3', + 'https://huggingface.co/Intel/neural-chat-7b-v3-2', + 'https://huggingface.co/Intel/neural-chat-7b-v3-1', + 'https://huggingface.co/Intel/neural-chat-7b-v3', + ], + }, + ], + }, + { + architecture: 'MixtralForCausalLM', + models: [ + { + name: 'Mixtral', + links: [ + 'https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1', + 'https://huggingface.co/mistralai/Mixtral-8x7B-v0.1', + ], + }, + ], + }, + { + architecture: 'OlmoForCausalLM', + models: [ + { + name: 'OLMo', + links: [ + 'https://huggingface.co/allenai/OLMo-1B-hf', + 'https://huggingface.co/allenai/OLMo-7B-hf', + 'https://huggingface.co/allenai/OLMo-7B-Twin-2T-hf', + 'https://huggingface.co/allenai/OLMo-7B-Instruct-hf', + 'https://huggingface.co/allenai/OLMo-7B-0724-Instruct-hf', + 'https://huggingface.co/allenai/OLMo-7B-0724-SFT-hf', + ], + }, + ], + }, + { + architecture: 'OPTForCausalLM', + models: [ + { + name: 'OPT', + links: [ + 'https://huggingface.co/facebook/opt-125m', + 'https://huggingface.co/facebook/opt-350m', + 'https://huggingface.co/facebook/opt-1.3b', + 'https://huggingface.co/facebook/opt-2.7b', + 'https://huggingface.co/facebook/opt-6.7b', + 'https://huggingface.co/facebook/opt-13b', + ], + }, + ], + }, + { + architecture: 'OrionForCausalLM', + models: [ + { + name: 'Orion', + links: [ + 'https://huggingface.co/OrionStarAI/Orion-14B-Chat', + 'https://huggingface.co/OrionStarAI/Orion-14B-LongChat', + 'https://huggingface.co/OrionStarAI/Orion-14B-Base', + ], + }, + ], + }, + { + architecture: 'PhiForCausalLM', + models: [ + { + name: 'Phi', + links: [ + 'https://huggingface.co/microsoft/phi-2', + 'https://huggingface.co/microsoft/phi-1_5', + ], + }, + ], + }, + { + architecture: 'Phi3ForCausalLM', + models: [ + { + name: 'Phi3', + links: [ + 'https://huggingface.co/microsoft/Phi-3-mini-4k-instruct', + 'https://huggingface.co/microsoft/Phi-3-mini-128k-instruct', + 'https://huggingface.co/microsoft/Phi-3-medium-4k-instruct', + 'https://huggingface.co/microsoft/Phi-3-medium-128k-instruct', + 'https://huggingface.co/microsoft/Phi-3.5-mini-instruct', + 'https://huggingface.co/microsoft/Phi-4-mini-instruct', + 'https://huggingface.co/microsoft/phi-4', + 'https://huggingface.co/microsoft/Phi-4-reasoning', + ], + }, + ], + }, + { + architecture: 'PhimoeForCausalLM', + models: [ + { + name: 'Phi-3.5-MoE', + links: ['https://huggingface.co/microsoft/Phi-3.5-MoE-instruct'], + }, + ], + }, + { + architecture: 'QWenLMHeadModel', + models: [ + { + name: 'Qwen', + links: [ + 'https://huggingface.co/Qwen/Qwen-1_8B-Chat', + 'https://huggingface.co/Qwen/Qwen-1_8B-Chat-Int4', + 'https://huggingface.co/Qwen/Qwen-1_8B', + 'https://huggingface.co/Qwen/Qwen-7B-Chat', + 'https://huggingface.co/Qwen/Qwen-7B-Chat-Int4', + 'https://huggingface.co/Qwen/Qwen-7B', + 'https://huggingface.co/Qwen/Qwen-14B-Chat', + 'https://huggingface.co/Qwen/Qwen-14B-Chat-Int4', + 'https://huggingface.co/Qwen/Qwen-14B', + 'https://huggingface.co/Qwen/Qwen-72B-Chat', + 'https://huggingface.co/Qwen/Qwen-72B-Chat-Int4', + 'https://huggingface.co/Qwen/Qwen-72B', + ], + }, + ], + }, + { + architecture: 'Qwen2ForCausalLM', + models: [ + { + name: 'Qwen2', + links: [ + 'https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct', + 'https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct', + 'https://huggingface.co/Qwen/Qwen2.5-3B-Instruct', + 'https://huggingface.co/Qwen/Qwen2.5-7B-Instruct', + 'https://huggingface.co/Qwen/Qwen2.5-14B-Instruct', + 'https://huggingface.co/Qwen/Qwen2.5-32B-Instruct', + 'https://huggingface.co/Qwen/Qwen2.5-72B-Instruct', + 'https://huggingface.co/Qwen/Qwen2-0.5B-Instruct', + 'https://huggingface.co/Qwen/Qwen2-1.5B-Instruct', + 'https://huggingface.co/Qwen/Qwen2-7B-Instruct', + 'https://huggingface.co/Qwen/Qwen2-72B-Instruct', + 'https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat', + 'https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat', + 'https://huggingface.co/Qwen/Qwen1.5-4B-Chat', + 'https://huggingface.co/Qwen/Qwen1.5-7B-Chat', + 'https://huggingface.co/Qwen/Qwen1.5-14B-Chat', + 'https://huggingface.co/Qwen/Qwen1.5-32B-Chat', + 'https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GPTQ-Int4', + 'https://huggingface.co/Qwen/QwQ-32B', + 'https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', + 'https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', + 'https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', + 'https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', + ], + }, + ], + }, + { + architecture: 'Qwen2MoeForCausalLM', + models: [ + { + name: 'Qwen2MoE', + links: [ + 'https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct', + 'https://huggingface.co/Qwen/Qwen2-57B-A14B', + 'https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat', + 'https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B', + ], + }, + ], + }, + { + architecture: 'Qwen3ForCausalLM', + models: [ + { + name: 'Qwen3', + links: [ + 'https://huggingface.co/Qwen/Qwen3-0.6B', + 'https://huggingface.co/Qwen/Qwen3-1.7B', + 'https://huggingface.co/Qwen/Qwen3-4B', + 'https://huggingface.co/Qwen/Qwen3-8B', + 'https://huggingface.co/Qwen/Qwen3-14B', + 'https://huggingface.co/Qwen/Qwen3-32B', + 'https://huggingface.co/Qwen/Qwen3-0.6B-Base', + 'https://huggingface.co/Qwen/Qwen3-1.7B-Base', + 'https://huggingface.co/Qwen/Qwen3-4B-Base', + 'https://huggingface.co/Qwen/Qwen3-8B-Base', + 'https://huggingface.co/Qwen/Qwen3-14B-Base', + ], + }, + ], + }, + { + architecture: 'Qwen3MoeForCausalLM', + models: [ + { + name: 'Qwen3MoE', + links: [ + 'https://huggingface.co/Qwen/Qwen3-30B-A3B', + 'https://huggingface.co/Qwen/Qwen3-30B-A3B-Base', + ], + }, + ], + }, + { + architecture: 'StableLmForCausalLM', + models: [ + { + name: 'StableLM', + links: [ + 'https://huggingface.co/stabilityai/stablelm-zephyr-3b', + 'https://huggingface.co/stabilityai/stablelm-2-1_6b', + 'https://huggingface.co/stabilityai/stablelm-2-12b', + 'https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b', + 'https://huggingface.co/stabilityai/stablelm-3b-4e1t', + ], + }, + ], + }, + { + architecture: 'Starcoder2ForCausalLM', + models: [ + { + name: 'Startcoder2', + links: [ + 'https://huggingface.co/bigcode/starcoder2-3b', + 'https://huggingface.co/bigcode/starcoder2-7b', + 'https://huggingface.co/bigcode/starcoder2-15b', + ], + }, + ], + }, + { + architecture: 'XGLMForCausalLM', + models: [ + { + name: 'XGLM', + links: [ + 'https://huggingface.co/facebook/xglm-564M', + 'https://huggingface.co/facebook/xglm-1.7B', + 'https://huggingface.co/facebook/xglm-2.9B', + 'https://huggingface.co/facebook/xglm-4.5B', + 'https://huggingface.co/facebook/xglm-7.5B', + ], + }, + ], + }, + { + architecture: 'XverseForCausalLM', + models: [ + { + name: 'Xverse', + links: [ + 'https://huggingface.co/xverse/XVERSE-7B', + 'https://huggingface.co/xverse/XVERSE-7B-Chat', + 'https://huggingface.co/xverse/XVERSE-13B', + 'https://huggingface.co/xverse/XVERSE-13B-Chat', + 'https://huggingface.co/xverse/XVERSE-65B', + 'https://huggingface.co/xverse/XVERSE-65B-Chat', + ], + }, + ], + }, +]; diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/speech-generation-models-table/index.tsx b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/speech-generation-models-table/index.tsx new file mode 100644 index 0000000..a9e6a0b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/speech-generation-models-table/index.tsx @@ -0,0 +1,27 @@ +import React from 'react'; +import { BaseModelsTable, LinksCell } from '../base-models-table'; +import { SPEECH_GENERATION_MODELS } from './models'; + +export default function SpeechGenerationModelsTable(): React.JSX.Element { + const headers = ['Architecture', 'Models', 'Example HuggingFace Models']; + + const rows = SPEECH_GENERATION_MODELS.map(({ architecture, models }) => ( + <> + + + {architecture} + + {models[0].name} + + + {models.slice(1).map(({ name, links }) => ( + + {name} + + + ))} + + )); + + return ; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/speech-generation-models-table/models.ts b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/speech-generation-models-table/models.ts new file mode 100644 index 0000000..285f8c6 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/speech-generation-models-table/models.ts @@ -0,0 +1,19 @@ +type SpeechGenerationModelType = { + architecture: string; + models: Array<{ + name: string; + links: string[]; + }>; +}; + +export const SPEECH_GENERATION_MODELS: SpeechGenerationModelType[] = [ + { + architecture: 'SpeechT5ForTextToSpeech', + models: [ + { + name: 'SpeechT5 TTS', + links: ['https://huggingface.co/microsoft/speecht5_tts'], + }, + ], + }, +]; diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-embeddings-models-table/index.tsx b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-embeddings-models-table/index.tsx new file mode 100644 index 0000000..457fafd --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-embeddings-models-table/index.tsx @@ -0,0 +1,18 @@ +import React from 'react'; +import { BaseModelsTable, LinksCell } from '../base-models-table'; +import { TEXT_EMBEDDINGS_MODELS } from './models'; + +export default function TextEmbeddingsModelsTable(): React.JSX.Element { + const headers = ['Architecture', 'Example HuggingFace Models']; + + const rows = TEXT_EMBEDDINGS_MODELS.map(({ architecture, models }) => ( + + + {architecture} + + + + )); + + return ; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-embeddings-models-table/models.ts b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-embeddings-models-table/models.ts new file mode 100644 index 0000000..02f5dfa --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-embeddings-models-table/models.ts @@ -0,0 +1,68 @@ +type TextEmbeddingsModelType = { + architecture: string; + models: Array<{ + links: string[]; + }>; +}; + +export const TEXT_EMBEDDINGS_MODELS: TextEmbeddingsModelType[] = [ + { + architecture: 'BertModel', + models: [ + { + links: [ + 'https://huggingface.co/BAAI/bge-small-en-v1.5', + 'https://huggingface.co/BAAI/bge-base-en-v1.5', + 'https://huggingface.co/BAAI/bge-large-en-v1.5', + 'https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2', + 'https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1', + 'https://huggingface.co/mixedbread-ai/mxbai-embed-xsmall-v1', + 'https://huggingface.co/WhereIsAI/UAE-Large-V1', + ], + }, + ], + }, + { + architecture: 'MPNetForMaskedLM', + models: [ + { + links: [ + 'https://huggingface.co/sentence-transformers/all-mpnet-base-v2', + 'https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1', + ], + }, + ], + }, + { + architecture: 'RobertaForMaskedLM', + models: [ + { + links: ['https://huggingface.co/sentence-transformers/all-distilroberta-v1'], + }, + ], + }, + { + architecture: 'XLMRobertaModel', + models: [ + { + links: [ + 'https://huggingface.co/mixedbread-ai/deepset-mxbai-embed-de-large-v1', + 'https://huggingface.co/intfloat/multilingual-e5-large-instruct', + 'https://huggingface.co/intfloat/multilingual-e5-large', + ], + }, + ], + }, + { + architecture: 'Qwen3ForCausalLM', + models: [ + { + links: [ + 'https://huggingface.co/Qwen/Qwen3-Embedding-0.6B', + 'https://huggingface.co/Qwen/Qwen3-Embedding-4B', + 'https://huggingface.co/Qwen/Qwen3-Embedding-8B', + ], + }, + ], + }, +]; diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-rerank-models-table/index.tsx b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-rerank-models-table/index.tsx new file mode 100644 index 0000000..2244c3b --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-rerank-models-table/index.tsx @@ -0,0 +1,23 @@ +import React from 'react'; +import { TEXT_RERANK_MODELS } from './models'; +import { BaseModelsTable, LinksCell } from '../base-models-table'; + +export default function TextRerankModelsTable(): React.JSX.Element { + const headers = ['Architecture', '`optimum-cli` task', 'Example HuggingFace Models']; + + const rows = TEXT_RERANK_MODELS.map(({ architecture, optimumIntelTask, models }) => ( + <> + + + {architecture} + + + {optimumIntelTask} + + + + + )); + + return ; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-rerank-models-table/models.ts b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-rerank-models-table/models.ts new file mode 100644 index 0000000..01f2c81 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/text-rerank-models-table/models.ts @@ -0,0 +1,63 @@ +type TextRerankModelType = { + architecture: string; + optimumIntelTask: string; + models: Array<{ + links: string[]; + }>; +}; + +export const TEXT_RERANK_MODELS: TextRerankModelType[] = [ + { + architecture: 'BertForSequenceClassification', + optimumIntelTask: 'text-classification', + models: [ + { + links: [ + 'https://huggingface.co/cross-encoder/ms-marco-MiniLM-L2-v2', + 'https://huggingface.co/cross-encoder/ms-marco-MiniLM-L4-v2', + 'https://huggingface.co/cross-encoder/ms-marco-MiniLM-L6-v2', + 'https://huggingface.co/cross-encoder/ms-marco-MiniLM-L12-v2', + 'https://huggingface.co/cross-encoder/ms-marco-TinyBERT-L2-v2', + 'https://huggingface.co/tomaarsen/reranker-MiniLM-L12-gooaq-bce', + ], + }, + ], + }, + { + architecture: 'XLMRobertaForSequenceClassification', + optimumIntelTask: 'text-classification', + models: [ + { + links: [ + 'https://huggingface.co/BAAI/bge-reranker-v2-m3', + ], + }, + ], + }, + { + architecture: 'ModernBertForSequenceClassification', + optimumIntelTask: 'text-classification', + models: [ + { + links: [ + 'https://huggingface.co/tomaarsen/reranker-ModernBERT-base-gooaq-bce', + 'https://huggingface.co/tomaarsen/reranker-ModernBERT-large-gooaq-bce', + 'https://huggingface.co/Alibaba-NLP/gte-reranker-modernbert-base', + ], + }, + ], + }, + { + architecture: 'Qwen3ForCausalLM', + optimumIntelTask: 'text-generation-with-past', + models: [ + { + links: [ + 'https://huggingface.co/Qwen/Qwen3-Reranker-0.6B', + 'https://huggingface.co/Qwen/Qwen3-Reranker-4B', + 'https://huggingface.co/Qwen/Qwen3-Reranker-8B' + ], + }, + ], + }, +]; diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/video-generation-models-table/index.tsx b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/video-generation-models-table/index.tsx new file mode 100644 index 0000000..bf6fdad --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/video-generation-models-table/index.tsx @@ -0,0 +1,29 @@ +import React from 'react'; +import { BaseModelsTable, LinksCell, StatusCell } from '../base-models-table'; +import { VIDEO_GENERATION_MODELS } from './models'; + +export default function VideoGenerationModelsTable(): React.JSX.Element { + const headers = [ + 'Architecture', + 'Text to Video', + 'Image to Video', + 'LoRA Support', + 'Example HuggingFace Models', + ]; + + const rows = VIDEO_GENERATION_MODELS.map( + ({ architecture, textToVideo, imageToVideo, loraSupport, links }) => ( + + + {architecture} + + + + + + + ) + ); + + return ; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/video-generation-models-table/models.ts b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/video-generation-models-table/models.ts new file mode 100644 index 0000000..b304595 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/video-generation-models-table/models.ts @@ -0,0 +1,17 @@ +type VideoGenerationModelType = { + architecture: string; + textToVideo: boolean; + imageToVideo: boolean; + loraSupport: boolean; + links: string[]; +}; + +export const VIDEO_GENERATION_MODELS: VideoGenerationModelType[] = [ + { + architecture: 'LTX-Video', + textToVideo: true, + imageToVideo: false, + loraSupport: false, + links: ['https://huggingface.co/Lightricks/LTX-Video'], + }, +]; diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/vlm-models-table/index.tsx b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/vlm-models-table/index.tsx new file mode 100644 index 0000000..210d4af --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/vlm-models-table/index.tsx @@ -0,0 +1,35 @@ +import Link from '@docusaurus/Link'; +import React from 'react'; +import { BaseModelsTable, LinksCell } from '../base-models-table'; +import { VLM_MODELS } from './models'; + +export default function VLMModelsTable(): React.JSX.Element { + const headers = ['Architecture', 'Models', 'Example HuggingFace Models']; + + const rows = VLM_MODELS.map(({ architecture, models }) => ( + <> + + + {architecture} + + + {models[0].name} + {models[0].notesLink && ( + <> +  (Notes) + + )} + + + + {models.slice(1).map(({ name, links }) => ( + + {name} + + + ))} + + )); + + return ; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/vlm-models-table/models.ts b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/vlm-models-table/models.ts new file mode 100644 index 0000000..f9c437c --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/vlm-models-table/models.ts @@ -0,0 +1,184 @@ +type VLMModelType = { + architecture: string; + models: Array<{ + name: string; + links: string[]; + notesLink?: string; + }>; +}; + +export const VLM_MODELS: VLMModelType[] = [ + { + architecture: 'InternVLChat', + models: [ + { + name: 'InternVLChatModel', + links: [ + 'https://huggingface.co/OpenGVLab/InternVL2-1B', + 'https://huggingface.co/OpenGVLab/InternVL2-2B', + 'https://huggingface.co/OpenGVLab/InternVL2-4B', + 'https://huggingface.co/OpenGVLab/InternVL2-8B', + 'https://huggingface.co/OpenGVLab/InternVL2_5-1B', + 'https://huggingface.co/OpenGVLab/InternVL2_5-2B', + 'https://huggingface.co/OpenGVLab/InternVL2_5-4B', + 'https://huggingface.co/OpenGVLab/InternVL2_5-8B', + 'https://huggingface.co/OpenGVLab/InternVL3-1B', + 'https://huggingface.co/OpenGVLab/InternVL3-2B', + 'https://huggingface.co/OpenGVLab/InternVL3-8B', + 'https://huggingface.co/OpenGVLab/InternVL3-9B', + 'https://huggingface.co/OpenGVLab/InternVL3-14B' + ], + notesLink: '#internvl2-notes', + }, + ], + }, + { + architecture: 'LLaVA', + models: [ + { + name: 'LLaVA-v1.5', + links: ['https://huggingface.co/llava-hf/llava-1.5-7b-hf'], + }, + ], + }, + { + architecture: 'nanoLLaVA', + models: [ + { + name: 'nanoLLaVA', + links: ['https://huggingface.co/qnguyen3/nanoLLaVA'], + notesLink: '#nanollava-notes', + }, + { + name: 'nanoLLaVA-1.5', + links: ['https://huggingface.co/qnguyen3/nanoLLaVA-1.5'], + }, + ], + }, + { + architecture: 'LLaVA-NeXT', + models: [ + { + name: 'LLaVA-v1.6', + links: [ + 'https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf', + 'https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf', + 'https://huggingface.co/llava-hf/llama3-llava-next-8b-hf', + ], + }, + ], + }, + { + architecture: 'LLaVA-NeXT-Video', + models: [ + { + name: 'LLaVA-Next-Video', + links: [ + 'https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-hf', + ], + }, + ], + }, + { + architecture: 'MiniCPMO', + models: [ + { + name: 'MiniCPM-o-2_6', + links: ['https://huggingface.co/openbmb/MiniCPM-o-2_6'], + notesLink: '#minicpm-o-notes', + }, + ], + }, + { + architecture: 'MiniCPMV', + models: [ + { + name: 'MiniCPM-V-2_6', + links: ['https://huggingface.co/openbmb/MiniCPM-V-2_6'], + }, + ], + }, + { + architecture: 'Phi3VForCausalLM', + models: [ + { + name: 'phi3_v', + links: [ + 'https://huggingface.co/microsoft/Phi-3-vision-128k-instruct', + 'https://huggingface.co/microsoft/Phi-3.5-vision-instruct', + ], + notesLink: '#phi3_v-notes', + }, + ], + }, + { + architecture: 'Phi4MMForCausalLM', + models: [ + { + name: 'phi4mm', + links: [ + 'https://huggingface.co/microsoft/Phi-4-multimodal-instruct', + ], + notesLink: '#phi4mm-notes' + }, + ], + }, + { + architecture: 'Qwen2-VL', + models: [ + { + name: 'Qwen2-VL', + links: [ + 'https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct', + 'https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct', + 'https://huggingface.co/Qwen/Qwen2-VL-2B', + 'https://huggingface.co/Qwen/Qwen2-VL-7B', + ], + }, + ], + }, + { + architecture: 'Qwen2.5-VL', + models: [ + { + name: 'Qwen2.5-VL', + links: [ + 'https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct', + 'https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct', + ], + }, + ], + }, + { + architecture: 'Qwen3-VL', + models: [ + { + name: 'Qwen3-VL', + links: [ + 'https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct', + 'https://huggingface.co/Qwen/Qwen3-VL-2B-Thinking', + 'https://huggingface.co/Qwen/Qwen3-VL-4B-Instruct', + 'https://huggingface.co/Qwen/Qwen3-VL-4B-Thinking', + 'https://huggingface.co/Qwen/Qwen3-VL-8B-Instruct', + 'https://huggingface.co/Qwen/Qwen3-VL-8B-Thinking', + 'https://huggingface.co/Qwen/Qwen3-VL-32B-Instruct', + 'https://huggingface.co/Qwen/Qwen3-VL-32B-Thinking', + ], + notesLink: '#qwen3_vl-notes', + }, + ], + }, + { + architecture: 'Gemma3ForConditionalGeneration', + models: [ + { + name: 'gemma3', + links: [ + 'https://huggingface.co/google/gemma-3-4b-it', + 'https://huggingface.co/google/gemma-3-12b-it', + 'https://huggingface.co/google/gemma-3-27b-it', + ], + }, + ], + }, +]; diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/whisper-models-table/index.tsx b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/whisper-models-table/index.tsx new file mode 100644 index 0000000..65a1ae5 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/whisper-models-table/index.tsx @@ -0,0 +1,27 @@ +import React from 'react'; +import { BaseModelsTable, LinksCell } from '../base-models-table'; +import { WHISPER_MODELS } from './models'; + +export default function WhisperModelsTable(): React.JSX.Element { + const headers = ['Architecture', 'Models', 'Example HuggingFace Models']; + + const rows = WHISPER_MODELS.map(({ architecture, models }) => ( + <> + + + {architecture} + + {models[0].name} + + + {models.slice(1).map(({ name, links }) => ( + + {name} + + + ))} + + )); + + return ; +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/whisper-models-table/models.ts b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/whisper-models-table/models.ts new file mode 100644 index 0000000..ac84eae --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/_components/whisper-models-table/models.ts @@ -0,0 +1,37 @@ +type WhisperModelType = { + architecture: string; + models: Array<{ + name: string; + links: string[]; + }>; +}; + +export const WHISPER_MODELS: WhisperModelType[] = [ + { + architecture: 'WhisperForConditionalGeneration', + models: [ + { + name: 'Whisper', + links: [ + 'https://huggingface.co/openai/whisper-tiny', + 'https://huggingface.co/openai/whisper-tiny.en', + 'https://huggingface.co/openai/whisper-base', + 'https://huggingface.co/openai/whisper-base.en', + 'https://huggingface.co/openai/whisper-small', + 'https://huggingface.co/openai/whisper-small.en', + 'https://huggingface.co/openai/whisper-medium', + 'https://huggingface.co/openai/whisper-medium.en', + 'https://huggingface.co/openai/whisper-large-v3', + ], + }, + { + name: 'Distil-Whisper', + links: [ + 'https://huggingface.co/distil-whisper/distil-small.en', + 'https://huggingface.co/distil-whisper/distil-medium.en', + 'https://huggingface.co/distil-whisper/distil-large-v3', + ], + }, + ], + }, +]; diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/index.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/index.mdx new file mode 100644 index 0000000..6e6d886 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/supported-models/index.mdx @@ -0,0 +1,139 @@ +import ImageGenerationModelsTable from './_components/image-generation-models-table'; +import VideoGenerationModelsTable from './_components/video-generation-models-table'; +import LLMModelsTable from './_components/llm-models-table'; +import VLMModelsTable from './_components/vlm-models-table'; +import WhisperModelsTable from './_components/whisper-models-table'; +import TextEmbeddingsModelsTable from './_components/text-embeddings-models-table'; +import SpeechGenerationModelsTable from './_components/speech-generation-models-table'; +import TextRerankModelsTable from './_components/text-rerank-models-table'; + + +# Supported Models + +:::info Models Compatibility +Other models with similar architectures may also work successfully even if not explicitly validated. +Consider testing any unlisted models to verify compatibility with your specific use case. +::: + +## Large Language Models (LLMs) + +:::tip LoRA Support +LLM pipeline supports LoRA adapters. +::: + + + +::::info + +The LLM pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature. +The model is required to have the following inputs after the conversion: + +1. `input_ids` contains the tokens. +2. `attention_mask` is filled with `1`. +3. `beam_idx` selects beams. +4. `position_ids` (optional) encodes a position of currently generating token in the sequence and a single `logits` output. + +:::note + +Models should belong to the same family and have the same tokenizers. + +::: + +:::: + +## Image Generation Models + + + +## Video Generation Models + + + +## Visual Language Models (VLMs) + +:::tip LoRA Support +VLM pipeline supports LoRA adapters applied to the language-model (LLM) part. +LoRA adapters targeting the vision encoder or other multimodal components are not supported. +::: + + + +:::warning VLM Models Notes +#### InternVL2 {#internvl2-notes} + +To convert InternVL2 models, `timm` and `einops` are required: + +```bash +pip install timm einops +``` +#### MiniCPMO {#minicpm-o-notes} + +1. `openbmb/MiniCPM-o-2_6` doesn't support `transformers>=4.52` which is required for `optimum-cli` export. +2. `--task image-text-to-text` is required for `optimum-cli export openvino --trust-remote-code` because `image-text-to-text` isn't `MiniCPM-o-2_6`'s native task. + +#### phi3_v {#phi3_v-notes} + +Models' configs aren't consistent. It's required to override the default `eos_token_id` with the one from a tokenizer: +```python +generation_config.set_eos_token_id(pipe.get_tokenizer().get_eos_token_id()) +``` +#### phi4mm {#phi4mm-notes} + +Apply https://huggingface.co/microsoft/Phi-4-multimodal-instruct/discussions/78/files to fix the model export for `transformers>=4.50` + +#### Qwen3-VL {#qwen3_vl-notes} + +The model requires `transformers>=4.57` for the export with `optimum-cli`. + +#### nanoLLaVA {#nanollava-notes} + +The model requires `transformers>=4.48` for the export with `optimum-cli`. +::: + +## Speech Recognition Models (Whisper-based) + +:::info LoRA Support +Speech recognition pipeline does **not** support LoRA adapters. +::: + + + +## Speech Generation Models + +:::info LoRA Support +Speech generation pipeline does **not** support LoRA adapters. +::: + + + +## Text Embeddings Models + +:::info LoRA Support +Text embeddings pipeline does **not** support LoRA adapters. +::: + + + +:::warning Text Embeddings Models Notes +Qwen3 Embedding models require `--task feature-extraction` during the conversion with `optimum-cli`. +::: + +## Text Rerank Models + +:::info LoRA Support +Text rerank pipeline does **not** support LoRA adapters. +::: + + + +:::warning Text Rerank Models Notes +Text Rerank models require appropriate `--task` provided during the conversion with `optimum-cli`. Task can be found in the table above. +::: + +___ + +:::info Hugging Face Notes +Some models may require access request submission on the Hugging Face page to be downloaded. + +If https://huggingface.co/ is down, the conversion step won't be able to download the models. +::: diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_category_.json b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_category_.json new file mode 100644 index 0000000..0b39f7e --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Use Cases", + "position": 2, + "link": { + "type": "generated-index", + "description": "OpenVINO GenAI provides support for following use cases" + } +} diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_basic_generation_configuration.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_basic_generation_configuration.mdx new file mode 100644 index 0000000..b89c8a9 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_basic_generation_configuration.mdx @@ -0,0 +1,19 @@ +#### Basic Generation Configuration + +{/* Python and C++ code examples */} +{props.children} + +:::info Understanding Basic Generation Parameters + +- `max_new_tokens`: The maximum numbers of tokens to generate, excluding the number of tokens in the prompt. `max_new_tokens` has priority over `max_length`. +- `temperature`: Controls the level of creativity in AI-generated text: + - Low temperature (e.g. 0.2) leads to more focused and deterministic output, choosing tokens with the highest probability. + - Medium temperature (e.g. 1.0) maintains a balance between creativity and focus, selecting tokens based on their probabilities without significant bias. + - High temperature (e.g. 2.0) makes output more creative and adventurous, increasing the chances of selecting less likely tokens. +- `top_k`: Limits token selection to the k most likely next tokens. Higher values allow more diverse outputs. +- `top_p`: Selects from the smallest set of tokens whose cumulative probability exceeds p. Helps balance diversity and quality. +- `repetition_penalty`: Reduces the likelihood of repeating tokens. Values above 1.0 discourage repetition. + +For the full list of generation parameters, refer to the [Generation Config API](https://docs.openvino.ai/2026/api/genai_api/_autosummary/openvino_genai.GenerationConfig.html#openvino-genai-generationconfig). + +::: diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_beam_search_generation.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_beam_search_generation.mdx new file mode 100644 index 0000000..0690531 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_beam_search_generation.mdx @@ -0,0 +1,17 @@ +#### Optimizing Generation with Grouped Beam Search + +Beam search helps explore multiple possible text completions simultaneously, often leading to higher quality outputs. + +{/* Python and C++ code examples */} +{props.children} + +:::info Understanding Beam Search Generation Parameters + +- `max_new_tokens`: The maximum numbers of tokens to generate, excluding the number of tokens in the prompt. `max_new_tokens` has priority over `max_length`. +- `num_beams`: The number of beams for beam search. 1 disables beam search. +- `num_beam_groups`: The number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams. +- `diversity_penalty`: value is subtracted from a beam's score if it generates the same token as any beam from other group at a particular time. + +For the full list of generation parameters, refer to the [Generation Config API](https://docs.openvino.ai/2026/api/genai_api/_autosummary/openvino_genai.GenerationConfig.html#openvino-genai-generationconfig). + +::: diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_chat_scenario.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_chat_scenario.mdx new file mode 100644 index 0000000..2e115c7 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_chat_scenario.mdx @@ -0,0 +1,3 @@ +### Use OpenVINO GenAI in Chat Scenario + +Refer to the [Chat Scenario](/docs/guides/chat-scenario) guide for more information on using OpenVINO GenAI in chat applications. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_convert_model.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_convert_model.mdx new file mode 100644 index 0000000..8d6e115 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_convert_model.mdx @@ -0,0 +1,8 @@ +## Convert and Optimize Model + +{/* optimum-cli export code examples */} +{props.children} + +:::info +Refer to the [Model Preparation](/docs/category/model-preparation) guide for detailed instructions on how to download, convert and optimize models for OpenVINO GenAI. +::: diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_generation_configuration_workflow.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_generation_configuration_workflow.mdx new file mode 100644 index 0000000..88ba3f7 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_generation_configuration_workflow.mdx @@ -0,0 +1,8 @@ +#### Generation Configuration Workflow + +1. Get the model default config with `get_generation_config()` +2. Modify parameters +3. Apply the updated config using one of the following methods: + - Use `set_generation_config(config)` + - Pass config directly to `generate()` (e.g. `generate(prompt, config)`) + - Specify options as inputs in the `generate()` method (e.g. `generate(prompt, max_new_tokens=100)`) diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_streaming.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_streaming.mdx new file mode 100644 index 0000000..f09a14c --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/_shared/_streaming.mdx @@ -0,0 +1,3 @@ +### Streaming the Output + +Refer to the [Streaming](/docs/guides/streaming) guide for more information on streaming the output with OpenVINO GenAI. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_cpp.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_cpp.mdx new file mode 100644 index 0000000..38192a3 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_cpp.mdx @@ -0,0 +1,19 @@ +import CodeBlock from '@theme/CodeBlock'; + + +{`#include "openvino/genai/image_generation/image2image_pipeline.hpp" +#include "load_image.hpp" +#include "imwrite.hpp" + +int main(int argc, char* argv[]) { + const std::string models_path = argv[1], prompt = argv[2], image_path = argv[3]; + + ov::Tensor input_image = utils::load_image(image_path); + + ov::genai::Image2ImagePipeline pipe(models_path, "${props.device || 'CPU'}"); + ov::Tensor generated_image = pipe.generate(prompt, input_image, ov::genai::strength(0.8f)); + + imwrite("image.bmp", generated_image, true); +} +`} + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_python.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_python.mdx new file mode 100644 index 0000000..542e821 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_python.mdx @@ -0,0 +1,22 @@ +import CodeBlock from '@theme/CodeBlock'; + + +{`import openvino_genai as ov_genai +import openvino as ov +from PIL import Image +import numpy as np + +def read_image(path: str) -> ov.Tensor: + pic = Image.open(path).convert("RGB") + image_data = np.array(pic)[None] + return ov.Tensor(image_data) + +input_image_data = read_image("input_image.jpg") + +pipe = ov_genai.Image2ImagePipeline(model_path, "${props.device || 'CPU'}") +image_tensor = pipe.generate(prompt, image=input_image_data, strength=0.8) + +image = Image.fromarray(image_tensor.data[0]) +image.save("image.bmp") +`} + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_cpp.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_cpp.mdx new file mode 100644 index 0000000..751628a --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_cpp.mdx @@ -0,0 +1,20 @@ +import CodeBlock from '@theme/CodeBlock'; + + +{`#include "openvino/genai/image_generation/inpainting_pipeline.hpp" +#include "load_image.hpp" +#include "imwrite.hpp" + +int main(int argc, char* argv[]) { + const std::string models_path = argv[1], prompt = argv[2]; + + ov::Tensor input_image = utils::load_image(argv[3]); + ov::Tensor mask_image = utils::load_image(argv[4]); + + ov::genai::InpaintingPipeline pipe(models_path, "${props.device || 'CPU'}"); + ov::Tensor generated_image = pipe.generate(prompt, input_image, mask_image); + + imwrite("image.bmp", generated_image, true); +} +`} + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_python.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_python.mdx new file mode 100644 index 0000000..1576a43 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_python.mdx @@ -0,0 +1,23 @@ +import CodeBlock from '@theme/CodeBlock'; + + +{`import openvino_genai as ov_genai +import openvino as ov +from PIL import Image +import numpy as np + +def read_image(path: str) -> ov.Tensor: + pic = Image.open(path).convert("RGB") + image_data = np.array(pic)[None] + return ov.Tensor(image_data) + +input_image_data = read_image("input_image.jpg") +mask_image = read_image("mask.jpg") + +pipe = ov_genai.InpaintingPipeline(model_path, "${props.device || 'CPU'}") +image_tensor = pipe.generate(prompt, image=input_image_data, mask_image=mask_image) + +image = Image.fromarray(image_tensor.data[0]) +image.save("image.bmp") +`} + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_cpp.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_cpp.mdx new file mode 100644 index 0000000..24054cd --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_cpp.mdx @@ -0,0 +1,16 @@ +import CodeBlock from '@theme/CodeBlock'; + + +{`#include "openvino/genai/image_generation/text2image_pipeline.hpp" +#include "imwrite.hpp" + +int main(int argc, char* argv[]) { + const std::string models_path = argv[1], prompt = argv[2]; + + ov::genai::Text2ImagePipeline pipe(models_path, "${props.device || 'CPU'}"); + ov::Tensor image = pipe.generate(prompt); + + imwrite("image.bmp", image, true); +} +`} + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_python.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_python.mdx new file mode 100644 index 0000000..f32ac70 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_python.mdx @@ -0,0 +1,13 @@ +import CodeBlock from '@theme/CodeBlock'; + + +{`import openvino_genai as ov_genai +from PIL import Image + +pipe = ov_genai.Text2ImagePipeline(model_path, "${props.device || 'CPU'}") +image_tensor = pipe.generate(prompt) + +image = Image.fromarray(image_tensor.data[0]) +image.save("image.bmp") +`} + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/index.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/index.mdx new file mode 100644 index 0000000..7941e2a --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_run_model/index.mdx @@ -0,0 +1,103 @@ +import Text2ImageCPP from './_text2image_cpp.mdx'; +import Text2ImagePython from './_text2image_python.mdx'; + +import Image2ImageCPP from './_image2image_cpp.mdx'; +import Image2ImagePython from './_image2image_python.mdx'; + +import InpaintingCPP from './_inpainting_cpp.mdx'; +import InpaintingPython from './_inpainting_python.mdx'; + +## Run Model Using OpenVINO GenAI + +OpenVINO GenAI supports the following diffusion model pipelines: +- [`Text2ImagePipeline`](https://docs.openvino.ai/2026/api/genai_api/_autosummary/openvino_genai.Text2ImagePipeline.html) for creating images from text prompts. +- [`Image2ImagePipeline`](https://docs.openvino.ai/2026/api/genai_api/_autosummary/openvino_genai.Image2ImagePipeline.html) for modifying existing images based on prompts. +- [`InpaintingPipeline`](https://docs.openvino.ai/2026/api/genai_api/_autosummary/openvino_genai.InpaintingPipeline.html) for selectively replacing portions of images using masks. + +### `Text2ImagePipeline` + + + + + + + + + + + + + + + + :::info + + Code below requires installation of C++ compatible package. + See [here](https://docs.openvino.ai/2026/get-started/install-openvino/install-openvino-genai.html#archive-installation) for additional setup details, + or [How to Build OpenVINO™ GenAI APP in C++](https://medium.com/openvino-toolkit/how-to-build-openvino-genai-app-in-c-32dcbe42fa67) blog for full instruction. + + ::: + + + + + + + + + +:::tip + +Use CPU or GPU as devices without any other code change. + +::: + +### `Image2ImagePipeline` + + + + + + + + + + + + + + + + + + + + + + + + +### `InpaintingPipeline` + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_usage_options/index.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_usage_options/index.mdx new file mode 100644 index 0000000..268993e --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/_sections/_usage_options/index.mdx @@ -0,0 +1,82 @@ +import GenerationConfigurationWorkflow from '@site/docs/use-cases/_shared/_generation_configuration_workflow.mdx'; + +## Additional Usage Options + +:::tip +Check out [Python](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/image_generation) and [C++](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/image_generation) image generation samples. +::: + +### Use Different Generation Parameters + + + +#### Image Generation Configuration + +You can adjust several parameters to control the image generation process, including dimensions and the number of inference steps: + + + + ```python + import openvino_genai as ov_genai + from PIL import Image + + pipe = ov_genai.Text2ImagePipeline(model_path, "CPU") + image_tensor = pipe.generate( + prompt, + # highlight-start + width=512, + height=512, + num_images_per_prompt=1, + num_inference_steps=30, + guidance_scale=7.5 + # highlight-end + ) + + image = Image.fromarray(image_tensor.data[0]) + image.save("image.bmp") + ``` + + + ```cpp + #include "openvino/genai/image_generation/text2image_pipeline.hpp" + #include "imwrite.hpp" + + int main(int argc, char* argv[]) { + const std::string models_path = argv[1], prompt = argv[2]; + + ov::genai::Text2ImagePipeline pipe(models_path, "CPU"); + ov::Tensor image = pipe.generate( + prompt, + // highlight-start + ov::genai::width(512), + ov::genai::height(512), + ov::genai::num_images_per_prompt(1), + ov::genai::num_inference_steps(30), + ov::genai::guidance_scale(7.5f) + // highlight-end + ); + + imwrite("image.bmp", image, true); + } + ``` + + + +:::info Understanding Image Generation Parameters + +- `width`: The width of resulting image(s). +- `height`: The height of resulting image(s). +- `num_images_per_prompt`: Specifies how many image variations to generate in a single request for the same prompt. +- `num_inference_steps`: Defines denoising iteration count. Higher values increase quality and generation time, lower values generate faster with less detail. +- `guidance_scale`: Balances prompt adherence vs. creativity. Higher values follow prompt more strictly, lower values allow more creative freedom. +- `rng_seed`: Controls randomness for reproducible results. Same seed produces identical images across runs. + +For the full list of generation parameters, refer to the [Image Generation Config API](https://docs.openvino.ai/2026/api/genai_api/_autosummary/openvino_genai.ImageGenerationConfig.html). + +::: + +### Working with LoRA Adapters + +For image generation models like Stable Diffusion, LoRA adapters can modify the generation process to produce images with specific artistic styles, content types, or quality enhancements. + +Refer to the [LoRA Adapters](/docs/guides/lora-adapters.mdx) for more details on working with LoRA adapters. diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/index.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/index.mdx new file mode 100644 index 0000000..9552385 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-generation/index.mdx @@ -0,0 +1,21 @@ +--- +sidebar_position: 3 +--- +import OptimumCLI from '@site/src/components/OptimumCLI'; +import ConvertModelSection from '../_shared/_convert_model.mdx'; +import RunModelSection from './_sections/_run_model/index.mdx'; +import UsageOptionsSection from './_sections/_usage_options/index.mdx'; + +# Image Generation Using Diffusers + + + Download and convert model (e.g. [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)) to OpenVINO format from Hugging Face: + + + + See all supported [Image Generation Models](/docs/supported-models/#image-generation-models). + + + + + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx new file mode 100644 index 0000000..1d348ce --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx @@ -0,0 +1,24 @@ +import CodeBlock from '@theme/CodeBlock'; + + +{`#include "openvino/genai/visual_language/pipeline.hpp" +#include "load_image.hpp" +#include + +int main(int argc, char* argv[]) { + std::string models_path = argv[1], images_path = argv[2];; + std::vector images = utils::load_images(images_path); + + ov::genai::VLMPipeline pipe(models_path, "${props.device || 'CPU'}"); + ov::genai::VLMDecodedResults result = pipe.generate( + prompt, + ov::genai::images(images), + ov::genai::max_new_tokens(100) + ); + std::cout << result.texts[0] << std::endl; + + // To input videos frames, use 'ov::genai::videos' property, frames tensor layout = [Frame, H, W, C] + // pipe.generate(prompt, ov::genai::videos(std::vector{frames}), ov::genai::max_new_tokens(100)); +} +`} + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_js.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_js.mdx new file mode 100644 index 0000000..f2a495c --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_js.mdx @@ -0,0 +1,40 @@ +import CodeBlock from '@theme/CodeBlock'; + + +{`import { addon as ov } from "openvino-node"; +import { VLMPipeline } from "openvino-genai-node"; +import { stat, readdir } from "node:fs/promises"; +import sharp from "sharp"; +import path from "node:path"; + +async function readImage(imagePath) { + const img = sharp(imagePath); + const metadata = await img.metadata(); + const { width, height, channels } = metadata; + const imageBuffer = await img.raw().toBuffer(); + return new ov.Tensor(ov.element.u8, [height, width, channels], imageBuffer); +} + +async function readImages(imagePath) { + const stats = await stat(imagePath); + if (stats.isDirectory()) { + const files = await readdir(imagePath); + return Promise.all(files.sort().map((file) => readImage(path.join(imagePath, file)))); + } + return [await readImage(imagePath)]; +} + +const images = await readImages("./images"); + +const pipe = await VLMPipeline(modelPath, "${props.device || 'CPU'}"); + +const result = await pipe.generate(prompt, { + images, + generationConfig: { max_new_tokens: 100 }, +}); +console.log(result.texts[0]); + +// To input videos frames, use 'videos' option, frames tensor layout = [Frame, H, W, C] +// const result = await pipe.generate(prompt, { videos: [frames], generationConfig: { max_new_tokens: 100 } }); +`} + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx new file mode 100644 index 0000000..36f6729 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx @@ -0,0 +1,30 @@ +import CodeBlock from '@theme/CodeBlock'; + + +{`import openvino_genai as ov_genai +import openvino as ov +from PIL import Image +import numpy as np +from pathlib import Path + +def read_image(path: str) -> ov.Tensor: + pic = Image.open(path).convert("RGB") + image_data = np.array(pic)[None] + return ov.Tensor(image_data) + +def read_images(path: str) -> list[ov.Tensor]: + entry = Path(path) + if entry.is_dir(): + return [read_image(str(file)) for file in sorted(entry.iterdir())] + return [read_image(path)] + +images = read_images("./images") + +pipe = ov_genai.VLMPipeline(model_path, "${props.device || 'CPU'}") +result = pipe.generate(prompt, images=images, max_new_tokens=100) +print(result.texts[0]) + +# To input videos frames, use 'videos=', frames tensor layout = [Frame, H, W, C] +# result = pipe.generate(prompt, videos=[frames], max_new_tokens=100) +`} + diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx new file mode 100644 index 0000000..e015ce8 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx @@ -0,0 +1,47 @@ +import CodeExampleCPP from './_code_example_cpp.mdx'; +import CodeExamplePython from './_code_example_python.mdx'; +import CodeExampleJS from './_code_example_js.mdx'; + +## Run Model Using OpenVINO GenAI + +OpenVINO GenAI introduces the [`VLMPipeline`](https://docs.openvino.ai/2026/api/genai_api/_autosummary/openvino_genai.VLMPipeline.html) pipeline for inference of multimodal text-generation Vision Language Models (VLMs). +It can generate text from a text prompt and images as inputs. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +:::tip + +Use CPU or GPU as devices without any other code change. + +::: diff --git a/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx new file mode 100644 index 0000000..6af81a7 --- /dev/null +++ b/src/resources/openvino.genai-2026.1.0.0/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx @@ -0,0 +1,120 @@ +import BasicGenerationConfiguration from '@site/docs/use-cases/_shared/_basic_generation_configuration.mdx'; +import ChatScenario from '@site/docs/use-cases/_shared/_chat_scenario.mdx'; +import GenerationConfigurationWorkflow from '@site/docs/use-cases/_shared/_generation_configuration_workflow.mdx'; +import Streaming from '@site/docs/use-cases/_shared/_streaming.mdx'; + +## Additional Usage Options + +:::tip +Check out [Python](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/visual_language_chat) and [C++](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/visual_language_chat) visual language chat samples. +::: + +### Use Image or Video Tags in Prompt + +The prompt can contain `` with `i` replaced with an actual zero based index to refer to an image. Reference to images used in previous prompts isn't implemented. A model's native image tag can be used instead of ``. These tags are: +1. InternVL2: `\n` +2. llava-1.5-7b-hf: `` +3. LLaVA-NeXT: `` +4. LLaVA-NeXT-Video: `` +5. nanoLLaVA: `\n` +6. nanoLLaVA-1.5: `\n` +7. MiniCPM-o-2_6: `./\n` +8. MiniCPM-V-2_6: `./\n` +9. Phi-3-vision: `<|image_i|>\n` - the index starts with one +10. Phi-4-multimodal-instruct: `<|image_i|>\n` - the index starts with one +11. Qwen2-VL: `<|vision_start|><|image_pad|><|vision_end|>` +12. Qwen2.5-VL: `<|vision_start|><|image_pad|><|vision_end|>` +13. Qwen3-VL: `<|vision_start|><|image_pad|><|vision_end|>` +14. gemma-3-4b-it: `` + +Model's native video tag can be used to refer to a video. These tags are: +1. LLaVA-NeXT-Video: `