diff --git a/AIDevGallery/Controls/WcrModelDownloader.xaml b/AIDevGallery/Controls/WcrModelDownloader.xaml index fcc0b7cc..0a6a20fb 100644 --- a/AIDevGallery/Controls/WcrModelDownloader.xaml +++ b/AIDevGallery/Controls/WcrModelDownloader.xaml @@ -31,7 +31,7 @@ IsTextSelectionEnabled="True" TextAlignment="Center" TextWrapping="Wrap"> - + A Copilot+ PC with Windows 11 Build 26120.3073 or higher is required - Windows Update + + + Windows Update diff --git a/AIDevGallery/Controls/WcrModelDownloader.xaml.cs b/AIDevGallery/Controls/WcrModelDownloader.xaml.cs index c620b0ed..ff166008 100644 --- a/AIDevGallery/Controls/WcrModelDownloader.xaml.cs +++ b/AIDevGallery/Controls/WcrModelDownloader.xaml.cs @@ -166,6 +166,15 @@ public Task SetDownloadOperation(ModelType modelType, string sampleId, Fun ? Visibility.Visible : Visibility.Collapsed; + // TODO: Remove once the Speech Recognition ships through Windows Update + var isSpeechRecognition = modelType == ModelType.SpeechRecognition; + ModelDownloadInfoRun.Text = isSpeechRecognition + ? "This Windows AI API requires a one-time model download." + : "This Windows AI API requires a one-time model download via Windows Update."; + WindowsUpdateTrackingText.Visibility = isSpeechRecognition + ? Visibility.Collapsed + : Visibility.Visible; + if (exisitingOperation != null && exisitingOperation.Status == AsyncStatus.Started) { // don't reuse same one because we can only have one Progress delegate diff --git a/AIDevGallery/Package.Store.appxmanifest b/AIDevGallery/Package.Store.appxmanifest index 86a6319c..6a677c7e 100644 --- a/AIDevGallery/Package.Store.appxmanifest +++ b/AIDevGallery/Package.Store.appxmanifest @@ -62,5 +62,6 @@ + diff --git a/AIDevGallery/Package.appxmanifest b/AIDevGallery/Package.appxmanifest index 168a7315..fc2714af 100644 --- a/AIDevGallery/Package.appxmanifest +++ b/AIDevGallery/Package.appxmanifest @@ -62,5 +62,6 @@ + diff --git a/AIDevGallery/Samples/Definitions/WcrApis/WcrApiCodeSnippet.cs b/AIDevGallery/Samples/Definitions/WcrApis/WcrApiCodeSnippet.cs index b9f7ec1e..69cfa2c6 100644 --- a/AIDevGallery/Samples/Definitions/WcrApis/WcrApiCodeSnippet.cs +++ b/AIDevGallery/Samples/Definitions/WcrApis/WcrApiCodeSnippet.cs @@ -721,6 +721,65 @@ public async void IndexStatisticsSample() } } """"" + }, + { + ModelType.SpeechRecognition, """" + using Microsoft.Windows.AI; + using Microsoft.Windows.AI.MachineLearning; + using Microsoft.Windows.AI.Speech; + + // Speech recognition runs on the device's execution providers (CPU / GPU / NPU). + var catalog = ExecutionProviderCatalog.GetDefault(); + await catalog.EnsureAndRegisterCertifiedAsync(); + + var readyState = SpeechRecognitionModel.GetReadyState(); + if (readyState is AIFeatureReadyState.Ready or AIFeatureReadyState.NotReady) + { + if (readyState == AIFeatureReadyState.NotReady) + { + var ensureOp = await SpeechRecognitionModel.EnsureReadyAsync(); + if (ensureOp.Status != AIFeatureReadyResultState.Success) + { + throw new InvalidOperationException("Speech model could not be prepared."); + } + } + + var modelResult = await SpeechRecognitionModel.TryCreateAsync(); + if (modelResult.ExtendedError != null) + { + throw modelResult.ExtendedError; + } + + using SpeechRecognitionModel speechModel = modelResult.SpeechModel; + + // Stream audio from the default microphone. Pass an empty deviceId to use the system default. + var audioConfig = AudioConfiguration.FromAudioDevice(string.Empty); + + using var streaming = new StreamingRecognition(audioConfig, speechModel); + + streaming.Recognizing += (_, args) => + { + // Interim hypothesis (updates frequently as more audio arrives). + Console.WriteLine($"[interim] {args.Text}"); + }; + + streaming.Recognized += (_, args) => + { + // Final result for a stable utterance. + Console.WriteLine($"[final] offset={args.Offset:F2}s duration={args.Duration:F2}s: {args.Text}"); + }; + + await streaming.StartContinuousRecognitionAsync(); + + // ... let captions stream in. When done: + streaming.StopContinuousRecognition(); + } + else + { + // DisabledByUser or NotSupportedOnCurrentSystem. + throw new InvalidOperationException($"Speech recognition is not available: {readyState}."); + } + """" } }; } \ No newline at end of file diff --git a/AIDevGallery/Samples/Definitions/WcrApis/WcrApiHelpers.cs b/AIDevGallery/Samples/Definitions/WcrApis/WcrApiHelpers.cs index 0becb093..d6b37a13 100644 --- a/AIDevGallery/Samples/Definitions/WcrApis/WcrApiHelpers.cs +++ b/AIDevGallery/Samples/Definitions/WcrApis/WcrApiHelpers.cs @@ -5,10 +5,13 @@ using AIDevGallery.Utils; using Microsoft.Windows.AI; using Microsoft.Windows.AI.Imaging; +using Microsoft.Windows.AI.MachineLearning; +using Microsoft.Windows.AI.Speech; using Microsoft.Windows.AI.Text; using Microsoft.Windows.AI.Video; using System; using System.Collections.Generic; +using System.Runtime.InteropServices.WindowsRuntime; using Windows.Foundation; namespace AIDevGallery.Samples; @@ -80,6 +83,9 @@ internal static class WcrApiHelpers }, { ModelType.VideoSuperRes, VideoScaler.GetReadyState + }, + { + ModelType.SpeechRecognition, SpeechRecognitionModel.GetReadyState } }; @@ -132,9 +138,29 @@ internal static class WcrApiHelpers }, { ModelType.VideoSuperRes, VideoScaler.EnsureReadyAsync + }, + { + ModelType.SpeechRecognition, EnsureSpeechRecognitionModelReadyAsync } }; + // SpeechRecognitionModel.EnsureReadyAsync reports progress as SpeechRecognitionModelProgress, + // so adapt it to the IAsyncOperationWithProgress shape the gallery expects. + private static IAsyncOperationWithProgress EnsureSpeechRecognitionModelReadyAsync() + { + return AsyncInfo.Run(async (cancellationToken, progress) => + { + progress.Report(0); + var catalog = ExecutionProviderCatalog.GetDefault(); + await catalog.EnsureAndRegisterCertifiedAsync().AsTask(cancellationToken); + + var inner = SpeechRecognitionModel.EnsureReadyAsync(); + inner.Progress = (_, p) => progress.Report(p.Progress); + using var registration = cancellationToken.Register(() => inner.Cancel()); + return await inner; + }); + } + // this is a workaround for GetReadyState not returning Ready after EnsureReadyAsync is called // for now, we will track when EnsureReadyAsync succeeds for each model to ensure we are not // blocking the samples from running until this bug is fixed diff --git a/AIDevGallery/Samples/Definitions/WcrApis/apis.json b/AIDevGallery/Samples/Definitions/WcrApis/apis.json index 61b79841..7311baee 100644 --- a/AIDevGallery/Samples/Definitions/WcrApis/apis.json +++ b/AIDevGallery/Samples/Definitions/WcrApis/apis.json @@ -218,6 +218,17 @@ "ReadmeUrl": "https://github.com/MicrosoftDocs/windows-ai-docs/blob/docs/docs/apis/video-super-resolution.md", "License": "ms-pl", "SampleIdToShowInDocs": "c3252e18-1d47-4689-adae-78fc66968650" + }, + "SpeechRecognition": { + "Id": "0d4f1c2a-7e3b-4c9e-9b8a-1f2d3c4b5a60", + "Name": "Speech Recognition", + "Icon": "WCRAPI.svg", + "IconGlyph": "\uE720", + "Description": "Continuously transcribe audio using local speech recognition.", + "ReadmeUrl": "https://github.com/MicrosoftDocs/windows-ai-docs/blob/docs/docs/apis/speech-recognition.md", + "License": "ms-pl", + "SampleIdToShowInDocs": "9c5b2e8a-1f7d-4d3c-9e6a-3b1c8e7f4d20", + "Category": "Speech" } } } diff --git a/AIDevGallery/Samples/WCRAPIs/SpeechRecognition.xaml b/AIDevGallery/Samples/WCRAPIs/SpeechRecognition.xaml new file mode 100644 index 00000000..654b365b --- /dev/null +++ b/AIDevGallery/Samples/WCRAPIs/SpeechRecognition.xaml @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + +