Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions AIDevGallery/Controls/WcrModelDownloader.xaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
IsTextSelectionEnabled="True"
TextAlignment="Center"
TextWrapping="Wrap">
<Run Text="This Windows AI API requires a one-time model download via Windows Update." /><LineBreak /> <LineBreak />
<Run x:Name="ModelDownloadInfoRun" Text="This Windows AI API requires a one-time model download via Windows Update." /><LineBreak /> <LineBreak />
<Hyperlink NavigateUri="https://learn.microsoft.com/windows/ai/apis/model-setup#prerequisites" UnderlineStyle="None">A Copilot+ PC with Windows 11 Build 26120.3073 or higher is required</Hyperlink>
</TextBlock>
<TextBlock
Expand Down Expand Up @@ -71,7 +71,15 @@
TextWrapping="WrapWholeWords">
<Run Text="Requesting model.." /> <LineBreak />
<LineBreak />
<Run FontSize="12" Text="Download progress can also be tracked in " /><Hyperlink Click="WindowsUpdateHyperlinkClicked" FontSize="12">Windows Update</Hyperlink><LineBreak />
</TextBlock>
<TextBlock
x:Name="WindowsUpdateTrackingText"
Margin="0,0,0,24"
HorizontalAlignment="Center"
Foreground="{ThemeResource TextFillColorSecondaryBrush}"
TextAlignment="Center"
TextWrapping="WrapWholeWords">
<Run FontSize="12" Text="Download progress can also be tracked in " /><Hyperlink Click="WindowsUpdateHyperlinkClicked" FontSize="12">Windows Update</Hyperlink>
</TextBlock>
</StackPanel>

Expand Down
9 changes: 9 additions & 0 deletions AIDevGallery/Controls/WcrModelDownloader.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,15 @@ public Task<bool> SetDownloadOperation(ModelType modelType, string sampleId, Fun
? Visibility.Visible
: Visibility.Collapsed;

// TODO: Remove once the Speech Recognition ships through Windows Update
var isSpeechRecognition = modelType == ModelType.SpeechRecognition;
ModelDownloadInfoRun.Text = isSpeechRecognition
? "This Windows AI API requires a one-time model download."
: "This Windows AI API requires a one-time model download via Windows Update.";
WindowsUpdateTrackingText.Visibility = isSpeechRecognition
? Visibility.Collapsed
: Visibility.Visible;

if (exisitingOperation != null && exisitingOperation.Status == AsyncStatus.Started)
{
// don't reuse same one because we can only have one Progress delegate
Expand Down
1 change: 1 addition & 0 deletions AIDevGallery/Package.Store.appxmanifest
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,6 @@
<Capabilities>
<rescap:Capability Name="runFullTrust" />
<systemai:Capability Name="systemAIModels"/>
<DeviceCapability Name="microphone" />
</Capabilities>
</Package>
1 change: 1 addition & 0 deletions AIDevGallery/Package.appxmanifest
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,6 @@
<Capabilities>
<rescap:Capability Name="runFullTrust" />
<systemai:Capability Name="systemAIModels"/>
<DeviceCapability Name="microphone" />
</Capabilities>
</Package>
59 changes: 59 additions & 0 deletions AIDevGallery/Samples/Definitions/WcrApis/WcrApiCodeSnippet.cs
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,65 @@ public async void IndexStatisticsSample()
}
}
"""""
},
{
ModelType.SpeechRecognition, """"
using Microsoft.Windows.AI;
using Microsoft.Windows.AI.MachineLearning;
using Microsoft.Windows.AI.Speech;

// Speech recognition runs on the device's execution providers (CPU / GPU / NPU).
var catalog = ExecutionProviderCatalog.GetDefault();
await catalog.EnsureAndRegisterCertifiedAsync();

var readyState = SpeechRecognitionModel.GetReadyState();
if (readyState is AIFeatureReadyState.Ready or AIFeatureReadyState.NotReady)
{
if (readyState == AIFeatureReadyState.NotReady)
{
var ensureOp = await SpeechRecognitionModel.EnsureReadyAsync();
if (ensureOp.Status != AIFeatureReadyResultState.Success)
{
throw new InvalidOperationException("Speech model could not be prepared.");
}
}

var modelResult = await SpeechRecognitionModel.TryCreateAsync();
if (modelResult.ExtendedError != null)
{
throw modelResult.ExtendedError;
}

using SpeechRecognitionModel speechModel = modelResult.SpeechModel;

// Stream audio from the default microphone. Pass an empty deviceId to use the system default.
var audioConfig = AudioConfiguration.FromAudioDevice(string.Empty);

using var streaming = new StreamingRecognition(audioConfig, speechModel);

streaming.Recognizing += (_, args) =>
{
// Interim hypothesis (updates frequently as more audio arrives).
Console.WriteLine($"[interim] {args.Text}");
};

streaming.Recognized += (_, args) =>
{
// Final result for a stable utterance.
Console.WriteLine($"[final] offset={args.Offset:F2}s duration={args.Duration:F2}s: {args.Text}");
};

await streaming.StartContinuousRecognitionAsync();

// ... let captions stream in. When done:
streaming.StopContinuousRecognition();
}
else
{
// DisabledByUser or NotSupportedOnCurrentSystem.
throw new InvalidOperationException($"Speech recognition is not available: {readyState}.");
}
""""
}
};
}
26 changes: 26 additions & 0 deletions AIDevGallery/Samples/Definitions/WcrApis/WcrApiHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
using AIDevGallery.Utils;
using Microsoft.Windows.AI;
using Microsoft.Windows.AI.Imaging;
using Microsoft.Windows.AI.MachineLearning;
using Microsoft.Windows.AI.Speech;
using Microsoft.Windows.AI.Text;
using Microsoft.Windows.AI.Video;
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices.WindowsRuntime;
using Windows.Foundation;

namespace AIDevGallery.Samples;
Expand Down Expand Up @@ -80,6 +83,9 @@ internal static class WcrApiHelpers
},
{
ModelType.VideoSuperRes, VideoScaler.GetReadyState
},
{
ModelType.SpeechRecognition, SpeechRecognitionModel.GetReadyState
}
};

Expand Down Expand Up @@ -132,9 +138,29 @@ internal static class WcrApiHelpers
},
{
ModelType.VideoSuperRes, VideoScaler.EnsureReadyAsync
},
{
ModelType.SpeechRecognition, EnsureSpeechRecognitionModelReadyAsync
}
};

// SpeechRecognitionModel.EnsureReadyAsync reports progress as SpeechRecognitionModelProgress,
// so adapt it to the IAsyncOperationWithProgress<AIFeatureReadyResult, double> shape the gallery expects.
private static IAsyncOperationWithProgress<AIFeatureReadyResult, double> EnsureSpeechRecognitionModelReadyAsync()
{
return AsyncInfo.Run<AIFeatureReadyResult, double>(async (cancellationToken, progress) =>
{
progress.Report(0);
var catalog = ExecutionProviderCatalog.GetDefault();
await catalog.EnsureAndRegisterCertifiedAsync().AsTask(cancellationToken);

var inner = SpeechRecognitionModel.EnsureReadyAsync();
inner.Progress = (_, p) => progress.Report(p.Progress);
Comment thread
haoliuu marked this conversation as resolved.
using var registration = cancellationToken.Register(() => inner.Cancel());
return await inner;
});
}

// this is a workaround for GetReadyState not returning Ready after EnsureReadyAsync is called
// for now, we will track when EnsureReadyAsync succeeds for each model to ensure we are not
// blocking the samples from running until this bug is fixed
Expand Down
11 changes: 11 additions & 0 deletions AIDevGallery/Samples/Definitions/WcrApis/apis.json
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,17 @@
"ReadmeUrl": "https://github.com/MicrosoftDocs/windows-ai-docs/blob/docs/docs/apis/video-super-resolution.md",
"License": "ms-pl",
"SampleIdToShowInDocs": "c3252e18-1d47-4689-adae-78fc66968650"
},
"SpeechRecognition": {
"Id": "0d4f1c2a-7e3b-4c9e-9b8a-1f2d3c4b5a60",
"Name": "Speech Recognition",
"Icon": "WCRAPI.svg",
"IconGlyph": "\uE720",
"Description": "Continuously transcribe audio using local speech recognition.",
"ReadmeUrl": "https://github.com/MicrosoftDocs/windows-ai-docs/blob/docs/docs/apis/speech-recognition.md",
"License": "ms-pl",
"SampleIdToShowInDocs": "9c5b2e8a-1f7d-4d3c-9e6a-3b1c8e7f4d20",
"Category": "Speech"
}
}
}
Expand Down
97 changes: 97 additions & 0 deletions AIDevGallery/Samples/WCRAPIs/SpeechRecognition.xaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
<?xml version="1.0" encoding="utf-8" ?>
<samples:BaseSamplePage
x:Class="AIDevGallery.Samples.WCRAPIs.SpeechRecognition"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
xmlns:samples="using:AIDevGallery.Samples"
mc:Ignorable="d">
<Grid RowSpacing="12">
<Grid.RowDefinitions>
<RowDefinition Height="*" />
<RowDefinition Height="Auto" />
<RowDefinition Height="Auto" />
</Grid.RowDefinitions>

<Border
Grid.Row="0"
Background="{ThemeResource CardBackgroundFillColorDefaultBrush}"
BorderBrush="{ThemeResource ControlStrongStrokeColorDefaultBrush}"
BorderThickness="1"
CornerRadius="{StaticResource OverlayCornerRadius}">
<ScrollViewer
x:Name="TranscriptionScrollViewer"
Padding="16"
HorizontalScrollBarVisibility="Disabled"
VerticalScrollBarVisibility="Auto">
<StackPanel Spacing="4">
<TextBlock
x:Name="FinalTranscriptionTextBlock"
AutomationProperties.LiveSetting="Polite"
AutomationProperties.Name="Final transcription"
FontFamily="Cascadia Code"
FontSize="16"
IsTextSelectionEnabled="True"
Text="Press Start to begin speech recognition..."
TextWrapping="Wrap" />
<TextBlock
x:Name="InterimTranscriptionTextBlock"
AutomationProperties.LiveSetting="Polite"
AutomationProperties.Name="Interim transcription"
FontFamily="Cascadia Code"
FontSize="16"
FontStyle="Italic"
Foreground="{ThemeResource TextFillColorSecondaryBrush}"
IsTextSelectionEnabled="True"
TextWrapping="Wrap" />
</StackPanel>
</ScrollViewer>
</Border>

<InfoBar
x:Name="StatusInfoBar"
Grid.Row="1"
IsClosable="False"
IsOpen="False"
Severity="Informational" />

<StackPanel
Grid.Row="2"
HorizontalAlignment="Center"
Orientation="Horizontal"
Spacing="8">
<ComboBox
x:Name="InputSourceComboBox"
MinWidth="240"
AutomationProperties.Name="Input source"
SelectedIndex="0"
ToolTipService.ToolTip="Choose what to transcribe, then press Start.">
<ComboBoxItem
Content="Microphone (live)"
Tag="Microphone"
ToolTipService.ToolTip="Live transcription from the default microphone using StreamingRecognition." />
<ComboBoxItem
Content="Audio file - full transcript"
Tag="FileBatch"
ToolTipService.ToolTip="BatchRecognition.RecognizeFromFile: one-shot call that returns the complete transcript." />
<ComboBoxItem
Content="Audio file - incremental"
Tag="FileStreaming"
ToolTipService.ToolTip="StreamingRecognition over a file: raises incremental Recognizing/Recognized events, like the microphone." />
</ComboBox>
<Button
x:Name="StartStopButton"
MinWidth="160"
Click="StartStopButton_Click"
Content="Start recognition"
Style="{StaticResource AccentButtonStyle}" />
Comment thread
Copilot marked this conversation as resolved.
<Button
x:Name="ClearButton"
MinWidth="120"
AutomationProperties.Name="Clear transcription"
Click="ClearButton_Click"
Content="Clear" />
</StackPanel>
</Grid>
</samples:BaseSamplePage>
Loading
Loading