diff --git a/AuthoritativeVoiceClient.cs b/AuthoritativeVoiceClient.cs new file mode 100644 index 0000000..1484421 --- /dev/null +++ b/AuthoritativeVoiceClient.cs @@ -0,0 +1,466 @@ +using System.Buffers.Binary; +using System.Net; +using System.Net.Sockets; +using System.Text; +using System.Threading; + +namespace OpenVoiceSharp +{ + /// + /// UDP client for the OpenVoiceSharp authoritative voice server. + /// Handles room join/leave, voice packet send/receive, and peer events. + /// + public sealed class AuthoritativeVoiceClient : IDisposable + { + // protocol packet types + private const byte ClientHello = 1; + private const byte ClientVoice = 2; + private const byte ClientLeave = 3; + private const byte ClientPing = 4; + private const byte ClientAuthHello = 5; + + private const byte ServerWelcome = 11; + private const byte ServerVoiceRelay = 12; + private const byte ServerError = 13; + private const byte ServerPeerJoined = 14; + private const byte ServerPeerLeft = 15; + private const byte ServerPong = 16; + + // events + public delegate void ConnectedEvent(Guid clientId); + public event ConnectedEvent? Connected; + + public delegate void PeerJoinedEvent(Guid clientId, string userName); + public event PeerJoinedEvent? PeerJoined; + + public delegate void PeerLeftEvent(Guid clientId); + public event PeerLeftEvent? PeerLeft; + + public delegate void VoicePacketReceivedEvent(Guid speakerClientId, uint sequence, byte[] payload, int length); + public event VoicePacketReceivedEvent? VoicePacketReceived; + + public delegate void ServerErrorEvent(byte errorCode, string message); + public event ServerErrorEvent? ErrorReceived; + + public delegate void PongReceivedEvent(Guid clientId); + public event PongReceivedEvent? PongReceived; + + public delegate void DisconnectedEvent(); + public event DisconnectedEvent? Disconnected; + + // settings + public string ServerHost { get; } + public int ServerPort { get; } + public string RoomName { get; private set; } + public string UserName { get; private set; } + public string? AuthToken { get; } + public Guid ClientId { get; } + public bool IsConnected { get; private set; } + + private IPEndPoint? ServerEndpoint; + private UdpClient? UdpClient; + private CancellationTokenSource? ReceiveCancellationTokenSource; + private Task? ReceiveTask; + private TaskCompletionSource? PendingWelcomeTaskCompletionSource; + private int NextVoiceSequence; + private bool IsDisposed; + + public AuthoritativeVoiceClient( + string serverHost, + int serverPort, + string roomName, + string userName, + string? authToken = null, + Guid? clientId = null + ) + { + if (string.IsNullOrWhiteSpace(serverHost)) + throw new ArgumentException("Server host is required.", nameof(serverHost)); + if (serverPort <= 0 || serverPort > 65535) + throw new ArgumentOutOfRangeException(nameof(serverPort)); + if (string.IsNullOrWhiteSpace(roomName)) + throw new ArgumentException("Room name is required.", nameof(roomName)); + if (string.IsNullOrWhiteSpace(userName)) + throw new ArgumentException("User name is required.", nameof(userName)); + + ServerHost = serverHost; + ServerPort = serverPort; + RoomName = roomName.Trim(); + UserName = userName.Trim(); + AuthToken = string.IsNullOrWhiteSpace(authToken) ? null : authToken.Trim(); + ClientId = clientId ?? Guid.NewGuid(); + } + + /// + /// Connects to the authoritative server and waits for welcome. + /// + /// Handshake timeout in milliseconds. + public async Task ConnectAsync(int handshakeTimeoutMs = 5000) + { + ThrowIfDisposed(); + if (IsConnected) + return; + if (handshakeTimeoutMs <= 0) + throw new ArgumentOutOfRangeException(nameof(handshakeTimeoutMs)); + + IPAddress[] addresses = await Dns.GetHostAddressesAsync(ServerHost).ConfigureAwait(false); + IPAddress? address = Array.Find(addresses, static ip => ip.AddressFamily == AddressFamily.InterNetwork) + ?? Array.Find(addresses, static ip => ip.AddressFamily == AddressFamily.InterNetworkV6); + + if (address is null) + throw new InvalidOperationException($"Could not resolve server host: {ServerHost}"); + + ServerEndpoint = new IPEndPoint(address, ServerPort); + UdpClient = new UdpClient(address.AddressFamily); + ReceiveCancellationTokenSource = new CancellationTokenSource(); + PendingWelcomeTaskCompletionSource = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + ReceiveTask = Task.Run(ReceiveLoopAsync); + + byte[] helloPacket = string.IsNullOrWhiteSpace(AuthToken) + ? BuildHelloPacket(ClientId, RoomName, UserName) + : BuildAuthHelloPacket(ClientId, RoomName, UserName, AuthToken); + await UdpClient.SendAsync(helloPacket, helloPacket.Length, ServerEndpoint).ConfigureAwait(false); + + Task delayTask = Task.Delay(handshakeTimeoutMs); + Task completedTask = await Task.WhenAny(PendingWelcomeTaskCompletionSource.Task, delayTask).ConfigureAwait(false); + if (completedTask != PendingWelcomeTaskCompletionSource.Task) + { + await DisconnectAsync().ConfigureAwait(false); + throw new TimeoutException("Did not receive welcome packet from server in time."); + } + + // Observe the task in case it faulted. + _ = await PendingWelcomeTaskCompletionSource.Task.ConfigureAwait(false); + } + + /// + /// Sends an encoded Opus payload to the server for room broadcast. + /// + public async Task SendVoiceAsync(byte[] encodedOpusPayload, int length) + { + ThrowIfDisposed(); + if (!IsConnected || UdpClient is null || ServerEndpoint is null) + throw new InvalidOperationException("Client is not connected."); + if (encodedOpusPayload is null) + throw new ArgumentNullException(nameof(encodedOpusPayload)); + if (length <= 0 || length > encodedOpusPayload.Length) + throw new ArgumentOutOfRangeException(nameof(length)); + if (length > ushort.MaxValue) + throw new ArgumentOutOfRangeException(nameof(length), "Payload length must be <= 65535."); + + uint sequence = unchecked((uint)Interlocked.Increment(ref NextVoiceSequence)); + byte[] packet = BuildVoicePacket(ClientId, sequence, encodedOpusPayload, length); + await UdpClient.SendAsync(packet, packet.Length, ServerEndpoint).ConfigureAwait(false); + } + + /// + /// Sends a ping packet. + /// + public async Task PingAsync() + { + ThrowIfDisposed(); + if (!IsConnected || UdpClient is null || ServerEndpoint is null) + throw new InvalidOperationException("Client is not connected."); + + byte[] pingPacket = BuildSingleGuidPacket(ClientPing, ClientId); + await UdpClient.SendAsync(pingPacket, pingPacket.Length, ServerEndpoint).ConfigureAwait(false); + } + + /// + /// Disconnects from the server and stops receive loop. + /// + public async Task DisconnectAsync() + { + if (UdpClient is not null && ServerEndpoint is not null) + { + try + { + byte[] leavePacket = BuildSingleGuidPacket(ClientLeave, ClientId); + await UdpClient.SendAsync(leavePacket, leavePacket.Length, ServerEndpoint).ConfigureAwait(false); + } + catch + { + // ignore transport errors during disconnect + } + } + + await StopNetworkingAsync().ConfigureAwait(false); + } + + private async Task ReceiveLoopAsync() + { + if (UdpClient is null) + return; + + try + { + while (!ReceiveCancellationTokenSource!.IsCancellationRequested) + { + UdpReceiveResult result = await UdpClient.ReceiveAsync().ConfigureAwait(false); + if (ServerEndpoint is null || !EndpointsEqual(result.RemoteEndPoint, ServerEndpoint)) + continue; + + HandleServerPacket(result.Buffer); + } + } + catch (ObjectDisposedException) + { + // expected on shutdown + } + catch (SocketException) + { + // expected on shutdown/close path + } + catch (Exception exception) + { + ErrorReceived?.Invoke(0, $"Receive loop failed: {exception.Message}"); + } + finally + { + if (IsConnected) + { + IsConnected = false; + Disconnected?.Invoke(); + } + } + } + + private void HandleServerPacket(byte[] packet) + { + if (packet.Length == 0) + return; + + switch (packet[0]) + { + case ServerWelcome: + HandleWelcome(packet); + break; + case ServerVoiceRelay: + HandleVoiceRelay(packet); + break; + case ServerError: + HandleServerError(packet); + break; + case ServerPeerJoined: + HandlePeerJoined(packet); + break; + case ServerPeerLeft: + HandlePeerLeft(packet); + break; + case ServerPong: + HandlePong(packet); + break; + } + } + + private void HandleWelcome(byte[] packet) + { + if (packet.Length != 17) + return; + + Guid welcomeClientId = ReadGuid(packet, 1); + if (welcomeClientId != ClientId) + return; + + IsConnected = true; + PendingWelcomeTaskCompletionSource?.TrySetResult(welcomeClientId); + Connected?.Invoke(welcomeClientId); + } + + private void HandleVoiceRelay(byte[] packet) + { + if (packet.Length < 23) + return; + + Guid speakerClientId = ReadGuid(packet, 1); + uint sequence = BinaryPrimitives.ReadUInt32LittleEndian(packet.AsSpan(17, 4)); + ushort payloadLength = BinaryPrimitives.ReadUInt16LittleEndian(packet.AsSpan(21, 2)); + if (packet.Length != 23 + payloadLength) + return; + + byte[] payload = new byte[payloadLength]; + Buffer.BlockCopy(packet, 23, payload, 0, payloadLength); + VoicePacketReceived?.Invoke(speakerClientId, sequence, payload, payload.Length); + } + + private void HandleServerError(byte[] packet) + { + if (packet.Length < 3) + return; + + byte errorCode = packet[1]; + byte messageLength = packet[2]; + if (packet.Length != 3 + messageLength) + return; + + string message = Encoding.UTF8.GetString(packet, 3, messageLength); + ErrorReceived?.Invoke(errorCode, message); + } + + private void HandlePeerJoined(byte[] packet) + { + if (packet.Length < 18) + return; + + Guid peerClientId = ReadGuid(packet, 1); + byte userNameLength = packet[17]; + if (packet.Length != 18 + userNameLength) + return; + + string userName = Encoding.UTF8.GetString(packet, 18, userNameLength); + PeerJoined?.Invoke(peerClientId, userName); + } + + private void HandlePeerLeft(byte[] packet) + { + if (packet.Length != 17) + return; + + Guid peerClientId = ReadGuid(packet, 1); + PeerLeft?.Invoke(peerClientId); + } + + private void HandlePong(byte[] packet) + { + if (packet.Length != 17) + return; + + Guid pongClientId = ReadGuid(packet, 1); + if (pongClientId == ClientId) + PongReceived?.Invoke(pongClientId); + } + + private async Task StopNetworkingAsync() + { + ReceiveCancellationTokenSource?.Cancel(); + + try + { + UdpClient?.Close(); + } + catch + { + // ignore close errors + } + + if (ReceiveTask is not null) + { + try + { + await ReceiveTask.ConfigureAwait(false); + } + catch + { + // ignore receive loop errors on shutdown + } + } + + ReceiveTask = null; + PendingWelcomeTaskCompletionSource = null; + UdpClient?.Dispose(); + UdpClient = null; + ReceiveCancellationTokenSource?.Dispose(); + ReceiveCancellationTokenSource = null; + ServerEndpoint = null; + + if (IsConnected) + { + IsConnected = false; + Disconnected?.Invoke(); + } + } + + public void Dispose() + { + if (IsDisposed) + return; + + IsDisposed = true; + StopNetworkingAsync().GetAwaiter().GetResult(); + } + + private void ThrowIfDisposed() + { + if (IsDisposed) + throw new ObjectDisposedException(nameof(AuthoritativeVoiceClient)); + } + + private static bool EndpointsEqual(IPEndPoint a, IPEndPoint b) + => a.Port == b.Port && Equals(a.Address, b.Address); + + private static Guid ReadGuid(byte[] data, int startIndex) + { + byte[] guidBytes = new byte[16]; + Buffer.BlockCopy(data, startIndex, guidBytes, 0, 16); + return new Guid(guidBytes); + } + + private static byte[] BuildSingleGuidPacket(byte packetType, Guid clientId) + { + byte[] packet = new byte[17]; + packet[0] = packetType; + clientId.TryWriteBytes(packet.AsSpan(1, 16)); + return packet; + } + + private static byte[] BuildHelloPacket(Guid clientId, string roomName, string userName) + { + byte[] roomNameBytes = Encoding.UTF8.GetBytes(roomName); + byte[] userNameBytes = Encoding.UTF8.GetBytes(userName); + if (roomNameBytes.Length > byte.MaxValue) + throw new ArgumentException("Room name is too long after UTF8 encoding.", nameof(roomName)); + if (userNameBytes.Length > byte.MaxValue) + throw new ArgumentException("User name is too long after UTF8 encoding.", nameof(userName)); + + byte[] packet = new byte[19 + roomNameBytes.Length + userNameBytes.Length]; + packet[0] = ClientHello; + clientId.TryWriteBytes(packet.AsSpan(1, 16)); + packet[17] = (byte)roomNameBytes.Length; + roomNameBytes.CopyTo(packet.AsSpan(18)); + + int userLengthIndex = 18 + roomNameBytes.Length; + packet[userLengthIndex] = (byte)userNameBytes.Length; + userNameBytes.CopyTo(packet.AsSpan(userLengthIndex + 1)); + return packet; + } + + private static byte[] BuildVoicePacket(Guid clientId, uint sequence, byte[] payload, int payloadLength) + { + byte[] packet = new byte[23 + payloadLength]; + packet[0] = ClientVoice; + clientId.TryWriteBytes(packet.AsSpan(1, 16)); + BinaryPrimitives.WriteUInt32LittleEndian(packet.AsSpan(17, 4), sequence); + BinaryPrimitives.WriteUInt16LittleEndian(packet.AsSpan(21, 2), (ushort)payloadLength); + Buffer.BlockCopy(payload, 0, packet, 23, payloadLength); + return packet; + } + + private static byte[] BuildAuthHelloPacket(Guid clientId, string roomName, string userName, string authToken) + { + byte[] roomNameBytes = Encoding.UTF8.GetBytes(roomName); + byte[] userNameBytes = Encoding.UTF8.GetBytes(userName); + byte[] authTokenBytes = Encoding.UTF8.GetBytes(authToken); + if (roomNameBytes.Length > byte.MaxValue) + throw new ArgumentException("Room name is too long after UTF8 encoding.", nameof(roomName)); + if (userNameBytes.Length > byte.MaxValue) + throw new ArgumentException("User name is too long after UTF8 encoding.", nameof(userName)); + if (authTokenBytes.Length > ushort.MaxValue) + throw new ArgumentException("Auth token is too long after UTF8 encoding.", nameof(authToken)); + + byte[] packet = new byte[21 + roomNameBytes.Length + userNameBytes.Length + authTokenBytes.Length]; + packet[0] = ClientAuthHello; + clientId.TryWriteBytes(packet.AsSpan(1, 16)); + packet[17] = (byte)roomNameBytes.Length; + roomNameBytes.CopyTo(packet.AsSpan(18)); + + int userLengthIndex = 18 + roomNameBytes.Length; + packet[userLengthIndex] = (byte)userNameBytes.Length; + userNameBytes.CopyTo(packet.AsSpan(userLengthIndex + 1)); + + int tokenLengthIndex = userLengthIndex + 1 + userNameBytes.Length; + BinaryPrimitives.WriteUInt16LittleEndian(packet.AsSpan(tokenLengthIndex, 2), (ushort)authTokenBytes.Length); + authTokenBytes.CopyTo(packet.AsSpan(tokenLengthIndex + 2)); + return packet; + } + } +} diff --git a/AuthoritativeVoiceSession.cs b/AuthoritativeVoiceSession.cs new file mode 100644 index 0000000..a206ade --- /dev/null +++ b/AuthoritativeVoiceSession.cs @@ -0,0 +1,302 @@ +using WebRtcVadSharp; + +namespace OpenVoiceSharp +{ + /// + /// High-level helper that wires microphone capture, Opus encode/decode, + /// and authoritative server networking into one session. + /// + public sealed class AuthoritativeVoiceSession : IDisposable + { + public delegate void DecodedVoiceFrameEvent(Guid speakerClientId, uint sequence, byte[] pcmData, int length); + public event DecodedVoiceFrameEvent? VoiceFrameDecoded; + + public delegate void SessionErrorEvent(string message, Exception? exception); + public event SessionErrorEvent? SessionError; + + public AuthoritativeVoiceClient Client { get; } + public VoiceChatInterface VoiceChatInterface { get; } + public BasicMicrophoneRecorder Recorder { get; } + + public bool GateOutgoingByVoiceActivity { get; set; } = true; + public bool EnableJitterBuffer { get; set; } = true; + public bool EnableSpeakerPlaybackBuffers { get; set; } = true; + public int JitterTargetPackets { get; } + public int JitterMaxPackets { get; } + public bool IsRunning { get; private set; } + + private readonly int ExpectedPcmFrameSize; + private readonly Dictionary SpeakerJitterBuffers = new(); + private readonly Dictionary SpeakerPlaybackBuffers = new(); + private bool IsDisposed; + private bool IsSubscribed; + + public AuthoritativeVoiceSession( + string serverHost, + int serverPort, + string roomName, + string userName, + string? authToken = null, + int bitrate = VoiceChatInterface.DefaultBitrate, + bool stereo = false, + bool enableNoiseSuppression = true, + bool favorAudioStreaming = false, + OperatingMode? vadOperatingMode = null, + int jitterTargetPackets = 3, + int jitterMaxPackets = 24 + ) + { + if (jitterTargetPackets < 1) + throw new ArgumentOutOfRangeException(nameof(jitterTargetPackets)); + if (jitterMaxPackets < jitterTargetPackets + 2) + throw new ArgumentOutOfRangeException(nameof(jitterMaxPackets)); + + Client = new AuthoritativeVoiceClient(serverHost, serverPort, roomName, userName, authToken); + VoiceChatInterface = new VoiceChatInterface( + bitrate, + stereo, + enableNoiseSuppression, + favorAudioStreaming, + vadOperatingMode + ); + Recorder = new BasicMicrophoneRecorder(stereo); + ExpectedPcmFrameSize = VoiceUtilities.GetSampleSize(stereo ? 2 : 1); + JitterTargetPackets = jitterTargetPackets; + JitterMaxPackets = jitterMaxPackets; + } + + /// + /// Connects to the authoritative server and starts microphone capture. + /// + public async Task StartAsync(int handshakeTimeoutMs = 5000) + { + ThrowIfDisposed(); + if (IsRunning) + return; + + SubscribeEvents(); + + try + { + await Client.ConnectAsync(handshakeTimeoutMs).ConfigureAwait(false); + Recorder.StartRecording(); + IsRunning = true; + } + catch (Exception exception) + { + SessionError?.Invoke("Failed to start voice session.", exception); + await StopAsync().ConfigureAwait(false); + throw; + } + } + + /// + /// Stops microphone capture and disconnects from the server. + /// + public async Task StopAsync() + { + if (!IsRunning && !IsSubscribed) + return; + + try + { + if (Recorder.IsRecording) + Recorder.StopRecording(); + } + catch (Exception exception) + { + SessionError?.Invoke("Failed to stop microphone recorder cleanly.", exception); + } + + try + { + await Client.DisconnectAsync().ConfigureAwait(false); + } + catch (Exception exception) + { + SessionError?.Invoke("Failed to disconnect voice client cleanly.", exception); + } + + UnsubscribeEvents(); + SpeakerJitterBuffers.Clear(); + lock (SpeakerPlaybackBuffers) + SpeakerPlaybackBuffers.Clear(); + IsRunning = false; + } + + private void SubscribeEvents() + { + if (IsSubscribed) + return; + + Recorder.DataAvailable += OnMicrophoneDataAvailable; + Client.VoicePacketReceived += OnVoicePacketReceived; + Client.ErrorReceived += OnClientErrorReceived; + IsSubscribed = true; + } + + private void UnsubscribeEvents() + { + if (!IsSubscribed) + return; + + Recorder.DataAvailable -= OnMicrophoneDataAvailable; + Client.VoicePacketReceived -= OnVoicePacketReceived; + Client.ErrorReceived -= OnClientErrorReceived; + IsSubscribed = false; + } + + private void OnClientErrorReceived(byte errorCode, string message) + => SessionError?.Invoke($"Server error ({errorCode}): {message}", null); + + private void OnMicrophoneDataAvailable(byte[] pcmData, int length) + { + if (!IsRunning) + return; + + if (length != ExpectedPcmFrameSize) + return; + + if (GateOutgoingByVoiceActivity && !VoiceChatInterface.IsSpeaking(pcmData)) + return; + + try + { + (byte[] encodedData, int encodedLength) = VoiceChatInterface.SubmitAudioData(pcmData, length); + _ = SendEncodedFrameAsync(encodedData, encodedLength); + } + catch (Exception exception) + { + SessionError?.Invoke("Failed to encode or enqueue outgoing voice frame.", exception); + } + } + + private async Task SendEncodedFrameAsync(byte[] encodedData, int encodedLength) + { + try + { + await Client.SendVoiceAsync(encodedData, encodedLength).ConfigureAwait(false); + } + catch (Exception exception) + { + SessionError?.Invoke("Failed to send outgoing voice frame.", exception); + } + } + + private void OnVoicePacketReceived(Guid speakerClientId, uint sequence, byte[] payload, int length) + { + if (!EnableJitterBuffer) + { + DecodeAndEmit(speakerClientId, sequence, payload, length); + return; + } + + try + { + if (!SpeakerJitterBuffers.TryGetValue(speakerClientId, out VoiceJitterBuffer? buffer)) + { + buffer = new VoiceJitterBuffer(JitterTargetPackets, JitterMaxPackets); + SpeakerJitterBuffers[speakerClientId] = buffer; + } + + buffer.Add(sequence, payload, length); + foreach ((uint bufferedSequence, byte[] bufferedPayload) in buffer.DrainReady()) + DecodeAndEmit(speakerClientId, bufferedSequence, bufferedPayload, bufferedPayload.Length); + } + catch (Exception exception) + { + SessionError?.Invoke("Failed to decode incoming voice frame.", exception); + } + } + + private void DecodeAndEmit(Guid speakerClientId, uint sequence, byte[] payload, int length) + { + try + { + (byte[] decodedData, int decodedLength) = VoiceChatInterface.WhenDataReceived(payload, length); + + if (EnableSpeakerPlaybackBuffers) + { + VoicePlaybackBuffer playbackBuffer; + lock (SpeakerPlaybackBuffers) + { + if (!SpeakerPlaybackBuffers.TryGetValue(speakerClientId, out playbackBuffer!)) + { + playbackBuffer = new VoicePlaybackBuffer(); + SpeakerPlaybackBuffers[speakerClientId] = playbackBuffer; + } + } + playbackBuffer.Enqueue(decodedData, decodedLength); + } + + VoiceFrameDecoded?.Invoke(speakerClientId, sequence, decodedData, decodedLength); + } + catch (Exception exception) + { + SessionError?.Invoke("Failed to decode incoming voice frame.", exception); + } + } + + /// + /// Reads speaker PCM into output and fills missing bytes with silence. + /// Returns copied PCM bytes before silence fill. + /// + public int ReadSpeakerPlayback(Guid speakerClientId, byte[] output, int count, int offset = 0) + { + if (output is null) + throw new ArgumentNullException(nameof(output)); + + VoicePlaybackBuffer? buffer; + lock (SpeakerPlaybackBuffers) + SpeakerPlaybackBuffers.TryGetValue(speakerClientId, out buffer); + + if (buffer is null) + { + if (count > 0) + Array.Clear(output, offset, count); + return 0; + } + + return buffer.ReadAndFillSilence(output, count, offset); + } + + /// + /// Drains and returns all remaining speaker PCM bytes. + /// + public byte[] FlushSpeakerPlayback(Guid speakerClientId) + { + VoicePlaybackBuffer? buffer; + lock (SpeakerPlaybackBuffers) + SpeakerPlaybackBuffers.TryGetValue(speakerClientId, out buffer); + + return buffer?.Flush() ?? Array.Empty(); + } + + /// + /// Returns active speaker ids with playback buffers. + /// + public Guid[] GetSpeakersWithPlayback() + { + lock (SpeakerPlaybackBuffers) + return SpeakerPlaybackBuffers.Keys.ToArray(); + } + + public void Dispose() + { + if (IsDisposed) + return; + + IsDisposed = true; + StopAsync().GetAwaiter().GetResult(); + Recorder.Dispose(); + VoiceChatInterface.Dispose(); + Client.Dispose(); + } + + private void ThrowIfDisposed() + { + if (IsDisposed) + throw new ObjectDisposedException(nameof(AuthoritativeVoiceSession)); + } + } +} diff --git a/BasicMicrophoneRecorder.cs b/BasicMicrophoneRecorder.cs index b2e1696..aee5551 100644 --- a/BasicMicrophoneRecorder.cs +++ b/BasicMicrophoneRecorder.cs @@ -1,21 +1,21 @@ -using NAudio.Wave; +using NAudio.Wave; namespace OpenVoiceSharp { /// /// Handles basic microphone recording for a voice chat interface using NAudio. /// - public sealed class BasicMicrophoneRecorder + public sealed class BasicMicrophoneRecorder : IDisposable { // events public delegate void AudioInputChangedEvent(int index, WaveInCapabilities microphone); - public event AudioInputChangedEvent AudioInputChanged; + public event AudioInputChangedEvent? AudioInputChanged; public delegate void MicrophoneDataAvailableEvent(byte[] pcmData, int length); - public event MicrophoneDataAvailableEvent DataAvailable; + public event MicrophoneDataAvailableEvent? DataAvailable; public delegate void MicrophoneStoppedRecordingEvent(StoppedEventArgs arguments); - public event MicrophoneStoppedRecordingEvent RecordingStopped; + public event MicrophoneStoppedRecordingEvent? RecordingStopped; // wave format/recorder private readonly WaveFormat WaveFormat; @@ -24,18 +24,30 @@ public sealed class BasicMicrophoneRecorder // setting to a specific microphone public int CurrentMicrophoneIndex { get; private set; } = 0; // default public WaveInCapabilities CurrentMicrophone { get; private set; } + private bool IsDisposed; public void SetMicrophone(int index) { - if (index < 0) return; + ThrowIfDisposed(); WaveInCapabilities[] microphones = GetMicrophones(); - if (index > microphones.Length - 1) return; + if (microphones.Length == 0) + throw new InvalidOperationException("No microphones are available."); + if (index < 0 || index >= microphones.Length) + throw new ArgumentOutOfRangeException(nameof(index)); - CurrentMicrophone = GetMicrophones()[index]; + bool wasRecording = IsRecording; + if (wasRecording) + StopRecording(); + + CurrentMicrophone = microphones[index]; CurrentMicrophoneIndex = index; + MicrophoneRecorder.DeviceNumber = CurrentMicrophoneIndex; AudioInputChanged?.Invoke(CurrentMicrophoneIndex, CurrentMicrophone); + + if (wasRecording) + StartRecording(); } public static WaveInCapabilities[] GetMicrophones() @@ -52,24 +64,28 @@ public static WaveInCapabilities[] GetMicrophones() // recording public bool IsRecording { get; private set; } = false; + public void StartRecording() { - if (IsRecording) return; + ThrowIfDisposed(); + if (IsRecording) return; IsRecording = true; MicrophoneRecorder.StartRecording(); } + public void StopRecording() { + ThrowIfDisposed(); if (!IsRecording) return; IsRecording = false; MicrophoneRecorder.StopRecording(); } + private void WhenRecordingStopped(object? sender, StoppedEventArgs e) => RecordingStopped?.Invoke(e); private void WhenDataAvailable(object? sender, WaveInEventArgs e) => DataAvailable?.Invoke(e.Buffer, e.BytesRecorded); - public BasicMicrophoneRecorder(bool stereo = false) { // wave format for recording @@ -88,5 +104,31 @@ public BasicMicrophoneRecorder(bool stereo = false) // set to default microphone SetToDefaultMicrophone(); } + + public void Dispose() + { + if (IsDisposed) return; + + try + { + if (IsRecording) + MicrophoneRecorder.StopRecording(); + } + catch + { + // ignored: dispose should not throw on shutdown paths + } + + MicrophoneRecorder.DataAvailable -= WhenDataAvailable; + MicrophoneRecorder.RecordingStopped -= WhenRecordingStopped; + MicrophoneRecorder.Dispose(); + IsDisposed = true; + } + + private void ThrowIfDisposed() + { + if (IsDisposed) + throw new ObjectDisposedException(nameof(BasicMicrophoneRecorder)); + } } } diff --git a/CircularAudioBuffer.cs b/CircularAudioBuffer.cs index 0b1a823..f4ae565 100644 --- a/CircularAudioBuffer.cs +++ b/CircularAudioBuffer.cs @@ -1,4 +1,4 @@ -using System; +using System; namespace OpenVoiceSharp { @@ -14,7 +14,7 @@ public enum RecommendedChunkAmount /// Useful for Unity or other engines that do not support streamed pcm reading by default. /// /// Byte/short/float depending on your needs. - public struct CircularAudioBuffer where T: struct + public struct CircularAudioBuffer where T : struct { /// /// The raw length of the buffer, in samples. @@ -26,7 +26,7 @@ public struct CircularAudioBuffer where T: struct public int ChunkSize { get; private set; } public readonly int BufferAvailable => ChunksAvailable * ChunkSize; - public int ChunksAvailable = 0; + public int ChunksAvailable; private readonly T[] Buffer; @@ -34,8 +34,6 @@ public struct CircularAudioBuffer where T: struct public readonly bool CanReadChunk => ChunksAvailable > 0; - // no need to do a for loop to rewrite the buffer. - // just dont give it. /// /// Reads the first chunk available at the front of the buffer. /// @@ -43,13 +41,16 @@ public struct CircularAudioBuffer where T: struct public T[] ReadChunk() { if (!CanReadChunk) - throw new Exception("No chunks are available."); + throw new InvalidOperationException("No chunks are available."); - // slice the chunk - T[] chunk = Buffer[ChunksAvailable..ChunkSize]; + // copy the first chunk out + T[] chunk = new T[ChunkSize]; + Array.Copy(Buffer, 0, chunk, 0, ChunkSize); - // grab the rest and put it at the front - chunk.CopyTo(Buffer[ChunkSize..BufferLength], 0); + // shift remaining data to the front + int remaining = (ChunksAvailable - 1) * ChunkSize; + if (remaining > 0) + Array.Copy(Buffer, ChunkSize, Buffer, 0, remaining); ChunksAvailable--; @@ -61,7 +62,15 @@ public T[] ReadChunk() /// /// The target buffer to which it'll be copied to /// The begin offset to which it'll begin copying to - public void ReadChunkTo(T[] target, int offset = 0) => ReadChunk().CopyTo(target, offset); + public void ReadChunkTo(T[] target, int offset = 0) + { + if (target is null) + throw new ArgumentNullException(nameof(target)); + if (offset < 0 || offset > target.Length - ChunkSize) + throw new ArgumentOutOfRangeException(nameof(offset)); + + ReadChunk().CopyTo(target, offset); + } /// /// Reads all the buffer that is available. @@ -83,7 +92,16 @@ public T[] ReadAllBuffer() /// /// The target buffer to which it'll be copied to /// The begin offset to which it'll begin copying to - public void ReadAllBufferTo(T[] target, int offset = 0) => ReadAllBuffer().CopyTo(target, offset); + public void ReadAllBufferTo(T[] target, int offset = 0) + { + if (target is null) + throw new ArgumentNullException(nameof(target)); + int available = BufferAvailable; + if (offset < 0 || offset > target.Length - available) + throw new ArgumentOutOfRangeException(nameof(offset)); + + ReadAllBuffer().CopyTo(target, offset); + } /// /// Pushes a chunk into the buffer. Will be ignored if the buffer is full. (no overflow) @@ -91,13 +109,16 @@ public T[] ReadAllBuffer() /// Chunk/frame public void PushChunk(T[] chunk) { - // push chunk back if not full + if (chunk is null) + throw new ArgumentNullException(nameof(chunk)); if (BufferFull) return; - // if size doesnt match - if (chunk.Length != ChunkSize) throw new Exception($"Invalid chunk size. Submitted {chunk.Length} - should be {ChunkSize}"); + if (chunk.Length != ChunkSize) + throw new ArgumentException( + $"Invalid chunk size. Submitted {chunk.Length} - should be {ChunkSize}", + nameof(chunk) + ); - // copy to Array.Copy(chunk, 0, Buffer, BufferAvailable, chunk.Length); ChunksAvailable++; } @@ -109,10 +130,17 @@ public void PushChunk(T[] chunk) /// Amount of chunks the circular audio buffer can take in. Higher values are usually more stable and lower values usually cause more audio cracking, but will do more latency (20 * amountOfChunks ms). public CircularAudioBuffer(int chunkSize, int amountOfChunks = 18) { + if (chunkSize <= 0) + throw new ArgumentOutOfRangeException(nameof(chunkSize)); + if (amountOfChunks <= 0) + throw new ArgumentOutOfRangeException(nameof(amountOfChunks)); + ChunkSize = chunkSize; BufferLength = chunkSize * amountOfChunks; Buffer = new T[BufferLength]; + ChunksAvailable = 0; } + /// /// Creates a circular audio buffer. /// diff --git a/Example.cs b/Example.cs index 3bdfbb7..6e9ee24 100644 --- a/Example.cs +++ b/Example.cs @@ -2,12 +2,13 @@ { internal static class Example { - static VoiceChatInterface voiceChatInterface = new(); + private static readonly VoiceChatInterface VoiceChatInterface = new(); + private static readonly int ExpectedFrameSize = VoiceUtilities.GetSampleSize(1); // basic recorder public static void Main() { - BasicMicrophoneRecorder recorder = new(); + using BasicMicrophoneRecorder recorder = new(); recorder.DataAvailable += WhenDataAvailable; recorder.StartRecording(); @@ -16,7 +17,11 @@ public static void Main() // encoding & sending private static void WhenDataAvailable(byte[] pcmData, int length) { - (byte[] encodedData, int encodedLength) = voiceChatInterface.SubmitAudioData(pcmData, length); + // Ignore incomplete frames that can occasionally happen at stream boundaries. + if (length != ExpectedFrameSize) + return; + + (byte[] encodedData, int encodedLength) = VoiceChatInterface.SubmitAudioData(pcmData, length); Send(encodedData, encodedLength); } private static void Send(byte[] encodedData, int encodedLength) @@ -28,7 +33,7 @@ private static void Send(byte[] encodedData, int encodedLength) private static void WhenVoicePacketReceived(byte[] encodedData, int encodedLength) { // here we assume that encodedData contains the bytes of the opus encoded data - (byte[] decodedData, int decodedLength) = voiceChatInterface.WhenDataReceived(encodedData, encodedLength); + (byte[] decodedData, int decodedLength) = VoiceChatInterface.WhenDataReceived(encodedData, encodedLength); SubmitBuffer(decodedData, decodedLength); } diff --git a/OpenVoiceSharp.AuthoritativeServer/ClientSession.cs b/OpenVoiceSharp.AuthoritativeServer/ClientSession.cs new file mode 100644 index 0000000..e76fb03 --- /dev/null +++ b/OpenVoiceSharp.AuthoritativeServer/ClientSession.cs @@ -0,0 +1,24 @@ +using System.Net; + +namespace OpenVoiceSharp.AuthoritativeServer; + +internal sealed class ClientSession +{ + public Guid ClientId { get; } + public string RoomName { get; set; } + public string UserName { get; set; } + public IPEndPoint Endpoint { get; set; } + public DateTime LastSeenUtc { get; set; } + public TokenBucket VoiceRateLimiter { get; } + public SequenceWindow VoiceSequenceWindow { get; } = new(); + + public ClientSession(Guid clientId, string roomName, string userName, IPEndPoint endpoint, int maxVoicePacketsPerSecond) + { + ClientId = clientId; + RoomName = roomName; + UserName = userName; + Endpoint = endpoint; + LastSeenUtc = DateTime.UtcNow; + VoiceRateLimiter = new TokenBucket(maxVoicePacketsPerSecond); + } +} diff --git a/OpenVoiceSharp.AuthoritativeServer/OpenVoiceSharp.AuthoritativeServer.csproj b/OpenVoiceSharp.AuthoritativeServer/OpenVoiceSharp.AuthoritativeServer.csproj new file mode 100644 index 0000000..ed9781c --- /dev/null +++ b/OpenVoiceSharp.AuthoritativeServer/OpenVoiceSharp.AuthoritativeServer.csproj @@ -0,0 +1,10 @@ + + + + Exe + net10.0 + enable + enable + + + diff --git a/OpenVoiceSharp.AuthoritativeServer/Program.cs b/OpenVoiceSharp.AuthoritativeServer/Program.cs new file mode 100644 index 0000000..1186286 --- /dev/null +++ b/OpenVoiceSharp.AuthoritativeServer/Program.cs @@ -0,0 +1,12 @@ +using OpenVoiceSharp.AuthoritativeServer; + +var options = ServerOptions.FromArgs(args); + +using var server = new VoiceAuthoritativeServer(options); +Console.CancelKeyPress += (_, eventArgs) => +{ + eventArgs.Cancel = true; + server.RequestStop(); +}; + +await server.RunAsync(); diff --git a/OpenVoiceSharp.AuthoritativeServer/Protocol.cs b/OpenVoiceSharp.AuthoritativeServer/Protocol.cs new file mode 100644 index 0000000..1510aff --- /dev/null +++ b/OpenVoiceSharp.AuthoritativeServer/Protocol.cs @@ -0,0 +1,217 @@ +using System.Buffers.Binary; +using System.Text; + +namespace OpenVoiceSharp.AuthoritativeServer; + +internal enum ClientPacketType : byte +{ + Hello = 1, + Voice = 2, + Leave = 3, + Ping = 4, + AuthHello = 5 +} + +internal enum ServerPacketType : byte +{ + Welcome = 11, + VoiceRelay = 12, + Error = 13, + PeerJoined = 14, + PeerLeft = 15, + Pong = 16 +} + +internal enum ErrorCode : byte +{ + InvalidPacket = 1, + NotRegistered = 2, + UnauthorizedEndpoint = 3, + RoomFull = 4, + RateLimited = 5, + PayloadTooLarge = 6, + AuthFailed = 7 +} + +internal static class Protocol +{ + public static bool TryReadHello( + ReadOnlySpan packet, + out Guid clientId, + out string room, + out string userName + ) + { + clientId = Guid.Empty; + room = string.Empty; + userName = string.Empty; + + // Type(1) + ClientId(16) + RoomLen(1) + UserLen(1) + if (packet.Length < 19 || packet[0] != (byte)ClientPacketType.Hello) + return false; + + clientId = new Guid(packet.Slice(1, 16)); + byte roomLength = packet[17]; + int roomStart = 18; + int userLenIndex = roomStart + roomLength; + + if (packet.Length < userLenIndex + 1) + return false; + + byte userLength = packet[userLenIndex]; + int userStart = userLenIndex + 1; + if (packet.Length != userStart + userLength) + return false; + + room = Encoding.UTF8.GetString(packet.Slice(roomStart, roomLength)); + userName = Encoding.UTF8.GetString(packet.Slice(userStart, userLength)); + return room.Length > 0 && userName.Length > 0; + } + + public static bool TryReadVoice( + ReadOnlySpan packet, + out Guid clientId, + out uint sequence, + out ReadOnlySpan payload + ) + { + clientId = Guid.Empty; + sequence = 0; + payload = default; + + // Type(1) + ClientId(16) + Sequence(4) + PayloadLen(2) + if (packet.Length < 23 || packet[0] != (byte)ClientPacketType.Voice) + return false; + + clientId = new Guid(packet.Slice(1, 16)); + sequence = BinaryPrimitives.ReadUInt32LittleEndian(packet.Slice(17, 4)); + ushort payloadLength = BinaryPrimitives.ReadUInt16LittleEndian(packet.Slice(21, 2)); + + if (packet.Length != 23 + payloadLength) + return false; + + payload = packet.Slice(23, payloadLength); + return true; + } + + public static bool TryReadAuthHello( + ReadOnlySpan packet, + out Guid clientId, + out string room, + out string userName, + out string authToken + ) + { + clientId = Guid.Empty; + room = string.Empty; + userName = string.Empty; + authToken = string.Empty; + + // Type(1) + ClientId(16) + RoomLen(1) + UserLen(1) + TokenLen(2) + if (packet.Length < 21 || packet[0] != (byte)ClientPacketType.AuthHello) + return false; + + clientId = new Guid(packet.Slice(1, 16)); + byte roomLength = packet[17]; + int roomStart = 18; + int userLenIndex = roomStart + roomLength; + if (packet.Length < userLenIndex + 1) + return false; + + byte userLength = packet[userLenIndex]; + int userStart = userLenIndex + 1; + int tokenLenIndex = userStart + userLength; + if (packet.Length < tokenLenIndex + 2) + return false; + + ushort tokenLength = BinaryPrimitives.ReadUInt16LittleEndian(packet.Slice(tokenLenIndex, 2)); + int tokenStart = tokenLenIndex + 2; + if (packet.Length != tokenStart + tokenLength) + return false; + + room = Encoding.UTF8.GetString(packet.Slice(roomStart, roomLength)); + userName = Encoding.UTF8.GetString(packet.Slice(userStart, userLength)); + authToken = Encoding.UTF8.GetString(packet.Slice(tokenStart, tokenLength)); + return room.Length > 0 && userName.Length > 0 && authToken.Length > 0; + } + + public static bool TryReadLeave(ReadOnlySpan packet, out Guid clientId) + { + clientId = Guid.Empty; + if (packet.Length != 17 || packet[0] != (byte)ClientPacketType.Leave) + return false; + + clientId = new Guid(packet.Slice(1, 16)); + return true; + } + + public static bool TryReadPing(ReadOnlySpan packet, out Guid clientId) + { + clientId = Guid.Empty; + if (packet.Length != 17 || packet[0] != (byte)ClientPacketType.Ping) + return false; + + clientId = new Guid(packet.Slice(1, 16)); + return true; + } + + public static byte[] BuildWelcome(Guid clientId) + { + byte[] packet = new byte[17]; + packet[0] = (byte)ServerPacketType.Welcome; + clientId.TryWriteBytes(packet.AsSpan(1, 16)); + return packet; + } + + public static byte[] BuildPeerJoined(Guid clientId, string userName) + { + byte[] userNameBytes = Encoding.UTF8.GetBytes(userName); + byte[] packet = new byte[18 + userNameBytes.Length]; + packet[0] = (byte)ServerPacketType.PeerJoined; + clientId.TryWriteBytes(packet.AsSpan(1, 16)); + packet[17] = checked((byte)userNameBytes.Length); + userNameBytes.CopyTo(packet.AsSpan(18)); + return packet; + } + + public static byte[] BuildPeerLeft(Guid clientId) + { + byte[] packet = new byte[17]; + packet[0] = (byte)ServerPacketType.PeerLeft; + clientId.TryWriteBytes(packet.AsSpan(1, 16)); + return packet; + } + + public static byte[] BuildVoiceRelay(Guid speakerClientId, uint sequence, ReadOnlySpan payload) + { + byte[] packet = new byte[23 + payload.Length]; + packet[0] = (byte)ServerPacketType.VoiceRelay; + speakerClientId.TryWriteBytes(packet.AsSpan(1, 16)); + BinaryPrimitives.WriteUInt32LittleEndian(packet.AsSpan(17, 4), sequence); + BinaryPrimitives.WriteUInt16LittleEndian(packet.AsSpan(21, 2), checked((ushort)payload.Length)); + payload.CopyTo(packet.AsSpan(23)); + return packet; + } + + public static byte[] BuildError(ErrorCode code, string message) + { + byte[] messageBytes = Encoding.UTF8.GetBytes(message); + if (messageBytes.Length > byte.MaxValue) + messageBytes = messageBytes[..byte.MaxValue]; + + byte[] packet = new byte[3 + messageBytes.Length]; + packet[0] = (byte)ServerPacketType.Error; + packet[1] = (byte)code; + packet[2] = (byte)messageBytes.Length; + messageBytes.CopyTo(packet.AsSpan(3)); + return packet; + } + + public static byte[] BuildPong(Guid clientId) + { + byte[] packet = new byte[17]; + packet[0] = (byte)ServerPacketType.Pong; + clientId.TryWriteBytes(packet.AsSpan(1, 16)); + return packet; + } +} diff --git a/OpenVoiceSharp.AuthoritativeServer/SequenceWindow.cs b/OpenVoiceSharp.AuthoritativeServer/SequenceWindow.cs new file mode 100644 index 0000000..47f2900 --- /dev/null +++ b/OpenVoiceSharp.AuthoritativeServer/SequenceWindow.cs @@ -0,0 +1,42 @@ +namespace OpenVoiceSharp.AuthoritativeServer; + +/// +/// Sliding anti-replay window for monotonic packet sequences. +/// Accepts out-of-order packets within a 64-sequence window. +/// +internal sealed class SequenceWindow +{ + private bool Initialized; + private uint NewestSequence; + private ulong SeenBitmap; + + public bool TryAccept(uint sequence) + { + if (!Initialized) + { + Initialized = true; + NewestSequence = sequence; + SeenBitmap = 1UL; + return true; + } + + if (sequence > NewestSequence) + { + uint shift = sequence - NewestSequence; + SeenBitmap = shift >= 64 ? 1UL : (SeenBitmap << (int)shift) | 1UL; + NewestSequence = sequence; + return true; + } + + uint delta = NewestSequence - sequence; + if (delta >= 64) + return false; + + ulong mask = 1UL << (int)delta; + if ((SeenBitmap & mask) != 0) + return false; + + SeenBitmap |= mask; + return true; + } +} diff --git a/OpenVoiceSharp.AuthoritativeServer/ServerMetrics.cs b/OpenVoiceSharp.AuthoritativeServer/ServerMetrics.cs new file mode 100644 index 0000000..d4478bc --- /dev/null +++ b/OpenVoiceSharp.AuthoritativeServer/ServerMetrics.cs @@ -0,0 +1,19 @@ +namespace OpenVoiceSharp.AuthoritativeServer; + +internal sealed class ServerMetrics +{ + public long PacketsReceived; + public long HelloPackets; + public long AuthHelloPackets; + public long VoicePacketsReceived; + public long VoicePacketsRelayed; + public long ErrorPacketsSent; + public long InvalidPacketsDropped; + public long RateLimitedDropped; + public long ReplayDropped; + public long PayloadDropped; + public long AuthFailures; + public long ClientsJoined; + public long ClientsLeft; + public long PingPackets; +} diff --git a/OpenVoiceSharp.AuthoritativeServer/ServerOptions.cs b/OpenVoiceSharp.AuthoritativeServer/ServerOptions.cs new file mode 100644 index 0000000..125b11a --- /dev/null +++ b/OpenVoiceSharp.AuthoritativeServer/ServerOptions.cs @@ -0,0 +1,70 @@ +namespace OpenVoiceSharp.AuthoritativeServer; + +internal sealed record ServerOptions +{ + public int Port { get; init; } = 7777; + public int MaxRoomMembers { get; init; } = 64; + public int MaxVoicePayloadBytes { get; init; } = 4096; + public int MaxVoicePacketsPerSecond { get; init; } = 80; + public int ClientTimeoutSeconds { get; init; } = 30; + public string? WordPressVerifyUrl { get; init; } + public string? WordPressSharedSecret { get; init; } + public int WordPressTimeoutSeconds { get; init; } = 5; + public int StatsPort { get; init; } = 0; + + public static ServerOptions FromArgs(string[] args) + { + ServerOptions options = new(); + + for (int i = 0; i < args.Length; i++) + { + string arg = args[i]; + if (i + 1 >= args.Length) + break; + + switch (arg) + { + case "--port": + if (int.TryParse(args[++i], out int port) && port > 0 && port <= 65535) + options = options with { Port = port }; + break; + case "--max-room-members": + if (int.TryParse(args[++i], out int maxRoomMembers) && maxRoomMembers > 1) + options = options with { MaxRoomMembers = maxRoomMembers }; + break; + case "--max-voice-bytes": + if (int.TryParse(args[++i], out int maxVoiceBytes) && maxVoiceBytes >= 128) + options = options with { MaxVoicePayloadBytes = maxVoiceBytes }; + break; + case "--max-pps": + if (int.TryParse(args[++i], out int maxPps) && maxPps >= 10) + options = options with { MaxVoicePacketsPerSecond = maxPps }; + break; + case "--timeout-seconds": + if (int.TryParse(args[++i], out int timeoutSeconds) && timeoutSeconds >= 5) + options = options with { ClientTimeoutSeconds = timeoutSeconds }; + break; + case "--wp-verify-url": + string verifyUrl = args[++i].Trim(); + if (!string.IsNullOrWhiteSpace(verifyUrl)) + options = options with { WordPressVerifyUrl = verifyUrl }; + break; + case "--wp-shared-secret": + string sharedSecret = args[++i].Trim(); + if (!string.IsNullOrWhiteSpace(sharedSecret)) + options = options with { WordPressSharedSecret = sharedSecret }; + break; + case "--wp-timeout-seconds": + if (int.TryParse(args[++i], out int wpTimeoutSeconds) && wpTimeoutSeconds >= 1) + options = options with { WordPressTimeoutSeconds = wpTimeoutSeconds }; + break; + case "--stats-port": + if (int.TryParse(args[++i], out int statsPort) && statsPort >= 0 && statsPort <= 65535) + options = options with { StatsPort = statsPort }; + break; + } + } + + return options; + } +} diff --git a/OpenVoiceSharp.AuthoritativeServer/TokenBucket.cs b/OpenVoiceSharp.AuthoritativeServer/TokenBucket.cs new file mode 100644 index 0000000..cd32223 --- /dev/null +++ b/OpenVoiceSharp.AuthoritativeServer/TokenBucket.cs @@ -0,0 +1,39 @@ +namespace OpenVoiceSharp.AuthoritativeServer; + +internal sealed class TokenBucket +{ + private readonly double FillRatePerSecond; + private readonly double Capacity; + private double Tokens; + private DateTime LastRefillUtc; + + public TokenBucket(int fillRatePerSecond) + { + FillRatePerSecond = fillRatePerSecond; + Capacity = fillRatePerSecond; + Tokens = fillRatePerSecond; + LastRefillUtc = DateTime.UtcNow; + } + + public bool TryConsume(int tokens) + { + Refill(); + + if (Tokens < tokens) + return false; + + Tokens -= tokens; + return true; + } + + private void Refill() + { + DateTime now = DateTime.UtcNow; + double elapsedSeconds = (now - LastRefillUtc).TotalSeconds; + if (elapsedSeconds <= 0) + return; + + Tokens = Math.Min(Capacity, Tokens + elapsedSeconds * FillRatePerSecond); + LastRefillUtc = now; + } +} diff --git a/OpenVoiceSharp.AuthoritativeServer/VoiceAuthoritativeServer.cs b/OpenVoiceSharp.AuthoritativeServer/VoiceAuthoritativeServer.cs new file mode 100644 index 0000000..eaa5c6a --- /dev/null +++ b/OpenVoiceSharp.AuthoritativeServer/VoiceAuthoritativeServer.cs @@ -0,0 +1,529 @@ +using System.Net; +using System.Net.Sockets; +using System.Text; +using System.Text.Json; +using System.Threading; + +namespace OpenVoiceSharp.AuthoritativeServer; + +internal sealed class VoiceAuthoritativeServer : IDisposable +{ + private readonly ServerOptions Options; + private readonly UdpClient Socket; + private readonly WordPressAuthVerifier? WordPressAuthVerifier; + private readonly Dictionary SessionsById = []; + private readonly Dictionary> RoomMembers = new(StringComparer.Ordinal); + private readonly CancellationTokenSource StopTokenSource = new(); + private readonly ServerMetrics Metrics = new(); + private readonly object StateLock = new(); + private TcpListener? StatsListener; + + public VoiceAuthoritativeServer(ServerOptions options) + { + Options = options; + Socket = new UdpClient(new IPEndPoint(IPAddress.Any, Options.Port)); + if (!string.IsNullOrWhiteSpace(Options.WordPressVerifyUrl)) + { + WordPressAuthVerifier = new WordPressAuthVerifier( + Options.WordPressVerifyUrl, + Options.WordPressSharedSecret, + Options.WordPressTimeoutSeconds + ); + } + } + + public void RequestStop() => StopTokenSource.Cancel(); + + public async Task RunAsync() + { + Console.WriteLine($"Authoritative voice server listening on udp://0.0.0.0:{Options.Port}"); + if (WordPressAuthVerifier is not null) + Console.WriteLine("WordPress auth verification is enabled."); + if (Options.StatsPort > 0) + Console.WriteLine($"Stats endpoint enabled on http://127.0.0.1:{Options.StatsPort}/stats"); + using PeriodicTimer cleanupTimer = new(TimeSpan.FromSeconds(1)); + + Task receiveLoop = ReceiveLoopAsync(StopTokenSource.Token); + Task cleanupLoop = CleanupLoopAsync(cleanupTimer, StopTokenSource.Token); + Task statsLoop = Options.StatsPort > 0 + ? StatsLoopAsync(StopTokenSource.Token) + : Task.CompletedTask; + + await Task.WhenAny(receiveLoop, cleanupLoop, statsLoop); + StopTokenSource.Cancel(); + + try + { + await Task.WhenAll(receiveLoop, cleanupLoop, statsLoop); + } + catch (OperationCanceledException) + { + // expected on shutdown + } + } + + private async Task ReceiveLoopAsync(CancellationToken cancellationToken) + { + while (!cancellationToken.IsCancellationRequested) + { + UdpReceiveResult result = await Socket.ReceiveAsync(cancellationToken); + await HandlePacketAsync(result.Buffer, result.RemoteEndPoint, cancellationToken).ConfigureAwait(false); + } + } + + private async Task CleanupLoopAsync(PeriodicTimer timer, CancellationToken cancellationToken) + { + int ticks = 0; + while (await timer.WaitForNextTickAsync(cancellationToken)) + { + DateTime cutoffUtc = DateTime.UtcNow.AddSeconds(-Options.ClientTimeoutSeconds); + List staleClientIds = []; + + lock (StateLock) + { + foreach (KeyValuePair pair in SessionsById) + { + if (pair.Value.LastSeenUtc < cutoffUtc) + staleClientIds.Add(pair.Key); + } + } + + foreach (Guid staleClientId in staleClientIds) + RemoveClient(staleClientId, notifyRoom: true); + + ticks++; + if (ticks % 10 == 0) + LogMetrics(); + } + } + + private async Task HandlePacketAsync(byte[] packet, IPEndPoint remoteEndpoint, CancellationToken cancellationToken) + { + if (packet.Length == 0) + return; + Interlocked.Increment(ref Metrics.PacketsReceived); + + ClientPacketType packetType = (ClientPacketType)packet[0]; + switch (packetType) + { + case ClientPacketType.Hello: + Interlocked.Increment(ref Metrics.HelloPackets); + await HandleHelloAsync(packet, remoteEndpoint, isAuthHello: false, cancellationToken).ConfigureAwait(false); + break; + case ClientPacketType.AuthHello: + Interlocked.Increment(ref Metrics.AuthHelloPackets); + await HandleHelloAsync(packet, remoteEndpoint, isAuthHello: true, cancellationToken).ConfigureAwait(false); + break; + case ClientPacketType.Voice: + Interlocked.Increment(ref Metrics.VoicePacketsReceived); + HandleVoice(packet, remoteEndpoint); + break; + case ClientPacketType.Leave: + HandleLeave(packet, remoteEndpoint); + break; + case ClientPacketType.Ping: + Interlocked.Increment(ref Metrics.PingPackets); + HandlePing(packet, remoteEndpoint); + break; + default: + SendToEndpoint(Protocol.BuildError(ErrorCode.InvalidPacket, "Unknown packet type"), remoteEndpoint); + Interlocked.Increment(ref Metrics.InvalidPacketsDropped); + break; + } + } + + private async Task HandleHelloAsync( + byte[] packet, + IPEndPoint remoteEndpoint, + bool isAuthHello, + CancellationToken cancellationToken + ) + { + Guid clientId; + string room; + string userName; + string authToken = string.Empty; + + bool parsed = isAuthHello + ? Protocol.TryReadAuthHello(packet, out clientId, out room, out userName, out authToken) + : Protocol.TryReadHello(packet, out clientId, out room, out userName); + if (!parsed) + { + SendToEndpoint(Protocol.BuildError(ErrorCode.InvalidPacket, "Invalid hello packet"), remoteEndpoint); + Interlocked.Increment(ref Metrics.InvalidPacketsDropped); + return; + } + + if (WordPressAuthVerifier is not null) + { + if (!isAuthHello || string.IsNullOrWhiteSpace(authToken)) + { + SendToEndpoint(Protocol.BuildError(ErrorCode.AuthFailed, "Authentication token is required."), remoteEndpoint); + Interlocked.Increment(ref Metrics.AuthFailures); + return; + } + + (bool valid, string message) = await WordPressAuthVerifier.VerifyAsync(authToken, cancellationToken).ConfigureAwait(false); + if (!valid) + { + SendToEndpoint(Protocol.BuildError(ErrorCode.AuthFailed, message), remoteEndpoint); + Interlocked.Increment(ref Metrics.AuthFailures); + LogEvent("auth_failed", ("endpoint", remoteEndpoint.ToString()), ("reason", message)); + return; + } + } + + room = room.Trim(); + userName = userName.Trim(); + if (room.Length == 0 || room.Length > 64 || userName.Length == 0 || userName.Length > 64) + { + SendToEndpoint(Protocol.BuildError(ErrorCode.InvalidPacket, "Invalid room or user name"), remoteEndpoint); + Interlocked.Increment(ref Metrics.InvalidPacketsDropped); + return; + } + + lock (StateLock) + { + if (!RoomMembers.TryGetValue(room, out HashSet? existingRoomMembers)) + { + existingRoomMembers = []; + RoomMembers[room] = existingRoomMembers; + } + + if (!SessionsById.TryGetValue(clientId, out ClientSession? session)) + { + if (existingRoomMembers.Count >= Options.MaxRoomMembers) + { + SendToEndpoint(Protocol.BuildError(ErrorCode.RoomFull, "Room is full"), remoteEndpoint); + return; + } + + session = new ClientSession(clientId, room, userName, remoteEndpoint, Options.MaxVoicePacketsPerSecond); + SessionsById[clientId] = session; + existingRoomMembers.Add(clientId); + Interlocked.Increment(ref Metrics.ClientsJoined); + + SendToEndpoint(Protocol.BuildWelcome(clientId), remoteEndpoint); + BroadcastToRoom(room, Protocol.BuildPeerJoined(clientId, userName), exceptClientId: clientId); + + LogEvent("client_joined", ("clientId", clientId), ("room", room), ("user", userName), ("endpoint", remoteEndpoint.ToString())); + return; + } + + // Rejoin or endpoint migration for existing client id. + if (!StringComparer.Ordinal.Equals(session.RoomName, room)) + { + RemoveFromRoom(session.ClientId, session.RoomName); + if (existingRoomMembers.Count >= Options.MaxRoomMembers) + { + SendToEndpoint(Protocol.BuildError(ErrorCode.RoomFull, "Room is full"), remoteEndpoint); + return; + } + + existingRoomMembers.Add(session.ClientId); + session.RoomName = room; + } + + if (!StringComparer.Ordinal.Equals(session.UserName, userName)) + session.UserName = userName; + + session.Endpoint = remoteEndpoint; + session.LastSeenUtc = DateTime.UtcNow; + + SendToEndpoint(Protocol.BuildWelcome(clientId), remoteEndpoint); + LogEvent("client_refreshed", ("clientId", clientId), ("room", room), ("endpoint", remoteEndpoint.ToString())); + } + } + + private void HandleVoice(byte[] packet, IPEndPoint remoteEndpoint) + { + if (!Protocol.TryReadVoice(packet, out Guid clientId, out uint sequence, out ReadOnlySpan payload)) + { + SendToEndpoint(Protocol.BuildError(ErrorCode.InvalidPacket, "Invalid voice packet"), remoteEndpoint); + Interlocked.Increment(ref Metrics.InvalidPacketsDropped); + return; + } + + lock (StateLock) + { + if (!SessionsById.TryGetValue(clientId, out ClientSession? session)) + { + SendToEndpoint(Protocol.BuildError(ErrorCode.NotRegistered, "Client not registered"), remoteEndpoint); + return; + } + + if (!EndpointsEqual(session.Endpoint, remoteEndpoint)) + { + SendToEndpoint(Protocol.BuildError(ErrorCode.UnauthorizedEndpoint, "Invalid source endpoint"), remoteEndpoint); + Interlocked.Increment(ref Metrics.InvalidPacketsDropped); + return; + } + + if (payload.Length == 0 || payload.Length > Options.MaxVoicePayloadBytes) + { + SendToEndpoint(Protocol.BuildError(ErrorCode.PayloadTooLarge, "Voice payload out of range"), remoteEndpoint); + Interlocked.Increment(ref Metrics.PayloadDropped); + return; + } + + if (!session.VoiceRateLimiter.TryConsume(1)) + { + SendToEndpoint(Protocol.BuildError(ErrorCode.RateLimited, "Voice packet rate exceeded"), remoteEndpoint); + Interlocked.Increment(ref Metrics.RateLimitedDropped); + return; + } + + if (!session.VoiceSequenceWindow.TryAccept(sequence)) + { + Interlocked.Increment(ref Metrics.ReplayDropped); + return; + } + session.LastSeenUtc = DateTime.UtcNow; + + byte[] relayPacket = Protocol.BuildVoiceRelay(clientId, sequence, payload); + BroadcastToRoom(session.RoomName, relayPacket, exceptClientId: clientId); + Interlocked.Increment(ref Metrics.VoicePacketsRelayed); + } + } + + private void HandleLeave(byte[] packet, IPEndPoint remoteEndpoint) + { + if (!Protocol.TryReadLeave(packet, out Guid clientId)) + { + SendToEndpoint(Protocol.BuildError(ErrorCode.InvalidPacket, "Invalid leave packet"), remoteEndpoint); + Interlocked.Increment(ref Metrics.InvalidPacketsDropped); + return; + } + + lock (StateLock) + { + if (!SessionsById.TryGetValue(clientId, out ClientSession? session)) + return; + + if (!EndpointsEqual(session.Endpoint, remoteEndpoint)) + { + SendToEndpoint(Protocol.BuildError(ErrorCode.UnauthorizedEndpoint, "Invalid source endpoint"), remoteEndpoint); + Interlocked.Increment(ref Metrics.InvalidPacketsDropped); + return; + } + + RemoveClient(clientId, notifyRoom: true); + } + } + + private void HandlePing(byte[] packet, IPEndPoint remoteEndpoint) + { + if (!Protocol.TryReadPing(packet, out Guid clientId)) + return; + + lock (StateLock) + { + if (SessionsById.TryGetValue(clientId, out ClientSession? session) && EndpointsEqual(session.Endpoint, remoteEndpoint)) + session.LastSeenUtc = DateTime.UtcNow; + } + + SendToEndpoint(Protocol.BuildPong(clientId), remoteEndpoint); + } + + private void BroadcastToRoom(string room, byte[] packet, Guid? exceptClientId = null) + { + lock (StateLock) + { + if (!RoomMembers.TryGetValue(room, out HashSet? memberIds)) + return; + + foreach (Guid memberId in memberIds) + { + if (exceptClientId.HasValue && memberId == exceptClientId.Value) + continue; + if (!SessionsById.TryGetValue(memberId, out ClientSession? memberSession)) + continue; + + SendToEndpoint(packet, memberSession.Endpoint); + } + } + } + + private void RemoveClient(Guid clientId, bool notifyRoom) + { + lock (StateLock) + { + if (!SessionsById.TryGetValue(clientId, out ClientSession? session)) + return; + + SessionsById.Remove(clientId); + RemoveFromRoom(clientId, session.RoomName); + + if (notifyRoom) + BroadcastToRoom(session.RoomName, Protocol.BuildPeerLeft(clientId), exceptClientId: clientId); + Interlocked.Increment(ref Metrics.ClientsLeft); + + LogEvent("client_left", ("clientId", clientId), ("room", session.RoomName)); + } + } + + private void RemoveFromRoom(Guid clientId, string roomName) + { + if (!RoomMembers.TryGetValue(roomName, out HashSet? memberIds)) + return; + + memberIds.Remove(clientId); + if (memberIds.Count == 0) + RoomMembers.Remove(roomName); + } + + private void SendToEndpoint(byte[] packet, IPEndPoint endpoint) + { + if (packet.Length > 0 && packet[0] == (byte)ServerPacketType.Error) + Interlocked.Increment(ref Metrics.ErrorPacketsSent); + + _ = Socket.SendAsync(packet, endpoint); + } + + private static bool EndpointsEqual(IPEndPoint a, IPEndPoint b) + => a.Port == b.Port && Equals(a.Address, b.Address); + + public void Dispose() + { + StopTokenSource.Cancel(); + WordPressAuthVerifier?.Dispose(); + StatsListener?.Stop(); + StatsListener = null; + Socket.Dispose(); + StopTokenSource.Dispose(); + } + + private void LogMetrics() + { + int activeClients; + int activeRooms; + lock (StateLock) + { + activeClients = SessionsById.Count; + activeRooms = RoomMembers.Count; + } + + LogEvent( + "metrics", + ("activeClients", activeClients), + ("activeRooms", activeRooms), + ("packetsReceived", Interlocked.Read(ref Metrics.PacketsReceived)), + ("helloPackets", Interlocked.Read(ref Metrics.HelloPackets)), + ("authHelloPackets", Interlocked.Read(ref Metrics.AuthHelloPackets)), + ("voicePacketsReceived", Interlocked.Read(ref Metrics.VoicePacketsReceived)), + ("voicePacketsRelayed", Interlocked.Read(ref Metrics.VoicePacketsRelayed)), + ("pingPackets", Interlocked.Read(ref Metrics.PingPackets)), + ("errorPacketsSent", Interlocked.Read(ref Metrics.ErrorPacketsSent)), + ("invalidPacketsDropped", Interlocked.Read(ref Metrics.InvalidPacketsDropped)), + ("payloadDropped", Interlocked.Read(ref Metrics.PayloadDropped)), + ("rateLimitedDropped", Interlocked.Read(ref Metrics.RateLimitedDropped)), + ("replayDropped", Interlocked.Read(ref Metrics.ReplayDropped)), + ("authFailures", Interlocked.Read(ref Metrics.AuthFailures)), + ("clientsJoined", Interlocked.Read(ref Metrics.ClientsJoined)), + ("clientsLeft", Interlocked.Read(ref Metrics.ClientsLeft)) + ); + } + + private static void LogEvent(string eventName, params (string key, object? value)[] fields) + { + Dictionary payload = new(StringComparer.Ordinal) + { + ["ts"] = DateTimeOffset.UtcNow.ToString("O"), + ["event"] = eventName + }; + + foreach ((string key, object? value) in fields) + payload[key] = value; + + Console.WriteLine(JsonSerializer.Serialize(payload)); + } + + private async Task StatsLoopAsync(CancellationToken cancellationToken) + { + StatsListener = new TcpListener(IPAddress.Loopback, Options.StatsPort); + StatsListener.Start(); + + try + { + while (!cancellationToken.IsCancellationRequested) + { + TcpClient client = await StatsListener.AcceptTcpClientAsync(cancellationToken).ConfigureAwait(false); + _ = Task.Run(() => HandleStatsClientAsync(client), cancellationToken); + } + } + catch (OperationCanceledException) + { + // expected on shutdown + } + catch (ObjectDisposedException) + { + // expected on shutdown + } + } + + private async Task HandleStatsClientAsync(TcpClient client) + { + await using NetworkStream stream = client.GetStream(); + using StreamReader reader = new(stream, Encoding.ASCII, detectEncodingFromByteOrderMarks: false, leaveOpen: true); + + string? requestLine = await reader.ReadLineAsync().ConfigureAwait(false); + if (string.IsNullOrWhiteSpace(requestLine)) + return; + + while (true) + { + string? headerLine = await reader.ReadLineAsync().ConfigureAwait(false); + if (string.IsNullOrEmpty(headerLine)) + break; + } + + bool isStatsRequest = requestLine.StartsWith("GET /stats", StringComparison.OrdinalIgnoreCase); + string body = isStatsRequest ? BuildStatsSnapshotJson() : "{\"error\":\"not_found\"}"; + string status = isStatsRequest ? "200 OK" : "404 Not Found"; + + byte[] bodyBytes = Encoding.UTF8.GetBytes(body); + string responseHeaders = + $"HTTP/1.1 {status}\r\n" + + "Content-Type: application/json\r\n" + + $"Content-Length: {bodyBytes.Length}\r\n" + + "Connection: close\r\n\r\n"; + + byte[] headerBytes = Encoding.ASCII.GetBytes(responseHeaders); + await stream.WriteAsync(headerBytes, 0, headerBytes.Length).ConfigureAwait(false); + await stream.WriteAsync(bodyBytes, 0, bodyBytes.Length).ConfigureAwait(false); + } + + private string BuildStatsSnapshotJson() + { + int activeClients; + int activeRooms; + lock (StateLock) + { + activeClients = SessionsById.Count; + activeRooms = RoomMembers.Count; + } + + Dictionary snapshot = new(StringComparer.Ordinal) + { + ["ts"] = DateTimeOffset.UtcNow.ToString("O"), + ["activeClients"] = activeClients, + ["activeRooms"] = activeRooms, + ["packetsReceived"] = Interlocked.Read(ref Metrics.PacketsReceived), + ["helloPackets"] = Interlocked.Read(ref Metrics.HelloPackets), + ["authHelloPackets"] = Interlocked.Read(ref Metrics.AuthHelloPackets), + ["voicePacketsReceived"] = Interlocked.Read(ref Metrics.VoicePacketsReceived), + ["voicePacketsRelayed"] = Interlocked.Read(ref Metrics.VoicePacketsRelayed), + ["pingPackets"] = Interlocked.Read(ref Metrics.PingPackets), + ["errorPacketsSent"] = Interlocked.Read(ref Metrics.ErrorPacketsSent), + ["invalidPacketsDropped"] = Interlocked.Read(ref Metrics.InvalidPacketsDropped), + ["payloadDropped"] = Interlocked.Read(ref Metrics.PayloadDropped), + ["rateLimitedDropped"] = Interlocked.Read(ref Metrics.RateLimitedDropped), + ["replayDropped"] = Interlocked.Read(ref Metrics.ReplayDropped), + ["authFailures"] = Interlocked.Read(ref Metrics.AuthFailures), + ["clientsJoined"] = Interlocked.Read(ref Metrics.ClientsJoined), + ["clientsLeft"] = Interlocked.Read(ref Metrics.ClientsLeft) + }; + + return JsonSerializer.Serialize(snapshot); + } +} diff --git a/OpenVoiceSharp.AuthoritativeServer/WordPressAuthVerifier.cs b/OpenVoiceSharp.AuthoritativeServer/WordPressAuthVerifier.cs new file mode 100644 index 0000000..b19a84f --- /dev/null +++ b/OpenVoiceSharp.AuthoritativeServer/WordPressAuthVerifier.cs @@ -0,0 +1,112 @@ +using System.Net.Http.Headers; +using System.Text.Json; + +namespace OpenVoiceSharp.AuthoritativeServer; + +internal sealed class WordPressAuthVerifier : IDisposable +{ + private readonly Uri VerifyUri; + private readonly string? SharedSecret; + private readonly HttpClient HttpClient; + + public WordPressAuthVerifier(string verifyUrl, string? sharedSecret, int timeoutSeconds) + { + VerifyUri = new Uri(verifyUrl, UriKind.Absolute); + SharedSecret = string.IsNullOrWhiteSpace(sharedSecret) ? null : sharedSecret; + HttpClient = new HttpClient + { + Timeout = TimeSpan.FromSeconds(timeoutSeconds) + }; + } + + public async Task<(bool isValid, string message)> VerifyAsync(string token, CancellationToken cancellationToken) + { + using HttpRequestMessage request = new(HttpMethod.Get, VerifyUri); + request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", token); + if (!string.IsNullOrWhiteSpace(SharedSecret)) + request.Headers.Add("X-OpenVoiceSharp-Secret", SharedSecret); + + HttpResponseMessage response; + try + { + response = await HttpClient.SendAsync(request, cancellationToken).ConfigureAwait(false); + } + catch (Exception exception) + { + return (false, $"WordPress verify request failed: {exception.Message}"); + } + + string responseText = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false); + if (!response.IsSuccessStatusCode) + return (false, $"WordPress verify rejected token ({(int)response.StatusCode})."); + + if (TryParseValidFlag(responseText, out bool valid)) + return valid ? (true, "ok") : (false, "WordPress token invalid."); + + // If endpoint returns 200 with non-standard body, treat as failure for safety. + return (false, "WordPress verify response did not contain a valid=true flag."); + } + + private static bool TryParseValidFlag(string json, out bool valid) + { + valid = false; + if (string.IsNullOrWhiteSpace(json)) + return false; + + try + { + using JsonDocument document = JsonDocument.Parse(json); + JsonElement root = document.RootElement; + + if (TryReadBoolean(root, "valid", out valid) || + TryReadBoolean(root, "success", out valid) || + TryReadBoolean(root, "authenticated", out valid)) + return true; + + if (root.TryGetProperty("data", out JsonElement data) && data.ValueKind == JsonValueKind.Object) + { + if (TryReadBoolean(data, "valid", out valid) || + TryReadBoolean(data, "success", out valid) || + TryReadBoolean(data, "authenticated", out valid)) + return true; + } + } + catch + { + return false; + } + + return false; + } + + private static bool TryReadBoolean(JsonElement element, string propertyName, out bool value) + { + value = false; + if (!element.TryGetProperty(propertyName, out JsonElement property)) + return false; + + if (property.ValueKind == JsonValueKind.True) + { + value = true; + return true; + } + if (property.ValueKind == JsonValueKind.False) + { + value = false; + return true; + } + if (property.ValueKind == JsonValueKind.String) + { + string text = property.GetString() ?? string.Empty; + if (bool.TryParse(text, out bool parsed)) + { + value = parsed; + return true; + } + } + + return false; + } + + public void Dispose() => HttpClient.Dispose(); +} diff --git a/OpenVoiceSharp.csproj b/OpenVoiceSharp.csproj index a5656ee..1193b26 100644 --- a/OpenVoiceSharp.csproj +++ b/OpenVoiceSharp.csproj @@ -2,6 +2,8 @@ netstandard2.1;net6.0 + x64 + x64 enable enable OpenVoiceSharp @@ -23,7 +25,16 @@ - + + + + + + + + + + diff --git a/OpenVoiceSharp.sln b/OpenVoiceSharp.sln index c7c9fcd..d57a94c 100644 --- a/OpenVoiceSharp.sln +++ b/OpenVoiceSharp.sln @@ -5,16 +5,22 @@ VisualStudioVersion = 17.7.34024.191 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OpenVoiceSharp", "OpenVoiceSharp.csproj", "{9AB99863-C04D-4CC2-A43D-71F174566DEC}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OpenVoiceSharp.AuthoritativeServer", "OpenVoiceSharp.AuthoritativeServer\OpenVoiceSharp.AuthoritativeServer.csproj", "{3D18F3C8-5B28-4809-B0B7-F49FD9FDD700}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU - Release|Any CPU = Release|Any CPU + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {9AB99863-C04D-4CC2-A43D-71F174566DEC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {9AB99863-C04D-4CC2-A43D-71F174566DEC}.Debug|Any CPU.Build.0 = Debug|Any CPU - {9AB99863-C04D-4CC2-A43D-71F174566DEC}.Release|Any CPU.ActiveCfg = Release|Any CPU - {9AB99863-C04D-4CC2-A43D-71F174566DEC}.Release|Any CPU.Build.0 = Release|Any CPU + {9AB99863-C04D-4CC2-A43D-71F174566DEC}.Debug|x64.ActiveCfg = Debug|x64 + {9AB99863-C04D-4CC2-A43D-71F174566DEC}.Debug|x64.Build.0 = Debug|x64 + {9AB99863-C04D-4CC2-A43D-71F174566DEC}.Release|x64.ActiveCfg = Release|x64 + {9AB99863-C04D-4CC2-A43D-71F174566DEC}.Release|x64.Build.0 = Release|x64 + {3D18F3C8-5B28-4809-B0B7-F49FD9FDD700}.Debug|x64.ActiveCfg = Debug|Any CPU + {3D18F3C8-5B28-4809-B0B7-F49FD9FDD700}.Debug|x64.Build.0 = Debug|Any CPU + {3D18F3C8-5B28-4809-B0B7-F49FD9FDD700}.Release|x64.ActiveCfg = Release|Any CPU + {3D18F3C8-5B28-4809-B0B7-F49FD9FDD700}.Release|x64.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/README.md b/README.md index 62072ab..3a3c16b 100644 --- a/README.md +++ b/README.md @@ -1,83 +1,169 @@ # OpenVoiceSharp OpenVoiceSharp -Agnostic VoIP Voice Chat and Audio Streaming C# library. +A C# voice chat and audio streaming library with an authoritative UDP server. -## Introduction +## Project Status ->[!WARNING] ->This package is not ready to use and is in work in progress. Please come back later! +OpenVoiceSharp is usable today for Windows x64 projects that want low-latency voice with Opus encoding and a server-authoritative relay model. -**OpenVoiceSharp** is an extremely simple, basic compact library that allows for real time VoIP (Voice over IP) voice chat and audio streaming. It allows for any app or game to embed voice chatting functionality. +Current status by area: -**OpenVoiceSharp** utilizes **Opus** as codec under the hood and **RNNoise** for basic (toggleable) noise suppression and **WebRTC** VAD (voice activity detection). +- Core C# library: working +- Authoritative UDP server: working +- Unity package: integration scaffold with managed/native dependency packaging +- Godot package: integration scaffold with demo scene +- Unreal package: protocol/client scaffold -**OpenVoiceSharp** also has a dedicated class (`VoiceUtilities`) for converting PCM to float formats depending on use cases for engines. +## What You Get -## Why did I make this? +- Opus encode/decode voice pipeline (`VoiceChatInterface`) +- Optional RNNoise suppression and WebRTC VAD +- Microphone capture helper (`BasicMicrophoneRecorder`) +- Authoritative UDP server project (`OpenVoiceSharp.AuthoritativeServer`) +- High-level client/session helpers: + - `AuthoritativeVoiceClient` + - `AuthoritativeVoiceSession` +- Playback remainder handling helpers: + - `ReadSpeakerPlayback(...)` + - `FlushSpeakerPlayback(...)` -I believe that voice chat, proximity or not is an essential functionality for game immersion or multiplayer, or discussion. Though, when searching for a friendly open source/free alternative other than Steam's Voice API or Epic Online Services's Voice API, I could not find any, and to make matters worse, it was extremely difficult to get information around how voice chat/audio streaming essentially worked under the hood, which can make difficult for people to make their own voice chat implementation. +## Requirements -Most alternatives are paid (Vivox, Photon Voice, Dissonance etc...) & are mostly compatible for Unity, which can cause an issue for developers using their own game engine, app stack, or game engines like the Godot Engine. +- Windows x64 +- .NET 6+ (library also targets .NET Standard 2.1) +- Native codec/runtime dependencies for Opus/RNNoise/WebRTC VAD -So upon learning how to make VoIP myself, I decided to share the knowledge into this library to make sure no one has to ever struggle with VoIP again, because I also believe that implementation for such things should be easy to use and implement. +## Repository Layout -## Features +- `OpenVoiceSharp.AuthoritativeServer` - authoritative UDP voice server +- `docs/AUTHORITATIVE_SERVER_PROTOCOL.md` - packet protocol +- `unity/OpenVoiceSharp.Unity` - Unity integration scaffold +- `godot/OpenVoiceSharp.Godot` - Godot 4 C# integration scaffold and demo +- `unreal/OpenVoiceSharpUnreal` - Unreal starter plugin scaffold -- 🕓 Agnostic: no specific engine/environment required! -- ⚡ Easy and friendly to use: all you need is a way to record and playback the audio! -- 🎙️ Basic microphone recorder class: no way to record the audio correctly or easily? `BasicMicrophoneRecorder` does that! -- 💥 Low memory footprint: using **Opus**, the packets are ***tiny***! And **OpenVoiceSharp** aims to be as memory efficient and performant as possible. -- 🎵 Audio streaming favoring: option to encode less for better quality packets for audio streaming and more! -- 😯 Low latency: **OpenVoiceSharp** aims to be as low latency as possible. **One opus frame is only 20ms!** -- 🔊 Customizable bitrate: make audio **crystal crisp** or not, it depends on you! (Supports from 8kbps up to 512kbps) -- 🍃 Basic noise suppression using **RNNoise** (can be toggled) -- 🧪 Basic voice conversion utilies: convert 16 bit PCM to float 32 PCM and so on. -- 🗣️ Voice activity detection (VAD): out of the box basic voice activity detection using **WebRTC's VAD**. +## Quick Start -> [!NOTE] -> **OpenVoiceSharp** is meant to be extremely basic and straightforward. Audio playback, modification (effects or more) and features such as groups, teams, muting should be left to implement by yourself. -> _**OpenVoiceSharp** just provides a basic way to encode and decode voice packets along with a basic microphone recorder._ +### 1. Run the server -## Requirements +```bash +dotnet run --project OpenVoiceSharp.AuthoritativeServer -- --port 7777 +``` + +Optional stats endpoint: + +```bash +dotnet run --project OpenVoiceSharp.AuthoritativeServer -- --port 7777 --stats-port 9090 +``` + +Read stats: + +```bash +curl http://127.0.0.1:9090/stats +``` + +Optional WordPress auth verification: + +```bash +dotnet run --project OpenVoiceSharp.AuthoritativeServer -- \ + --port 7777 \ + --wp-verify-url "https://your-site.com/wp-json/openvoicesharp/v1/verify" \ + --wp-shared-secret "server-to-wp-shared-secret" +``` + +### 2. Connect from client code + +```csharp +var session = new AuthoritativeVoiceSession( + "127.0.0.1", + 7777, + "lobby", + "PlayerOne", + authToken: "wordpress-access-token" +); + +session.VoiceFrameDecoded += (speakerId, sequence, pcmData, length) => +{ + // Send PCM to your playback pipeline. +}; + +await session.StartAsync(); +``` + +### 3. Read playback safely (no stuck tail audio) + +```csharp +Guid speakerId = /* target speaker */; +byte[] pcmOut = new byte[1920]; +int copied = session.ReadSpeakerPlayback(speakerId, pcmOut, pcmOut.Length); +// `copied` bytes are real audio; remainder is silence-filled. +``` + +## Engine Integrations + +### Unity + +- Path: `unity/OpenVoiceSharp.Unity` +- Intended use: production client integration with packaged managed/native dependencies for Windows x64. + +### Unity 5-Minute Checklist + +1. Start the server locally: + - `dotnet run --project OpenVoiceSharp.AuthoritativeServer -- --port 7777` +2. Copy `unity/OpenVoiceSharp.Unity` into your Unity project (recommended under `Assets/OpenVoiceSharp.Unity`). +3. Verify managed plugin DLLs exist under: + - `Assets/OpenVoiceSharp.Unity/Plugins/OpenVoiceSharp/` +4. Verify native plugin DLLs exist under: + - `Assets/OpenVoiceSharp.Unity/Plugins/x86_64/` +5. In Unity, create/connect an `AuthoritativeVoiceSession` using: + - host `127.0.0.1`, port `7777`, room `"lobby"`, user `"PlayerOne"` +6. On decoded frames, submit PCM to your playback path; for fixed-size callbacks use: + - `ReadSpeakerPlayback(...)` to avoid stuck remainder audio. +7. If using WordPress auth on server, pass your token in `authToken` when creating the client/session. + +### Godot (4 C#) -- Windows (64 bit) -- .NET 6.0 and higher or support for .NET Standard 2.1 -- Visual Studio +- Path: `godot/OpenVoiceSharp.Godot` +- Includes demo: `godot/OpenVoiceSharp.Godot/demo/DemoVoiceUI.tscn` -> [!WARNING] -> **OpenVoiceSharp** currently only supports Windows 64 bit, atleast the dependencies do. -> I currently do not plan on integrating native support for MacOS, Linux, Android or others but you can compile the libraries yourself and link them. +### Unreal -## Installation & Usage +- Path: `unreal/OpenVoiceSharpUnreal` +- Scope: starter plugin scaffold and UDP protocol client API. -Everything you need to know or do can be found [in the wiki](https://github.com/realcoloride/OpenVoiceSharp/wiki). +## Architecture -## Contribute +- Server is authoritative and room-based. +- Clients send encoded voice frames. +- Server validates endpoint/session and relays to room peers. +- Clients decode and feed playback. +- Optional jitter/playback buffering smooths network variation. -If you wish to contribute, you have a few ways: +See full packet definitions in: -**By supporting the project:** -- 🗣️ Feedback, ideas, or bug reports can be opened talked through the Discord server or through [Issues](https://github.com/realcoloride/OpenVoiceSharp/issues). -- 🌟 Leave a star or share this project. +- `docs/AUTHORITATIVE_SERVER_PROTOCOL.md` -**Or sending me a coffee:** +## Notes and Limits -💖 **I do those projects out of pure passion and love for the craft.** All my open source projects will remain free to use forever. +- Current native dependency setup is focused on Windows x64. +- Engine folders are integration layers around the core server/protocol design. +- Advanced gameplay voice features (teams, proximity rules, moderation policy, etc.) are intended to be implemented at the game/application layer. -☕ **If you wish to support me, send me a coffee on ko.fi:** https://ko-fi.com/coloride +## Contributing -## Licenses & Disclaimer +Contributions, bug reports, and feedback are welcome: -**OpenVoiceSharp** uses the following libraries, so by using **OpenVoiceSharp**, you accept their license's conditions. +- Issues: [GitHub Issues](https://github.com/realcoloride/OpenVoiceSharp/issues) -> [!TIP] -> Most of the libraries used by **OpenVoiceSharp** are MIT licensed, except for WebRTC's VAD, which contains the license from WebRTC. +## License -- [NAudio](https://github.com/naudio/NAudio) - Licensed [MIT](https://github.com/naudio/NAudio/blob/master/license.txt) -- [OpusDotNet](https://github.com/mrphil2105/OpusDotNet) & [Opus](https://opus-codec.org/) - Licensed [MIT](https://github.com/mrphil2105/OpusDotNet/blob/master/LICENSE.md) & [BSD](https://opus-codec.org/license/) -- [WebRtcVadSharp](https://github.com/ladenedge/WebRtcVadSharp) & [WebRTC](https://webrtc.org/) - Licensed [MIT](https://github.com/ladenedge/WebRtcVadSharp/blob/main/LICENSE) & [Other](https://webrtc.org/support/license) -- [YellowDogMan.RRNoise.NET](https://github.com/Yellow-Dog-Man/RNNoise.Net) & [RNNoise](https://github.com/xiph/rnnoise) - Licensed [MIT](https://github.com/Yellow-Dog-Man/RNNoise.Net/blob/main/LICENSE) & [BSD](https://github.com/xiph/rnnoise/blob/main/COPYING) +OpenVoiceSharp is MIT licensed. -_As of this library, just good old MIT._ +This project depends on third-party libraries with their own licenses: -##### © (real)coloride - 2024 | Licensed MIT +- [NAudio](https://github.com/naudio/NAudio) - MIT +- [OpusDotNet](https://github.com/mrphil2105/OpusDotNet) - MIT +- [Opus](https://opus-codec.org/) - BSD +- [WebRtcVadSharp](https://github.com/ladenedge/WebRtcVadSharp) - MIT +- [WebRTC VAD](https://webrtc.org/support/license) - WebRTC license terms +- [YellowDogMan.RRNoise.NET](https://github.com/Yellow-Dog-Man/RNNoise.Net) - MIT +- [RNNoise](https://github.com/xiph/rnnoise) - BSD diff --git a/VoiceChatInterface.cs b/VoiceChatInterface.cs index 24292a7..e58bba6 100644 --- a/VoiceChatInterface.cs +++ b/VoiceChatInterface.cs @@ -1,10 +1,10 @@ -using OpusDotNet; +using OpusDotNet; using RNNoise.NET; using WebRtcVadSharp; namespace OpenVoiceSharp { - public sealed class VoiceChatInterface + public sealed class VoiceChatInterface : IDisposable { /// /// Opus frame length. (20ms) @@ -43,6 +43,7 @@ public sealed class VoiceChatInterface public bool FavorAudioStreaming { get; private set; } = false; private int ChannelsAmount => Stereo ? 2 : 1; + private int PcmFrameSize => VoiceUtilities.GetSampleSize(ChannelsAmount); // instances private readonly OpusEncoder OpusEncoder; @@ -54,7 +55,6 @@ public sealed class VoiceChatInterface SampleRate = WebRtcVadSharp.SampleRate.Is48kHz }; - /// /// Returns if voice activity was detected using the WebRTC VAD. /// @@ -64,30 +64,53 @@ public sealed class VoiceChatInterface // stores float samples if needed private readonly float[] FloatSamples; + private readonly byte[] EncodeBuffer; + private readonly byte[] DecodeBuffer; + private readonly object EncodeLock = new(); + private readonly object DecodeLock = new(); + private bool IsDisposed; - private void ApplyNoiseSuppression(byte[] pcmData) + private void ApplyNoiseSuppression(byte[] pcmData, int length) { // convert to float32 - VoiceUtilities.Convert16BitToFloat(pcmData, FloatSamples); + VoiceUtilities.Convert16BitToFloat(pcmData, FloatSamples, length); // apply noise suppression Denoiser.Denoise(FloatSamples); // convert back to 16 bit pcm - VoiceUtilities.ConvertFloatTo16Bit(FloatSamples, pcmData); + VoiceUtilities.ConvertFloatTo16Bit(FloatSamples, pcmData, length); } /// /// Encodes and processes audio data. Also handles noise suppression if needed. /// /// The 16 bit PCM data according to your needs. + /// The length of the data /// The encoded Opus data, along with its length. public (byte[] encodedOpusData, int encodedLength) SubmitAudioData(byte[] pcmData, int length) { - if (EnableNoiseSuppression) - ApplyNoiseSuppression(pcmData); - - return (OpusEncoder.Encode(pcmData, length, out int encodedLength), encodedLength); + ThrowIfDisposed(); + if (pcmData is null) + throw new ArgumentNullException(nameof(pcmData)); + if (length <= 0 || length > pcmData.Length) + throw new ArgumentOutOfRangeException(nameof(length)); + if (length != PcmFrameSize) + throw new ArgumentException( + $"Expected exactly one {FrameLength}ms PCM frame ({PcmFrameSize} bytes), got {length}.", + nameof(length) + ); + + lock (EncodeLock) + { + if (EnableNoiseSuppression) + ApplyNoiseSuppression(pcmData, length); + + int encodedLength = OpusEncoder.Encode(pcmData, length, EncodeBuffer, EncodeBuffer.Length); + byte[] encodedData = new byte[encodedLength]; + Buffer.BlockCopy(EncodeBuffer, 0, encodedData, 0, encodedLength); + return (encodedData, encodedLength); + } } /// @@ -97,7 +120,21 @@ private void ApplyNoiseSuppression(byte[] pcmData) /// The length of the data /// The decoded Opus data, along with its length. public (byte[] decodedOpusData, int decodedLength) WhenDataReceived(byte[] encodedData, int length) - => (OpusDecoder.Decode(encodedData, length, out int decodedLength), decodedLength); + { + ThrowIfDisposed(); + if (encodedData is null) + throw new ArgumentNullException(nameof(encodedData)); + if (length <= 0 || length > encodedData.Length) + throw new ArgumentOutOfRangeException(nameof(length)); + + lock (DecodeLock) + { + int decodedLength = OpusDecoder.Decode(encodedData, length, DecodeBuffer, DecodeBuffer.Length); + byte[] decodedData = new byte[decodedLength]; + Buffer.BlockCopy(DecodeBuffer, 0, decodedData, 0, decodedLength); + return (decodedData, decodedLength); + } + } /// /// Creates a brand new OpenVoiceSharp voice chat interface to manage voice chat. @@ -108,12 +145,15 @@ private void ApplyNoiseSuppression(byte[] pcmData) /// Favor audio streaming and less compressed packets to favor audio quality. /// The VAD (voice activity detection) operating mode. public VoiceChatInterface( - int bitrate = DefaultBitrate, - bool stereo = false, + int bitrate = DefaultBitrate, + bool stereo = false, bool enableNoiseSuppression = true, - bool favorAudioStreaming = false, + bool favorAudioStreaming = false, OperatingMode? vadOperatingMode = null ) { + if (bitrate < 8_000 || bitrate > 512_000) + throw new ArgumentOutOfRangeException(nameof(bitrate), "Bitrate must be between 8000 and 512000."); + Bitrate = bitrate; Stereo = stereo; EnableNoiseSuppression = enableNoiseSuppression; @@ -121,7 +161,9 @@ public VoiceChatInterface( int channels = ChannelsAmount; // fill float samples for noise suppression - FloatSamples = new float[VoiceUtilities.GetSampleSize(channels) / 2]; + FloatSamples = new float[PcmFrameSize / 2]; + EncodeBuffer = new byte[PcmFrameSize]; + DecodeBuffer = new byte[PcmFrameSize]; // create opus encoder/decoder OpusEncoder = new( @@ -129,15 +171,34 @@ public VoiceChatInterface( SampleRate, channels ) { - Bitrate = Bitrate, VBR = false, ForceChannels = Stereo ? ForceChannels.Stereo : ForceChannels.Mono }; + // OpusDotNet 1.0.3 exposes bitrate through an obsolete property but still provides the setter method. + typeof(OpusEncoder).GetMethod("set_Bitrate")?.Invoke(OpusEncoder, new object[] { Bitrate }); - OpusDecoder = new(FrameLength, SampleRate, channels); + // OpusDecoder takes (sampleRate, channels) — FrameLength is not a parameter + OpusDecoder = new(SampleRate, channels); if (vadOperatingMode != null) VoiceActivityDetector.OperatingMode = (OperatingMode)vadOperatingMode; } + + public void Dispose() + { + if (IsDisposed) return; + + OpusEncoder.Dispose(); + OpusDecoder.Dispose(); + Denoiser.Dispose(); + VoiceActivityDetector.Dispose(); + IsDisposed = true; + } + + private void ThrowIfDisposed() + { + if (IsDisposed) + throw new ObjectDisposedException(nameof(VoiceChatInterface)); + } } } diff --git a/VoiceJitterBuffer.cs b/VoiceJitterBuffer.cs new file mode 100644 index 0000000..de5438d --- /dev/null +++ b/VoiceJitterBuffer.cs @@ -0,0 +1,93 @@ +namespace OpenVoiceSharp +{ + /// + /// Per-speaker jitter buffer for packet reordering and limited loss tolerance. + /// + public sealed class VoiceJitterBuffer + { + private readonly SortedDictionary Packets = new(); + private readonly int TargetDelayPackets; + private readonly int MaxBufferPackets; + + private bool Started; + private uint ExpectedSequence; + private int MissingSequenceSkips; + + public VoiceJitterBuffer(int targetDelayPackets = 3, int maxBufferPackets = 24) + { + if (targetDelayPackets < 1) + throw new ArgumentOutOfRangeException(nameof(targetDelayPackets)); + if (maxBufferPackets < targetDelayPackets + 2) + throw new ArgumentOutOfRangeException(nameof(maxBufferPackets)); + + TargetDelayPackets = targetDelayPackets; + MaxBufferPackets = maxBufferPackets; + } + + public void Add(uint sequence, byte[] payload, int length) + { + if (payload is null) + throw new ArgumentNullException(nameof(payload)); + if (length <= 0 || length > payload.Length) + throw new ArgumentOutOfRangeException(nameof(length)); + + if (Started && sequence < ExpectedSequence) + return; + if (Packets.ContainsKey(sequence)) + return; + + byte[] copy = new byte[length]; + Buffer.BlockCopy(payload, 0, copy, 0, length); + Packets[sequence] = copy; + + // Prevent unbounded growth under extreme disorder. + while (Packets.Count > MaxBufferPackets) + { + uint oldest = GetFirstKey(); + Packets.Remove(oldest); + } + } + + public IEnumerable<(uint sequence, byte[] payload)> DrainReady() + { + if (!Started) + { + if (Packets.Count < TargetDelayPackets) + yield break; + + ExpectedSequence = GetFirstKey(); + Started = true; + MissingSequenceSkips = 0; + } + + while (Packets.Count > 0) + { + if (Packets.TryGetValue(ExpectedSequence, out byte[]? payload)) + { + Packets.Remove(ExpectedSequence); + uint sequence = ExpectedSequence; + ExpectedSequence++; + MissingSequenceSkips = 0; + yield return (sequence, payload); + continue; + } + + bool forceAdvance = Packets.Count >= MaxBufferPackets || MissingSequenceSkips >= TargetDelayPackets; + if (!forceAdvance) + yield break; + + ExpectedSequence++; + MissingSequenceSkips++; + } + } + + private uint GetFirstKey() + { + using IEnumerator enumerator = Packets.Keys.GetEnumerator(); + if (!enumerator.MoveNext()) + throw new InvalidOperationException("Jitter buffer is empty."); + + return enumerator.Current; + } + } +} diff --git a/VoicePlaybackBuffer.cs b/VoicePlaybackBuffer.cs new file mode 100644 index 0000000..745f8d0 --- /dev/null +++ b/VoicePlaybackBuffer.cs @@ -0,0 +1,121 @@ +namespace OpenVoiceSharp +{ + /// + /// Thread-safe PCM playback buffer with partial-read support and silence fill. + /// Useful for audio callbacks that request fixed-size chunks. + /// + public sealed class VoicePlaybackBuffer + { + private readonly Queue> Segments = new(); + private readonly object Sync = new(); + private int AvailableBytes; + + /// + /// Number of queued PCM bytes available to read. + /// + public int Available + { + get + { + lock (Sync) + return AvailableBytes; + } + } + + /// + /// Enqueues decoded PCM bytes. + /// + public void Enqueue(byte[] pcmData, int length) + { + if (pcmData is null) + throw new ArgumentNullException(nameof(pcmData)); + if (length <= 0 || length > pcmData.Length) + throw new ArgumentOutOfRangeException(nameof(length)); + + byte[] copy = new byte[length]; + Buffer.BlockCopy(pcmData, 0, copy, 0, length); + + lock (Sync) + { + Segments.Enqueue(new ArraySegment(copy, 0, copy.Length)); + AvailableBytes += copy.Length; + } + } + + /// + /// Reads up to bytes into . + /// Any missing bytes are filled with zero (silence). + /// Returns the amount of real PCM bytes copied before silence fill. + /// + public int ReadAndFillSilence(byte[] output, int count, int offset = 0) + { + if (output is null) + throw new ArgumentNullException(nameof(output)); + if (count < 0) + throw new ArgumentOutOfRangeException(nameof(count)); + if (offset < 0 || offset > output.Length - count) + throw new ArgumentOutOfRangeException(nameof(offset)); + + int copied = 0; + lock (Sync) + { + while (copied < count && Segments.Count > 0) + { + ArraySegment current = Segments.Dequeue(); + int take = Math.Min(current.Count, count - copied); + + Buffer.BlockCopy(current.Array!, current.Offset, output, offset + copied, take); + copied += take; + AvailableBytes -= take; + + int remaining = current.Count - take; + if (remaining > 0) + { + Segments.Enqueue(new ArraySegment(current.Array!, current.Offset + take, remaining)); + } + } + } + + if (copied < count) + Array.Clear(output, offset + copied, count - copied); + + return copied; + } + + /// + /// Drains and returns all queued bytes. + /// + public byte[] Flush() + { + lock (Sync) + { + if (AvailableBytes == 0) + return Array.Empty(); + + byte[] output = new byte[AvailableBytes]; + int offset = 0; + while (Segments.Count > 0) + { + ArraySegment segment = Segments.Dequeue(); + Buffer.BlockCopy(segment.Array!, segment.Offset, output, offset, segment.Count); + offset += segment.Count; + } + + AvailableBytes = 0; + return output; + } + } + + /// + /// Clears all queued PCM bytes. + /// + public void Clear() + { + lock (Sync) + { + Segments.Clear(); + AvailableBytes = 0; + } + } + } +} diff --git a/VoiceUtilities.cs b/VoiceUtilities.cs index 8e250cd..f18c00a 100644 --- a/VoiceUtilities.cs +++ b/VoiceUtilities.cs @@ -28,14 +28,33 @@ public static int GetSampleSize(int channels) /// The output data in which the result will be returned. /// The 16 bit byte array. public static void Convert16BitToFloat(byte[] input, float[] output) + => Convert16BitToFloat(input, output, input.Length); + + /// + /// Converts 16 bit PCM data into float 32 for a specific input length. + /// + /// The 16 bit PCM data according to your needs. + /// The output data in which the result will be returned. + /// How many bytes to convert from . + public static void Convert16BitToFloat(byte[] input, float[] output, int inputLengthBytes) { - int outputIndex = 0; - short sample; + if (input is null) + throw new ArgumentNullException(nameof(input)); + if (output is null) + throw new ArgumentNullException(nameof(output)); + if (inputLengthBytes < 0 || inputLengthBytes > input.Length) + throw new ArgumentOutOfRangeException(nameof(inputLengthBytes)); + if ((inputLengthBytes & 1) != 0) + throw new ArgumentException("Input length must be even for 16-bit PCM.", nameof(inputLengthBytes)); + + int samples = inputLengthBytes / 2; + if (output.Length < samples) + throw new ArgumentException("Output buffer is too small for the requested input length.", nameof(output)); - for (int n = 0; n < output.Length; n++) + for (int n = 0; n < samples; n++) { - sample = BitConverter.ToInt16(input, n * 2); - output[outputIndex++] = sample / 32768f; + short sample = BitConverter.ToInt16(input, n * 2); + output[n] = sample / 32768f; } } @@ -47,12 +66,37 @@ public static void Convert16BitToFloat(byte[] input, float[] output) /// The output data in which the result will be returned. /// The float32 PCM array. public static void ConvertFloatTo16Bit(float[] input, byte[] output) + => ConvertFloatTo16Bit(input, output, output.Length); + + /// + /// Converts float 32 PCM data into 16 bit for a specific output length. + /// + /// The float 32 PCM data according to your needs. + /// The output data in which the result will be returned. + /// How many bytes to write into . + public static void ConvertFloatTo16Bit(float[] input, byte[] output, int outputLengthBytes) { - int sampleIndex = 0, pcmIndex = 0; + if (input is null) + throw new ArgumentNullException(nameof(input)); + if (output is null) + throw new ArgumentNullException(nameof(output)); + if (outputLengthBytes < 0 || outputLengthBytes > output.Length) + throw new ArgumentOutOfRangeException(nameof(outputLengthBytes)); + if ((outputLengthBytes & 1) != 0) + throw new ArgumentException("Output length must be even for 16-bit PCM.", nameof(outputLengthBytes)); - while (sampleIndex < input.Length) + int samples = outputLengthBytes / 2; + if (input.Length < samples) + throw new ArgumentException("Input buffer is too small for the requested output length.", nameof(input)); + + int sampleIndex = 0, pcmIndex = 0; + while (sampleIndex < samples) { - short outsample = (short)(input[sampleIndex] * short.MaxValue); + float sample = input[sampleIndex]; + if (sample > 1f) sample = 1f; + else if (sample < -1f) sample = -1f; + + short outsample = (short)(sample * short.MaxValue); output[pcmIndex] = (byte)(outsample & 0xff); output[pcmIndex + 1] = (byte)((outsample >> 8) & 0xff); diff --git a/docs/AUTHORITATIVE_SERVER_PROTOCOL.md b/docs/AUTHORITATIVE_SERVER_PROTOCOL.md new file mode 100644 index 0000000..348836f --- /dev/null +++ b/docs/AUTHORITATIVE_SERVER_PROTOCOL.md @@ -0,0 +1,172 @@ +# OpenVoiceSharp Authoritative UDP Protocol + +This protocol is room-based and server-authoritative for voice packet routing. + +## Transport + +- UDP only. +- Default server endpoint: `0.0.0.0:7777`. +- Packet layout uses little-endian integers. + +## Client -> Server + +### `Hello` (`type=1`) + +Register or refresh a client session. + +```text +byte Type = 1 +bytes16 ClientId (GUID bytes) +byte RoomLength (0-255) +bytesN RoomUtf8 +byte UserLength (0-255) +bytesM UserUtf8 +``` + +### `Voice` (`type=2`) + +Submit one encoded Opus frame. + +```text +byte Type = 2 +bytes16 ClientId +uint32 Sequence +uint16 PayloadLength +bytesN OpusPayload +``` + +### `Leave` (`type=3`) + +```text +byte Type = 3 +bytes16 ClientId +``` + +### `Ping` (`type=4`) + +```text +byte Type = 4 +bytes16 ClientId +``` + +### `AuthHello` (`type=5`) + +`AuthHello` is required when WordPress verification is enabled on the server. + +```text +byte Type = 5 +bytes16 ClientId (GUID bytes) +byte RoomLength +bytesN RoomUtf8 +byte UserLength +bytesM UserUtf8 +uint16 TokenLength +bytesK AuthTokenUtf8 +``` + +## Server -> Client + +### `Welcome` (`type=11`) + +```text +byte Type = 11 +bytes16 ClientId +``` + +### `VoiceRelay` (`type=12`) + +Forwarded voice packet from another peer in the same room. + +```text +byte Type = 12 +bytes16 SpeakerClientId +uint32 Sequence +uint16 PayloadLength +bytesN OpusPayload +``` + +### `Error` (`type=13`) + +```text +byte Type = 13 +byte ErrorCode +byte MessageLength +bytesN MessageUtf8 +``` + +Codes: + +- `1` InvalidPacket +- `2` NotRegistered +- `3` UnauthorizedEndpoint +- `4` RoomFull +- `5` RateLimited +- `6` PayloadTooLarge +- `7` AuthFailed + +### `PeerJoined` (`type=14`) + +```text +byte Type = 14 +bytes16 ClientId +byte UserLength +bytesN UserUtf8 +``` + +### `PeerLeft` (`type=15`) + +```text +byte Type = 15 +bytes16 ClientId +``` + +### `Pong` (`type=16`) + +```text +byte Type = 16 +bytes16 ClientId +``` + +## Server Behavior + +- A client can only send voice from the endpoint that sent `Hello`. +- Voice packets are rate-limited per client. +- Voice sequence uses a 64-packet anti-replay window (duplicates/replays are dropped, limited out-of-order packets are accepted). +- Voice payload max size is configurable (`--max-voice-bytes`). +- Inactive clients are removed automatically (`--timeout-seconds`). + +## Running Server + +```bash +dotnet run --project OpenVoiceSharp.AuthoritativeServer -- \ + --port 7777 \ + --max-room-members 64 \ + --max-voice-bytes 4096 \ + --max-pps 80 \ + --timeout-seconds 30 \ + --stats-port 9090 +``` + +Read live stats: + +```bash +curl http://127.0.0.1:9090/stats +``` + +## WordPress Verification Mode + +Enable token verification against a WordPress endpoint: + +```bash +dotnet run --project OpenVoiceSharp.AuthoritativeServer -- \ + --port 7777 \ + --wp-verify-url "https://your-site.com/wp-json/openvoicesharp/v1/verify" \ + --wp-shared-secret "server-to-wp-shared-secret" \ + --wp-timeout-seconds 5 +``` + +Server request behavior: + +- Sends `Authorization: Bearer ` to your `--wp-verify-url`. +- Sends `X-OpenVoiceSharp-Secret` header when `--wp-shared-secret` is provided. +- Expects JSON containing one of: `valid`, `success`, or `authenticated` boolean fields (either at root or under `data`). diff --git a/godot/OpenVoiceSharp.Godot/README.md b/godot/OpenVoiceSharp.Godot/README.md new file mode 100644 index 0000000..2ea6e32 --- /dev/null +++ b/godot/OpenVoiceSharp.Godot/README.md @@ -0,0 +1,75 @@ +# OpenVoiceSharp Godot (Starter Package) + +This is a Godot 4 C# integration scaffold for connecting Godot clients to `OpenVoiceSharp.AuthoritativeServer`. + +## What you get + +- `addons/openvoicesharp/OpenVoiceSharpSessionNode.cs` + - Wraps `AuthoritativeVoiceSession` + - Connect/start/stop helpers + - Godot signals for decoded voice and lifecycle + - Helper to read per-speaker playback remainder +- `addons/openvoicesharp/plugin.cfg` and `OpenVoiceSharpPlugin.cs` starter plugin shell + +## What this does not include yet + +- Native Godot microphone capture/playback graph implementation +- Opus/RNNoise/WebRTC native runtime packaging per platform +- UI for rooms/users/device selection + +## Install in your Godot project + +1. Copy this folder into your Godot project: + - `res://addons/openvoicesharp/` +2. Add managed OpenVoiceSharp dependencies to your Godot project: + - `OpenVoiceSharp.dll` + - `OpusDotNet.dll` + - `RNNoise.NET.dll` + - `WebRtcVadSharp.dll` + - `NAudio.Core.dll` (if used on your target runtime path) + - `NAudio.WinMM.dll` (Windows microphone path) +3. Ensure native runtime DLLs are available for your target platform (for example Windows x64). +4. Enable the plugin in Godot: `Project -> Project Settings -> Plugins -> OpenVoiceSharp`. +5. Add `OpenVoiceSharpSessionNode` to a scene and set exported properties. + +## Minimal usage + +```csharp +public override async void _Ready() +{ + var voice = GetNode("OpenVoiceSharpSessionNode"); + voice.ServerHost = "127.0.0.1"; + voice.ServerPort = 7777; + voice.RoomName = "lobby"; + voice.UserName = "PlayerOne"; + voice.AuthToken = ""; + + bool ok = await voice.StartSessionAsync(); +GD.Print($"Voice connected: {ok}"); +} +``` + +## Demo scene included + +- Scene: `res://demo/DemoVoiceUI.tscn` +- Script: `res://demo/DemoVoiceUI.cs` + +The demo includes: + +- Server/room/user/token inputs +- Connect and disconnect buttons +- Push-to-talk mode toggle +- Hold-to-talk button (active only when push-to-talk mode is enabled) +- Basic playback pumping from `ReadSpeakerPlayback(...)` + +To run: + +1. Copy `godot/OpenVoiceSharp.Godot` contents into a Godot 4 C# project. +2. Ensure the `addons/openvoicesharp` plugin is enabled. +3. Add dependencies (managed + native) from `addons/openvoicesharp/lib/README.md`. +4. Open `res://demo/DemoVoiceUI.tscn` and run scene. + +## Architecture note + +- Godot package = client integration layer. +- `OpenVoiceSharp.AuthoritativeServer` = authoritative UDP voice relay server. diff --git a/godot/OpenVoiceSharp.Godot/addons/openvoicesharp/OpenVoiceSharpPlugin.cs b/godot/OpenVoiceSharp.Godot/addons/openvoicesharp/OpenVoiceSharpPlugin.cs new file mode 100644 index 0000000..5fefac8 --- /dev/null +++ b/godot/OpenVoiceSharp.Godot/addons/openvoicesharp/OpenVoiceSharpPlugin.cs @@ -0,0 +1,13 @@ +using Godot; + +[Tool] +public partial class OpenVoiceSharpPlugin : EditorPlugin +{ + public override void _EnterTree() + { + } + + public override void _ExitTree() + { + } +} diff --git a/godot/OpenVoiceSharp.Godot/addons/openvoicesharp/OpenVoiceSharpSessionNode.cs b/godot/OpenVoiceSharp.Godot/addons/openvoicesharp/OpenVoiceSharpSessionNode.cs new file mode 100644 index 0000000..3c0308a --- /dev/null +++ b/godot/OpenVoiceSharp.Godot/addons/openvoicesharp/OpenVoiceSharpSessionNode.cs @@ -0,0 +1,199 @@ +using Godot; +using System; +using System.Threading.Tasks; + +[GlobalClass] +public partial class OpenVoiceSharpSessionNode : Node +{ + [Signal] + public delegate void SessionStartedEventHandler(); + + [Signal] + public delegate void SessionStoppedEventHandler(); + + [Signal] + public delegate void SessionErrorEventHandler(string message); + + [Signal] + public delegate void VoiceFrameDecodedEventHandler(string speakerId, long sequence, byte[] pcmData, int length); + + [Export] + public string ServerHost { get; set; } = "127.0.0.1"; + + [Export] + public int ServerPort { get; set; } = 7777; + + [Export] + public string RoomName { get; set; } = "lobby"; + + [Export] + public string UserName { get; set; } = "Player"; + + [Export] + public string AuthToken { get; set; } = string.Empty; + + [Export] + public int Bitrate { get; set; } = 24000; + + [Export] + public bool UseNoiseSuppression { get; set; } = true; + + [Export] + public bool PushToTalkMode { get; set; } + + private AuthoritativeVoiceSession? _session; + private bool _isStarting; + private bool _isStopping; + + public bool IsSessionRunning => _session != null; + + public async Task StartSessionAsync() + { + if (_session != null || _isStarting) + { + return true; + } + + _isStarting = true; + try + { + var session = new AuthoritativeVoiceSession( + ServerHost, + ServerPort, + RoomName, + UserName, + AuthToken, + bitrate: Bitrate, + enableNoiseSuppression: UseNoiseSuppression); + + session.VoiceFrameDecoded += OnVoiceFrameDecoded; + + await session.StartAsync().ConfigureAwait(false); + + _session = session; + + if (PushToTalkMode && _session.Recorder.IsRecording) + { + _session.Recorder.StopRecording(); + } + + CallDeferred(nameof(EmitSessionStarted)); + return true; + } + catch (Exception ex) + { + CallDeferred(nameof(EmitSessionError), ex.Message); + return false; + } + finally + { + _isStarting = false; + } + } + + public async Task StopSessionAsync() + { + if (_session == null || _isStopping) + { + return; + } + + _isStopping = true; + try + { + var activeSession = _session; + _session = null; + + activeSession.VoiceFrameDecoded -= OnVoiceFrameDecoded; + await activeSession.StopAsync().ConfigureAwait(false); + activeSession.Dispose(); + + CallDeferred(nameof(EmitSessionStopped)); + } + catch (Exception ex) + { + CallDeferred(nameof(EmitSessionError), ex.Message); + } + finally + { + _isStopping = false; + } + } + + public int ReadSpeakerPlayback(string speakerId, byte[] pcmOut, int length) + { + if (_session == null) + { + return 0; + } + + if (!Guid.TryParse(speakerId, out Guid parsedSpeakerId)) + { + return 0; + } + + return _session.ReadSpeakerPlayback(parsedSpeakerId, pcmOut, length); + } + + public void BeginPushToTalk() + { + if (_session == null || !PushToTalkMode) + { + return; + } + + if (!_session.Recorder.IsRecording) + { + _session.Recorder.StartRecording(); + } + } + + public void EndPushToTalk() + { + if (_session == null || !PushToTalkMode) + { + return; + } + + if (_session.Recorder.IsRecording) + { + _session.Recorder.StopRecording(); + } + } + + public override void _ExitTree() + { + if (_session != null) + { + var activeSession = _session; + _session = null; + activeSession.VoiceFrameDecoded -= OnVoiceFrameDecoded; + activeSession.Dispose(); + } + } + + private void OnVoiceFrameDecoded(Guid speakerId, uint sequence, byte[] pcmData, int length) + { + CallDeferred(nameof(EmitVoiceFrameDecoded), speakerId.ToString(), (long)sequence, pcmData, length); + } + + private void EmitSessionStarted() + { + EmitSignal(SignalName.SessionStarted); + } + + private void EmitSessionStopped() + { + EmitSignal(SignalName.SessionStopped); + } + + private void EmitSessionError(string message) + { + EmitSignal(SignalName.SessionError, message); + } + + private void EmitVoiceFrameDecoded(string speakerId, long sequence, byte[] pcmData, int length) + { + EmitSignal(SignalName.VoiceFrameDecoded, speakerId, sequence, pcmData, length); + } +} diff --git a/godot/OpenVoiceSharp.Godot/addons/openvoicesharp/lib/README.md b/godot/OpenVoiceSharp.Godot/addons/openvoicesharp/lib/README.md new file mode 100644 index 0000000..339c365 --- /dev/null +++ b/godot/OpenVoiceSharp.Godot/addons/openvoicesharp/lib/README.md @@ -0,0 +1,18 @@ +# Managed and Native Dependencies + +Place required managed and native runtime dependencies here (or in your Godot project's preferred plugin runtime path). + +Typical managed dependencies: + +- `OpenVoiceSharp.dll` +- `OpusDotNet.dll` +- `RNNoise.NET.dll` +- `WebRtcVadSharp.dll` +- `NAudio.Core.dll` +- `NAudio.WinMM.dll` (Windows microphone path) + +Typical native dependencies (Windows x64): + +- `opus.dll` +- `rnnoise.dll` +- `WebRtcVad.dll` diff --git a/godot/OpenVoiceSharp.Godot/addons/openvoicesharp/plugin.cfg b/godot/OpenVoiceSharp.Godot/addons/openvoicesharp/plugin.cfg new file mode 100644 index 0000000..2ee7021 --- /dev/null +++ b/godot/OpenVoiceSharp.Godot/addons/openvoicesharp/plugin.cfg @@ -0,0 +1,6 @@ +[plugin] +name="OpenVoiceSharp" +description="OpenVoiceSharp Godot C# integration starter." +author="OpenVoiceSharp" +version="0.1.0" +script="res://addons/openvoicesharp/OpenVoiceSharpPlugin.cs" diff --git a/godot/OpenVoiceSharp.Godot/demo/DemoVoiceUI.cs b/godot/OpenVoiceSharp.Godot/demo/DemoVoiceUI.cs new file mode 100644 index 0000000..1c1ee57 --- /dev/null +++ b/godot/OpenVoiceSharp.Godot/demo/DemoVoiceUI.cs @@ -0,0 +1,201 @@ +using Godot; +using System; +using System.Text; +using System.Threading.Tasks; + +public partial class DemoVoiceUI : Control +{ + private LineEdit _serverHost = null!; + private SpinBox _serverPort = null!; + private LineEdit _roomName = null!; + private LineEdit _userName = null!; + private LineEdit _authToken = null!; + private Label _status = null!; + private RichTextLabel _log = null!; + private Button _connectButton = null!; + private Button _disconnectButton = null!; + private CheckButton _pushToTalkMode = null!; + private Button _holdToTalkButton = null!; + private AudioStreamPlayer _playbackPlayer = null!; + private Timer _playbackTimer = null!; + + private OpenVoiceSharpSessionNode _voiceSession = null!; + private AudioStreamGeneratorPlayback? _playback; + private string _activeSpeakerId = string.Empty; + private byte[] _pcmScratch = Array.Empty(); + + private const int SampleRate = 48000; + private const float PlaybackBufferSeconds = 0.3f; + private const int PlaybackChunkSamples = 960; // 20ms @ 48kHz + + public override void _Ready() + { + _serverHost = GetNode("Root/Margin/VBox/ConnectionGrid/ServerHost"); + _serverPort = GetNode("Root/Margin/VBox/ConnectionGrid/ServerPort"); + _roomName = GetNode("Root/Margin/VBox/ConnectionGrid/RoomName"); + _userName = GetNode("Root/Margin/VBox/ConnectionGrid/UserName"); + _authToken = GetNode("Root/Margin/VBox/ConnectionGrid/AuthToken"); + _status = GetNode