Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion kos-py/pykos/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pykos.services.process_manager import ProcessManagerServiceClient
from pykos.services.sim import SimServiceClient
from pykos.services.sound import SoundServiceClient
from pykos.services.speech import SpeechServiceClient


class KOS:
Expand All @@ -23,6 +24,13 @@ class KOS:

Attributes:
imu (IMUServiceClient): Client for the IMU service.
actuator (ActuatorServiceClient): Client for the actuator service.
led_matrix (LEDMatrixServiceClient): Client for the LED matrix service.
sound (SoundServiceClient): Client for the sound service.
process_manager (ProcessManagerServiceClient): Client for the process manager service.
inference (InferenceServiceClient): Client for the inference service.
sim (SimServiceClient): Client for the simulation service.
speech (SpeechServiceClient): Client for the speech service.
"""

def __init__(self, ip: str = "localhost", port: int = 50051) -> None:
Expand All @@ -36,6 +44,7 @@ def __init__(self, ip: str = "localhost", port: int = 50051) -> None:
self._process_manager: ProcessManagerServiceClient | None = None
self._inference: InferenceServiceClient | None = None
self._sim: SimServiceClient | None = None
self._speech: SpeechServiceClient | None = None

@property
def imu(self) -> IMUServiceClient:
Expand Down Expand Up @@ -79,14 +88,21 @@ def sim(self) -> SimServiceClient:
raise RuntimeError("Sim client not initialized! Must call __aenter__() first.")
return self._sim

@property
def speech(self) -> SpeechServiceClient:
if self._speech is None:
raise RuntimeError("Speech client not initialized! Must call __aenter__() first.")
return self._speech

async def connect(self) -> None:
"""Connect to the gRPC server and initialize service clients."""
self._channel = grpc.aio.insecure_channel(f"{self.ip}:{self.port}")
self._process_manager = ProcessManagerServiceClient(self._channel)
self._imu = IMUServiceClient(self._channel)
self._actuator = ActuatorServiceClient(self._channel)
self._led_matrix = LEDMatrixServiceClient(self._channel)
self._sound = SoundServiceClient(self._channel)
self._process_manager = ProcessManagerServiceClient(self._channel)
self._speech = SpeechServiceClient(self._channel)
self._inference = InferenceServiceClient(self._channel)
self._sim = SimServiceClient(self._channel)

Expand Down
61 changes: 61 additions & 0 deletions kos-py/pykos/services/speech.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""Speech service client."""

import grpc
import grpc.aio

from kos_protos import speech_pb2, speech_pb2_grpc


class SpeechServiceClient:
"""Client for the SpeechService.

This service provides text-to-speech synthesis and speech-to-text transcription.
"""

def __init__(self, channel: grpc.aio.Channel) -> None:
"""Initialize the speech service client.

Args:
channel: gRPC channel to use for communication.
"""
self.stub = speech_pb2_grpc.SpeechServiceStub(channel)

async def synthesize(self, text: str) -> speech_pb2.SynthesizeResponse:
"""Synthesize speech from text.

Args:
text: Text to synthesize

Returns:
Output file to the synthesized speech.

Raises:
RuntimeError: If synthesis fails.
"""
request = speech_pb2.SynthesizeRequest(text=text)

response = await self.stub.Synthesize(request)
if response.HasField("error"):
raise RuntimeError(f"Synthesis error: {response.error}")
return response.file_path

async def transcribe(self, audio_data: str) -> str:
"""Transcribe speech to text.

Args:
audio_data: Audio data to transcribe

Returns:
Transcribed text.

Raises:
RuntimeError: If transcription fails.
"""
request = speech_pb2.TranscribeRequest(
audio_data=audio_data,
)

response = await self.stub.Transcribe(request)
if response.HasField("error"):
raise RuntimeError(f"Transcription error: {response.error}")
return response.text
52 changes: 0 additions & 52 deletions kos-py/tests/test_pykos.py

This file was deleted.

12 changes: 11 additions & 1 deletion kos-stub/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
mod actuator;
mod imu;
mod process_manager;
mod speech;
use crate::actuator::StubActuator;
use crate::imu::StubIMU;
use crate::process_manager::StubProcessManager;
use crate::speech::StubSpeech;
use async_trait::async_trait;
use kos::hal::Operation;
use kos::kos_proto::actuator::actuator_service_server::ActuatorServiceServer;
use kos::kos_proto::imu::imu_service_server::ImuServiceServer;
use kos::kos_proto::process_manager::process_manager_service_server::ProcessManagerServiceServer;
use kos::services::{ActuatorServiceImpl, IMUServiceImpl, ProcessManagerServiceImpl};
use kos::kos_proto::speech::speech_service_server::SpeechServiceServer;
use kos::services::{
ActuatorServiceImpl, IMUServiceImpl, ProcessManagerServiceImpl, SpeechServiceImpl,
};

use kos::{services::OperationsServiceImpl, Platform, ServiceEnum};
use std::future::Future;
use std::pin::Pin;
Expand Down Expand Up @@ -52,6 +58,7 @@ impl Platform for StubPlatform {
let actuator = StubActuator::new(operations_service.clone());
let imu = StubIMU::new(operations_service.clone());
let process_manager = StubProcessManager::new();
let speech = StubSpeech::new();

Ok(vec![
ServiceEnum::Actuator(ActuatorServiceServer::new(ActuatorServiceImpl::new(
Expand All @@ -61,6 +68,9 @@ impl Platform for StubPlatform {
ProcessManagerServiceImpl::new(Arc::new(process_manager)),
)),
ServiceEnum::Imu(ImuServiceServer::new(IMUServiceImpl::new(Arc::new(imu)))),
ServiceEnum::Speech(SpeechServiceServer::new(SpeechServiceImpl::new(Arc::new(
speech,
)))),
])
})
}
Expand Down
32 changes: 32 additions & 0 deletions kos-stub/src/speech.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use async_trait::async_trait;
use eyre::Result;
use kos::hal::Speech;
use kos::kos_proto::speech::SynthesizeResponse;
use std::process::Command;
use uuid::Uuid;
pub struct StubSpeech {}

impl Default for StubSpeech {
fn default() -> Self {
Self::new()
}
}

impl StubSpeech {
pub fn new() -> Self {
StubSpeech {}
}
}

#[async_trait]
impl Speech for StubSpeech {
async fn synthesize(&self, text: String) -> Result<SynthesizeResponse> {
// Generate a unique filename for the wav output
let output_file = format!("synthesize_{}.wav", Uuid::new_v4());

Ok(SynthesizeResponse {
file_path: output_file,
error: None,
})
}
}
6 changes: 5 additions & 1 deletion kos/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ fn main() {
let proto_root = "proto";

// Where to output the compiled Rust files
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
let out_dir = PathBuf::from(match env::var("OUT_DIR") {
Ok(dir) => dir,
Err(e) => panic!("Failed to get OUT_DIR: {}", e),
});

// List of Protobuf files
let protos = [
Expand All @@ -19,6 +22,7 @@ fn main() {
"kos/system.proto",
"kos/led_matrix.proto",
"kos/sound.proto",
"kos/speech.proto",
"google/longrunning/operations.proto",
];

Expand Down
44 changes: 44 additions & 0 deletions kos/proto/kos/speech.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
syntax = "proto3";

package kos.speech;

import "google/protobuf/empty.proto";
import "kos/common.proto";

option go_package = "kos/speech;speech";
option java_package = "com.kos.speech";
option csharp_namespace = "KOS.Speech";

// The SpeechService provides methods to transcribe or synthesize speech
service SpeechService {
// Transcribes speech to text
rpc Transcribe(TranscribeRequest) returns (TranscribeResponse);

// Synthesizes speech from text
rpc Synthesize(SynthesizeRequest) returns (SynthesizeResponse);
}

enum Model {
ESPEAK_NG = 0;
KMODEL = 1;
LOCAL = 2;
}

message TranscribeRequest {
string audio_data = 1;
}

message TranscribeResponse {
string text = 1;
kos.common.Error error = 2;
}

message SynthesizeRequest {
string text = 1;
Model model = 2;
}

message SynthesizeResponse {
string file_path = 1;
kos.common.Error error = 2;
}
1 change: 1 addition & 0 deletions kos/src/daemon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ fn add_service_to_router(
ServiceEnum::Inference(svc) => router.add_service(svc),
ServiceEnum::LEDMatrix(svc) => router.add_service(svc),
ServiceEnum::Sound(svc) => router.add_service(svc),
ServiceEnum::Speech(svc) => router.add_service(svc),
}
}

Expand Down
4 changes: 4 additions & 0 deletions kos/src/grpc_interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ pub mod kos {
pub mod sound {
tonic::include_proto!("kos/kos.sound");
}

pub mod speech {
tonic::include_proto!("kos/kos.speech");
}
}

pub mod google {
Expand Down
19 changes: 12 additions & 7 deletions kos/src/hal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ pub use crate::grpc_interface::kos;
pub use crate::grpc_interface::kos::common::ActionResponse;
pub use crate::kos_proto::{
actuator::*, common::ActionResult, imu::*, inference::*, led_matrix::*, process_manager::*,
sound::*,
sound::*, speech::*,
};
use async_trait::async_trait;
use bytes::Bytes;
Expand Down Expand Up @@ -44,12 +44,6 @@ pub trait IMU: Send + Sync {
async fn get_quaternion(&self) -> Result<QuaternionResponse>;
}

#[async_trait]
pub trait ProcessManager: Send + Sync {
async fn start_kclip(&self, action: String) -> Result<KClipStartResponse>;
async fn stop_kclip(&self) -> Result<KClipStopResponse>;
}

#[async_trait]
pub trait Inference: Send + Sync {
async fn upload_model(
Expand Down Expand Up @@ -107,6 +101,17 @@ pub trait Sound: Send + Sync {
async fn stop_recording(&self) -> Result<ActionResponse, tonic::Status>;
}

#[async_trait]
pub trait ProcessManager: Send + Sync {
async fn start_kclip(&self, action: String) -> Result<KClipStartResponse>;
async fn stop_kclip(&self) -> Result<KClipStopResponse>;
}

#[async_trait]
pub trait Speech: Send + Sync {
async fn synthesize(&self, text: String) -> Result<SynthesizeResponse>;
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CalibrationStatus {
Calibrating,
Expand Down
10 changes: 9 additions & 1 deletion kos/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@ use hal::inference_service_server::InferenceServiceServer;
use hal::led_matrix_service_server::LedMatrixServiceServer;
use hal::process_manager_service_server::ProcessManagerServiceServer;
use hal::sound_service_server::SoundServiceServer;
use hal::speech_service_server::SpeechServiceServer;
use services::OperationsServiceImpl;
use services::{
ActuatorServiceImpl, IMUServiceImpl, InferenceServiceImpl, LEDMatrixServiceImpl,
ProcessManagerServiceImpl, SoundServiceImpl,
ProcessManagerServiceImpl, SoundServiceImpl, SpeechServiceImpl,
};
use std::fmt::Debug;
use std::future::Future;
Expand Down Expand Up @@ -66,6 +67,12 @@ impl Debug for SoundServiceImpl {
}
}

impl Debug for SpeechServiceImpl {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "SpeechServiceImpl")
}
}

#[derive(Debug)]
pub enum ServiceEnum {
Actuator(ActuatorServiceServer<ActuatorServiceImpl>),
Expand All @@ -74,6 +81,7 @@ pub enum ServiceEnum {
Inference(InferenceServiceServer<InferenceServiceImpl>),
LEDMatrix(LedMatrixServiceServer<LEDMatrixServiceImpl>),
Sound(SoundServiceServer<SoundServiceImpl>),
Speech(SpeechServiceServer<SpeechServiceImpl>),
}

#[async_trait]
Expand Down
Loading