diff --git a/acestep/streaming/config.py b/acestep/streaming/config.py
index 65fadf56..c66ab167 100644
--- a/acestep/streaming/config.py
+++ b/acestep/streaming/config.py
@@ -52,6 +52,14 @@ class SessionConfig:
     fixture_name: str | None = None
     use_server_fixture: bool = False
     stem_source_mode: str | None = None
+    # Text-to-music mode: no input audio at all. The server synthesizes a
+    # silent source of ``text2music_duration_s`` seconds and conditions the
+    # diffusion purely on the prompt (canonical silence latent = the
+    # model's trained "no reference" signal), so generation is text-only.
+    # The client sends NO binary PCM frame during the handshake when this
+    # is set (mirrors ``use_server_fixture``).
+    text2music: bool = False
+    text2music_duration_s: float = 60.0
     enabled_loras: list = field(default_factory=list)
     lora_strengths: dict = field(default_factory=dict)
     lora_paths: list = field(default_factory=list)
diff --git a/acestep/streaming/session.py b/acestep/streaming/session.py
index 0814b207..14333203 100644
--- a/acestep/streaming/session.py
+++ b/acestep/streaming/session.py
@@ -114,6 +114,7 @@
     _resolve_bpm_key_source,
     _try_load_sidecar,
     SAMPLE_RATE,
+    resolve_text2music_source,
 )
 from acestep.streaming.state import SessionState
 from acestep.streaming.stems import (
@@ -935,11 +936,19 @@ def _apply_swap_if_pending(self) -> None:
             tags = state.swap_pending.get("tags")
             requested_key = state.swap_pending.get("key")
             requested_time_sig = state.swap_pending.get("time_signature")
-            new_fixture_name = state.swap_pending.get("fixture_name")
-            new_stem_source_mode = resolve_upload_stem_source_mode(
-                new_fixture_name,
-                state.swap_pending.get("stem_source_mode"),
-                known_fixtures=KNOWN_FIXTURES,
+            new_text2music = bool(state.swap_pending.get("text2music"))
+            # Text-to-music swap: the placeholder waveform carries no
+            # fixture identity and the stem machinery doesn't apply.
+            new_fixture_name = (
+                None if new_text2music
+                else state.swap_pending.get("fixture_name")
+            )
+            new_stem_source_mode = None if new_text2music else (
+                resolve_upload_stem_source_mode(
+                    new_fixture_name,
+                    state.swap_pending.get("stem_source_mode"),
+                    known_fixtures=KNOWN_FIXTURES,
+                )
             )
             if new_wf is None:
                 return
@@ -949,6 +958,7 @@ def _apply_swap_if_pending(self) -> None:
             state.swap_pending["time_signature"] = None
             state.swap_pending["fixture_name"] = None
             state.swap_pending["stem_source_mode"] = None
+            state.swap_pending["text2music"] = False
 
         # Initialized to None so the finally below can None-guard
         # cleanly in the (rare) case an exception fires between the
@@ -1028,16 +1038,27 @@ def _apply_swap_if_pending(self) -> None:
                     return
 
             new_audio_in = Audio(waveform=new_wf, sample_rate=SAMPLE_RATE)
-            new_source, new_bpm, new_key, new_time_sig = (
-                _resolve_bpm_key_source(
-                    self.session,
-                    audio_in=new_audio_in,
-                    fixture_name=new_fixture_name,
-                    samples=int(new_wf.shape[1]),
-                    key_override=requested_key,
-                    time_signature_override=requested_time_sig,
+            if new_text2music:
+                new_source, new_bpm, new_key, new_time_sig = (
+                    resolve_text2music_source(
+                        self.session, samples=int(new_wf.shape[1]),
+                    )
+                )
+                if requested_key:
+                    new_key = requested_key
+                if requested_time_sig:
+                    new_time_sig = requested_time_sig
+            else:
+                new_source, new_bpm, new_key, new_time_sig = (
+                    _resolve_bpm_key_source(
+                        self.session,
+                        audio_in=new_audio_in,
+                        fixture_name=new_fixture_name,
+                        samples=int(new_wf.shape[1]),
+                        key_override=requested_key,
+                        time_signature_override=requested_time_sig,
+                    )
                 )
-            )
             new_upload_stems, new_stem_error, new_source, new_wf = (
                 extract_and_select_upload_stem(
                     new_wf,
@@ -1858,11 +1879,16 @@ def swap_source(
         time_signature: str | None = None,
         fixture_name: str | None = None,
         stem_source_mode: str | None = None,
+        text2music: bool = False,
         origin: CommandOrigin = CommandOrigin.PRIMARY,
     ) -> None:
         """Stage a source swap. The runner applies it inside
         ``before_tick``; publishes :class:`SwapReady` or
-        :class:`SwapFailed` when the swap completes."""
+        :class:`SwapFailed` when the swap completes.
+
+        ``text2music=True`` marks ``audio`` as a synthesized silent
+        placeholder: the swap resolves the canonical-silence source
+        (prompt-only conditioning) instead of encoding the waveform."""
         state = self.state
         state.last_activity_ts = time.monotonic()
         effective_tags = tags or state.prompt_text
@@ -1877,6 +1903,7 @@ def swap_source(
             state.swap_pending["stem_source_mode"] = normalize_stem_source_mode(
                 stem_source_mode,
             )
+            state.swap_pending["text2music"] = bool(text2music)
 
     @requires_capability("write_audio", "write_audio")
     def write_audio(
@@ -2090,8 +2117,12 @@ def create(
         fast_vae = config.fast_vae
         walk_window = config.walk_window
         walk_window_s = config.walk_window_s
-        fixture_name = config.fixture_name
-        stem_source_mode = resolve_upload_stem_source_mode(
+        # Text-to-music: no input audio. The waveform is the synthesized
+        # silence placeholder; fixture / stem machinery doesn't apply
+        # (running Mel-Band RoFormer on zeros would be pure waste).
+        text2music = config.text2music
+        fixture_name = None if text2music else config.fixture_name
+        stem_source_mode = None if text2music else resolve_upload_stem_source_mode(
             fixture_name,
             normalize_stem_source_mode(config.stem_source_mode),
             known_fixtures=KNOWN_FIXTURES,
@@ -2242,14 +2273,21 @@ def create(
 
             audio_in = Audio(waveform=waveform, sample_rate=SAMPLE_RATE)
 
-            source, detected_bpm, detected_key, detected_time_signature = (
-                _resolve_bpm_key_source(
-                    engine_session,
-                    audio_in=audio_in,
-                    fixture_name=fixture_name,
-                    samples=int(waveform.shape[1]),
+            if text2music:
+                source, detected_bpm, detected_key, detected_time_signature = (
+                    resolve_text2music_source(
+                        engine_session, samples=int(waveform.shape[1]),
+                    )
+                )
+            else:
+                source, detected_bpm, detected_key, detected_time_signature = (
+                    _resolve_bpm_key_source(
+                        engine_session,
+                        audio_in=audio_in,
+                        fixture_name=fixture_name,
+                        samples=int(waveform.shape[1]),
+                    )
                 )
-            )
 
             upload_stems, stem_error, source, waveform = (
                 extract_and_select_upload_stem(
diff --git a/acestep/streaming/source.py b/acestep/streaming/source.py
index a9ffef81..2d2f7111 100644
--- a/acestep/streaming/source.py
+++ b/acestep/streaming/source.py
@@ -133,6 +133,79 @@ def _load_clip_waveform(name: str) -> torch.Tensor:
     return _load_waveform_from_path(str(resolve_audio_clip(name)))
 
 
+# ---------------------------------------------------------------------------
+# Text-to-music (no input audio)
+# ---------------------------------------------------------------------------
+
+# Conditioning defaults baked into the text encoder when there is no audio
+# to detect them from. 120 BPM / C major / 4 are the model family's
+# most-supported values; the operator can re-steer key / time signature
+# live via the ``prompt`` command.
+TEXT2MUSIC_BPM = 120
+TEXT2MUSIC_KEY = "C major"
+TEXT2MUSIC_TIME_SIGNATURE = "4"
+
+# Floor for the synthesized silent source. Below ~10 s the loop seam
+# dominates the listening experience; the ceiling is the caller's TRT
+# profile cap (same clamp every uploaded source gets).
+TEXT2MUSIC_MIN_DURATION_S = 10.0
+
+# Samples per latent frame at the 48 kHz / 25 fps latent rate.
+_SAMPLES_PER_LATENT_FRAME = SAMPLE_RATE // 25
+
+
+def text2music_waveform(duration_s: float, *, max_seconds: float) -> torch.Tensor:
+    """Synthesize the silent stereo placeholder for a text-to-music source.
+
+    The waveform only seeds the playback ring buffer (the user hears
+    silence until generated slices land) and sets the session length;
+    callers run it through the same TRT-cap / pool-alignment trim as an
+    uploaded source.
+    """
+    dur = min(
+        max(float(duration_s), TEXT2MUSIC_MIN_DURATION_S), float(max_seconds),
+    )
+    return torch.zeros(2, int(dur * SAMPLE_RATE))
+
+
+def resolve_text2music_source(
+    session: Session, *, samples: int,
+) -> tuple[PreparedSource, int, str, str]:
+    """Text-to-music analog of :func:`_resolve_bpm_key_source`.
+
+    Both the source latent and the context latent are the CANONICAL
+    silence latent from the checkpoint (``EmptyLatent``), not a VAE
+    encode of digital zeros: ``silence_latent`` is what the model was
+    trained to read as "no reference audio" (its forward uses it to
+    simulate text2music mode), so structure conditioning is genuinely
+    absent rather than "semantic hints of an all-zero clip". This also
+    skips the VAE encode + semantic extract entirely, along with
+    librosa beat-tracking and CNN key detection (both meaningless on
+    silence — beat_track returns 0 BPM, which would poison the text
+    conditioning).
+    """
+    from acestep.nodes.vae_nodes import EmptyLatent
+
+    frames = samples // _SAMPLES_PER_LATENT_FRAME
+    latent = EmptyLatent().execute(
+        model=session.model, frames=frames,
+    )["latent"]
+    source = PreparedSource(
+        latent=latent,
+        context_latent=Latent(tensor=latent.tensor.clone()),
+    )
+    logger.info(
+        "text2music_source_ready frames={} duration_s={:.1f}",
+        frames, samples / SAMPLE_RATE,
+    )
+    return (
+        source,
+        TEXT2MUSIC_BPM,
+        TEXT2MUSIC_KEY,
+        TEXT2MUSIC_TIME_SIGNATURE,
+    )
+
+
 _VALID_TIME_SIG_STRS = frozenset(str(s) for s in VALID_TIME_SIGNATURES)
 
 
diff --git a/acestep/streaming/state.py b/acestep/streaming/state.py
index 3e294e88..c55bf9e2 100644
--- a/acestep/streaming/state.py
+++ b/acestep/streaming/state.py
@@ -38,6 +38,7 @@ def _default_swap_pending() -> dict:
         "time_signature": None,
         "fixture_name": None,
         "stem_source_mode": None,
+        "text2music": False,
     }
 
 
diff --git a/demos/realtime_motion_graph_web/protocol.py b/demos/realtime_motion_graph_web/protocol.py
index 2a7f8e45..f52a9083 100644
--- a/demos/realtime_motion_graph_web/protocol.py
+++ b/demos/realtime_motion_graph_web/protocol.py
@@ -380,13 +380,19 @@ class EventSpec:
                       description="When true, the server loads the named source "
                                   "off its own disk and NO binary frame is "
                                   "sent."),
+            FieldSpec("text2music", "bool",
+                      description="When true, the server swaps to a synthesized "
+                                  "silent source (text-to-music mode: generation "
+                                  "is conditioned on the prompt alone) and NO "
+                                  "binary frame is sent. fixture_name is "
+                                  "ignored."),
         ),
         binary=True,
         binary_optional=True,
         requires="swap",
         description="Replace the playback source in-flight. A binary PCM frame "
-                    "follows UNLESS use_server_source is set. Acked by "
-                    "swap_ready (+ binary buffer) / swap_failed.",
+                    "follows UNLESS use_server_source or text2music is set. "
+                    "Acked by swap_ready (+ binary buffer) / swap_failed.",
     ),
     CommandSpec(
         "write_audio",
diff --git a/demos/realtime_motion_graph_web/web/app/globals.css b/demos/realtime_motion_graph_web/web/app/globals.css
index 7ffb55e8..9560a501 100644
--- a/demos/realtime_motion_graph_web/web/app/globals.css
+++ b/demos/realtime_motion_graph_web/web/app/globals.css
@@ -7102,6 +7102,45 @@ body.curve-open #install-video-area #graph {
   opacity: 1;
 }
 
+/* Pinned text-to-music sleeve — prompt-only generation, no input audio.
+   Sits in the pinned region above the upload sleeve, separated from the
+   scroll list by the same hairline treatment. */
+.audio-source-sleeve--text2music {
+  flex-shrink: 0;
+  position: relative;
+  margin-top: 2px;
+  padding-top: 9px;
+  color: var(--accent-hover);
+  text-transform: uppercase;
+  letter-spacing: var(--tracking-wide);
+  font-size: 10px;
+}
+.audio-source-sleeve--text2music::before {
+  content: "";
+  position: absolute;
+  top: 0;
+  left: 8px;
+  right: 8px;
+  height: 1px;
+  background: linear-gradient(90deg,
+    transparent,
+    var(--frame-line) 25%,
+    var(--frame-line) 75%,
+    transparent);
+}
+.audio-source-sleeve--text2music:hover {
+  background: var(--accent-medium);
+  color: var(--accent-hover);
+}
+.audio-source-sleeve-art--text2music {
+  border: none;
+  color: var(--accent);
+  width: 14px;
+  height: 14px;
+  opacity: 1;
+}
+.audio-source-sleeve-art--text2music::after { display: none; }
+
 /* Pinned upload sleeve — always rendered, separated from the scroll list
    above by a thin hairline so it reads as a distinct "you" slot. */
 .audio-source-sleeve--upload {
diff --git a/demos/realtime_motion_graph_web/web/components/Performance/AudioSourceCrate.tsx b/demos/realtime_motion_graph_web/web/components/Performance/AudioSourceCrate.tsx
index e0ed79d0..9c66703f 100644
--- a/demos/realtime_motion_graph_web/web/components/Performance/AudioSourceCrate.tsx
+++ b/demos/realtime_motion_graph_web/web/components/Performance/AudioSourceCrate.tsx
@@ -16,6 +16,11 @@ import { useUploadOnboardingHint } from "@/hooks/useUploadOnboardingHint";
 import { trimAudioBuffer } from "@/lib/audio/trimAudioBuffer";
 import { useConfig } from "@/lib/config";
 import { LOCAL_MODE } from "@/lib/runtime";
+import {
+  TEXT2MUSIC_LABEL,
+  TEXT2MUSIC_SOURCE,
+  isText2Music,
+} from "@/lib/text2music";
 import { useCustomTracksStore } from "@/store/useCustomTracksStore";
 import { usePerformanceStore } from "@/store/usePerformanceStore";
 import { useSessionStore } from "@/store/useSessionStore";
@@ -85,6 +90,28 @@ function MicIcon({ size = 14 }: { size?: number }) {
   );
 }
 
+function TextPromptIcon({ size = 14 }: { size?: number }) {
+  return (
+    <svg
+      viewBox="0 0 16 16"
+      width={size}
+      height={size}
+      fill="none"
+      stroke="currentColor"
+      strokeWidth={1.4}
+      strokeLinecap="round"
+      strokeLinejoin="round"
+      aria-hidden="true"
+    >
+      <path d="M2.5 3.5h11" />
+      <path d="M2.5 6.5h7" />
+      <path d="M2.5 9.5h5" />
+      <path d="M13 7.5v4" />
+      <circle cx="11.6" cy="11.9" r="1.4" />
+    </svg>
+  );
+}
+
 function NoteIcon({ size = 16 }: { size?: number }) {
   return (
     <svg
@@ -278,7 +305,9 @@ export function AudioSourceCrate() {
     ...fixtures.map((name) => ({ name, kind: "fixture" as const })),
     ...customNames.map((name) => ({ name, kind: "custom" as const })),
   ];
-  const displayedName = fixture || (tracks[0]?.name ?? "—");
+  const displayedName = isText2Music(fixture)
+    ? TEXT2MUSIC_LABEL
+    : fixture || (tracks[0]?.name ?? "—");
 
   return (
     <>
@@ -424,6 +453,36 @@ export function AudioSourceCrate() {
               );
             })}
           </div>
+          {/* Text-to-music sleeve — generation from the prompt alone, no
+              input audio. Pinned with the upload sleeve so it's always
+              visible regardless of fixture count. */}
+          <button
+            type="button"
+            role="menuitem"
+            className={[
+              "audio-source-sleeve",
+              "audio-source-sleeve--text2music",
+              isText2Music(fixture) ? "audio-source-sleeve--current" : "",
+            ]
+              .filter(Boolean)
+              .join(" ")}
+            onClick={async () => {
+              setOpen(false);
+              if (!(await gate("track_change"))) return;
+              setFixture(TEXT2MUSIC_SOURCE);
+            }}
+            data-dd-tooltip="Generate music from your text prompt alone — no input audio"
+          >
+            <span
+              className="audio-source-sleeve-art audio-source-sleeve-art--text2music"
+              aria-hidden="true"
+            >
+              <TextPromptIcon />
+            </span>
+            <span className="audio-source-sleeve-label">
+              {TEXT2MUSIC_LABEL}
+            </span>
+          </button>
           {/* Upload sleeve is pinned outside the scroll region so it stays
               visible regardless of fixture count. Always rendered. */}
           <button
diff --git a/demos/realtime_motion_graph_web/web/components/Performance/LiteTrackCarousel.tsx b/demos/realtime_motion_graph_web/web/components/Performance/LiteTrackCarousel.tsx
index 0bcc492e..608a7a55 100644
--- a/demos/realtime_motion_graph_web/web/components/Performance/LiteTrackCarousel.tsx
+++ b/demos/realtime_motion_graph_web/web/components/Performance/LiteTrackCarousel.tsx
@@ -15,6 +15,7 @@ import { commitUploadedTrack } from "@/lib/audio/commitUploadedTrack";
 import { trimAudioBuffer } from "@/lib/audio/trimAudioBuffer";
 import { useConfig } from "@/lib/config";
 import { LOCAL_MODE } from "@/lib/runtime";
+import { TEXT2MUSIC_LABEL, TEXT2MUSIC_SOURCE } from "@/lib/text2music";
 import { useCustomTracksStore } from "@/store/useCustomTracksStore";
 import { usePerformanceStore } from "@/store/usePerformanceStore";
 import { useSessionStore } from "@/store/useSessionStore";
@@ -221,6 +222,9 @@ export function LiteTrackCarousel() {
               ))}
             </optgroup>
           )}
+          <option value={TEXT2MUSIC_SOURCE}>
+            ♪  {TEXT2MUSIC_LABEL} (no input audio)
+          </option>
           <option value={MIC_VALUE}>
             ●  Record from microphone…
           </option>
diff --git a/demos/realtime_motion_graph_web/web/components/Performance/TrackPicker.tsx b/demos/realtime_motion_graph_web/web/components/Performance/TrackPicker.tsx
index 047285c6..229fa9a8 100644
--- a/demos/realtime_motion_graph_web/web/components/Performance/TrackPicker.tsx
+++ b/demos/realtime_motion_graph_web/web/components/Performance/TrackPicker.tsx
@@ -15,6 +15,7 @@ import { commitUploadedTrack } from "@/lib/audio/commitUploadedTrack";
 import { trimAudioBuffer } from "@/lib/audio/trimAudioBuffer";
 import { useConfig } from "@/lib/config";
 import { LOCAL_MODE } from "@/lib/runtime";
+import { TEXT2MUSIC_LABEL, TEXT2MUSIC_SOURCE } from "@/lib/text2music";
 import { useCustomTracksStore } from "@/store/useCustomTracksStore";
 import { usePerformanceStore } from "@/store/usePerformanceStore";
 import { useSessionStore } from "@/store/useSessionStore";
@@ -147,7 +148,7 @@ export function TrackPicker() {
       <RefSelect
         label="input track"
         value={fixture || ""}
-        pinned={[]}
+        pinned={[{ value: TEXT2MUSIC_SOURCE, label: TEXT2MUSIC_LABEL }]}
         groups={[
           {
             label: "Library",
diff --git a/demos/realtime_motion_graph_web/web/hooks/useFixtureSwap.ts b/demos/realtime_motion_graph_web/web/hooks/useFixtureSwap.ts
index d19298a6..a98e475e 100644
--- a/demos/realtime_motion_graph_web/web/hooks/useFixtureSwap.ts
+++ b/demos/realtime_motion_graph_web/web/hooks/useFixtureSwap.ts
@@ -11,6 +11,7 @@ import {
   getConfig,
   resolveLoraCapForSource,
 } from "@/lib/config";
+import { isText2Music } from "@/lib/text2music";
 import { useCustomTracksStore } from "@/store/useCustomTracksStore";
 import { usePerformanceStore } from "@/store/usePerformanceStore";
 import { useSessionStore } from "@/store/useSessionStore";
@@ -62,13 +63,16 @@ export function useFixtureSwap() {
       // re-rip stems. The playback buffer still comes back in the
       // swap_ready echo. Only tracks that live solely in browser memory
       // (no-pod fallback, MCP mirror) take the decode + upload path.
-      const serverResident = useCustomTracksStore
+      // Text-to-music swap: no audio to load — the server synthesizes
+      // the silent source and conditions on the prompt alone.
+      const text2music = isText2Music(name);
+      const serverResident = !text2music && useCustomTracksStore
         .getState()
         .isServerResident(name);
 
       let interleaved: Float32Array | null = null;
       let channels = 0;
-      if (!serverResident) {
+      if (!serverResident && !text2music) {
         setStatus("ready", `Loading ${name}…`);
         try {
           const decoded = await loadFixtureAudio(name);
@@ -211,7 +215,9 @@ export function useFixtureSwap() {
         // fixture's sidecar.key on the server side.
         // Operator overrides flow through the OperatorStrip dropdown's
         // onChange handler (sendPrompt), not through swap_source.
-        const sent = serverResident
+        const sent = text2music
+          ? remote.sendSwapTextToMusic(perf.promptA)
+          : serverResident
           ? remote.sendSwapSourceByName(
               name,
               perf.promptA,
@@ -261,14 +267,23 @@ export function useFixtureSwap() {
         // the next legitimate swap reverts to the normal behaviour.
         const perfState = usePerformanceStore.getState();
         const gate = getConfig().denoise_session_gate;
-        if (perfState.skipNextDenoiseGate) {
+        if (text2music) {
+          // Text-to-music: the source is silence — gating denoise to 0
+          // would play nothing. Full generation immediately, and the
+          // "drag to start" affordance doesn't apply.
+          perfState.setSliderDirect("denoise", 1);
+          perfState.setRemixStarted(true);
+        } else if (perfState.skipNextDenoiseGate) {
           perfState.setSkipNextDenoiseGate(false);
+          perfState.setRemixStarted(false);
         } else if (gate.enabled) {
           const prevDenoise = perfState.sliderTargets["denoise"] ?? 0;
           perfState.setSliderDirect("denoise", 0);
           perfState.animateSliderDisplayFrom("denoise", prevDenoise, gate.glide_ms);
+          perfState.setRemixStarted(false);
+        } else {
+          perfState.setRemixStarted(false);
         }
-        perfState.setRemixStarted(false);
       }
       setStatus("ready", "Playing");
     };
diff --git a/demos/realtime_motion_graph_web/web/hooks/useStartSession.ts b/demos/realtime_motion_graph_web/web/hooks/useStartSession.ts
index 5cc79e99..a2d4aacb 100644
--- a/demos/realtime_motion_graph_web/web/hooks/useStartSession.ts
+++ b/demos/realtime_motion_graph_web/web/hooks/useStartSession.ts
@@ -15,6 +15,7 @@ import {
   resolveLoraCapForSource,
 } from "@/lib/config";
 import { wirePromptTransform } from "@/lib/loraTriggers";
+import { isText2Music } from "@/lib/text2music";
 import { useCustomTracksStore } from "@/store/useCustomTracksStore";
 import { useLoraStore } from "@/store/useLoraStore";
 import { usePerformanceStore, type RefSource } from "@/store/usePerformanceStore";
@@ -119,6 +120,33 @@ function buildConfig(
   // standalone shell wires no getter, so this is null and the field is
   // omitted; demon-public-demo wires PostHog's distinct_id).
   const clientId = getClientId();
+  // Text-to-music: the sentinel never goes on the wire as a
+  // fixture_name; the server synthesizes a silent source and conditions
+  // on the prompt alone. No binary PCM frame is sent (the SDK skips it
+  // when config.text2music is set).
+  if (isText2Music(fixtureName)) {
+    return {
+      telemetry_version: 1,
+      sde: cfg.sde,
+      lora: cfg.lora,
+      depth: cfg.depth,
+      vae_window: cfg.vae_window,
+      crop: cfg.crop,
+      steps: cfg.steps,
+      fast_vae: cfg.fast_vae,
+      walk_window: cfg.walk_window ?? false,
+      walk_window_s: cfg.walk_window_s ?? 60,
+      lead_floor_s: cfg.lead_floor_s,
+      lead_ceiling_s: cfg.lead_ceiling_s,
+      lead_release_tau_s: cfg.lead_release_tau_s,
+      enabled_loras: enabledLoras,
+      prompt: perf.promptA,
+      prompt_b: perf.promptB,
+      lora_strengths: loraStrengths,
+      text2music: true,
+      ...(clientId ? { client_id: clientId } : {}),
+    };
+  }
   return {
     telemetry_version: 1,
     sde: cfg.sde,
@@ -371,6 +399,17 @@ interface ResolvedFixture {
  */
 async function resolveFixtureForConnect(): Promise<ResolvedFixture | null> {
   let fixtureName = usePerformanceStore.getState().fixture;
+  // Text-to-music: nothing to load or probe — there is no input audio.
+  // The empty interleaved array mirrors the server-side-fixture path
+  // (the SDK sends no PCM frame when config.text2music is set).
+  if (isText2Music(fixtureName)) {
+    return {
+      fixtureName,
+      useServerFixture: false,
+      interleaved: new Float32Array(0),
+      channels: 2,
+    };
+  }
   if (!fixtureName) {
     const list = await listFixtures();
     fixtureName = pickDefaultFixture(list);
@@ -743,14 +782,23 @@ export function useStartSession() {
     // so their gate behaviour is unchanged.
     const perfState = usePerformanceStore.getState();
     const gate = getConfig().denoise_session_gate;
-    if (perfState.skipNextDenoiseGate) {
+    if (isText2Music(sessionFixture.fixtureName)) {
+      // Text-to-music: the "source" is silence, so the hear-source-first
+      // gate (and any partial denoise) would just play nothing / blend
+      // toward silence. Full generation from the first slice.
+      perfState.setSliderDirect("denoise", 1);
+      perfState.setRemixStarted(true);
+    } else if (perfState.skipNextDenoiseGate) {
       perfState.setSkipNextDenoiseGate(false);
+      perfState.setRemixStarted(false);
     } else if (gate.enabled) {
       const prevDenoise = perfState.sliderTargets["denoise"] ?? 0;
       perfState.setSliderDirect("denoise", 0);
       perfState.animateSliderDisplayFrom("denoise", prevDenoise, gate.glide_ms);
+      perfState.setRemixStarted(false);
+    } else {
+      perfState.setRemixStarted(false);
     }
-    perfState.setRemixStarted(false);
 
     setSession(remote, player);
     setStatus("ready", "Playing");
diff --git a/demos/realtime_motion_graph_web/web/lib/text2music.ts b/demos/realtime_motion_graph_web/web/lib/text2music.ts
new file mode 100644
index 00000000..ddd6693c
--- /dev/null
+++ b/demos/realtime_motion_graph_web/web/lib/text2music.ts
@@ -0,0 +1,18 @@
+// Text-to-music mode: generate music from the prompt alone, no input
+// audio. Represented client-side as a sentinel "fixture" name so every
+// existing source-selection surface (crate fan, CORE-tab picker, lite
+// carousel) and the fixture-swap subscription work unchanged; the two
+// wire send sites (session config in useStartSession, swap_source in
+// useFixtureSwap) translate the sentinel into the contract's
+// `text2music` flag instead of a fixture_name.
+
+/** Sentinel value stored in usePerformanceStore.fixture. Never sent as a
+ *  fixture_name on the wire. */
+export const TEXT2MUSIC_SOURCE = "__text2music__";
+
+/** Display label for the sentinel across pickers / placards. */
+export const TEXT2MUSIC_LABEL = "Text to music";
+
+export function isText2Music(name: string | null | undefined): boolean {
+  return name === TEXT2MUSIC_SOURCE;
+}
diff --git a/demos/realtime_motion_graph_web/ws_adapter.py b/demos/realtime_motion_graph_web/ws_adapter.py
index 3c8353c2..8e07613e 100644
--- a/demos/realtime_motion_graph_web/ws_adapter.py
+++ b/demos/realtime_motion_graph_web/ws_adapter.py
@@ -41,6 +41,7 @@
 import numpy as np
 
 from acestep.audio.key_detection import detect_key
+from acestep.constants import DURATION_MAX
 from acestep.engine.obs import logger, spawn_thread
 from acestep.engine.session import Session
 from acestep.fixtures import KNOWN_FIXTURES
@@ -91,6 +92,7 @@
     _load_clip_waveform,
     _load_known_fixture_waveform,
     _normalize_time_signature,
+    text2music_waveform,
 )
 from acestep.streaming.stems import (
     extract_upload_stems,
@@ -924,7 +926,20 @@ def _ms(stage: str) -> None:
     # download→decode→re-upload round-trip and read the waveform
     # straight from the pod's fixture cache.
     fixture_name = config_dict.get("fixture_name")
-    if config_dict.get("use_server_fixture") and fixture_name in KNOWN_FIXTURES:
+    if config_dict.get("text2music"):
+        # Text-to-music: no input audio at all. The client sends NO
+        # binary frame; synthesize the silent placeholder here. The
+        # generous 600 s cap just bounds the zeros buffer —
+        # ``StreamingSession.create`` re-trims to the TRT profile
+        # ceiling like any uploaded source.
+        fixture_name = None
+        try:
+            _t2m_dur = float(config_dict.get("text2music_duration_s") or 60.0)
+        except (TypeError, ValueError):
+            _t2m_dur = 60.0
+        waveform = text2music_waveform(_t2m_dur, max_seconds=DURATION_MAX)
+        _ms("audio_text2music_synthesized")
+    elif config_dict.get("use_server_fixture") and fixture_name in KNOWN_FIXTURES:
         try:
             waveform = _load_known_fixture_waveform(fixture_name)
             _ms("audio_serverside_loaded")
@@ -1547,7 +1562,16 @@ def _recv_binary_payload(fail_type: str):
                 # stem caches hit (no prepare_source, no Mel-Band RoFormer
                 # re-rip) instead of treating a re-decoded, re-uploaded PCM
                 # buffer as a brand-new source.
-                if data.get("use_server_source"):
+                if data.get("text2music"):
+                    # Text-to-music swap: no binary frame on the wire;
+                    # synthesize the silent placeholder at the session's
+                    # configured text2music duration (capped at the same
+                    # TRT ceiling every swap honors).
+                    wf = text2music_waveform(
+                        streaming.config.text2music_duration_s,
+                        max_seconds=streaming.max_seconds,
+                    )
+                elif data.get("use_server_source"):
                     name = data.get("fixture_name")
                     try:
                         wf = _load_clip_waveform(str(name))
@@ -1582,6 +1606,7 @@ def _recv_binary_payload(fail_type: str):
                     time_signature=data.get("time_signature"),
                     fixture_name=data.get("fixture_name"),
                     stem_source_mode=data.get("stem_source_mode"),
+                    text2music=bool(data.get("text2music")),
                     origin=origin,
                 )
             elif mtype == "write_audio":
diff --git a/packages/demon-client/protocol.ts b/packages/demon-client/protocol.ts
index 57f5a11f..e10008a0 100644
--- a/packages/demon-client/protocol.ts
+++ b/packages/demon-client/protocol.ts
@@ -394,16 +394,18 @@ export class RemoteBackend extends EventTarget {
       ws.onopen = () => {
         if (!this._pending) return;
         this._updateTrace({ openAt: Date.now(), phase: "open" });
-        // Phase 1: JSON config, then (unless server-side fixture) the
-        // binary audio upload. For known fixtures the pod loads the
-        // waveform from its own cache, so re-uploading ~20 MB of PCM
-        // here is pure waste (~11 s on the measured cold path). When
-        // `use_server_fixture` is set the server skips its audio recv,
-        // so we must skip the send to match.
+        // Phase 1: JSON config, then (unless the server sources its own
+        // audio) the binary audio upload. For known fixtures the pod
+        // loads the waveform from its own cache, so re-uploading ~20 MB
+        // of PCM here is pure waste (~11 s on the measured cold path);
+        // in text-to-music mode there is no input audio at all (the
+        // server synthesizes silence). In both cases the server skips
+        // its audio recv, so we must skip the send to match.
         ws.send(JSON.stringify(this._pending.config));
-        const useServerFixture =
-          this._pending.config.use_server_fixture === true;
-        if (!useServerFixture) {
+        const serverSourcesAudio =
+          this._pending.config.use_server_fixture === true ||
+          this._pending.config.text2music === true;
+        if (!serverSourcesAudio) {
           const { interleaved, channels } = this._pending;
           ws.send(packPcmFrame(interleaved, channels));
         }
@@ -1211,6 +1213,36 @@ export class RemoteBackend extends EventTarget {
     }
   }
 
+  /**
+   * Swap to text-to-music mode mid-session: NO PCM is sent. The server
+   * synthesizes a silent source (at the session's configured
+   * text2music_duration_s) and conditions generation on the prompt
+   * alone. The reply is the same swap_ready + binary buffer as
+   * sendSwapSource — the buffer is silence, which generated slices
+   * progressively overwrite.
+   */
+  sendSwapTextToMusic(
+    tags?: string,
+    key?: string,
+    timeSignature?: string,
+  ): boolean {
+    if (this.ws?.readyState !== WebSocket.OPEN) return false;
+    try {
+      const msg: SwapSourceCommand = {
+        type: "swap_source",
+        text2music: true,
+      };
+      if (tags) msg.tags = tags;
+      if (key) msg.key = key;
+      if (timeSignature) msg.time_signature = timeSignature;
+      this.ws.send(JSON.stringify(msg));
+      return true;
+    } catch (e) {
+      console.error("[protocol] sendSwapTextToMusic failed:", e);
+      return false;
+    }
+  }
+
   close(): void {
     this.closedByUser = true;
     this._updateTrace({ closedByUser: true });
diff --git a/packages/demon-client/types/wireContract.gen.ts b/packages/demon-client/types/wireContract.gen.ts
index 9c6d62c5..3b29227b 100644
--- a/packages/demon-client/types/wireContract.gen.ts
+++ b/packages/demon-client/types/wireContract.gen.ts
@@ -242,6 +242,8 @@ export interface SwapSourceCommand {
   stem_source_mode?: "full" | "vocals" | "instruments";
   /** When true, the server loads the named source off its own disk and NO binary frame is sent. */
   use_server_source?: boolean;
+  /** When true, the server swaps to a synthesized silent source (text-to-music mode: generation is conditioned on the prompt alone) and NO binary frame is sent. fixture_name is ignored. */
+  text2music?: boolean;
 }
 
 export interface WriteAudioCommand {
@@ -459,6 +461,8 @@ export interface SessionConfigPayload {
   fixture_name?: string | null;
   use_server_fixture?: boolean;
   stem_source_mode?: string | null;
+  text2music?: boolean;
+  text2music_duration_s?: number;
   enabled_loras?: unknown[];
   lora_strengths?: Record<string, unknown>;
   lora_paths?: unknown[];