From 9068e78a62b800253e5949df5b61f7e260b5385a Mon Sep 17 00:00:00 2001 From: "se.cherkasov" Date: Fri, 13 Mar 2026 16:21:30 +0300 Subject: [PATCH] docs: add audio output design spec and implementation plan Co-Authored-By: Claude Opus 4.6 --- .../plans/2026-03-13-audio-output.md | 1032 +++++++++++++++++ .../specs/2026-03-13-audio-output-design.md | 245 ++++ 2 files changed, 1277 insertions(+) create mode 100644 docs/superpowers/plans/2026-03-13-audio-output.md create mode 100644 docs/superpowers/specs/2026-03-13-audio-output-design.md diff --git a/docs/superpowers/plans/2026-03-13-audio-output.md b/docs/superpowers/plans/2026-03-13-audio-output.md new file mode 100644 index 0000000..4dfb793 --- /dev/null +++ b/docs/superpowers/plans/2026-03-13-audio-output.md @@ -0,0 +1,1032 @@ +# Audio Output Implementation Plan + +> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add full 5-channel APU mixing and real audio output via cpal to the desktop NES emulator client, with a volume slider in the header bar. + +**Architecture:** The APU gains timer/sequencer state so it can report per-channel output levels. The AudioMixer uses these to produce properly mixed f32 samples. A lock-free SPSC ring buffer bridges the emulation (GTK main thread) and cpal's audio callback (OS thread). The desktop client replaces its stub AudioSink with a CpalAudioSink that writes to the ring buffer. + +**Tech Stack:** Rust, cpal 0.15, GTK4 0.8, std::sync::atomic + +**Spec:** `docs/superpowers/specs/2026-03-13-audio-output-design.md` + +--- + +## Chunk 1: APU Channel Output State + +### Task 1: Add timer/sequencer fields to Apu struct + +**Files:** +- Modify: `src/native_core/apu/types.rs:19-51` (Apu struct) +- Modify: `src/native_core/apu/types.rs:53-84` (ApuStateTail struct) + +- [ ] **Step 1: Add new fields to `Apu` struct** + +In `src/native_core/apu/types.rs`, add these fields after line 50 (`pending_frame_irq_inhibit: bool,`), before the closing `}`: + +```rust + // Pulse channel timers & duty sequencers + pub(crate) pulse_timer_counter: [u16; 2], + pub(crate) pulse_duty_step: [u8; 2], + // Triangle channel timer & sequencer + pub(crate) triangle_timer_counter: u16, + pub(crate) triangle_step: u8, + // Noise channel timer & LFSR + pub(crate) noise_timer_counter: u16, + pub(crate) noise_lfsr: u16, +``` + +- [ ] **Step 2: Add matching fields to `ApuStateTail` struct** + +In the same file, add matching fields after line 83 (`pending_frame_irq_inhibit: bool,`), before the closing `}`: + +```rust + pub pulse_timer_counter: [u16; 2], + pub pulse_duty_step: [u8; 2], + pub triangle_timer_counter: u16, + pub triangle_step: u8, + pub noise_timer_counter: u16, + pub noise_lfsr: u16, +``` + +- [ ] **Step 3: Initialize new fields in `Apu::new()`** + +In `src/native_core/apu/api.rs`, add to the `Self { ... }` block in `new()` (after line 36, `pending_frame_irq_inhibit: false,`): + +```rust + pulse_timer_counter: [0; 2], + pulse_duty_step: [0; 2], + triangle_timer_counter: 0, + triangle_step: 0, + noise_timer_counter: 0, + noise_lfsr: 1, // LFSR initialized to 1 per NES hardware +``` + +- [ ] **Step 4: Update `save_state_tail`** + +In `src/native_core/apu/api.rs`, at the end of `save_state_tail()` (before the closing `}`), add: + +```rust + out.extend_from_slice(&self.pulse_timer_counter[0].to_le_bytes()); + out.extend_from_slice(&self.pulse_timer_counter[1].to_le_bytes()); + out.extend_from_slice(&self.pulse_duty_step); + out.extend_from_slice(&self.triangle_timer_counter.to_le_bytes()); + out.push(self.triangle_step); + out.extend_from_slice(&self.noise_timer_counter.to_le_bytes()); + out.extend_from_slice(&self.noise_lfsr.to_le_bytes()); +``` + +- [ ] **Step 5: Update `load_state_tail`** + +In `src/native_core/apu/api.rs`, at the end of `load_state_tail()` (before the closing `}`), add: + +```rust + self.pulse_timer_counter = state.pulse_timer_counter; + self.pulse_duty_step = [state.pulse_duty_step[0] & 0x07, state.pulse_duty_step[1] & 0x07]; + self.triangle_timer_counter = state.triangle_timer_counter; + self.triangle_step = state.triangle_step & 0x1F; + self.noise_timer_counter = state.noise_timer_counter; + self.noise_lfsr = if state.noise_lfsr == 0 { 1 } else { state.noise_lfsr }; +``` + +- [ ] **Step 6: Update `bus/state.rs` deserialization** + +In `src/native_core/bus/state.rs`, add deserialization of the new fields after line 114 (`let pending_frame_irq_inhibit = ...;`) and before the `self.apu.load_state_tail(ApuStateTail {` block (line 115): + +```rust + let pulse_timer_counter = [ + u16::from_le_bytes([ + sio::take_u8(data, &mut cursor, BUS_STATE_CTX)?, + sio::take_u8(data, &mut cursor, BUS_STATE_CTX)?, + ]), + u16::from_le_bytes([ + sio::take_u8(data, &mut cursor, BUS_STATE_CTX)?, + sio::take_u8(data, &mut cursor, BUS_STATE_CTX)?, + ]), + ]; + let mut pulse_duty_step = [0u8; 2]; + pulse_duty_step.copy_from_slice(sio::take_exact(data, &mut cursor, 2, BUS_STATE_CTX)?); + let triangle_timer_counter = u16::from_le_bytes([ + sio::take_u8(data, &mut cursor, BUS_STATE_CTX)?, + sio::take_u8(data, &mut cursor, BUS_STATE_CTX)?, + ]); + let triangle_step = sio::take_u8(data, &mut cursor, BUS_STATE_CTX)?; + let noise_timer_counter = u16::from_le_bytes([ + sio::take_u8(data, &mut cursor, BUS_STATE_CTX)?, + sio::take_u8(data, &mut cursor, BUS_STATE_CTX)?, + ]); + let noise_lfsr = u16::from_le_bytes([ + sio::take_u8(data, &mut cursor, BUS_STATE_CTX)?, + sio::take_u8(data, &mut cursor, BUS_STATE_CTX)?, + ]); +``` + +Then add the new fields to the `ApuStateTail { ... }` constructor (after `pending_frame_irq_inhibit,` on line 145): + +```rust + pulse_timer_counter, + pulse_duty_step, + triangle_timer_counter, + triangle_step, + noise_timer_counter, + noise_lfsr, +``` + +- [ ] **Step 7: Bump `SAVE_STATE_VERSION`** + +In `src/runtime/constants.rs`, change line 4: + +```rust +pub const SAVE_STATE_VERSION: u32 = 2; +``` + +- [ ] **Step 8: Verify it compiles** + +Run: `cargo build 2>&1 | head -30` +Expected: successful build (or warnings only) + +- [ ] **Step 9: Commit** + +```bash +git add src/native_core/apu/types.rs src/native_core/apu/api.rs src/native_core/bus/state.rs src/runtime/constants.rs +git commit -m "feat(apu): add timer/sequencer/LFSR fields for channel output tracking" +``` + +### Task 2: Clock new timer/sequencer state in APU + +**Files:** +- Modify: `src/native_core/apu/timing.rs` (add clocking logic) +- Modify: `src/native_core/apu/api.rs:137-158` (clock_cpu_cycle calls new clocking) + +- [ ] **Step 1: Add pulse timer period helper** + +The APU already has `pulse_timer_period()` at `timing.rs:213`. We need triangle and noise period helpers. Add to the end of `src/native_core/apu/timing.rs` (before the closing `}`): + +```rust + pub(crate) fn triangle_timer_period(&self) -> u16 { + let lo = self.io[0x0A] as u16; + let hi = (self.io[0x0B] as u16 & 0x07) << 8; + hi | lo + } + + pub(crate) fn noise_timer_period(&self) -> u16 { + const NOISE_PERIOD_TABLE: [u16; 16] = [ + 4, 8, 16, 32, 64, 96, 128, 160, 202, 254, 380, 508, 762, 1016, 2034, 4068, + ]; + let idx = (self.io[0x0E] & 0x0F) as usize; + NOISE_PERIOD_TABLE[idx] + } +``` + +- [ ] **Step 2: Add channel clocking methods** + +Add to `src/native_core/apu/timing.rs`, before the closing `}`: + +```rust + pub(crate) fn clock_pulse_timers(&mut self) { + if self.cpu_cycle_parity { + return; // pulse timers tick every other CPU cycle + } + for ch in 0..2usize { + if self.pulse_timer_counter[ch] == 0 { + let reg_offset = ch * 4; + let period = self.pulse_timer_period(reg_offset + 2); + self.pulse_timer_counter[ch] = period; + self.pulse_duty_step[ch] = (self.pulse_duty_step[ch] + 1) & 0x07; + } else { + self.pulse_timer_counter[ch] -= 1; + } + } + } + + pub(crate) fn clock_triangle_timer(&mut self) { + if self.triangle_timer_counter == 0 { + self.triangle_timer_counter = self.triangle_timer_period(); + if self.length_counters[2] > 0 && self.triangle_linear_counter > 0 { + self.triangle_step = (self.triangle_step + 1) & 0x1F; + } + } else { + self.triangle_timer_counter -= 1; + } + } + + pub(crate) fn clock_noise_timer(&mut self) { + if self.cpu_cycle_parity { + return; // noise timer ticks every other CPU cycle + } + if self.noise_timer_counter == 0 { + self.noise_timer_counter = self.noise_timer_period(); + let mode_flag = (self.io[0x0E] & 0x80) != 0; + let feedback_bit = if mode_flag { 6 } else { 1 }; + let feedback = (self.noise_lfsr & 1) ^ ((self.noise_lfsr >> feedback_bit) & 1); + self.noise_lfsr = (self.noise_lfsr >> 1) | (feedback << 14); + } else { + self.noise_timer_counter -= 1; + } + } +``` + +- [ ] **Step 3: Call new clocking from `clock_cpu_cycle()`** + +In `src/native_core/apu/api.rs`, in the `clock_cpu_cycle()` method, add three lines before `self.cpu_cycle_parity = !self.cpu_cycle_parity;` (line 157): + +```rust + self.clock_pulse_timers(); + self.clock_triangle_timer(); + self.clock_noise_timer(); +``` + +- [ ] **Step 4: Reset sequencers on channel writes** + +In `src/native_core/apu/api.rs`, in the `write()` method, update the `0x4003` arm (line 61-64) to also reset the pulse 1 duty step: + +```rust + 0x4003 => { + self.reload_length_counter(0, value >> 3); + self.envelope_start_flags |= 1 << 0; + self.pulse_duty_step[0] = 0; + self.pulse_timer_counter[0] = self.pulse_timer_period(0x02); + } +``` + +And the `0x4007` arm (line 68-71) for pulse 2: + +```rust + 0x4007 => { + self.reload_length_counter(1, value >> 3); + self.envelope_start_flags |= 1 << 1; + self.pulse_duty_step[1] = 0; + self.pulse_timer_counter[1] = self.pulse_timer_period(0x06); + } +``` + +- [ ] **Step 5: Verify it compiles and existing tests pass** + +Run: `cargo test 2>&1 | tail -20` +Expected: all existing tests pass + +- [ ] **Step 6: Commit** + +```bash +git add src/native_core/apu/timing.rs src/native_core/apu/api.rs +git commit -m "feat(apu): clock pulse/triangle/noise timers and sequencers" +``` + +### Task 3: Add `ChannelOutputs` struct and `channel_outputs()` method + +**Files:** +- Modify: `src/native_core/apu/types.rs` (add ChannelOutputs) +- Modify: `src/native_core/apu/api.rs` (add channel_outputs method) +- Modify: `src/native_core/apu/mod.rs` (re-export ChannelOutputs) +- Modify: `src/native_core/bus.rs` (expose via bus) +- Modify: `src/lib.rs` (re-export from crate root) + +- [ ] **Step 1: Add `ChannelOutputs` struct** + +At the top of `src/native_core/apu/types.rs` (before the Apu struct), add: + +```rust +#[derive(Debug, Clone, Copy, Default)] +pub struct ChannelOutputs { + pub pulse1: u8, + pub pulse2: u8, + pub triangle: u8, + pub noise: u8, + pub dmc: u8, +} +``` + +- [ ] **Step 2: Add `channel_outputs()` to Apu** + +At the end of `src/native_core/apu/api.rs` (before the closing `}` of `impl Apu`), add: + +```rust + pub fn channel_outputs(&self) -> ChannelOutputs { + const PULSE_DUTY_TABLE: [[u8; 8]; 4] = [ + [0, 1, 0, 0, 0, 0, 0, 0], // 12.5% + [0, 1, 1, 0, 0, 0, 0, 0], // 25% + [0, 1, 1, 1, 1, 0, 0, 0], // 50% + [1, 0, 0, 1, 1, 1, 1, 1], // 75% (negated 25%) + ]; + const TRIANGLE_SEQUENCE: [u8; 32] = [ + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + ]; + + let pulse1 = { + let duty = (self.io[0x00] >> 6) as usize; + let step = self.pulse_duty_step[0] as usize; + let volume = if (self.io[0x00] & 0x10) != 0 { + self.io[0x00] & 0x0F + } else { + self.envelope_decay[0] + }; + let active = (self.channel_enable_mask & 0x01) != 0 + && self.length_counters[0] > 0 + && PULSE_DUTY_TABLE[duty][step] != 0 + && !self.sweep_mutes_channel(0, 0x02); + if active { volume } else { 0 } + }; + + let pulse2 = { + let duty = (self.io[0x04] >> 6) as usize; + let step = self.pulse_duty_step[1] as usize; + let volume = if (self.io[0x04] & 0x10) != 0 { + self.io[0x04] & 0x0F + } else { + self.envelope_decay[1] + }; + let active = (self.channel_enable_mask & 0x02) != 0 + && self.length_counters[1] > 0 + && PULSE_DUTY_TABLE[duty][step] != 0 + && !self.sweep_mutes_channel(1, 0x06); + if active { volume } else { 0 } + }; + + let triangle = { + let active = (self.channel_enable_mask & 0x04) != 0 + && self.length_counters[2] > 0 + && self.triangle_linear_counter > 0; + if active { + TRIANGLE_SEQUENCE[self.triangle_step as usize & 0x1F] + } else { + 0 + } + }; + + let noise = { + let volume = if (self.io[0x0C] & 0x10) != 0 { + self.io[0x0C] & 0x0F + } else { + self.envelope_decay[2] + }; + let active = (self.channel_enable_mask & 0x08) != 0 + && self.length_counters[3] > 0 + && (self.noise_lfsr & 1) == 0; + if active { volume } else { 0 } + }; + + let dmc = self.dmc_output_level; + + ChannelOutputs { pulse1, pulse2, triangle, noise, dmc } + } +``` + +- [ ] **Step 3: Update `api.rs` import** + +At the top of `src/native_core/apu/api.rs`, change: + +```rust +use super::types::{Apu, ApuStateTail}; +``` + +to: + +```rust +use super::types::{Apu, ApuStateTail, ChannelOutputs}; +``` + +- [ ] **Step 4: Re-export `ChannelOutputs` from apu mod** + +In `src/native_core/apu/mod.rs`, change: + +```rust +pub use types::{Apu, ApuStateTail}; +``` + +to: + +```rust +pub use types::{Apu, ApuStateTail, ChannelOutputs}; +``` + +- [ ] **Step 5: Expose via bus** + +In `src/native_core/bus.rs`, add after `apu_registers()` method (after line 59): + +```rust + pub fn apu_channel_outputs(&self) -> crate::native_core::apu::ChannelOutputs { + self.apu.channel_outputs() + } +``` + +- [ ] **Step 6: Re-export from crate root** + +In `src/lib.rs`, update line 19: + +```rust +pub use native_core::apu::{Apu, ApuStateTail, ChannelOutputs}; +``` + +- [ ] **Step 7: Verify it compiles** + +Run: `cargo build 2>&1 | head -30` +Expected: successful build + +- [ ] **Step 8: Commit** + +```bash +git add src/native_core/apu/ src/native_core/bus.rs src/lib.rs +git commit -m "feat(apu): add ChannelOutputs struct and channel_outputs() method" +``` + +### Task 4: Rewrite AudioMixer with 5-channel mixing + +**Files:** +- Modify: `src/runtime/audio.rs` (rewrite push_cycles) +- Modify: `src/runtime/core.rs:104-116` (pass ChannelOutputs) +- Modify: `src/runtime/mod.rs` (re-export ChannelOutputs) + +- [ ] **Step 1: Rewrite `AudioMixer::push_cycles()`** + +Replace the entire `src/runtime/audio.rs` with: + +```rust +use crate::native_core::apu::ChannelOutputs; +use crate::runtime::VideoMode; + +#[derive(Debug)] +pub struct AudioMixer { + sample_rate: u32, + samples_per_cpu_cycle: f64, + sample_accumulator: f64, +} + +impl AudioMixer { + pub fn new(sample_rate: u32, mode: VideoMode) -> Self { + let cpu_hz = mode.cpu_hz(); + Self { + sample_rate, + samples_per_cpu_cycle: sample_rate as f64 / cpu_hz, + sample_accumulator: 0.0, + } + } + + pub fn sample_rate(&self) -> u32 { + self.sample_rate + } + + pub fn reset(&mut self) { + self.sample_accumulator = 0.0; + } + + pub fn push_cycles(&mut self, cpu_cycles: u8, channels: ChannelOutputs, out: &mut Vec) { + self.sample_accumulator += self.samples_per_cpu_cycle * f64::from(cpu_cycles); + let samples = self.sample_accumulator.floor() as usize; + self.sample_accumulator -= samples as f64; + + let pulse_out = 0.00752 * (f32::from(channels.pulse1) + f32::from(channels.pulse2)); + let tnd_out = 0.00851 * f32::from(channels.triangle) + + 0.00494 * f32::from(channels.noise) + + 0.00335 * f32::from(channels.dmc); + let mixed = pulse_out + tnd_out; + let sample = mixed * 2.0 - 1.0; + + out.extend(std::iter::repeat_n(sample, samples)); + } +} +``` + +- [ ] **Step 2: Update `run_until_frame_complete_with_audio` in core.rs** + +In `src/runtime/core.rs`, change the method (lines 104-116): + +```rust + pub fn run_until_frame_complete_with_audio( + &mut self, + mixer: &mut AudioMixer, + out_samples: &mut Vec, + ) -> Result<(), RuntimeError> { + self.bus.begin_frame(); + while !self.bus.take_frame_complete() { + let cycles = self.step_instruction()?; + mixer.push_cycles(cycles, self.bus.apu_channel_outputs(), out_samples); + } + self.frame_number = self.frame_number.saturating_add(1); + Ok(()) + } +``` + +- [ ] **Step 3: Add mixer unit test** + +Add to the end of `src/runtime/audio.rs`: + +```rust +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn mixer_silent_channels_produce_negative_one() { + let mut mixer = AudioMixer::new(44_100, VideoMode::Ntsc); + let channels = ChannelOutputs::default(); // all zeros + let mut out = Vec::new(); + mixer.push_cycles(50, channels, &mut out); + assert!(!out.is_empty()); + // All channels at 0 → mixed = 0.0, sample = 0.0 * 2.0 - 1.0 = -1.0 + for &s in &out { + assert!((s - (-1.0)).abs() < 1e-6, "expected -1.0, got {s}"); + } + } + + #[test] + fn mixer_max_channels_produce_positive() { + let mut mixer = AudioMixer::new(44_100, VideoMode::Ntsc); + let channels = ChannelOutputs { + pulse1: 15, + pulse2: 15, + triangle: 15, + noise: 15, + dmc: 127, + }; + let mut out = Vec::new(); + mixer.push_cycles(50, channels, &mut out); + assert!(!out.is_empty()); + for &s in &out { + assert!(s > 0.0, "expected positive sample, got {s}"); + } + } +} +``` + +- [ ] **Step 4: Verify it compiles and tests pass** + +Run: `cargo test 2>&1 | tail -30` +Expected: mixer tests pass. Regression hash test fails (audio hash changed). Note the new hash. + +- [ ] **Step 5: Update the regression hash** + +In `tests/public_api.rs`, update `expected_audio_hash` (line 215) to the new value printed by the failed test. + +- [ ] **Step 6: Verify all tests pass** + +Run: `cargo test 2>&1 | tail -20` +Expected: all tests pass + +- [ ] **Step 7: Commit** + +```bash +git add src/runtime/audio.rs src/runtime/core.rs tests/public_api.rs +git commit -m "feat(mixer): 5-channel APU mixing with linear approximation formula" +``` + +--- + +## Chunk 2: Ring Buffer + cpal Audio Backend + Volume Slider + +### Task 5: Implement lock-free SPSC ring buffer + +**Files:** +- Create: `src/runtime/ring_buffer.rs` +- Modify: `src/runtime/mod.rs` (add module) + +- [ ] **Step 1: Write ring buffer unit tests first** + +Create `src/runtime/ring_buffer.rs` with tests: + +```rust +use std::cell::UnsafeCell; +use std::sync::atomic::{AtomicUsize, Ordering}; + +pub struct RingBuffer { + buffer: UnsafeCell>, + capacity: usize, + head: AtomicUsize, + tail: AtomicUsize, +} + +// SAFETY: RingBuffer is an SPSC queue. The producer (push) only writes to +// positions between head and the next head, while the consumer (pop) only +// reads positions between tail and head. Atomic operations on head/tail +// with Acquire/Release ordering ensure proper synchronization. +unsafe impl Send for RingBuffer {} +unsafe impl Sync for RingBuffer {} + +impl RingBuffer { + pub fn new(capacity: usize) -> Self { + assert!(capacity > 0); + Self { + buffer: UnsafeCell::new(vec![0.0; capacity].into_boxed_slice()), + capacity, + head: AtomicUsize::new(0), + tail: AtomicUsize::new(0), + } + } + + pub fn push(&self, samples: &[f32]) -> usize { + let head = self.head.load(Ordering::Relaxed); + let tail = self.tail.load(Ordering::Acquire); + let available = self.capacity - self.len_internal(head, tail) - 1; + let to_write = samples.len().min(available); + + let buf = self.buffer.get(); + for i in 0..to_write { + let idx = (head + i) % self.capacity; + // SAFETY: single producer — only one thread writes to positions + // between current head and new head. Consumer never reads here + // until head is updated with Release ordering below. + unsafe { (*buf)[idx] = samples[i]; } + } + + self.head.store((head + to_write) % self.capacity, Ordering::Release); + to_write + } + + pub fn pop(&self, out: &mut [f32]) -> usize { + let tail = self.tail.load(Ordering::Relaxed); + let head = self.head.load(Ordering::Acquire); + let available = self.len_internal(head, tail); + let to_read = out.len().min(available); + + let buf = self.buffer.get(); + for i in 0..to_read { + let idx = (tail + i) % self.capacity; + // SAFETY: single consumer — only one thread reads positions + // between current tail and head. Producer never writes here. + unsafe { out[i] = (*buf)[idx]; } + } + + self.tail.store((tail + to_read) % self.capacity, Ordering::Release); + to_read + } + + /// Clear the buffer. Must only be called when no concurrent push/pop + /// is in progress (e.g., when the audio stream is paused or dropped). + pub fn clear(&self) { + self.tail.store(0, Ordering::SeqCst); + self.head.store(0, Ordering::SeqCst); + } + + fn len_internal(&self, head: usize, tail: usize) -> usize { + if head >= tail { + head - tail + } else { + self.capacity - tail + head + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn push_pop_basic() { + let rb = RingBuffer::new(8); + let input = [1.0, 2.0, 3.0]; + assert_eq!(rb.push(&input), 3); + let mut out = [0.0; 3]; + assert_eq!(rb.pop(&mut out), 3); + assert_eq!(out, [1.0, 2.0, 3.0]); + } + + #[test] + fn underrun_returns_zero_count() { + let rb = RingBuffer::new(8); + let mut out = [0.0; 4]; + assert_eq!(rb.pop(&mut out), 0); + } + + #[test] + fn overrun_drops_new_samples() { + let rb = RingBuffer::new(4); // usable capacity = 3 + let input = [1.0, 2.0, 3.0, 4.0, 5.0]; + let written = rb.push(&input); + assert_eq!(written, 3); + let mut out = [0.0; 3]; + rb.pop(&mut out); + assert_eq!(out, [1.0, 2.0, 3.0]); + } + + #[test] + fn clear_resets() { + let rb = RingBuffer::new(8); + rb.push(&[1.0, 2.0]); + rb.clear(); + let mut out = [0.0; 2]; + assert_eq!(rb.pop(&mut out), 0); + } + + #[test] + fn wraparound() { + let rb = RingBuffer::new(4); // usable = 3 + rb.push(&[1.0, 2.0, 3.0]); + let mut out = [0.0; 2]; + rb.pop(&mut out); + assert_eq!(out, [1.0, 2.0]); + rb.push(&[4.0, 5.0]); + let mut out2 = [0.0; 3]; + let read = rb.pop(&mut out2); + assert_eq!(read, 3); + assert_eq!(out2, [3.0, 4.0, 5.0]); + } +} +``` + +- [ ] **Step 2: Add module to mod.rs** + +In `src/runtime/mod.rs`, add after `mod audio;` (line 3): + +```rust +pub mod ring_buffer; +``` + +And add to the exports (after `pub use audio::AudioMixer;` on line 14): + +```rust +pub use ring_buffer::RingBuffer; +``` + +- [ ] **Step 3: Run tests** + +Run: `cargo test ring_buffer 2>&1` +Expected: all 5 ring buffer tests pass + +- [ ] **Step 4: Commit** + +```bash +git add src/runtime/ring_buffer.rs src/runtime/mod.rs +git commit -m "feat: add lock-free SPSC ring buffer for audio streaming" +``` + +### Task 6: Implement CpalAudioSink in desktop client + +**Files:** +- Modify: `crates/nesemu-desktop/Cargo.toml` (add cpal) +- Modify: `crates/nesemu-desktop/src/main.rs` (replace AudioSink with CpalAudioSink) + +- [ ] **Step 1: Add cpal dependency** + +In `crates/nesemu-desktop/Cargo.toml`, add after `cairo-rs = "0.19"`: + +```toml +cpal = "0.15" +``` + +- [ ] **Step 2: Replace AudioSink with CpalAudioSink** + +In `crates/nesemu-desktop/src/main.rs`, replace lines 411-420 (the audio stub section) with: + +```rust +use std::sync::atomic::{AtomicU32, Ordering as AtomicOrdering}; +use std::sync::Arc; + +struct CpalAudioSink { + _stream: Option, + ring: Arc, + volume: Arc, +} + +impl CpalAudioSink { + fn new(volume: Arc) -> Self { + let ring = Arc::new(nesemu::RingBuffer::new(4096)); + let ring_for_cb = Arc::clone(&ring); + let vol = Arc::clone(&volume); + + let stream = Self::try_build_stream(ring_for_cb, vol); + Self { + _stream: stream, + ring, + volume, + } + } + + fn try_build_stream( + ring: Arc, + volume: Arc, + ) -> Option { + use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; + + let host = cpal::default_host(); + let device = match host.default_output_device() { + Some(d) => d, + None => { + eprintln!("No audio output device found — running without sound"); + return None; + } + }; + + let config = cpal::StreamConfig { + channels: 1, + sample_rate: cpal::SampleRate(SAMPLE_RATE), + buffer_size: cpal::BufferSize::Default, + }; + + let stream = match device.build_output_stream( + &config, + move |data: &mut [f32], _: &cpal::OutputCallbackInfo| { + let read = ring.pop(data); + // Fill remainder with silence on underrun + for sample in &mut data[read..] { + *sample = 0.0; + } + // Apply volume to all samples (including silence — no-op on 0.0) + let vol = f32::from_bits(volume.load(AtomicOrdering::Relaxed)); + for sample in &mut data[..read] { + *sample *= vol; + } + }, + move |err| { + eprintln!("Audio stream error: {err}"); + }, + None, + ) { + Ok(s) => s, + Err(err) => { + eprintln!("Failed to build audio stream: {err} — running without sound"); + return None; + } + }; + + if let Err(err) = stream.play() { + eprintln!("Failed to start audio stream: {err} — running without sound"); + return None; + } + + Some(stream) + } + + fn clear(&self) { + self.ring.clear(); + } +} + +impl nesemu::AudioOutput for CpalAudioSink { + fn push_samples(&mut self, samples: &[f32]) { + self.ring.push(samples); + } +} +``` + +- [ ] **Step 3: Update `DesktopApp` to use CpalAudioSink** + +In `crates/nesemu-desktop/src/main.rs`, update `DesktopApp` struct (lines 426-432): + +```rust +struct DesktopApp { + host: Option>>, + input: InputState, + audio: CpalAudioSink, + frame_rgba: Vec, + state: EmulationState, +} +``` + +- [ ] **Step 4: Update `DesktopApp::new()` to accept volume** + +Change `new()` (lines 434-443): + +```rust + fn new(volume: Arc) -> Self { + Self { + host: None, + input: InputState::default(), + audio: CpalAudioSink::new(volume), + frame_rgba: vec![0; FRAME_RGBA_BYTES], + state: EmulationState::Paused, + } + } +``` + +- [ ] **Step 5: Clear ring buffer on ROM load and reset** + +In `load_rom_from_path()`, add `self.audio.clear();` before `self.state = EmulationState::Running;`. + +In `reset()`, add `self.audio.clear();` before `self.state = EmulationState::Running;`. + +- [ ] **Step 6: Create shared volume atomic in `build_ui()`** + +In `build_ui()`, after the constants/beginning of the function, add: + +```rust + let volume = Arc::new(AtomicU32::new(f32::to_bits(0.75))); +``` + +And update the state creation (line 102): + +```rust + let desktop = Rc::new(RefCell::new(DesktopApp::new(Arc::clone(&volume)))); +``` + +Add necessary imports at the top of the file: + +```rust +use std::sync::atomic::{AtomicU32, Ordering as AtomicOrdering}; +use std::sync::Arc; +``` + +(Remove the duplicate `use` inside the CpalAudioSink section — move it to file level.) + +- [ ] **Step 7: Verify it compiles** + +Run: `cargo build -p nesemu-desktop 2>&1 | head -30` +Expected: successful build + +- [ ] **Step 8: Commit** + +```bash +git add crates/nesemu-desktop/ +git commit -m "feat(desktop): replace audio stub with cpal backend and ring buffer" +``` + +### Task 7: Add volume slider to header bar + +**Files:** +- Modify: `crates/nesemu-desktop/src/main.rs` (add Scale widget) + +- [ ] **Step 1: Create the volume slider** + +In `build_ui()`, after the reset_button creation and before `header.pack_start(&open_button);` (line 76), add: + +```rust + let volume_scale = gtk::Scale::with_range(gtk::Orientation::Horizontal, 0.0, 1.0, 0.05); + volume_scale.set_value(0.75); + volume_scale.set_draw_value(false); + volume_scale.set_width_request(100); + volume_scale.set_tooltip_text(Some("Volume")); + volume_scale.set_focusable(false); +``` + +- [ ] **Step 2: Pack slider into header bar** + +After `header.pack_start(&reset_button);` (line 78), add: + +```rust + let volume_box = gtk::Box::new(gtk::Orientation::Horizontal, 4); + let volume_icon = gtk::Image::from_icon_name("audio-volume-high-symbolic"); + volume_box.append(&volume_icon); + volume_box.append(&volume_scale); + header.pack_end(&volume_box); +``` + +- [ ] **Step 3: Connect volume slider to atomic** + +After the volume slider creation, connect the signal: + +```rust + { + let volume = Arc::clone(&volume); + volume_scale.connect_value_changed(move |scale| { + let val = scale.value() as f32; + volume.store(f32::to_bits(val), AtomicOrdering::Relaxed); + }); + } +``` + +- [ ] **Step 4: Verify it compiles and runs** + +Run: `cargo build -p nesemu-desktop 2>&1 | head -20` +Expected: successful build + +- [ ] **Step 5: Commit** + +```bash +git add crates/nesemu-desktop/src/main.rs +git commit -m "feat(desktop): add volume slider to header bar" +``` + +### Task 8: Export RingBuffer from crate root and final verification + +**Files:** +- Modify: `src/lib.rs` (export RingBuffer) + +- [ ] **Step 1: Export RingBuffer from lib.rs** + +In `src/lib.rs`, add `RingBuffer` to the runtime re-exports (line 27-33). Add it to the `pub use runtime::{...}` block: + +```rust +pub use runtime::{ + AudioMixer, AudioOutput, ClientRuntime, EmulationState, FRAME_HEIGHT, FRAME_RGBA_BYTES, + FRAME_WIDTH, FrameClock, FramePacer, HostConfig, InputProvider, JOYPAD_BUTTON_ORDER, + JOYPAD_BUTTONS_COUNT, JoypadButton, JoypadButtons, NesRuntime, NoopClock, NullAudio, NullInput, + NullVideo, PacingClock, RingBuffer, RuntimeError, RuntimeHostLoop, SAVE_STATE_VERSION, VideoMode, + VideoOutput, button_pressed, set_button_pressed, +}; +``` + +- [ ] **Step 2: Run full test suite** + +Run: `cargo test 2>&1 | tail -30` +Expected: all tests pass + +- [ ] **Step 3: Build desktop client** + +Run: `cargo build -p nesemu-desktop 2>&1 | head -20` +Expected: successful build + +- [ ] **Step 4: Commit** + +```bash +git add src/lib.rs +git commit -m "feat: export RingBuffer from crate root" +``` + +- [ ] **Step 5: Final integration commit (if any loose changes)** + +Run: `cargo clippy --all-targets 2>&1 | head -30` +Fix any warnings, then: + +```bash +git add -A +git commit -m "chore: fix clippy warnings after audio implementation" +``` diff --git a/docs/superpowers/specs/2026-03-13-audio-output-design.md b/docs/superpowers/specs/2026-03-13-audio-output-design.md new file mode 100644 index 0000000..0b8bbf2 --- /dev/null +++ b/docs/superpowers/specs/2026-03-13-audio-output-design.md @@ -0,0 +1,245 @@ +# Audio Output Design — Full 5-Channel Mixer + cpal Backend + +## Overview + +Add real audio output to the desktop NES emulator client. This involves two independent pieces of work: + +1. **Full APU mixer** — replace the current DMC-only mixer with proper 5-channel mixing (Pulse 1, Pulse 2, Triangle, Noise, DMC) using NES hardware-accurate formulas. +2. **cpal audio backend** — replace the stub `AudioSink` in the desktop client with a real audio output using `cpal`, connected via a lock-free ring buffer. Add a volume slider to the GTK4 header bar. + +## 1. Full APU Mixer + +### Current State + +`AudioMixer::push_cycles()` in `src/runtime/audio.rs` reads only `apu_regs[0x11]` (DMC output level) and generates a single-channel signal. All other channels are ignored. + +### Design + +#### 1.1 Channel Outputs Struct + +Add to `src/native_core/apu/`: + +```rust +#[derive(Debug, Clone, Copy, Default)] +pub struct ChannelOutputs { + pub pulse1: u8, // 0–15 + pub pulse2: u8, // 0–15 + pub triangle: u8, // 0–15 + pub noise: u8, // 0–15 + pub dmc: u8, // 0–127 +} +``` + +#### 1.2 New APU Internal State + +The current `Apu` struct lacks timer counters and sequencer state needed to compute channel outputs. The following fields must be added: + +**Pulse channels (×2):** +- `pulse_timer_counter: [u16; 2]` — countdown timer, clocked every other CPU cycle +- `pulse_duty_step: [u8; 2]` — position in 8-step duty cycle sequence (0–7) + +**Triangle channel:** +- `triangle_timer_counter: u16` — countdown timer, clocked every CPU cycle +- `triangle_step: u8` — position in 32-step triangle sequence (0–31) + +**Noise channel:** +- `noise_timer_counter: u16` — countdown timer, clocked every other CPU cycle +- `noise_lfsr: u16` — 15-bit linear feedback shift register, initialized to 1 + +These must be clocked in `Apu::clock_cpu_cycle()`: +- Pulse and noise timers decrement every **2** CPU cycles (APU rate, tracked via existing `cpu_cycle_parity`) +- Triangle timer decrements every **1** CPU cycle +- When a timer reaches 0, it reloads from the period register and advances the corresponding sequencer + +#### 1.3 APU Method + +Add `Apu::channel_outputs(&self) -> ChannelOutputs` that computes the current output level of each channel: + +- **Pulse 1/2:** Output is 0 if length counter is 0, or sweep mutes the channel, or duty cycle sequencer output is 0. Otherwise output is the envelope volume (0–15). +- **Triangle:** Output is the value from the 32-step triangle waveform lookup at `triangle_step`. Muted (output 0) if length counter or linear counter is 0. +- **Noise:** Output is 0 if length counter is 0 or LFSR bit 0 is 1. Otherwise output is the envelope volume (0–15). +- **DMC:** Output is `dmc_output_level` (0–127), already tracked. + +#### 1.4 Save-State Compatibility + +Adding new fields to `Apu` changes the save-state binary format. The `save_state_tail()` and `load_state_tail()` methods must be updated to serialize/deserialize the new fields. This is a **breaking change** to the save-state format — old save states will not be compatible. Since the project is pre-1.0, this is acceptable without a migration strategy. + +#### 1.5 Bus Exposure + +Add `NativeBus::apu_channel_outputs(&self) -> ChannelOutputs` to expose channel outputs alongside the existing `apu_registers()`. + +#### 1.6 Mixer Update + +Change `AudioMixer::push_cycles()` signature: + +```rust +// Before: +pub fn push_cycles(&mut self, cpu_cycles: u8, apu_regs: &[u8; 0x20], out: &mut Vec) + +// After: +pub fn push_cycles(&mut self, cpu_cycles: u8, channels: ChannelOutputs, out: &mut Vec) +``` + +Mixing formula (nesdev wiki linear approximation): + +``` +pulse_out = 0.00752 * (pulse1 + pulse2) +tnd_out = 0.00851 * triangle + 0.00494 * noise + 0.00335 * dmc +output = pulse_out + tnd_out +``` + +Output range is approximately [0.0, 1.0]. Normalize to [-1.0, 1.0] by: `sample = output * 2.0 - 1.0`. + +**Known simplifications:** +- This uses the linear approximation, not the more accurate nonlinear lookup tables from real NES hardware. Nonlinear mixing can be added later as an enhancement. +- The current `repeat_n` resampling approach (nearest-neighbor) produces aliasing. A low-pass filter or bandlimited interpolation can be added later. +- Real NES hardware applies two first-order high-pass filters (~90Hz and ~440Hz). Without these, channel enable/disable will cause audible pops. Deferred for a future iteration. + +#### 1.7 Runtime Integration + +Update `NesRuntime::run_until_frame_complete_with_audio()` in `src/runtime/core.rs` to pass `ChannelOutputs` (from `self.bus.apu_channel_outputs()`) instead of the register slice to the mixer. + +## 2. Lock-Free Ring Buffer + +### Location + +New file: `src/runtime/ring_buffer.rs`. + +### Design + +SPSC (single-producer, single-consumer) ring buffer using `AtomicUsize` for head/tail indices: + +- **Capacity:** 4096 f32 samples (~85ms at 48kHz) — enough to absorb frame timing jitter +- **Producer:** emulation thread writes samples after each frame via `push_samples()` +- **Consumer:** cpal audio callback reads samples via `pop_samples()` +- **Underrun (buffer empty):** consumer outputs silence (0.0) +- **Overrun (buffer full):** producer **drops new samples** (standard SPSC behavior — only the consumer moves the tail pointer) + +```rust +pub struct RingBuffer { + buffer: Box<[f32]>, + capacity: usize, + head: AtomicUsize, // write position (producer only) + tail: AtomicUsize, // read position (consumer only) +} + +impl RingBuffer { + pub fn new(capacity: usize) -> Self; + pub fn push(&self, samples: &[f32]) -> usize; // returns samples actually written + pub fn pop(&self, out: &mut [f32]) -> usize; // returns samples actually read + pub fn clear(&self); // reset both pointers (call when no concurrent access) +} +``` + +Thread safety: `RingBuffer` is `Send + Sync`. Shared via `Arc`. + +## 3. Desktop cpal Audio Backend + +### Dependencies + +Add to `crates/nesemu-desktop/Cargo.toml`: + +```toml +cpal = "0.15" +``` + +### CpalAudioSink + +```rust +pub struct CpalAudioSink { + _stream: cpal::Stream, // keeps the audio stream alive + ring: Arc, + volume: Arc, // f32 bits stored atomically +} +``` + +- Implements `nesemu::AudioOutput` — `push_samples()` writes to ring buffer +- Created when a ROM is loaded; the ring buffer is cleared on ROM change to prevent stale samples +- cpal callback: reads from ring buffer, multiplies each sample by volume, writes to output buffer +- On pause: emulation stops producing samples → callback outputs silence (underrun behavior) +- On ROM change: old stream is dropped, ring buffer cleared, new stream created + +### Error Handling + +If no audio device is available, or the requested format is unsupported, or the stream fails to build: +- Log the error to stderr +- Fall back to `NullAudio` behavior (discard samples silently) +- The emulator continues to work without sound + +The cpal error callback also logs errors to stderr without crashing. + +### Stream Configuration + +- Sample rate: 48,000 Hz +- Channels: 1 (mono — NES is mono) +- Sample format: f32 +- Buffer size: let cpal choose (typically 256–1024 frames) + +### Volume + +- `Arc` shared between UI and cpal callback +- Stored as `f32::to_bits()` / `f32::from_bits()` +- Default: 0.75 (75%) +- Applied in cpal callback: `sample * volume` + +## 4. UI — Volume Slider + +### Widget + +`gtk::Scale` (horizontal) added to the header bar: + +- Range: 0.0 to 1.0 (displayed as 0–100%) +- Default: 0.75 +- `connect_value_changed` → atomically update volume + +### Placement + +In the header bar, after the existing control buttons (open, pause, reset), with a small speaker icon label. + +## 5. Threading Model + +- **GTK main thread:** runs emulation via `glib::timeout_add_local` (~16ms tick), UI events, volume slider updates +- **cpal OS thread:** audio callback reads from ring buffer — this is the only cross-thread boundary +- The ring buffer (`Arc`) and volume (`Arc`) are the only shared state between threads + +## 6. Data Flow + +``` +CPU instruction step (GTK main thread) + → APU.clock_cpu_cycle() [updates internal channel state] + → AudioMixer.push_cycles(cycles, apu.channel_outputs()) + → mix 5 channels → f32 sample + → append to frame audio buffer (Vec) + +Per frame (GTK main thread): + → FrameExecutor collects audio_buffer + → CpalAudioSink.push_samples(audio_buffer) + → write to Arc + +cpal callback (separate OS thread): + → read from Arc + → multiply by volume (Arc) + → write to hardware audio buffer +``` + +## 7. Files Changed + +| File | Change | +|------|--------| +| `src/native_core/apu/types.rs` | Add `ChannelOutputs` struct, new timer/sequencer fields to `Apu` and `ApuStateTail` | +| `src/native_core/apu/api.rs` | Add `channel_outputs()` method, update `save_state_tail`/`load_state_tail` | +| `src/native_core/apu/timing.rs` | Clock new timer/sequencer fields in `clock_cpu_cycle()` | +| `src/native_core/bus.rs` | Add `apu_channel_outputs()` | +| `src/runtime/audio.rs` | Rewrite mixer with 5-channel formula | +| `src/runtime/ring_buffer.rs` (new) | Lock-free SPSC ring buffer | +| `src/runtime/core.rs` | Pass `channel_outputs()` to mixer in `run_until_frame_complete_with_audio()` | +| `src/runtime/mod.rs` | Export `ring_buffer`, `ChannelOutputs` | +| `crates/nesemu-desktop/Cargo.toml` | Add `cpal` dependency | +| `crates/nesemu-desktop/src/main.rs` | Replace stub AudioSink with CpalAudioSink, add volume slider | + +## 8. Testing + +- Existing tests in `tests/public_api.rs` must continue to pass (they use NullAudio). **Note:** the regression hash test (`public_api_regression_hashes_for_reference_rom`) will produce a different audio hash due to the mixer change — the expected hash must be updated. +- Unit test for ring buffer: push/pop, underrun, overrun, clear +- Unit test for mixer: known channel outputs → expected sample values +- Manual test: load a ROM, verify audible sound through speakers