fix(audio): fix DMC loop byte skip, add DC blocker, lazy cpal stream

Three audio bugs fixed: 1. DMC loop mode skipped the last byte of each sample iteration. provide_dmc_dma_byte() was immediately setting dmc_dma_request on loop restart while the sample buffer was still full, causing the while-loop in clock_cpu_cycles to service a second DMA immediately and overwrite the valid buffer. Per NES hardware spec, the reader only fills an empty buffer — the request is now left to clock_dmc when the output unit actually empties the buffer into the shift register. Fixes intermittent clicking/crackling in games that use looped DMC samples (BGM, SFX). 2. Missing DC blocker (high-pass filter) in AudioMixer. The NES APU has a capacitor-coupled output stage that blocks DC bias. Without it, abrupt channel state changes (length counter expiry, sweep mute, triangle period < 2) produce DC steps that manifest as audible clicks. Added a one-pole IIR high-pass filter at ~5 Hz applied after the existing low-pass filter. 3. cpal stream was opened at application startup with BufferSize::Fixed(256), forcing PipeWire/PulseAudio to run the entire audio graph at a 5.3 ms quantum. This disrupted other audio applications (browsers, media players) even when no ROM was loaded. Fixed by: (a) creating the stream lazily on the first push_samples call so no device is touched until a ROM is running, and (b) switching to BufferSize::Default so the audio server chooses the quantum instead of the emulator imposing one. Ring buffer capacity increased from 1536 to 4096 samples to absorb larger server quanta.
2026-03-15 10:41:19 +03:00
parent 82ac084b53
commit 1b4db3a506
3 changed files with 59 additions and 20 deletions
--- a/crates/nesemu-desktop/src/main.rs
+++ b/crates/nesemu-desktop/src/main.rs
@@ -20,7 +20,7 @@ const APP_ID: &str = "org.nesemu.desktop";
 const TITLE: &str = "NES Emulator";
 const SCALE: i32 = 3;
 const SAMPLE_RATE: u32 = 48_000;
-const AUDIO_RING_CAPACITY: usize = 1536;
+const AUDIO_RING_CAPACITY: usize = 4096;
 const AUDIO_CALLBACK_FRAMES: u32 = 256;
 fn main() {
@@ -482,16 +482,26 @@ struct CpalAudioSink {
 impl CpalAudioSink {
    fn new(volume: Arc<AtomicU32>) -> Self {
        let ring = Arc::new(RingBuffer::new(AUDIO_RING_CAPACITY));
-        let ring_for_cb = Arc::clone(&ring);
+        // Do NOT open the audio device here. Creating a cpal stream at startup
-        let vol_for_cb = Arc::clone(&volume);
+        // forces the system audio server (PipeWire/PulseAudio) to allocate
-        let stream = Self::try_build_stream(ring_for_cb, vol_for_cb);
+        // resources and may disrupt other running audio applications even when
        // the emulator is idle. The stream is opened lazily on the first
        // push_samples call, i.e. only when a ROM is actually playing.
        Self {
-            _stream: stream,
+            _stream: None,
            ring,
            _volume: volume,
        }
    }
    fn ensure_stream(&mut self) {
        if self._stream.is_none() {
            let ring_for_cb = Arc::clone(&self.ring);
            let vol_for_cb = Arc::clone(&self._volume);
            self._stream = Self::try_build_stream(ring_for_cb, vol_for_cb);
        }
    }
    fn try_build_stream(ring: Arc<RingBuffer>, volume: Arc<AtomicU32>) -> Option<cpal::Stream> {
        use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
@@ -548,6 +558,7 @@ impl CpalAudioSink {
 impl nesemu::AudioOutput for CpalAudioSink {
    fn push_samples(&mut self, samples: &[f32]) {
        self.ensure_stream();
        self.ring.push(samples);
    }
 }
@@ -567,7 +578,10 @@ fn cpal_stream_config() -> cpal::StreamConfig {
    cpal::StreamConfig {
        channels: 1,
        sample_rate: cpal::SampleRate(SAMPLE_RATE),
-        buffer_size: cpal::BufferSize::Fixed(AUDIO_CALLBACK_FRAMES),
+        // Use the audio server's default buffer size to avoid forcing the entire
        // PipeWire/PulseAudio graph into low-latency mode, which would disturb
        // other audio applications (browsers, media players, etc.).
        buffer_size: cpal::BufferSize::Default,
    }
 }
@@ -803,9 +817,9 @@ mod tests {
    }
    #[test]
-    fn desktop_audio_ring_budget_stays_below_25ms() {
+    fn desktop_audio_ring_budget_stays_below_100ms() {
        let latency_ms = audio_ring_latency_ms(AUDIO_RING_CAPACITY, SAMPLE_RATE);
-        let max_budget_ms = 40.0;
+        let max_budget_ms = 100.0;
        assert!(
            latency_ms <= max_budget_ms,
            "desktop audio ring latency budget too high: {latency_ms:.2}ms"
@@ -813,12 +827,11 @@ mod tests {
    }
    #[test]
-    fn desktop_audio_uses_fixed_low_latency_callback_size() {
+    fn desktop_audio_uses_default_buffer_size() {
        let config = cpal_stream_config();
-        assert_eq!(
+        // Default lets the audio server (PipeWire/PulseAudio) choose the
-            config.buffer_size,
+        // buffer size, preventing interference with other audio applications.
-            cpal::BufferSize::Fixed(AUDIO_CALLBACK_FRAMES)
+        assert_eq!(config.buffer_size, cpal::BufferSize::Default);
        );
    }
    #[test]
--- a/src/native_core/apu/api.rs
+++ b/src/native_core/apu/api.rs
@@ -200,9 +200,15 @@ impl Apu {
        }
        if self.dmc_bytes_remaining == 0 {
            if (self.io[0x10] & 0x40) != 0 {
                // Loop mode: reset address and byte counter.
                // Do NOT request another DMA here — the sample buffer is full
                // right now. clock_dmc will request the next fetch when the
                // output unit empties the buffer into the shift register, which
                // is the correct NES hardware behaviour (reader only fills an
                // empty buffer).  Requesting early would overwrite the valid
                // buffer and skip the last byte of each loop iteration.
                self.dmc_bytes_remaining = self.dmc_sample_length_bytes();
                self.dmc_current_addr = self.dmc_sample_start_addr();
                self.dmc_dma_request = true;
            } else if self.dmc_irq_enabled {
                self.dmc_irq_pending = true;
            }
--- a/src/runtime/audio.rs
+++ b/src/runtime/audio.rs
@@ -11,12 +11,21 @@ pub struct AudioMixer {
    // Coefficient: a = exp(-2π * fc / fs). At fc=14000, fs=48000: a ≈ 0.160
    lp_coeff: f32,
    lp_state: f32,
    // One-pole IIR high-pass filter (DC blocker). Removes the DC bias that
    // accumulates when APU channels switch state, preventing audible clicks and
    // pops. Approximates the NES capacitor-coupled output stage (~5 Hz cutoff).
    // Formula: y[n] = hp_coeff * y[n-1] + x[n] - x[n-1]
    // Coefficient: a = exp(-2π * fc / fs). At fc=5, fs=48000: a ≈ 0.99935.
    hp_coeff: f32,
    hp_prev_x: f32,
    hp_prev_y: f32,
 }
 impl AudioMixer {
    pub fn new(sample_rate: u32, mode: VideoMode) -> Self {
        let cpu_hz = mode.cpu_hz();
        let lp_coeff = (-2.0 * std::f64::consts::PI * 14_000.0 / sample_rate as f64).exp() as f32;
        let hp_coeff = (-2.0 * std::f64::consts::PI * 5.0 / sample_rate as f64).exp() as f32;
        Self {
            sample_rate,
            samples_per_cpu_cycle: sample_rate as f64 / cpu_hz,
@@ -24,6 +33,9 @@ impl AudioMixer {
            last_output_sample: 0.0,
            lp_coeff,
            lp_state: 0.0,
            hp_coeff,
            hp_prev_x: 0.0,
            hp_prev_y: 0.0,
        }
    }
@@ -35,6 +47,8 @@ impl AudioMixer {
        self.sample_accumulator = 0.0;
        self.last_output_sample = 0.0;
        self.lp_state = 0.0;
        self.hp_prev_x = 0.0;
        self.hp_prev_y = 0.0;
    }
    pub fn push_cycles(&mut self, cpu_cycles: u32, channels: ChannelOutputs, out: &mut Vec<f32>) {
@@ -56,17 +70,23 @@ impl AudioMixer {
        let a = self.lp_coeff;
        let b = 1.0 - a;
        if samples == 1 {
-            let s = a * self.lp_state + b * sample;
+            let lp = a * self.lp_state + b * sample;
-            self.lp_state = s;
+            self.lp_state = lp;
-            out.push(s);
+            let hp = self.hp_coeff * self.hp_prev_y + lp - self.hp_prev_x;
            self.hp_prev_x = lp;
            self.hp_prev_y = hp;
            out.push(hp);
        } else {
            let denom = samples as f32;
            for idx in 0..samples {
                let t = (idx + 1) as f32 / denom;
                let interp = start + (sample - start) * t;
-                let s = a * self.lp_state + b * interp;
+                let lp = a * self.lp_state + b * interp;
-                self.lp_state = s;
+                self.lp_state = lp;
-                out.push(s);
+                let hp = self.hp_coeff * self.hp_prev_y + lp - self.hp_prev_x;
                self.hp_prev_x = lp;
                self.hp_prev_y = hp;
                out.push(hp);
            }
        }
        self.last_output_sample = sample;