Compare commits

...

2 Commits

Author SHA1 Message Date
Se.Cherkasov
08f7ad8bdf fix(audio): fix DMC loop byte skip, add DC blocker, lazy cpal stream
Some checks failed
CI / rust (push) Has been cancelled
Three audio bugs fixed:

1. DMC loop mode skipped the last byte of each sample iteration.
   provide_dmc_dma_byte() was immediately setting dmc_dma_request on
   loop restart while the sample buffer was still full, causing the
   while-loop in clock_cpu_cycles to service a second DMA immediately
   and overwrite the valid buffer. Per NES hardware spec, the reader
   only fills an empty buffer — the request is now left to clock_dmc
   when the output unit actually empties the buffer into the shift
   register. Fixes intermittent clicking/crackling in games that use
   looped DMC samples (BGM, SFX).

2. Missing DC blocker (high-pass filter) in AudioMixer. The NES APU
   has a capacitor-coupled output stage that blocks DC bias. Without
   it, abrupt channel state changes (length counter expiry, sweep
   mute, triangle period < 2) produce DC steps that manifest as
   audible clicks. Added a one-pole IIR high-pass filter at ~5 Hz
   applied after the existing low-pass filter.

3. cpal stream was opened at application startup with
   BufferSize::Fixed(256), forcing PipeWire/PulseAudio to run the
   entire audio graph at a 5.3 ms quantum. This disrupted other audio
   applications (browsers, media players) even when no ROM was loaded.
   Fixed by: (a) creating the stream lazily on the first push_samples
   call so no device is touched until a ROM is running, and (b)
   switching to BufferSize::Default so the audio server chooses the
   quantum instead of the emulator imposing one. Ring buffer capacity
   increased from 1536 to 4096 samples to absorb larger server quanta.
2026-03-15 10:41:19 +03:00
Se.Cherkasov
1e7a6a9e48 fix(apu): correct frame counter timing, add LP filter, mute aliased triangle
- Fix frame counter running at 2× speed: clock_frame_counter now skips
  odd CPU cycles (APU cycle = CPU/2), so envelope, sweep, and length
  counters tick at the correct rate. Fixes sweep-driven whistle in Megaman II.

- Switch audio sampling to per-CPU-cycle granularity in
  run_until_frame_complete_with_audio to eliminate square-wave harmonic
  aliasing caused by sampling only once per instruction.

- Add IIR one-pole low-pass filter (~14 kHz) to AudioMixer to smooth
  abrupt level transitions (crackling) introduced by correct envelope timing.

- Mute triangle channel when timer_period < 2 (≥27 kHz), which aliases
  into the audible range at 48 kHz. Real NES RC circuit removes these
  ultrasonics; emulator must suppress them explicitly.

- Update all APU bus tests to use correct (doubled) CPU cycle counts.
2026-03-14 17:35:35 +03:00
6 changed files with 100 additions and 31 deletions

View File

@@ -20,7 +20,7 @@ const APP_ID: &str = "org.nesemu.desktop";
const TITLE: &str = "NES Emulator";
const SCALE: i32 = 3;
const SAMPLE_RATE: u32 = 48_000;
const AUDIO_RING_CAPACITY: usize = 1536;
const AUDIO_RING_CAPACITY: usize = 4096;
const AUDIO_CALLBACK_FRAMES: u32 = 256;
fn main() {
@@ -482,16 +482,26 @@ struct CpalAudioSink {
impl CpalAudioSink {
fn new(volume: Arc<AtomicU32>) -> Self {
let ring = Arc::new(RingBuffer::new(AUDIO_RING_CAPACITY));
let ring_for_cb = Arc::clone(&ring);
let vol_for_cb = Arc::clone(&volume);
let stream = Self::try_build_stream(ring_for_cb, vol_for_cb);
// Do NOT open the audio device here. Creating a cpal stream at startup
// forces the system audio server (PipeWire/PulseAudio) to allocate
// resources and may disrupt other running audio applications even when
// the emulator is idle. The stream is opened lazily on the first
// push_samples call, i.e. only when a ROM is actually playing.
Self {
_stream: stream,
_stream: None,
ring,
_volume: volume,
}
}
fn ensure_stream(&mut self) {
if self._stream.is_none() {
let ring_for_cb = Arc::clone(&self.ring);
let vol_for_cb = Arc::clone(&self._volume);
self._stream = Self::try_build_stream(ring_for_cb, vol_for_cb);
}
}
fn try_build_stream(ring: Arc<RingBuffer>, volume: Arc<AtomicU32>) -> Option<cpal::Stream> {
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
@@ -548,6 +558,7 @@ impl CpalAudioSink {
impl nesemu::AudioOutput for CpalAudioSink {
fn push_samples(&mut self, samples: &[f32]) {
self.ensure_stream();
self.ring.push(samples);
}
}
@@ -567,7 +578,10 @@ fn cpal_stream_config() -> cpal::StreamConfig {
cpal::StreamConfig {
channels: 1,
sample_rate: cpal::SampleRate(SAMPLE_RATE),
buffer_size: cpal::BufferSize::Fixed(AUDIO_CALLBACK_FRAMES),
// Use the audio server's default buffer size to avoid forcing the entire
// PipeWire/PulseAudio graph into low-latency mode, which would disturb
// other audio applications (browsers, media players, etc.).
buffer_size: cpal::BufferSize::Default,
}
}
@@ -803,9 +817,9 @@ mod tests {
}
#[test]
fn desktop_audio_ring_budget_stays_below_25ms() {
fn desktop_audio_ring_budget_stays_below_100ms() {
let latency_ms = audio_ring_latency_ms(AUDIO_RING_CAPACITY, SAMPLE_RATE);
let max_budget_ms = 40.0;
let max_budget_ms = 100.0;
assert!(
latency_ms <= max_budget_ms,
"desktop audio ring latency budget too high: {latency_ms:.2}ms"
@@ -813,12 +827,11 @@ mod tests {
}
#[test]
fn desktop_audio_uses_fixed_low_latency_callback_size() {
fn desktop_audio_uses_default_buffer_size() {
let config = cpal_stream_config();
assert_eq!(
config.buffer_size,
cpal::BufferSize::Fixed(AUDIO_CALLBACK_FRAMES)
);
// Default lets the audio server (PipeWire/PulseAudio) choose the
// buffer size, preventing interference with other audio applications.
assert_eq!(config.buffer_size, cpal::BufferSize::Default);
}
#[test]

View File

@@ -200,9 +200,15 @@ impl Apu {
}
if self.dmc_bytes_remaining == 0 {
if (self.io[0x10] & 0x40) != 0 {
// Loop mode: reset address and byte counter.
// Do NOT request another DMA here — the sample buffer is full
// right now. clock_dmc will request the next fetch when the
// output unit empties the buffer into the shift register, which
// is the correct NES hardware behaviour (reader only fills an
// empty buffer). Requesting early would overwrite the valid
// buffer and skip the last byte of each loop iteration.
self.dmc_bytes_remaining = self.dmc_sample_length_bytes();
self.dmc_current_addr = self.dmc_sample_start_addr();
self.dmc_dma_request = true;
} else if self.dmc_irq_enabled {
self.dmc_irq_pending = true;
}
@@ -331,9 +337,13 @@ impl Apu {
};
let triangle = {
// Timer period < 2 produces ultrasonic output (~28-56 kHz) that aliases
// to audible frequencies when sampled at 48 kHz. Real hardware filters
// this via the RC output stage; mute here to match that behaviour.
let active = (self.channel_enable_mask & 0x04) != 0
&& self.length_counters[2] > 0
&& self.triangle_linear_counter > 0;
&& self.triangle_linear_counter > 0
&& self.triangle_timer_period() >= 2;
if active {
TRIANGLE_SEQUENCE[self.triangle_step as usize & 0x1F]
} else {

View File

@@ -14,6 +14,9 @@ impl Apu {
status
}
pub(crate) fn clock_frame_counter(&mut self) {
if self.cpu_cycle_parity {
return;
}
let seq_len = if self.frame_mode_5step {
APU_FRAME_SEQ_5_STEP_CYCLES
} else {

View File

@@ -5,7 +5,7 @@ fn apu_frame_irq_asserts_in_4_step_mode() {
let mut bus = NativeBus::new(Box::new(StubMapper));
bus.write(0x4017, 0x00); // 4-step, IRQ enabled
for _ in 0..14_918u32 {
for _ in 0..29_832u32 {
bus.clock_cpu(1);
}
@@ -17,7 +17,7 @@ fn reading_4015_clears_apu_frame_irq_flag() {
let mut bus = NativeBus::new(Box::new(StubMapper));
bus.write(0x4017, 0x00); // 4-step, IRQ enabled
for _ in 0..14_918u32 {
for _ in 0..29_832u32 {
bus.clock_cpu(1);
}
@@ -30,7 +30,7 @@ fn reading_4015_clears_apu_frame_irq_flag() {
fn apu_frame_irq_inhibit_bit_disables_irq_and_clears_pending() {
let mut bus = NativeBus::new(Box::new(StubMapper));
bus.write(0x4017, 0x00); // 4-step, IRQ enabled
for _ in 0..14_918u32 {
for _ in 0..29_832u32 {
bus.clock_cpu(1);
}
assert!(bus.poll_irq());
@@ -46,13 +46,13 @@ fn apu_frame_irq_inhibit_bit_disables_irq_and_clears_pending() {
fn writing_4015_does_not_acknowledge_apu_frame_irq() {
let mut bus = NativeBus::new(Box::new(StubMapper));
bus.write(0x4017, 0x00); // 4-step, IRQ enabled
for _ in 0..14_918u32 {
for _ in 0..29_832u32 {
bus.clock_cpu(1);
}
assert!(bus.poll_irq(), "frame IRQ must be pending");
// Recreate pending frame IRQ and ensure $4015 write does not clear it.
for _ in 0..14_918u32 {
for _ in 0..29_832u32 {
bus.clock_cpu(1);
}
bus.write(0x4015, 0x00);
@@ -183,11 +183,11 @@ fn apu_length_counter_decrements_on_half_frame_when_not_halted() {
bus.write(0x4003, 0x18); // length index 3 => value 2
assert_eq!(bus.apu.length_counters[0], 2);
for _ in 0..7_457u32 {
for _ in 0..14_913u32 {
bus.clock_cpu(1);
}
assert_eq!(bus.apu.length_counters[0], 1);
for _ in 0..7_458u32 {
for _ in 0..14_916u32 {
bus.clock_cpu(1);
}
assert_eq!(bus.apu.length_counters[0], 0);
@@ -218,13 +218,13 @@ fn quarter_frame_clocks_triangle_linear_counter() {
bus.write(0x4008, 0x05); // control=0, reload value=5
bus.write(0x400B, 0x00); // set reload flag
for _ in 0..3_729u32 {
for _ in 0..7_457u32 {
bus.clock_cpu(1);
}
assert_eq!(bus.apu.triangle_linear_counter, 5);
assert!(!bus.apu.triangle_linear_reload_flag);
for _ in 0..3_728u32 {
for _ in 0..7_456u32 {
bus.clock_cpu(1);
}
assert_eq!(bus.apu.triangle_linear_counter, 4);
@@ -238,7 +238,7 @@ fn quarter_frame_envelope_start_reloads_decay() {
bus.write(0x4003, 0x00); // start envelope
assert_ne!(bus.apu.envelope_start_flags & 0x01, 0);
for _ in 0..3_729u32 {
for _ in 0..7_457u32 {
bus.clock_cpu(1);
}
assert_eq!(bus.apu.envelope_decay[0], 15);
@@ -253,7 +253,7 @@ fn sweep_half_frame_updates_pulse_timer_period() {
bus.write(0x4003, 0x02); // timer high => period 0x200
bus.write(0x4001, 0x82); // enable, period=1, negate=0, shift=2
for _ in 0..7_457u32 {
for _ in 0..14_913u32 {
bus.clock_cpu(1);
}
assert_eq!(bus.apu.read(0x4002), 0x80);
@@ -267,7 +267,7 @@ fn sweep_negative_pulse1_uses_ones_complement() {
bus.write(0x4003, 0x02);
bus.write(0x4001, 0x8A); // enable, period=1, negate=1, shift=2
for _ in 0..7_457u32 {
for _ in 0..14_913u32 {
bus.clock_cpu(1);
}
assert_eq!(bus.apu.read(0x4002), 0x7F);

View File

@@ -7,16 +7,35 @@ pub struct AudioMixer {
samples_per_cpu_cycle: f64,
sample_accumulator: f64,
last_output_sample: f32,
// One-pole IIR low-pass filter state (approximates NES ~14 kHz RC filter).
// Coefficient: a = exp(-2π * fc / fs). At fc=14000, fs=48000: a ≈ 0.160
lp_coeff: f32,
lp_state: f32,
// One-pole IIR high-pass filter (DC blocker). Removes the DC bias that
// accumulates when APU channels switch state, preventing audible clicks and
// pops. Approximates the NES capacitor-coupled output stage (~5 Hz cutoff).
// Formula: y[n] = hp_coeff * y[n-1] + x[n] - x[n-1]
// Coefficient: a = exp(-2π * fc / fs). At fc=5, fs=48000: a ≈ 0.99935.
hp_coeff: f32,
hp_prev_x: f32,
hp_prev_y: f32,
}
impl AudioMixer {
pub fn new(sample_rate: u32, mode: VideoMode) -> Self {
let cpu_hz = mode.cpu_hz();
let lp_coeff = (-2.0 * std::f64::consts::PI * 14_000.0 / sample_rate as f64).exp() as f32;
let hp_coeff = (-2.0 * std::f64::consts::PI * 5.0 / sample_rate as f64).exp() as f32;
Self {
sample_rate,
samples_per_cpu_cycle: sample_rate as f64 / cpu_hz,
sample_accumulator: 0.0,
last_output_sample: 0.0,
lp_coeff,
lp_state: 0.0,
hp_coeff,
hp_prev_x: 0.0,
hp_prev_y: 0.0,
}
}
@@ -27,6 +46,9 @@ impl AudioMixer {
pub fn reset(&mut self) {
self.sample_accumulator = 0.0;
self.last_output_sample = 0.0;
self.lp_state = 0.0;
self.hp_prev_x = 0.0;
self.hp_prev_y = 0.0;
}
pub fn push_cycles(&mut self, cpu_cycles: u32, channels: ChannelOutputs, out: &mut Vec<f32>) {
@@ -45,13 +67,26 @@ impl AudioMixer {
}
let start = self.last_output_sample;
let a = self.lp_coeff;
let b = 1.0 - a;
if samples == 1 {
out.push(sample);
let lp = a * self.lp_state + b * sample;
self.lp_state = lp;
let hp = self.hp_coeff * self.hp_prev_y + lp - self.hp_prev_x;
self.hp_prev_x = lp;
self.hp_prev_y = hp;
out.push(hp);
} else {
let denom = samples as f32;
for idx in 0..samples {
let t = (idx + 1) as f32 / denom;
out.push(start + (sample - start) * t);
let interp = start + (sample - start) * t;
let lp = a * self.lp_state + b * interp;
self.lp_state = lp;
let hp = self.hp_coeff * self.hp_prev_y + lp - self.hp_prev_x;
self.hp_prev_x = lp;
self.hp_prev_y = hp;
out.push(hp);
}
}
self.last_output_sample = sample;

View File

@@ -108,8 +108,16 @@ impl NesRuntime {
) -> Result<(), RuntimeError> {
self.bus.begin_frame();
while !self.bus.take_frame_complete() {
let cycles = self.step_instruction()?;
mixer.push_cycles(cycles, self.bus.apu_channel_outputs(), out_samples);
self.bus.set_joypad_buttons(self.buttons);
let cpu_cycles = self.cpu.step(&mut self.bus).map_err(RuntimeError::Cpu)?;
// Sample APU output once per CPU cycle for better audio resolution.
// OAM DMA cycles (triggered inside cpu.step) are captured in the
// first take_cpu_cycles_since_poll call of this instruction.
for _ in 0..cpu_cycles {
self.bus.clock_cpu(1);
let actual = self.bus.take_cpu_cycles_since_poll();
mixer.push_cycles(actual, self.bus.apu_channel_outputs(), out_samples);
}
}
self.frame_number = self.frame_number.saturating_add(1);
Ok(())