Compare commits

...

2 Commits

Author SHA1 Message Date
d9666c23b4 feat: Hermite resampling, sprite shift registers, controller open bus
Some checks failed
CI / rust (push) Has been cancelled
#3 audio.rs: replace linear interpolation with Catmull-Rom Hermite cubic.
  Stores prev_sample as p0 control point; m1=(p2-p0)/2, m2=(p2-p1)/2
  tangents give continuous first derivative across batch boundaries.

#4 ppu: add per-slot sprite shift registers (spr_shift_lo/hi, spr_x_counter,
  spr_attr_latch). load_sprite_shifters fetches pattern bytes with h-flip at
  dot 1 of each visible scanline. sprite_pixel_from_shifters replaces the
  per-pixel OAM scan; sprite-0 hit detection integrated into the shifter path.

#5 joypad.rs: format_controller_read now preserves bits 1-5,7 as open bus
  (!0x41 mask) instead of zeroing bits 1-4, matching NES hardware behaviour.
2026-03-15 11:30:14 +03:00
c77be7c84b feat(audio): non-linear APU mixing and mapper expansion audio (VRC6, FME-7, Namco163) 2026-03-15 11:17:37 +03:00
12 changed files with 540 additions and 56 deletions

View File

@@ -365,6 +365,6 @@ impl Apu {
let dmc = self.dmc_output_level; let dmc = self.dmc_output_level;
ChannelOutputs { pulse1, pulse2, triangle, noise, dmc } ChannelOutputs { pulse1, pulse2, triangle, noise, dmc, expansion: 0.0 }
} }
} }

View File

@@ -5,6 +5,11 @@ pub struct ChannelOutputs {
pub triangle: u8, pub triangle: u8,
pub noise: u8, pub noise: u8,
pub dmc: u8, pub dmc: u8,
/// Pre-mixed expansion audio from the cartridge mapper (VRC6, FME-7,
/// Namco163, etc.). Normalized to roughly the same amplitude range as
/// the internal NES APU output. Added linearly to the final sample
/// after the non-linear NES APU mixing stage.
pub expansion: f32,
} }
pub(super) const APU_FRAME_SEQ_4_STEP_CYCLES: u32 = 14_915; pub(super) const APU_FRAME_SEQ_4_STEP_CYCLES: u32 = 14_915;

View File

@@ -61,7 +61,9 @@ impl NativeBus {
} }
pub fn apu_channel_outputs(&self) -> crate::native_core::apu::ChannelOutputs { pub fn apu_channel_outputs(&self) -> crate::native_core::apu::ChannelOutputs {
self.apu.channel_outputs() let mut outputs = self.apu.channel_outputs();
outputs.expansion = self.mapper.expansion_audio_sample();
outputs
} }
pub fn render_frame(&self, out_rgba: &mut [u8], frame_number: u32, buttons: [bool; 8]) { pub fn render_frame(&self, out_rgba: &mut [u8], frame_number: u32, buttons: [bool; 8]) {

View File

@@ -67,8 +67,11 @@ impl NativeBus {
} }
fn format_controller_read(&self, bit: u8) -> u8 { fn format_controller_read(&self, bit: u8) -> u8 {
// Controller reads expose serial data in bit0, keep bit6 high, and // The NES controller port drives only bit 0 (serial data); bit 6 is
// preserve open-bus upper bits. // held high by a pull-up on the expansion connector. All other bits
(self.cpu_open_bus & 0xE0) | 0x40 | (bit & 1) // (1-5, 7) float and retain whatever is currently on the CPU data bus
// (open bus). !0x41 clears bits 6 and 0 so we can OR in their
// canonical values without corrupting any open-bus bits.
(self.cpu_open_bus & !0x41u8) | 0x40 | (bit & 1)
} }
} }

View File

@@ -26,6 +26,14 @@ pub trait Mapper {
fn poll_irq(&mut self) -> bool { fn poll_irq(&mut self) -> bool {
false false
} }
/// Returns the current pre-mixed expansion audio sample for mappers that
/// include an on-cartridge sound chip (VRC6, FME-7/Sunsoft 5B, Namco163,
/// etc.). The value is already normalized so that its amplitude is
/// comparable to the internal NES APU output range. Default: 0.0
/// (no expansion audio).
fn expansion_audio_sample(&self) -> f32 {
0.0
}
fn save_state(&self, out: &mut Vec<u8>); fn save_state(&self, out: &mut Vec<u8>);
fn load_state(&mut self, data: &[u8]) -> Result<(), String>; fn load_state(&mut self, data: &[u8]) -> Result<(), String>;
} }

View File

@@ -16,4 +16,13 @@ pub(crate) struct Fme7 {
pub(super) irq_enabled: bool, pub(super) irq_enabled: bool,
pub(super) irq_counter_enabled: bool, pub(super) irq_counter_enabled: bool,
pub(super) irq_pending: bool, pub(super) irq_pending: bool,
// Sunsoft 5B (YM2149 / AY-3-8910 compatible) expansion audio.
// Registers R0-R13 hold period, mixer, volume, and envelope config.
// Commands 0xC0-0xCF select audio register (low nibble).
pub(super) ay_regs: [u8; 16],
// Per-channel 12-bit period counter and current square-wave state.
pub(super) ay_timer: [u16; 3],
pub(super) ay_state: [bool; 3],
// Prescaler: the AY chip runs at CPU clock / 16.
pub(super) ay_prescaler: u8,
} }

View File

@@ -103,6 +103,10 @@ impl Fme7 {
irq_enabled: false, irq_enabled: false,
irq_counter_enabled: false, irq_counter_enabled: false,
irq_pending: false, irq_pending: false,
ay_regs: [0; 16],
ay_timer: [1; 3],
ay_state: [false; 3],
ay_prescaler: 0,
} }
} }
@@ -137,15 +141,21 @@ impl Mapper for Fme7 {
fn cpu_write(&mut self, addr: u16, value: u8) { fn cpu_write(&mut self, addr: u16, value: u8) {
if (0x8000..=0x9FFF).contains(&addr) { if (0x8000..=0x9FFF).contains(&addr) {
self.command = value & 0x0F; self.command = value;
return; return;
} }
if !(0xA000..=0xBFFF).contains(&addr) { if !(0xA000..=0xBFFF).contains(&addr) {
return; return;
} }
match self.command { // Commands 0xC0-0xCF: Sunsoft 5B (AY-3-8910) audio registers.
0x0..=0x7 => self.chr_banks[self.command as usize] = value, if self.command >= 0xC0 {
self.ay_regs[(self.command & 0x0F) as usize] = value;
return;
}
match self.command & 0x0F {
0x0..=0x7 => self.chr_banks[(self.command & 0x0F) as usize] = value,
0x8 => { 0x8 => {
self.low_bank = value & 0x3F; self.low_bank = value & 0x3F;
self.low_is_ram = (value & 0x40) != 0; self.low_is_ram = (value & 0x40) != 0;
@@ -238,9 +248,7 @@ impl Mapper for Fme7 {
} }
fn clock_cpu(&mut self, cycles: u8) { fn clock_cpu(&mut self, cycles: u8) {
if !self.irq_counter_enabled { if self.irq_counter_enabled {
return;
}
for _ in 0..cycles { for _ in 0..cycles {
if self.irq_counter == 0 { if self.irq_counter == 0 {
self.irq_counter = 0xFFFF; self.irq_counter = 0xFFFF;
@@ -253,6 +261,46 @@ impl Mapper for Fme7 {
} }
} }
// Sunsoft 5B AY-3-8910 timer: chip runs at CPU clock / 16.
// Each time the prescaler wraps, tick all three tone channels.
for _ in 0..cycles {
self.ay_prescaler = self.ay_prescaler.wrapping_add(1);
if self.ay_prescaler < 16 {
continue;
}
self.ay_prescaler = 0;
for ch in 0..3usize {
let period = {
let lo = self.ay_regs[ch * 2] as u16;
let hi = (self.ay_regs[ch * 2 + 1] & 0x0F) as u16;
let p = (hi << 8) | lo;
if p == 0 { 1 } else { p }
};
if self.ay_timer[ch] == 0 {
self.ay_timer[ch] = period;
self.ay_state[ch] = !self.ay_state[ch];
} else {
self.ay_timer[ch] -= 1;
}
}
}
}
fn expansion_audio_sample(&self) -> f32 {
// Mixer register R7: bits 2:0 are tone-disable flags (0 = enabled).
let mixer = self.ay_regs[7];
let mut sample = 0.0f32;
for ch in 0..3usize {
let tone_enabled = (mixer >> ch) & 1 == 0;
if tone_enabled && self.ay_state[ch] {
let volume = (self.ay_regs[8 + ch] & 0x0F) as f32;
// Scale similarly to a NES pulse channel.
sample += volume * 0.00752;
}
}
sample
}
fn poll_irq(&mut self) -> bool { fn poll_irq(&mut self) -> bool {
let out = self.irq_pending; let out = self.irq_pending;
self.irq_pending = false; self.irq_pending = false;
@@ -271,12 +319,14 @@ impl Mapper for Fme7 {
out.push(u8::from(self.irq_counter_enabled)); out.push(u8::from(self.irq_counter_enabled));
out.push(u8::from(self.irq_pending)); out.push(u8::from(self.irq_pending));
out.push(encode_mirroring(self.mirroring)); out.push(encode_mirroring(self.mirroring));
out.extend_from_slice(&self.ay_regs);
write_state_bytes(out, &self.low_ram); write_state_bytes(out, &self.low_ram);
write_chr_state(out, &self.chr_data); write_chr_state(out, &self.chr_data);
} }
fn load_state(&mut self, data: &[u8]) -> Result<(), String> { fn load_state(&mut self, data: &[u8]) -> Result<(), String> {
if data.len() < 21 { // 21 original + 16 ay_regs bytes
if data.len() < 21 + 16 {
return Err("mapper state is truncated".to_string()); return Err("mapper state is truncated".to_string());
} }
let mut cursor = 0usize; let mut cursor = 0usize;
@@ -302,6 +352,8 @@ impl Mapper for Fme7 {
cursor += 1; cursor += 1;
self.mirroring = decode_mirroring(data[cursor]); self.mirroring = decode_mirroring(data[cursor]);
cursor += 1; cursor += 1;
self.ay_regs.copy_from_slice(&data[cursor..cursor + 16]);
cursor += 16;
let low_ram_payload = read_state_bytes(data, &mut cursor)?; let low_ram_payload = read_state_bytes(data, &mut cursor)?;
if low_ram_payload.len() != self.low_ram.len() { if low_ram_payload.len() != self.low_ram.len() {
return Err("mapper state does not match loaded ROM".to_string()); return Err("mapper state does not match loaded ROM".to_string());

View File

@@ -12,6 +12,12 @@ pub(crate) struct Namco163_19 {
irq_counter: u16, irq_counter: u16,
irq_enabled: bool, irq_enabled: bool,
irq_pending: bool, irq_pending: bool,
// Namco163 wavetable audio. Each active channel has a 24-bit phase
// accumulator. Channels 7..7-N+1 are active (N from audio_ram[0x7F]).
// Phase increments by the 18-bit frequency value every 15 CPU cycles
// per active channel (chip cycles sequentially through all channels).
namco_phase: [u32; 8],
namco_cycle: u16,
} }
impl Namco163_19 { impl Namco163_19 {
@@ -28,6 +34,8 @@ impl Namco163_19 {
irq_counter: 0, irq_counter: 0,
irq_enabled: false, irq_enabled: false,
irq_pending: false, irq_pending: false,
namco_phase: [0; 8],
namco_cycle: 0,
} }
} }
@@ -144,9 +152,7 @@ impl Mapper for Namco163_19 {
} }
fn clock_cpu(&mut self, cycles: u8) { fn clock_cpu(&mut self, cycles: u8) {
if !self.irq_enabled { if self.irq_enabled {
return;
}
let sum = self.irq_counter as u32 + cycles as u32; let sum = self.irq_counter as u32 + cycles as u32;
if sum > 0x7FFF { if sum > 0x7FFF {
self.irq_pending = true; self.irq_pending = true;
@@ -154,6 +160,58 @@ impl Mapper for Namco163_19 {
self.irq_counter = (sum as u16) & 0x7FFF; self.irq_counter = (sum as u16) & 0x7FFF;
} }
// Namco163 audio: the chip cycles through all active channels, clocking
// one channel every 15 CPU cycles. When all channels have been clocked
// once, each channel's phase has advanced by its 18-bit frequency value.
let num_active = ((self.audio_ram[0x7F] >> 4) & 0x07) as u16 + 1;
let period = 15 * num_active;
for _ in 0..cycles {
self.namco_cycle += 1;
if self.namco_cycle >= period {
self.namco_cycle = 0;
for j in 0..num_active as usize {
// Channel j registers start at audio_ram[0x40 + j*8].
let base = 0x40 + j * 8;
let freq = (self.audio_ram[base] as u32)
| ((self.audio_ram[base + 2] as u32) << 8)
| (((self.audio_ram[base + 4] & 0x03) as u32) << 16);
self.namco_phase[j] =
(self.namco_phase[j] + freq) & 0x00FF_FFFF;
}
}
}
}
fn expansion_audio_sample(&self) -> f32 {
let num_active = ((self.audio_ram[0x7F] >> 4) & 0x07) as usize + 1;
let mut output = 0.0f32;
for j in 0..num_active {
let base = 0x40 + j * 8;
// Wave length is stored in the upper 6 bits of the byte at base+4,
// encoded as (256 - wave_nibbles): value 0 → 256 nibbles.
let len_raw = (self.audio_ram[base + 4] >> 2) as u16;
let wave_len = if len_raw == 0 { 256u16 } else { 256 - len_raw * 4 };
let wave_len = wave_len.max(1);
let wave_addr = self.audio_ram[base + 6] as u16;
let volume = (self.audio_ram[base + 7] & 0x0F) as f32;
// Current position in the waveform (nibble index).
let nibble_pos = ((self.namco_phase[j] >> 16) as u16 % wave_len + wave_addr)
& 0xFF;
let byte = self.audio_ram[(nibble_pos / 2) as usize];
let nibble = if nibble_pos & 1 == 0 {
byte & 0x0F
} else {
(byte >> 4) & 0x0F
} as f32;
// Centre at 8 (DC = 0), scale by volume, normalize.
output += (nibble - 8.0) * volume / (15.0 * num_active as f32);
}
// Scale to NES amplitude range.
output * 0.02
}
fn poll_irq(&mut self) -> bool { fn poll_irq(&mut self) -> bool {
let out = self.irq_pending; let out = self.irq_pending;
self.irq_pending = false; self.irq_pending = false;

View File

@@ -18,6 +18,27 @@ pub(crate) struct Vrc6_24 {
irq_mode_cpu: bool, irq_mode_cpu: bool,
irq_pending: bool, irq_pending: bool,
irq_prescaler: i16, irq_prescaler: i16,
// VRC6 expansion audio — 2 pulse channels + 1 sawtooth channel.
// Pulse channel n: 12-bit period timer, 4-bit volume, 3-bit duty (0-7),
// mode flag (ignore duty → always output), gate (enabled) flag.
// Timer decrements each CPU cycle; at 0 reload and advance duty_step (0-15).
// Output: if mode OR duty_step <= duty → volume, else 0.
vrc6_pulse_period: [u16; 2],
vrc6_pulse_counter: [u16; 2],
vrc6_pulse_duty_step: [u8; 2],
vrc6_pulse_duty: [u8; 2],
vrc6_pulse_volume: [u8; 2],
vrc6_pulse_mode: [bool; 2],
vrc6_pulse_enabled: [bool; 2],
// Sawtooth channel: 12-bit period timer, 6-bit accumulator rate.
// Step counter 0-6; on steps 1/3/5 accumulator += rate; on step 6 reset.
// Output: accumulator >> 3.
vrc6_saw_period: u16,
vrc6_saw_counter: u16,
vrc6_saw_step: u8,
vrc6_saw_accumulator: u8,
vrc6_saw_rate: u8,
vrc6_saw_enabled: bool,
} }
impl Vrc6_24 { impl Vrc6_24 {
@@ -44,6 +65,19 @@ impl Vrc6_24 {
irq_mode_cpu: false, irq_mode_cpu: false,
irq_pending: false, irq_pending: false,
irq_prescaler: 341, irq_prescaler: 341,
vrc6_pulse_period: [0; 2],
vrc6_pulse_counter: [0; 2],
vrc6_pulse_duty_step: [0; 2],
vrc6_pulse_duty: [0; 2],
vrc6_pulse_volume: [0; 2],
vrc6_pulse_mode: [false; 2],
vrc6_pulse_enabled: [false; 2],
vrc6_saw_period: 0,
vrc6_saw_counter: 0,
vrc6_saw_step: 0,
vrc6_saw_accumulator: 0,
vrc6_saw_rate: 0,
vrc6_saw_enabled: false,
} }
} }
@@ -118,7 +152,47 @@ impl Mapper for Vrc6_24 {
} }
match self.decode_register(addr) { match self.decode_register(addr) {
0x8000..=0x8003 => self.prg_bank_16k = value & 0x0F, 0x8000..=0x8003 => self.prg_bank_16k = value & 0x0F,
// VRC6 pulse 1 registers ($9000-$9002)
0x9000 => {
self.vrc6_pulse_mode[0] = (value & 0x80) != 0;
self.vrc6_pulse_duty[0] = (value >> 4) & 0x07;
self.vrc6_pulse_volume[0] = value & 0x0F;
}
0x9001 => {
self.vrc6_pulse_period[0] =
(self.vrc6_pulse_period[0] & 0x0F00) | value as u16;
}
0x9002 => {
self.vrc6_pulse_enabled[0] = (value & 0x80) != 0;
self.vrc6_pulse_period[0] =
(self.vrc6_pulse_period[0] & 0x00FF) | (((value & 0x0F) as u16) << 8);
}
0x9003 => self.control = value, 0x9003 => self.control = value,
// VRC6 pulse 2 registers ($A000-$A002)
0xA000 => {
self.vrc6_pulse_mode[1] = (value & 0x80) != 0;
self.vrc6_pulse_duty[1] = (value >> 4) & 0x07;
self.vrc6_pulse_volume[1] = value & 0x0F;
}
0xA001 => {
self.vrc6_pulse_period[1] =
(self.vrc6_pulse_period[1] & 0x0F00) | value as u16;
}
0xA002 => {
self.vrc6_pulse_enabled[1] = (value & 0x80) != 0;
self.vrc6_pulse_period[1] =
(self.vrc6_pulse_period[1] & 0x00FF) | (((value & 0x0F) as u16) << 8);
}
// VRC6 sawtooth registers ($B000-$B002)
0xB000 => self.vrc6_saw_rate = value & 0x3F,
0xB001 => {
self.vrc6_saw_period = (self.vrc6_saw_period & 0x0F00) | value as u16;
}
0xB002 => {
self.vrc6_saw_enabled = (value & 0x80) != 0;
self.vrc6_saw_period =
(self.vrc6_saw_period & 0x00FF) | (((value & 0x0F) as u16) << 8);
}
0xC000..=0xC003 => self.prg_bank_8k = value & 0x1F, 0xC000..=0xC003 => self.prg_bank_8k = value & 0x1F,
0xD000 => self.chr_banks_1k[0] = value, 0xD000 => self.chr_banks_1k[0] = value,
0xD001 => self.chr_banks_1k[1] = value, 0xD001 => self.chr_banks_1k[1] = value,
@@ -193,6 +267,65 @@ impl Mapper for Vrc6_24 {
fn clock_cpu(&mut self, cycles: u8) { fn clock_cpu(&mut self, cycles: u8) {
vrc_irq_clock(cycles, self.irq_state()); vrc_irq_clock(cycles, self.irq_state());
for _ in 0..cycles {
// Pulse channels
for i in 0..2usize {
if !self.vrc6_pulse_enabled[i] {
continue;
}
if self.vrc6_pulse_counter[i] == 0 {
self.vrc6_pulse_counter[i] = self.vrc6_pulse_period[i].max(1);
self.vrc6_pulse_duty_step[i] = (self.vrc6_pulse_duty_step[i] + 1) & 0x0F;
} else {
self.vrc6_pulse_counter[i] -= 1;
}
}
// Sawtooth channel
if self.vrc6_saw_enabled {
if self.vrc6_saw_counter == 0 {
self.vrc6_saw_counter = self.vrc6_saw_period.max(1);
self.vrc6_saw_step += 1;
match self.vrc6_saw_step {
1 | 3 | 5 => {
self.vrc6_saw_accumulator =
self.vrc6_saw_accumulator.wrapping_add(self.vrc6_saw_rate);
}
6 => {
self.vrc6_saw_accumulator = 0;
self.vrc6_saw_step = 0;
}
_ => {}
}
} else {
self.vrc6_saw_counter -= 1;
}
}
}
}
fn expansion_audio_sample(&self) -> f32 {
// Pulse 1 & 2: 4-bit output (0-15), scaled like NES pulse channels.
let mut sample = 0.0f32;
for i in 0..2usize {
if self.vrc6_pulse_enabled[i] {
let raw = if self.vrc6_pulse_mode[i]
|| self.vrc6_pulse_duty_step[i] <= self.vrc6_pulse_duty[i]
{
self.vrc6_pulse_volume[i] as f32
} else {
0.0
};
// Scale to match NES pulse level (0.00752 * 15 ≈ 0.113 max per channel).
sample += raw * 0.00752;
}
}
// Sawtooth: accumulator >> 3 gives a 0-23 range; scale comparably.
if self.vrc6_saw_enabled {
let raw = (self.vrc6_saw_accumulator >> 3) as f32;
sample += raw * 0.00752;
}
sample
} }
fn poll_irq(&mut self) -> bool { fn poll_irq(&mut self) -> bool {
@@ -214,12 +347,30 @@ impl Mapper for Vrc6_24 {
out.push(u8::from(self.irq_mode_cpu)); out.push(u8::from(self.irq_mode_cpu));
out.push(u8::from(self.irq_pending)); out.push(u8::from(self.irq_pending));
out.extend_from_slice(&self.irq_prescaler.to_le_bytes()); out.extend_from_slice(&self.irq_prescaler.to_le_bytes());
// VRC6 expansion audio state (24 bytes)
for i in 0..2 {
out.extend_from_slice(&self.vrc6_pulse_period[i].to_le_bytes());
out.extend_from_slice(&self.vrc6_pulse_counter[i].to_le_bytes());
out.push(self.vrc6_pulse_duty_step[i]);
out.push(self.vrc6_pulse_duty[i]);
out.push(self.vrc6_pulse_volume[i]);
out.push(
u8::from(self.vrc6_pulse_mode[i]) | (u8::from(self.vrc6_pulse_enabled[i]) << 1),
);
}
out.extend_from_slice(&self.vrc6_saw_period.to_le_bytes());
out.extend_from_slice(&self.vrc6_saw_counter.to_le_bytes());
out.push(self.vrc6_saw_step);
out.push(self.vrc6_saw_accumulator);
out.push(self.vrc6_saw_rate);
out.push(u8::from(self.vrc6_saw_enabled));
write_state_bytes(out, &self.prg_ram); write_state_bytes(out, &self.prg_ram);
write_chr_state(out, &self.chr_data); write_chr_state(out, &self.chr_data);
} }
fn load_state(&mut self, data: &[u8]) -> Result<(), String> { fn load_state(&mut self, data: &[u8]) -> Result<(), String> {
if data.len() < 1 + 1 + 1 + 8 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 2 { // 20 fixed + 24 VRC6 audio bytes
if data.len() < 1 + 1 + 1 + 8 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 2 + 24 {
return Err("mapper state is truncated".to_string()); return Err("mapper state is truncated".to_string());
} }
let mut cursor = 0usize; let mut cursor = 0usize;
@@ -247,6 +398,37 @@ impl Mapper for Vrc6_24 {
cursor += 1; cursor += 1;
self.irq_prescaler = i16::from_le_bytes([data[cursor], data[cursor + 1]]); self.irq_prescaler = i16::from_le_bytes([data[cursor], data[cursor + 1]]);
cursor += 2; cursor += 2;
// VRC6 expansion audio state
for i in 0..2 {
self.vrc6_pulse_period[i] =
u16::from_le_bytes([data[cursor], data[cursor + 1]]);
cursor += 2;
self.vrc6_pulse_counter[i] =
u16::from_le_bytes([data[cursor], data[cursor + 1]]);
cursor += 2;
self.vrc6_pulse_duty_step[i] = data[cursor];
cursor += 1;
self.vrc6_pulse_duty[i] = data[cursor];
cursor += 1;
self.vrc6_pulse_volume[i] = data[cursor];
cursor += 1;
let flags = data[cursor];
cursor += 1;
self.vrc6_pulse_mode[i] = (flags & 0x01) != 0;
self.vrc6_pulse_enabled[i] = (flags & 0x02) != 0;
}
self.vrc6_saw_period = u16::from_le_bytes([data[cursor], data[cursor + 1]]);
cursor += 2;
self.vrc6_saw_counter = u16::from_le_bytes([data[cursor], data[cursor + 1]]);
cursor += 2;
self.vrc6_saw_step = data[cursor];
cursor += 1;
self.vrc6_saw_accumulator = data[cursor];
cursor += 1;
self.vrc6_saw_rate = data[cursor];
cursor += 1;
self.vrc6_saw_enabled = data[cursor] != 0;
cursor += 1;
let prg_ram = read_state_bytes(data, &mut cursor)?; let prg_ram = read_state_bytes(data, &mut cursor)?;
if prg_ram.len() != self.prg_ram.len() { if prg_ram.len() != self.prg_ram.len() {

View File

@@ -44,6 +44,10 @@ impl Ppu {
sprite_count: 0, sprite_count: 0,
next_sprite_indices: [0; 8], next_sprite_indices: [0; 8],
next_sprite_count: 0, next_sprite_count: 0,
spr_shift_lo: [0; 8],
spr_shift_hi: [0; 8],
spr_x_counter: [0; 8],
spr_attr_latch: [0; 8],
} }
} }
@@ -90,18 +94,34 @@ impl Ppu {
(self.read_palette(0), false) (self.read_palette(0), false)
}; };
if !self.sprite0_hit_set() && self.sprite0_hit_at(mapper, y, dot) && bg_opaque { // Advance sprite shift registers for every visible pixel
self.set_sprite0_hit(true); // (unconditional so x-counters stay in sync even when the sprite
} // layer is clipped on the left).
let spr_result = if self.sprites_enabled() {
self.sprite_pixel_from_shifters()
} else {
None
};
let mut final_color = bg_color_index & 0x3F; let mut final_color = bg_color_index & 0x3F;
let sprite_layer_enabled = show_spr && (x >= 8 || show_spr_left); let sprite_layer_enabled = show_spr && (x >= 8 || show_spr_left);
if sprite_layer_enabled if sprite_layer_enabled {
&& let Some((spr_color_index, behind_bg)) = self.sprite_pixel(mapper, x, y) if let Some((spr_color_index, behind_bg, is_sprite0)) = spr_result {
&& !(behind_bg && bg_opaque) // Sprite-0 hit: set when a non-transparent sprite-0 pixel
// overlaps a non-transparent background pixel. Suppressed
// in the left 8 pixels when either clip bit is clear.
if is_sprite0
&& bg_opaque
&& !self.sprite0_hit_set()
&& (x >= 8 || (show_bg_left && show_spr_left))
{ {
self.set_sprite0_hit(true);
}
if !(behind_bg && bg_opaque) {
final_color = spr_color_index & 0x3F; final_color = spr_color_index & 0x3F;
} }
}
}
let (r, g, b) = apply_color_emphasis(nes_rgb(final_color), self.mask); let (r, g, b) = apply_color_emphasis(nes_rgb(final_color), self.mask);
let i = (y * 256 + x) * 4; let i = (y * 256 + x) * 4;
@@ -160,11 +180,13 @@ impl Ppu {
} }
if rendering_active { if rendering_active {
// Transfer pre-evaluated sprite list at the start of each visible scanline, // Transfer pre-evaluated sprite list at the start of each visible
// so dots 1-256 render with the correct sprites for *this* scanline. // scanline, then immediately load the per-slot shift registers so
// that dots 1-256 render with the correct sprites for *this* line.
if scanline < 240 && dot == 1 && self.sprites_enabled() { if scanline < 240 && dot == 1 && self.sprites_enabled() {
self.sprite_count = self.next_sprite_count; self.sprite_count = self.next_sprite_count;
self.sprite_indices = self.next_sprite_indices; self.sprite_indices = self.next_sprite_indices;
self.load_sprite_shifters(mapper, scanline);
} }
if dot == 256 { if dot == 256 {
@@ -284,6 +306,93 @@ impl Ppu {
(count, indices, overflow) (count, indices, overflow)
} }
/// Fetch pattern bytes for the sprites evaluated for `scanline` and load
/// them into the per-slot shift registers. Called once at dot 1 of each
/// visible scanline so that `sprite_pixel_from_shifters` can provide
/// cycle-accurate, shift-register-based sprite rendering for dots 1-256.
pub(super) fn load_sprite_shifters(&mut self, mapper: &dyn Mapper, scanline: u32) {
let sprite_height = if (self.ctrl & 0x20) != 0 { 16i16 } else { 8i16 };
for slot in 0..8usize {
// Default: inactive slot — shifters transparent, counter parked.
self.spr_shift_lo[slot] = 0;
self.spr_shift_hi[slot] = 0;
self.spr_x_counter[slot] = 0xFF;
self.spr_attr_latch[slot] = 0;
if slot >= self.sprite_count as usize {
continue;
}
let i = self.sprite_indices[slot] as usize;
let oam_idx = i * 4;
let attr = self.oam[oam_idx + 2];
let tile = self.oam[oam_idx + 1];
let sprite_y = self.oam[oam_idx] as i16 + 1;
let mut row = scanline as i16 - sprite_y;
if row < 0 || row >= sprite_height {
continue;
}
if (attr & 0x80) != 0 {
row = sprite_height - 1 - row; // vertical flip
}
let (lo_addr, hi_addr) = if sprite_height == 16 {
let table = ((tile & 1) as u16) << 12;
let tile_num = (tile & 0xFE).wrapping_add((row / 8) as u8) as u16;
let row_in_tile = (row & 7) as u16;
let lo = table + tile_num * 16 + row_in_tile;
(lo, lo + 8)
} else {
let table = if (self.ctrl & 0x08) != 0 { 0x1000u16 } else { 0u16 };
let lo = table + (tile as u16) * 16 + row as u16;
(lo, lo + 8)
};
let mut lo = mapper.ppu_read(lo_addr);
let mut hi = mapper.ppu_read(hi_addr);
if (attr & 0x40) != 0 {
// Horizontal flip: reverse bit order so MSB is always the
// leftmost pixel when we shift out from bit 7.
lo = lo.reverse_bits();
hi = hi.reverse_bits();
}
self.spr_shift_lo[slot] = lo;
self.spr_shift_hi[slot] = hi;
self.spr_x_counter[slot] = self.oam[oam_idx + 3];
self.spr_attr_latch[slot] = attr;
}
}
/// Advance all active sprite shift registers by one pixel and return the
/// colour and priority of the first non-transparent sprite pixel found.
/// The third element of the tuple is `true` when the winning sprite is
/// OAM sprite 0 (used for sprite-0 hit detection).
///
/// Every active slot is always updated regardless of which slot wins, so
/// this must be called exactly once per visible pixel dot (1-256).
pub(super) fn sprite_pixel_from_shifters(&mut self) -> Option<(u8, bool, bool)> {
let mut result: Option<(u8, bool, bool)> = None;
for slot in 0..self.sprite_count as usize {
if self.spr_x_counter[slot] > 0 {
self.spr_x_counter[slot] -= 1;
continue;
}
// Extract the MSB from each pattern plane then advance the shifter.
let lo_bit = (self.spr_shift_lo[slot] >> 7) & 1;
let hi_bit = (self.spr_shift_hi[slot] >> 7) & 1;
self.spr_shift_lo[slot] <<= 1;
self.spr_shift_hi[slot] <<= 1;
if result.is_none() {
let pix = lo_bit | (hi_bit << 1);
if pix != 0 {
let attr = self.spr_attr_latch[slot];
let pal_idx = (((attr & 0x03) as u16) << 2) | pix as u16;
let color = self.read_palette(0x10 | pal_idx);
let behind_bg = (attr & 0x20) != 0;
let is_sprite0 = self.sprite_indices[slot] == 0;
result = Some((color, behind_bg, is_sprite0));
}
}
}
result
}
pub fn note_scroll_register_write_legacy(&mut self, scanline: usize, dot: u32) { pub fn note_scroll_register_write_legacy(&mut self, scanline: usize, dot: u32) {
let mut target_scanline = scanline; let mut target_scanline = scanline;
let mut x_start = 0u8; let mut x_start = 0u8;

View File

@@ -42,6 +42,15 @@ pub struct Ppu {
pub(super) sprite_count: u8, pub(super) sprite_count: u8,
pub(super) next_sprite_indices: [u8; 8], pub(super) next_sprite_indices: [u8; 8],
pub(super) next_sprite_count: u8, pub(super) next_sprite_count: u8,
// Per-slot sprite shift registers loaded at dot 1 of each visible scanline.
// spr_shift_lo/hi hold the 8-bit pattern row (h-flip already applied).
// spr_x_counter counts down the remaining pixels before a slot becomes
// active; when it reaches 0 the slot starts shifting out pixel bits.
// spr_attr_latch stores the OAM attribute byte (priority, palette, flips).
pub(super) spr_shift_lo: [u8; 8],
pub(super) spr_shift_hi: [u8; 8],
pub(super) spr_x_counter: [u8; 8],
pub(super) spr_attr_latch: [u8; 8],
} }
impl Default for Ppu { impl Default for Ppu {

View File

@@ -7,6 +7,13 @@ pub struct AudioMixer {
samples_per_cpu_cycle: f64, samples_per_cpu_cycle: f64,
sample_accumulator: f64, sample_accumulator: f64,
last_output_sample: f32, last_output_sample: f32,
// Previous output sample (two batches ago) used as the p0 control point
// for Catmull-Rom Hermite interpolation. Storing p0 allows the tangent at
// the start of each interpolation interval to be computed as
// m1 = (p2 - p0) / 2
// which produces a smooth, continuous first derivative across batch
// boundaries rather than the kink introduced by linear ramps.
prev_sample: f32,
// One-pole IIR low-pass filter state (approximates NES ~14 kHz RC filter). // One-pole IIR low-pass filter state (approximates NES ~14 kHz RC filter).
// Coefficient: a = exp(-2π * fc / fs). At fc=14000, fs=48000: a ≈ 0.160 // Coefficient: a = exp(-2π * fc / fs). At fc=14000, fs=48000: a ≈ 0.160
lp_coeff: f32, lp_coeff: f32,
@@ -31,6 +38,7 @@ impl AudioMixer {
samples_per_cpu_cycle: sample_rate as f64 / cpu_hz, samples_per_cpu_cycle: sample_rate as f64 / cpu_hz,
sample_accumulator: 0.0, sample_accumulator: 0.0,
last_output_sample: 0.0, last_output_sample: 0.0,
prev_sample: 0.0,
lp_coeff, lp_coeff,
lp_state: 0.0, lp_state: 0.0,
hp_coeff, hp_coeff,
@@ -46,6 +54,7 @@ impl AudioMixer {
pub fn reset(&mut self) { pub fn reset(&mut self) {
self.sample_accumulator = 0.0; self.sample_accumulator = 0.0;
self.last_output_sample = 0.0; self.last_output_sample = 0.0;
self.prev_sample = 0.0;
self.lp_state = 0.0; self.lp_state = 0.0;
self.hp_prev_x = 0.0; self.hp_prev_x = 0.0;
self.hp_prev_y = 0.0; self.hp_prev_y = 0.0;
@@ -56,31 +65,67 @@ impl AudioMixer {
let samples = self.sample_accumulator.floor() as usize; let samples = self.sample_accumulator.floor() as usize;
self.sample_accumulator -= samples as f64; self.sample_accumulator -= samples as f64;
let pulse_out = 0.00752 * (f32::from(channels.pulse1) + f32::from(channels.pulse2)); // NES non-linear APU mixing (Blargg's reference formulas).
let tnd_out = 0.00851 * f32::from(channels.triangle) // Pulse channels use a shared lookup:
+ 0.00494 * f32::from(channels.noise) // pulse_out = 95.88 / (8128 / (p1 + p2) + 100)
+ 0.00335 * f32::from(channels.dmc); // TND channels use a separate lookup:
let sample = pulse_out + tnd_out; // tnd_out = 159.79 / (1 / (tri/8227 + noise/12241 + dmc/22638) + 100)
// Both formulas produce 0.0 when all contributing channels are silent.
let p_sum = f32::from(channels.pulse1) + f32::from(channels.pulse2);
let pulse_out = if p_sum == 0.0 {
0.0
} else {
95.88 / (8128.0 / p_sum + 100.0)
};
let tnd_sum = f32::from(channels.triangle) / 8227.0
+ f32::from(channels.noise) / 12241.0
+ f32::from(channels.dmc) / 22638.0;
let tnd_out = if tnd_sum == 0.0 {
0.0
} else {
159.79 / (1.0 / tnd_sum + 100.0)
};
let sample = pulse_out + tnd_out + channels.expansion;
if samples == 0 { if samples == 0 {
return; return;
} }
let start = self.last_output_sample; // Catmull-Rom Hermite interpolation between the previous batch sample
// (p1 = last_output_sample) and the current batch sample (p2 = sample).
//
// The tangent at p1 uses the two-point central difference:
// m1 = (p2 - p0) / 2, where p0 = prev_sample (two batches ago).
// The tangent at p2 uses the forward difference (p3 approximated as p2,
// i.e. the signal stays flat beyond the current batch):
// m2 = (p2 - p1) / 2.
//
// Hermite basis:
// h00(t) = 2t³ - 3t² + 1
// h10(t) = t³ - 2t² + t
// h01(t) = -2t³ + 3t²
// h11(t) = t³ - t²
// f(t) = h00·p1 + h10·m1 + h01·p2 + h11·m2
//
// For t = 1 this collapses to p2, so the last output of each batch
// always lands exactly on the current APU sample value.
let p0 = self.prev_sample;
let p1 = self.last_output_sample;
let p2 = sample;
let m1 = (p2 - p0) * 0.5;
let m2 = (p2 - p1) * 0.5;
let denom = samples as f32;
let a = self.lp_coeff; let a = self.lp_coeff;
let b = 1.0 - a; let b = 1.0 - a;
if samples == 1 {
let lp = a * self.lp_state + b * sample;
self.lp_state = lp;
let hp = self.hp_coeff * self.hp_prev_y + lp - self.hp_prev_x;
self.hp_prev_x = lp;
self.hp_prev_y = hp;
out.push(hp);
} else {
let denom = samples as f32;
for idx in 0..samples { for idx in 0..samples {
let t = (idx + 1) as f32 / denom; let t = (idx + 1) as f32 / denom;
let interp = start + (sample - start) * t; let t2 = t * t;
let t3 = t2 * t;
let interp = (2.0 * t3 - 3.0 * t2 + 1.0) * p1
+ (t3 - 2.0 * t2 + t) * m1
+ (-2.0 * t3 + 3.0 * t2) * p2
+ (t3 - t2) * m2;
let lp = a * self.lp_state + b * interp; let lp = a * self.lp_state + b * interp;
self.lp_state = lp; self.lp_state = lp;
let hp = self.hp_coeff * self.hp_prev_y + lp - self.hp_prev_x; let hp = self.hp_coeff * self.hp_prev_y + lp - self.hp_prev_x;
@@ -88,7 +133,7 @@ impl AudioMixer {
self.hp_prev_y = hp; self.hp_prev_y = hp;
out.push(hp); out.push(hp);
} }
} self.prev_sample = p1;
self.last_output_sample = sample; self.last_output_sample = sample;
} }
} }
@@ -118,6 +163,7 @@ mod tests {
triangle: 15, triangle: 15,
noise: 15, noise: 15,
dmc: 127, dmc: 127,
expansion: 0.0,
}; };
let mut out = Vec::new(); let mut out = Vec::new();
mixer.push_cycles(50, channels, &mut out); mixer.push_cycles(50, channels, &mut out);
@@ -143,6 +189,7 @@ mod tests {
triangle: 15, triangle: 15,
noise: 15, noise: 15,
dmc: 127, dmc: 127,
expansion: 0.0,
}, },
&mut out, &mut out,
); );