diff --git a/src/native_core/bus/joypad.rs b/src/native_core/bus/joypad.rs index cdd1dc7..05774f0 100644 --- a/src/native_core/bus/joypad.rs +++ b/src/native_core/bus/joypad.rs @@ -67,8 +67,11 @@ impl NativeBus { } fn format_controller_read(&self, bit: u8) -> u8 { - // Controller reads expose serial data in bit0, keep bit6 high, and - // preserve open-bus upper bits. - (self.cpu_open_bus & 0xE0) | 0x40 | (bit & 1) + // The NES controller port drives only bit 0 (serial data); bit 6 is + // held high by a pull-up on the expansion connector. All other bits + // (1-5, 7) float and retain whatever is currently on the CPU data bus + // (open bus). !0x41 clears bits 6 and 0 so we can OR in their + // canonical values without corrupting any open-bus bits. + (self.cpu_open_bus & !0x41u8) | 0x40 | (bit & 1) } } diff --git a/src/native_core/ppu/api.rs b/src/native_core/ppu/api.rs index 6943e41..814d364 100644 --- a/src/native_core/ppu/api.rs +++ b/src/native_core/ppu/api.rs @@ -44,6 +44,10 @@ impl Ppu { sprite_count: 0, next_sprite_indices: [0; 8], next_sprite_count: 0, + spr_shift_lo: [0; 8], + spr_shift_hi: [0; 8], + spr_x_counter: [0; 8], + spr_attr_latch: [0; 8], } } @@ -90,17 +94,33 @@ impl Ppu { (self.read_palette(0), false) }; - if !self.sprite0_hit_set() && self.sprite0_hit_at(mapper, y, dot) && bg_opaque { - self.set_sprite0_hit(true); - } + // Advance sprite shift registers for every visible pixel + // (unconditional so x-counters stay in sync even when the sprite + // layer is clipped on the left). + let spr_result = if self.sprites_enabled() { + self.sprite_pixel_from_shifters() + } else { + None + }; let mut final_color = bg_color_index & 0x3F; let sprite_layer_enabled = show_spr && (x >= 8 || show_spr_left); - if sprite_layer_enabled - && let Some((spr_color_index, behind_bg)) = self.sprite_pixel(mapper, x, y) - && !(behind_bg && bg_opaque) - { - final_color = spr_color_index & 0x3F; + if sprite_layer_enabled { + if let Some((spr_color_index, behind_bg, is_sprite0)) = spr_result { + // Sprite-0 hit: set when a non-transparent sprite-0 pixel + // overlaps a non-transparent background pixel. Suppressed + // in the left 8 pixels when either clip bit is clear. + if is_sprite0 + && bg_opaque + && !self.sprite0_hit_set() + && (x >= 8 || (show_bg_left && show_spr_left)) + { + self.set_sprite0_hit(true); + } + if !(behind_bg && bg_opaque) { + final_color = spr_color_index & 0x3F; + } + } } let (r, g, b) = apply_color_emphasis(nes_rgb(final_color), self.mask); @@ -160,11 +180,13 @@ impl Ppu { } if rendering_active { - // Transfer pre-evaluated sprite list at the start of each visible scanline, - // so dots 1-256 render with the correct sprites for *this* scanline. + // Transfer pre-evaluated sprite list at the start of each visible + // scanline, then immediately load the per-slot shift registers so + // that dots 1-256 render with the correct sprites for *this* line. if scanline < 240 && dot == 1 && self.sprites_enabled() { self.sprite_count = self.next_sprite_count; self.sprite_indices = self.next_sprite_indices; + self.load_sprite_shifters(mapper, scanline); } if dot == 256 { @@ -284,6 +306,93 @@ impl Ppu { (count, indices, overflow) } + /// Fetch pattern bytes for the sprites evaluated for `scanline` and load + /// them into the per-slot shift registers. Called once at dot 1 of each + /// visible scanline so that `sprite_pixel_from_shifters` can provide + /// cycle-accurate, shift-register-based sprite rendering for dots 1-256. + pub(super) fn load_sprite_shifters(&mut self, mapper: &dyn Mapper, scanline: u32) { + let sprite_height = if (self.ctrl & 0x20) != 0 { 16i16 } else { 8i16 }; + for slot in 0..8usize { + // Default: inactive slot — shifters transparent, counter parked. + self.spr_shift_lo[slot] = 0; + self.spr_shift_hi[slot] = 0; + self.spr_x_counter[slot] = 0xFF; + self.spr_attr_latch[slot] = 0; + if slot >= self.sprite_count as usize { + continue; + } + let i = self.sprite_indices[slot] as usize; + let oam_idx = i * 4; + let attr = self.oam[oam_idx + 2]; + let tile = self.oam[oam_idx + 1]; + let sprite_y = self.oam[oam_idx] as i16 + 1; + let mut row = scanline as i16 - sprite_y; + if row < 0 || row >= sprite_height { + continue; + } + if (attr & 0x80) != 0 { + row = sprite_height - 1 - row; // vertical flip + } + let (lo_addr, hi_addr) = if sprite_height == 16 { + let table = ((tile & 1) as u16) << 12; + let tile_num = (tile & 0xFE).wrapping_add((row / 8) as u8) as u16; + let row_in_tile = (row & 7) as u16; + let lo = table + tile_num * 16 + row_in_tile; + (lo, lo + 8) + } else { + let table = if (self.ctrl & 0x08) != 0 { 0x1000u16 } else { 0u16 }; + let lo = table + (tile as u16) * 16 + row as u16; + (lo, lo + 8) + }; + let mut lo = mapper.ppu_read(lo_addr); + let mut hi = mapper.ppu_read(hi_addr); + if (attr & 0x40) != 0 { + // Horizontal flip: reverse bit order so MSB is always the + // leftmost pixel when we shift out from bit 7. + lo = lo.reverse_bits(); + hi = hi.reverse_bits(); + } + self.spr_shift_lo[slot] = lo; + self.spr_shift_hi[slot] = hi; + self.spr_x_counter[slot] = self.oam[oam_idx + 3]; + self.spr_attr_latch[slot] = attr; + } + } + + /// Advance all active sprite shift registers by one pixel and return the + /// colour and priority of the first non-transparent sprite pixel found. + /// The third element of the tuple is `true` when the winning sprite is + /// OAM sprite 0 (used for sprite-0 hit detection). + /// + /// Every active slot is always updated regardless of which slot wins, so + /// this must be called exactly once per visible pixel dot (1-256). + pub(super) fn sprite_pixel_from_shifters(&mut self) -> Option<(u8, bool, bool)> { + let mut result: Option<(u8, bool, bool)> = None; + for slot in 0..self.sprite_count as usize { + if self.spr_x_counter[slot] > 0 { + self.spr_x_counter[slot] -= 1; + continue; + } + // Extract the MSB from each pattern plane then advance the shifter. + let lo_bit = (self.spr_shift_lo[slot] >> 7) & 1; + let hi_bit = (self.spr_shift_hi[slot] >> 7) & 1; + self.spr_shift_lo[slot] <<= 1; + self.spr_shift_hi[slot] <<= 1; + if result.is_none() { + let pix = lo_bit | (hi_bit << 1); + if pix != 0 { + let attr = self.spr_attr_latch[slot]; + let pal_idx = (((attr & 0x03) as u16) << 2) | pix as u16; + let color = self.read_palette(0x10 | pal_idx); + let behind_bg = (attr & 0x20) != 0; + let is_sprite0 = self.sprite_indices[slot] == 0; + result = Some((color, behind_bg, is_sprite0)); + } + } + } + result + } + pub fn note_scroll_register_write_legacy(&mut self, scanline: usize, dot: u32) { let mut target_scanline = scanline; let mut x_start = 0u8; diff --git a/src/native_core/ppu/types.rs b/src/native_core/ppu/types.rs index b7147d0..f025b36 100644 --- a/src/native_core/ppu/types.rs +++ b/src/native_core/ppu/types.rs @@ -42,6 +42,15 @@ pub struct Ppu { pub(super) sprite_count: u8, pub(super) next_sprite_indices: [u8; 8], pub(super) next_sprite_count: u8, + // Per-slot sprite shift registers loaded at dot 1 of each visible scanline. + // spr_shift_lo/hi hold the 8-bit pattern row (h-flip already applied). + // spr_x_counter counts down the remaining pixels before a slot becomes + // active; when it reaches 0 the slot starts shifting out pixel bits. + // spr_attr_latch stores the OAM attribute byte (priority, palette, flips). + pub(super) spr_shift_lo: [u8; 8], + pub(super) spr_shift_hi: [u8; 8], + pub(super) spr_x_counter: [u8; 8], + pub(super) spr_attr_latch: [u8; 8], } impl Default for Ppu { diff --git a/src/runtime/audio.rs b/src/runtime/audio.rs index 855dd0c..ec6cf1e 100644 --- a/src/runtime/audio.rs +++ b/src/runtime/audio.rs @@ -7,6 +7,13 @@ pub struct AudioMixer { samples_per_cpu_cycle: f64, sample_accumulator: f64, last_output_sample: f32, + // Previous output sample (two batches ago) used as the p0 control point + // for Catmull-Rom Hermite interpolation. Storing p0 allows the tangent at + // the start of each interpolation interval to be computed as + // m1 = (p2 - p0) / 2 + // which produces a smooth, continuous first derivative across batch + // boundaries rather than the kink introduced by linear ramps. + prev_sample: f32, // One-pole IIR low-pass filter state (approximates NES ~14 kHz RC filter). // Coefficient: a = exp(-2π * fc / fs). At fc=14000, fs=48000: a ≈ 0.160 lp_coeff: f32, @@ -31,6 +38,7 @@ impl AudioMixer { samples_per_cpu_cycle: sample_rate as f64 / cpu_hz, sample_accumulator: 0.0, last_output_sample: 0.0, + prev_sample: 0.0, lp_coeff, lp_state: 0.0, hp_coeff, @@ -46,6 +54,7 @@ impl AudioMixer { pub fn reset(&mut self) { self.sample_accumulator = 0.0; self.last_output_sample = 0.0; + self.prev_sample = 0.0; self.lp_state = 0.0; self.hp_prev_x = 0.0; self.hp_prev_y = 0.0; @@ -82,29 +91,49 @@ impl AudioMixer { return; } - let start = self.last_output_sample; + // Catmull-Rom Hermite interpolation between the previous batch sample + // (p1 = last_output_sample) and the current batch sample (p2 = sample). + // + // The tangent at p1 uses the two-point central difference: + // m1 = (p2 - p0) / 2, where p0 = prev_sample (two batches ago). + // The tangent at p2 uses the forward difference (p3 approximated as p2, + // i.e. the signal stays flat beyond the current batch): + // m2 = (p2 - p1) / 2. + // + // Hermite basis: + // h00(t) = 2t³ - 3t² + 1 + // h10(t) = t³ - 2t² + t + // h01(t) = -2t³ + 3t² + // h11(t) = t³ - t² + // f(t) = h00·p1 + h10·m1 + h01·p2 + h11·m2 + // + // For t = 1 this collapses to p2, so the last output of each batch + // always lands exactly on the current APU sample value. + let p0 = self.prev_sample; + let p1 = self.last_output_sample; + let p2 = sample; + let m1 = (p2 - p0) * 0.5; + let m2 = (p2 - p1) * 0.5; + + let denom = samples as f32; let a = self.lp_coeff; let b = 1.0 - a; - if samples == 1 { - let lp = a * self.lp_state + b * sample; + for idx in 0..samples { + let t = (idx + 1) as f32 / denom; + let t2 = t * t; + let t3 = t2 * t; + let interp = (2.0 * t3 - 3.0 * t2 + 1.0) * p1 + + (t3 - 2.0 * t2 + t) * m1 + + (-2.0 * t3 + 3.0 * t2) * p2 + + (t3 - t2) * m2; + let lp = a * self.lp_state + b * interp; self.lp_state = lp; let hp = self.hp_coeff * self.hp_prev_y + lp - self.hp_prev_x; self.hp_prev_x = lp; self.hp_prev_y = hp; out.push(hp); - } else { - let denom = samples as f32; - for idx in 0..samples { - let t = (idx + 1) as f32 / denom; - let interp = start + (sample - start) * t; - let lp = a * self.lp_state + b * interp; - self.lp_state = lp; - let hp = self.hp_coeff * self.hp_prev_y + lp - self.hp_prev_x; - self.hp_prev_x = lp; - self.hp_prev_y = hp; - out.push(hp); - } } + self.prev_sample = p1; self.last_output_sample = sample; } }