feat: Hermite resampling, sprite shift registers, controller open bus
Some checks failed
CI / rust (push) Has been cancelled

#3 audio.rs: replace linear interpolation with Catmull-Rom Hermite cubic.
  Stores prev_sample as p0 control point; m1=(p2-p0)/2, m2=(p2-p1)/2
  tangents give continuous first derivative across batch boundaries.

#4 ppu: add per-slot sprite shift registers (spr_shift_lo/hi, spr_x_counter,
  spr_attr_latch). load_sprite_shifters fetches pattern bytes with h-flip at
  dot 1 of each visible scanline. sprite_pixel_from_shifters replaces the
  per-pixel OAM scan; sprite-0 hit detection integrated into the shifter path.

#5 joypad.rs: format_controller_read now preserves bits 1-5,7 as open bus
  (!0x41 mask) instead of zeroing bits 1-4, matching NES hardware behaviour.
This commit is contained in:
2026-03-15 11:30:14 +03:00
parent c77be7c84b
commit d9666c23b4
4 changed files with 178 additions and 28 deletions

View File

@@ -67,8 +67,11 @@ impl NativeBus {
}
fn format_controller_read(&self, bit: u8) -> u8 {
// Controller reads expose serial data in bit0, keep bit6 high, and
// preserve open-bus upper bits.
(self.cpu_open_bus & 0xE0) | 0x40 | (bit & 1)
// The NES controller port drives only bit 0 (serial data); bit 6 is
// held high by a pull-up on the expansion connector. All other bits
// (1-5, 7) float and retain whatever is currently on the CPU data bus
// (open bus). !0x41 clears bits 6 and 0 so we can OR in their
// canonical values without corrupting any open-bus bits.
(self.cpu_open_bus & !0x41u8) | 0x40 | (bit & 1)
}
}

View File

@@ -44,6 +44,10 @@ impl Ppu {
sprite_count: 0,
next_sprite_indices: [0; 8],
next_sprite_count: 0,
spr_shift_lo: [0; 8],
spr_shift_hi: [0; 8],
spr_x_counter: [0; 8],
spr_attr_latch: [0; 8],
}
}
@@ -90,17 +94,33 @@ impl Ppu {
(self.read_palette(0), false)
};
if !self.sprite0_hit_set() && self.sprite0_hit_at(mapper, y, dot) && bg_opaque {
self.set_sprite0_hit(true);
}
// Advance sprite shift registers for every visible pixel
// (unconditional so x-counters stay in sync even when the sprite
// layer is clipped on the left).
let spr_result = if self.sprites_enabled() {
self.sprite_pixel_from_shifters()
} else {
None
};
let mut final_color = bg_color_index & 0x3F;
let sprite_layer_enabled = show_spr && (x >= 8 || show_spr_left);
if sprite_layer_enabled
&& let Some((spr_color_index, behind_bg)) = self.sprite_pixel(mapper, x, y)
&& !(behind_bg && bg_opaque)
{
final_color = spr_color_index & 0x3F;
if sprite_layer_enabled {
if let Some((spr_color_index, behind_bg, is_sprite0)) = spr_result {
// Sprite-0 hit: set when a non-transparent sprite-0 pixel
// overlaps a non-transparent background pixel. Suppressed
// in the left 8 pixels when either clip bit is clear.
if is_sprite0
&& bg_opaque
&& !self.sprite0_hit_set()
&& (x >= 8 || (show_bg_left && show_spr_left))
{
self.set_sprite0_hit(true);
}
if !(behind_bg && bg_opaque) {
final_color = spr_color_index & 0x3F;
}
}
}
let (r, g, b) = apply_color_emphasis(nes_rgb(final_color), self.mask);
@@ -160,11 +180,13 @@ impl Ppu {
}
if rendering_active {
// Transfer pre-evaluated sprite list at the start of each visible scanline,
// so dots 1-256 render with the correct sprites for *this* scanline.
// Transfer pre-evaluated sprite list at the start of each visible
// scanline, then immediately load the per-slot shift registers so
// that dots 1-256 render with the correct sprites for *this* line.
if scanline < 240 && dot == 1 && self.sprites_enabled() {
self.sprite_count = self.next_sprite_count;
self.sprite_indices = self.next_sprite_indices;
self.load_sprite_shifters(mapper, scanline);
}
if dot == 256 {
@@ -284,6 +306,93 @@ impl Ppu {
(count, indices, overflow)
}
/// Fetch pattern bytes for the sprites evaluated for `scanline` and load
/// them into the per-slot shift registers. Called once at dot 1 of each
/// visible scanline so that `sprite_pixel_from_shifters` can provide
/// cycle-accurate, shift-register-based sprite rendering for dots 1-256.
pub(super) fn load_sprite_shifters(&mut self, mapper: &dyn Mapper, scanline: u32) {
let sprite_height = if (self.ctrl & 0x20) != 0 { 16i16 } else { 8i16 };
for slot in 0..8usize {
// Default: inactive slot — shifters transparent, counter parked.
self.spr_shift_lo[slot] = 0;
self.spr_shift_hi[slot] = 0;
self.spr_x_counter[slot] = 0xFF;
self.spr_attr_latch[slot] = 0;
if slot >= self.sprite_count as usize {
continue;
}
let i = self.sprite_indices[slot] as usize;
let oam_idx = i * 4;
let attr = self.oam[oam_idx + 2];
let tile = self.oam[oam_idx + 1];
let sprite_y = self.oam[oam_idx] as i16 + 1;
let mut row = scanline as i16 - sprite_y;
if row < 0 || row >= sprite_height {
continue;
}
if (attr & 0x80) != 0 {
row = sprite_height - 1 - row; // vertical flip
}
let (lo_addr, hi_addr) = if sprite_height == 16 {
let table = ((tile & 1) as u16) << 12;
let tile_num = (tile & 0xFE).wrapping_add((row / 8) as u8) as u16;
let row_in_tile = (row & 7) as u16;
let lo = table + tile_num * 16 + row_in_tile;
(lo, lo + 8)
} else {
let table = if (self.ctrl & 0x08) != 0 { 0x1000u16 } else { 0u16 };
let lo = table + (tile as u16) * 16 + row as u16;
(lo, lo + 8)
};
let mut lo = mapper.ppu_read(lo_addr);
let mut hi = mapper.ppu_read(hi_addr);
if (attr & 0x40) != 0 {
// Horizontal flip: reverse bit order so MSB is always the
// leftmost pixel when we shift out from bit 7.
lo = lo.reverse_bits();
hi = hi.reverse_bits();
}
self.spr_shift_lo[slot] = lo;
self.spr_shift_hi[slot] = hi;
self.spr_x_counter[slot] = self.oam[oam_idx + 3];
self.spr_attr_latch[slot] = attr;
}
}
/// Advance all active sprite shift registers by one pixel and return the
/// colour and priority of the first non-transparent sprite pixel found.
/// The third element of the tuple is `true` when the winning sprite is
/// OAM sprite 0 (used for sprite-0 hit detection).
///
/// Every active slot is always updated regardless of which slot wins, so
/// this must be called exactly once per visible pixel dot (1-256).
pub(super) fn sprite_pixel_from_shifters(&mut self) -> Option<(u8, bool, bool)> {
let mut result: Option<(u8, bool, bool)> = None;
for slot in 0..self.sprite_count as usize {
if self.spr_x_counter[slot] > 0 {
self.spr_x_counter[slot] -= 1;
continue;
}
// Extract the MSB from each pattern plane then advance the shifter.
let lo_bit = (self.spr_shift_lo[slot] >> 7) & 1;
let hi_bit = (self.spr_shift_hi[slot] >> 7) & 1;
self.spr_shift_lo[slot] <<= 1;
self.spr_shift_hi[slot] <<= 1;
if result.is_none() {
let pix = lo_bit | (hi_bit << 1);
if pix != 0 {
let attr = self.spr_attr_latch[slot];
let pal_idx = (((attr & 0x03) as u16) << 2) | pix as u16;
let color = self.read_palette(0x10 | pal_idx);
let behind_bg = (attr & 0x20) != 0;
let is_sprite0 = self.sprite_indices[slot] == 0;
result = Some((color, behind_bg, is_sprite0));
}
}
}
result
}
pub fn note_scroll_register_write_legacy(&mut self, scanline: usize, dot: u32) {
let mut target_scanline = scanline;
let mut x_start = 0u8;

View File

@@ -42,6 +42,15 @@ pub struct Ppu {
pub(super) sprite_count: u8,
pub(super) next_sprite_indices: [u8; 8],
pub(super) next_sprite_count: u8,
// Per-slot sprite shift registers loaded at dot 1 of each visible scanline.
// spr_shift_lo/hi hold the 8-bit pattern row (h-flip already applied).
// spr_x_counter counts down the remaining pixels before a slot becomes
// active; when it reaches 0 the slot starts shifting out pixel bits.
// spr_attr_latch stores the OAM attribute byte (priority, palette, flips).
pub(super) spr_shift_lo: [u8; 8],
pub(super) spr_shift_hi: [u8; 8],
pub(super) spr_x_counter: [u8; 8],
pub(super) spr_attr_latch: [u8; 8],
}
impl Default for Ppu {

View File

@@ -7,6 +7,13 @@ pub struct AudioMixer {
samples_per_cpu_cycle: f64,
sample_accumulator: f64,
last_output_sample: f32,
// Previous output sample (two batches ago) used as the p0 control point
// for Catmull-Rom Hermite interpolation. Storing p0 allows the tangent at
// the start of each interpolation interval to be computed as
// m1 = (p2 - p0) / 2
// which produces a smooth, continuous first derivative across batch
// boundaries rather than the kink introduced by linear ramps.
prev_sample: f32,
// One-pole IIR low-pass filter state (approximates NES ~14 kHz RC filter).
// Coefficient: a = exp(-2π * fc / fs). At fc=14000, fs=48000: a ≈ 0.160
lp_coeff: f32,
@@ -31,6 +38,7 @@ impl AudioMixer {
samples_per_cpu_cycle: sample_rate as f64 / cpu_hz,
sample_accumulator: 0.0,
last_output_sample: 0.0,
prev_sample: 0.0,
lp_coeff,
lp_state: 0.0,
hp_coeff,
@@ -46,6 +54,7 @@ impl AudioMixer {
pub fn reset(&mut self) {
self.sample_accumulator = 0.0;
self.last_output_sample = 0.0;
self.prev_sample = 0.0;
self.lp_state = 0.0;
self.hp_prev_x = 0.0;
self.hp_prev_y = 0.0;
@@ -82,29 +91,49 @@ impl AudioMixer {
return;
}
let start = self.last_output_sample;
// Catmull-Rom Hermite interpolation between the previous batch sample
// (p1 = last_output_sample) and the current batch sample (p2 = sample).
//
// The tangent at p1 uses the two-point central difference:
// m1 = (p2 - p0) / 2, where p0 = prev_sample (two batches ago).
// The tangent at p2 uses the forward difference (p3 approximated as p2,
// i.e. the signal stays flat beyond the current batch):
// m2 = (p2 - p1) / 2.
//
// Hermite basis:
// h00(t) = 2t³ - 3t² + 1
// h10(t) = t³ - 2t² + t
// h01(t) = -2t³ + 3t²
// h11(t) = t³ - t²
// f(t) = h00·p1 + h10·m1 + h01·p2 + h11·m2
//
// For t = 1 this collapses to p2, so the last output of each batch
// always lands exactly on the current APU sample value.
let p0 = self.prev_sample;
let p1 = self.last_output_sample;
let p2 = sample;
let m1 = (p2 - p0) * 0.5;
let m2 = (p2 - p1) * 0.5;
let denom = samples as f32;
let a = self.lp_coeff;
let b = 1.0 - a;
if samples == 1 {
let lp = a * self.lp_state + b * sample;
for idx in 0..samples {
let t = (idx + 1) as f32 / denom;
let t2 = t * t;
let t3 = t2 * t;
let interp = (2.0 * t3 - 3.0 * t2 + 1.0) * p1
+ (t3 - 2.0 * t2 + t) * m1
+ (-2.0 * t3 + 3.0 * t2) * p2
+ (t3 - t2) * m2;
let lp = a * self.lp_state + b * interp;
self.lp_state = lp;
let hp = self.hp_coeff * self.hp_prev_y + lp - self.hp_prev_x;
self.hp_prev_x = lp;
self.hp_prev_y = hp;
out.push(hp);
} else {
let denom = samples as f32;
for idx in 0..samples {
let t = (idx + 1) as f32 / denom;
let interp = start + (sample - start) * t;
let lp = a * self.lp_state + b * interp;
self.lp_state = lp;
let hp = self.hp_coeff * self.hp_prev_y + lp - self.hp_prev_x;
self.hp_prev_x = lp;
self.hp_prev_y = hp;
out.push(hp);
}
}
self.prev_sample = p1;
self.last_output_sample = sample;
}
}