feat: Hermite resampling, sprite shift registers, controller open bus
Some checks failed
CI / rust (push) Has been cancelled
Some checks failed
CI / rust (push) Has been cancelled
#3 audio.rs: replace linear interpolation with Catmull-Rom Hermite cubic. Stores prev_sample as p0 control point; m1=(p2-p0)/2, m2=(p2-p1)/2 tangents give continuous first derivative across batch boundaries. #4 ppu: add per-slot sprite shift registers (spr_shift_lo/hi, spr_x_counter, spr_attr_latch). load_sprite_shifters fetches pattern bytes with h-flip at dot 1 of each visible scanline. sprite_pixel_from_shifters replaces the per-pixel OAM scan; sprite-0 hit detection integrated into the shifter path. #5 joypad.rs: format_controller_read now preserves bits 1-5,7 as open bus (!0x41 mask) instead of zeroing bits 1-4, matching NES hardware behaviour.
This commit is contained in:
@@ -67,8 +67,11 @@ impl NativeBus {
|
||||
}
|
||||
|
||||
fn format_controller_read(&self, bit: u8) -> u8 {
|
||||
// Controller reads expose serial data in bit0, keep bit6 high, and
|
||||
// preserve open-bus upper bits.
|
||||
(self.cpu_open_bus & 0xE0) | 0x40 | (bit & 1)
|
||||
// The NES controller port drives only bit 0 (serial data); bit 6 is
|
||||
// held high by a pull-up on the expansion connector. All other bits
|
||||
// (1-5, 7) float and retain whatever is currently on the CPU data bus
|
||||
// (open bus). !0x41 clears bits 6 and 0 so we can OR in their
|
||||
// canonical values without corrupting any open-bus bits.
|
||||
(self.cpu_open_bus & !0x41u8) | 0x40 | (bit & 1)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,6 +44,10 @@ impl Ppu {
|
||||
sprite_count: 0,
|
||||
next_sprite_indices: [0; 8],
|
||||
next_sprite_count: 0,
|
||||
spr_shift_lo: [0; 8],
|
||||
spr_shift_hi: [0; 8],
|
||||
spr_x_counter: [0; 8],
|
||||
spr_attr_latch: [0; 8],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -90,17 +94,33 @@ impl Ppu {
|
||||
(self.read_palette(0), false)
|
||||
};
|
||||
|
||||
if !self.sprite0_hit_set() && self.sprite0_hit_at(mapper, y, dot) && bg_opaque {
|
||||
self.set_sprite0_hit(true);
|
||||
}
|
||||
// Advance sprite shift registers for every visible pixel
|
||||
// (unconditional so x-counters stay in sync even when the sprite
|
||||
// layer is clipped on the left).
|
||||
let spr_result = if self.sprites_enabled() {
|
||||
self.sprite_pixel_from_shifters()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let mut final_color = bg_color_index & 0x3F;
|
||||
let sprite_layer_enabled = show_spr && (x >= 8 || show_spr_left);
|
||||
if sprite_layer_enabled
|
||||
&& let Some((spr_color_index, behind_bg)) = self.sprite_pixel(mapper, x, y)
|
||||
&& !(behind_bg && bg_opaque)
|
||||
{
|
||||
final_color = spr_color_index & 0x3F;
|
||||
if sprite_layer_enabled {
|
||||
if let Some((spr_color_index, behind_bg, is_sprite0)) = spr_result {
|
||||
// Sprite-0 hit: set when a non-transparent sprite-0 pixel
|
||||
// overlaps a non-transparent background pixel. Suppressed
|
||||
// in the left 8 pixels when either clip bit is clear.
|
||||
if is_sprite0
|
||||
&& bg_opaque
|
||||
&& !self.sprite0_hit_set()
|
||||
&& (x >= 8 || (show_bg_left && show_spr_left))
|
||||
{
|
||||
self.set_sprite0_hit(true);
|
||||
}
|
||||
if !(behind_bg && bg_opaque) {
|
||||
final_color = spr_color_index & 0x3F;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let (r, g, b) = apply_color_emphasis(nes_rgb(final_color), self.mask);
|
||||
@@ -160,11 +180,13 @@ impl Ppu {
|
||||
}
|
||||
|
||||
if rendering_active {
|
||||
// Transfer pre-evaluated sprite list at the start of each visible scanline,
|
||||
// so dots 1-256 render with the correct sprites for *this* scanline.
|
||||
// Transfer pre-evaluated sprite list at the start of each visible
|
||||
// scanline, then immediately load the per-slot shift registers so
|
||||
// that dots 1-256 render with the correct sprites for *this* line.
|
||||
if scanline < 240 && dot == 1 && self.sprites_enabled() {
|
||||
self.sprite_count = self.next_sprite_count;
|
||||
self.sprite_indices = self.next_sprite_indices;
|
||||
self.load_sprite_shifters(mapper, scanline);
|
||||
}
|
||||
|
||||
if dot == 256 {
|
||||
@@ -284,6 +306,93 @@ impl Ppu {
|
||||
(count, indices, overflow)
|
||||
}
|
||||
|
||||
/// Fetch pattern bytes for the sprites evaluated for `scanline` and load
|
||||
/// them into the per-slot shift registers. Called once at dot 1 of each
|
||||
/// visible scanline so that `sprite_pixel_from_shifters` can provide
|
||||
/// cycle-accurate, shift-register-based sprite rendering for dots 1-256.
|
||||
pub(super) fn load_sprite_shifters(&mut self, mapper: &dyn Mapper, scanline: u32) {
|
||||
let sprite_height = if (self.ctrl & 0x20) != 0 { 16i16 } else { 8i16 };
|
||||
for slot in 0..8usize {
|
||||
// Default: inactive slot — shifters transparent, counter parked.
|
||||
self.spr_shift_lo[slot] = 0;
|
||||
self.spr_shift_hi[slot] = 0;
|
||||
self.spr_x_counter[slot] = 0xFF;
|
||||
self.spr_attr_latch[slot] = 0;
|
||||
if slot >= self.sprite_count as usize {
|
||||
continue;
|
||||
}
|
||||
let i = self.sprite_indices[slot] as usize;
|
||||
let oam_idx = i * 4;
|
||||
let attr = self.oam[oam_idx + 2];
|
||||
let tile = self.oam[oam_idx + 1];
|
||||
let sprite_y = self.oam[oam_idx] as i16 + 1;
|
||||
let mut row = scanline as i16 - sprite_y;
|
||||
if row < 0 || row >= sprite_height {
|
||||
continue;
|
||||
}
|
||||
if (attr & 0x80) != 0 {
|
||||
row = sprite_height - 1 - row; // vertical flip
|
||||
}
|
||||
let (lo_addr, hi_addr) = if sprite_height == 16 {
|
||||
let table = ((tile & 1) as u16) << 12;
|
||||
let tile_num = (tile & 0xFE).wrapping_add((row / 8) as u8) as u16;
|
||||
let row_in_tile = (row & 7) as u16;
|
||||
let lo = table + tile_num * 16 + row_in_tile;
|
||||
(lo, lo + 8)
|
||||
} else {
|
||||
let table = if (self.ctrl & 0x08) != 0 { 0x1000u16 } else { 0u16 };
|
||||
let lo = table + (tile as u16) * 16 + row as u16;
|
||||
(lo, lo + 8)
|
||||
};
|
||||
let mut lo = mapper.ppu_read(lo_addr);
|
||||
let mut hi = mapper.ppu_read(hi_addr);
|
||||
if (attr & 0x40) != 0 {
|
||||
// Horizontal flip: reverse bit order so MSB is always the
|
||||
// leftmost pixel when we shift out from bit 7.
|
||||
lo = lo.reverse_bits();
|
||||
hi = hi.reverse_bits();
|
||||
}
|
||||
self.spr_shift_lo[slot] = lo;
|
||||
self.spr_shift_hi[slot] = hi;
|
||||
self.spr_x_counter[slot] = self.oam[oam_idx + 3];
|
||||
self.spr_attr_latch[slot] = attr;
|
||||
}
|
||||
}
|
||||
|
||||
/// Advance all active sprite shift registers by one pixel and return the
|
||||
/// colour and priority of the first non-transparent sprite pixel found.
|
||||
/// The third element of the tuple is `true` when the winning sprite is
|
||||
/// OAM sprite 0 (used for sprite-0 hit detection).
|
||||
///
|
||||
/// Every active slot is always updated regardless of which slot wins, so
|
||||
/// this must be called exactly once per visible pixel dot (1-256).
|
||||
pub(super) fn sprite_pixel_from_shifters(&mut self) -> Option<(u8, bool, bool)> {
|
||||
let mut result: Option<(u8, bool, bool)> = None;
|
||||
for slot in 0..self.sprite_count as usize {
|
||||
if self.spr_x_counter[slot] > 0 {
|
||||
self.spr_x_counter[slot] -= 1;
|
||||
continue;
|
||||
}
|
||||
// Extract the MSB from each pattern plane then advance the shifter.
|
||||
let lo_bit = (self.spr_shift_lo[slot] >> 7) & 1;
|
||||
let hi_bit = (self.spr_shift_hi[slot] >> 7) & 1;
|
||||
self.spr_shift_lo[slot] <<= 1;
|
||||
self.spr_shift_hi[slot] <<= 1;
|
||||
if result.is_none() {
|
||||
let pix = lo_bit | (hi_bit << 1);
|
||||
if pix != 0 {
|
||||
let attr = self.spr_attr_latch[slot];
|
||||
let pal_idx = (((attr & 0x03) as u16) << 2) | pix as u16;
|
||||
let color = self.read_palette(0x10 | pal_idx);
|
||||
let behind_bg = (attr & 0x20) != 0;
|
||||
let is_sprite0 = self.sprite_indices[slot] == 0;
|
||||
result = Some((color, behind_bg, is_sprite0));
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn note_scroll_register_write_legacy(&mut self, scanline: usize, dot: u32) {
|
||||
let mut target_scanline = scanline;
|
||||
let mut x_start = 0u8;
|
||||
|
||||
@@ -42,6 +42,15 @@ pub struct Ppu {
|
||||
pub(super) sprite_count: u8,
|
||||
pub(super) next_sprite_indices: [u8; 8],
|
||||
pub(super) next_sprite_count: u8,
|
||||
// Per-slot sprite shift registers loaded at dot 1 of each visible scanline.
|
||||
// spr_shift_lo/hi hold the 8-bit pattern row (h-flip already applied).
|
||||
// spr_x_counter counts down the remaining pixels before a slot becomes
|
||||
// active; when it reaches 0 the slot starts shifting out pixel bits.
|
||||
// spr_attr_latch stores the OAM attribute byte (priority, palette, flips).
|
||||
pub(super) spr_shift_lo: [u8; 8],
|
||||
pub(super) spr_shift_hi: [u8; 8],
|
||||
pub(super) spr_x_counter: [u8; 8],
|
||||
pub(super) spr_attr_latch: [u8; 8],
|
||||
}
|
||||
|
||||
impl Default for Ppu {
|
||||
|
||||
@@ -7,6 +7,13 @@ pub struct AudioMixer {
|
||||
samples_per_cpu_cycle: f64,
|
||||
sample_accumulator: f64,
|
||||
last_output_sample: f32,
|
||||
// Previous output sample (two batches ago) used as the p0 control point
|
||||
// for Catmull-Rom Hermite interpolation. Storing p0 allows the tangent at
|
||||
// the start of each interpolation interval to be computed as
|
||||
// m1 = (p2 - p0) / 2
|
||||
// which produces a smooth, continuous first derivative across batch
|
||||
// boundaries rather than the kink introduced by linear ramps.
|
||||
prev_sample: f32,
|
||||
// One-pole IIR low-pass filter state (approximates NES ~14 kHz RC filter).
|
||||
// Coefficient: a = exp(-2π * fc / fs). At fc=14000, fs=48000: a ≈ 0.160
|
||||
lp_coeff: f32,
|
||||
@@ -31,6 +38,7 @@ impl AudioMixer {
|
||||
samples_per_cpu_cycle: sample_rate as f64 / cpu_hz,
|
||||
sample_accumulator: 0.0,
|
||||
last_output_sample: 0.0,
|
||||
prev_sample: 0.0,
|
||||
lp_coeff,
|
||||
lp_state: 0.0,
|
||||
hp_coeff,
|
||||
@@ -46,6 +54,7 @@ impl AudioMixer {
|
||||
pub fn reset(&mut self) {
|
||||
self.sample_accumulator = 0.0;
|
||||
self.last_output_sample = 0.0;
|
||||
self.prev_sample = 0.0;
|
||||
self.lp_state = 0.0;
|
||||
self.hp_prev_x = 0.0;
|
||||
self.hp_prev_y = 0.0;
|
||||
@@ -82,29 +91,49 @@ impl AudioMixer {
|
||||
return;
|
||||
}
|
||||
|
||||
let start = self.last_output_sample;
|
||||
// Catmull-Rom Hermite interpolation between the previous batch sample
|
||||
// (p1 = last_output_sample) and the current batch sample (p2 = sample).
|
||||
//
|
||||
// The tangent at p1 uses the two-point central difference:
|
||||
// m1 = (p2 - p0) / 2, where p0 = prev_sample (two batches ago).
|
||||
// The tangent at p2 uses the forward difference (p3 approximated as p2,
|
||||
// i.e. the signal stays flat beyond the current batch):
|
||||
// m2 = (p2 - p1) / 2.
|
||||
//
|
||||
// Hermite basis:
|
||||
// h00(t) = 2t³ - 3t² + 1
|
||||
// h10(t) = t³ - 2t² + t
|
||||
// h01(t) = -2t³ + 3t²
|
||||
// h11(t) = t³ - t²
|
||||
// f(t) = h00·p1 + h10·m1 + h01·p2 + h11·m2
|
||||
//
|
||||
// For t = 1 this collapses to p2, so the last output of each batch
|
||||
// always lands exactly on the current APU sample value.
|
||||
let p0 = self.prev_sample;
|
||||
let p1 = self.last_output_sample;
|
||||
let p2 = sample;
|
||||
let m1 = (p2 - p0) * 0.5;
|
||||
let m2 = (p2 - p1) * 0.5;
|
||||
|
||||
let denom = samples as f32;
|
||||
let a = self.lp_coeff;
|
||||
let b = 1.0 - a;
|
||||
if samples == 1 {
|
||||
let lp = a * self.lp_state + b * sample;
|
||||
for idx in 0..samples {
|
||||
let t = (idx + 1) as f32 / denom;
|
||||
let t2 = t * t;
|
||||
let t3 = t2 * t;
|
||||
let interp = (2.0 * t3 - 3.0 * t2 + 1.0) * p1
|
||||
+ (t3 - 2.0 * t2 + t) * m1
|
||||
+ (-2.0 * t3 + 3.0 * t2) * p2
|
||||
+ (t3 - t2) * m2;
|
||||
let lp = a * self.lp_state + b * interp;
|
||||
self.lp_state = lp;
|
||||
let hp = self.hp_coeff * self.hp_prev_y + lp - self.hp_prev_x;
|
||||
self.hp_prev_x = lp;
|
||||
self.hp_prev_y = hp;
|
||||
out.push(hp);
|
||||
} else {
|
||||
let denom = samples as f32;
|
||||
for idx in 0..samples {
|
||||
let t = (idx + 1) as f32 / denom;
|
||||
let interp = start + (sample - start) * t;
|
||||
let lp = a * self.lp_state + b * interp;
|
||||
self.lp_state = lp;
|
||||
let hp = self.hp_coeff * self.hp_prev_y + lp - self.hp_prev_x;
|
||||
self.hp_prev_x = lp;
|
||||
self.hp_prev_y = hp;
|
||||
out.push(hp);
|
||||
}
|
||||
}
|
||||
self.prev_sample = p1;
|
||||
self.last_output_sample = sample;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user