cenotaph/tools/downscale_sprite.py
Trevor Boddy 073f96c9b1 pickups, native build, enemy/bullet/stage overhaul
- Add pickup system (bomb, spread, rapid, shield) with new sprites
- Replace Docker build with native SGDK compile via m68k-elf-gcc
- Rework enemy spawning, homing math, boss HP/number globals
- Expand chrome: score popups, minimap, pause/game over improvements
- Overhaul stage generation with threat-point system
- Add explosion sprites, shield sprite, powerup sprite
- Add tools/ for sprite downscaling utilities
2026-04-15 08:19:29 -04:00

540 lines
18 KiB
Python

#!/usr/bin/env python3
"""Content-adaptive sprite downscaler (Kopf-Shamir-Peers 2013).
Downscales hand-drawn sprites into Genesis-compatible indexed-color pixel art
using bilateral EM kernels that respect edges and color boundaries.
Dependencies: pip install numpy scikit-image Pillow
"""
import argparse
import sys
import numpy as np
from pathlib import Path
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
GENESIS_LEVELS = np.array([0, 36, 72, 109, 145, 182, 218, 255], dtype=np.uint8)
# ---------------------------------------------------------------------------
# Color utilities
# ---------------------------------------------------------------------------
def rgb_to_lab(rgb: np.ndarray) -> np.ndarray:
"""Convert float RGB [0,1] image to CIELAB. Uses skimage."""
from skimage.color import rgb2lab
return rgb2lab(rgb)
def lab_to_rgb(lab: np.ndarray) -> np.ndarray:
"""Convert CIELAB image to float RGB [0,1]. Uses skimage."""
from skimage.color import lab2rgb
return lab2rgb(lab)
def normalize_lab(lab: np.ndarray) -> np.ndarray:
"""Normalize CIELAB to roughly [0,1] per channel (matching paper)."""
out = np.empty_like(lab)
out[..., 0] = lab[..., 0] / 100.0
out[..., 1] = (lab[..., 1] + 87.0) / 186.0
out[..., 2] = (lab[..., 2] + 108.0) / 203.0
return out
def denormalize_lab(nlab: np.ndarray) -> np.ndarray:
"""Undo normalize_lab."""
out = np.empty_like(nlab)
out[..., 0] = nlab[..., 0] * 100.0
out[..., 1] = nlab[..., 1] * 186.0 - 87.0
out[..., 2] = nlab[..., 2] * 203.0 - 108.0
return out
# ---------------------------------------------------------------------------
# Image I/O
# ---------------------------------------------------------------------------
def load_input(path: str, bg_color=None):
"""Load PNG, return (normalized_lab, alpha_mask)."""
from PIL import Image
img = Image.open(path)
if img.mode == "RGBA":
arr = np.array(img, dtype=np.float64)
alpha_mask = arr[:, :, 3] > 127
rgb01 = arr[:, :, :3] / 255.0
elif img.mode == "RGB":
arr = np.array(img, dtype=np.float64)
rgb01 = arr / 255.0
if bg_color is not None:
bg = np.array(bg_color, dtype=np.float64) / 255.0
alpha_mask = ~np.all(np.abs(rgb01 - bg) < 1e-3, axis=-1)
else:
alpha_mask = np.ones(rgb01.shape[:2], dtype=bool)
else:
img = img.convert("RGBA")
arr = np.array(img, dtype=np.float64)
alpha_mask = arr[:, :, 3] > 127
rgb01 = arr[:, :, :3] / 255.0
lab = rgb_to_lab(rgb01)
nlab = normalize_lab(lab)
return nlab, alpha_mask
def load_palette(path: str):
"""Load a 16-color palette from an indexed PNG.
Returns (16, 3) uint8 RGB array.
"""
from PIL import Image
img = Image.open(path)
if img.mode != "P":
print(f"Error: palette image must be indexed-color (mode P), got {img.mode}",
file=sys.stderr)
sys.exit(1)
raw_pal = img.getpalette()
if raw_pal is None:
print("Error: palette image has no palette data", file=sys.stderr)
sys.exit(1)
pal = np.array(raw_pal[:48], dtype=np.uint8).reshape(16, 3)
return pal
def save_indexed_png(path: str, pixels: np.ndarray, palette: np.ndarray):
"""Write indexed-color PNG with the palette exactly as provided."""
from PIL import Image
img = Image.fromarray(pixels, mode="P")
flat_pal = palette.flatten().tolist()
flat_pal.extend([0] * (768 - len(flat_pal)))
img.putpalette(flat_pal)
img.save(path)
# ---------------------------------------------------------------------------
# EM core
# ---------------------------------------------------------------------------
def _invert_2x2(m):
"""Invert a single 2x2 matrix."""
a, b, c, d = m[0, 0], m[0, 1], m[1, 0], m[1, 1]
det = a * d - b * c
if abs(det) < 1e-15:
det = 1e-15
return np.array([[d, -b], [-c, a]], dtype=np.float64) / det
def run_downscale(nlab_image, alpha_mask, out_h, out_w, n_iters=30,
verbose=False):
"""Run full EM downscale with proper per-pixel normalization.
nlab_image: (H, W, 3) normalized CIELAB [0,1]
alpha_mask: (H, W) bool
Returns (output_nlab, output_alpha).
"""
in_h, in_w = nlab_image.shape[:2]
K = out_h * out_w
N = in_h * in_w
rx = in_w / out_w
ry = in_h / out_h
# Flatten input for indexing
nlab_flat = nlab_image.reshape(N, 3)
alpha_flat = alpha_mask.ravel()
# Precompute all input pixel coordinates (row, col) in input space
grid_y, grid_x = np.mgrid[:in_h, :in_w]
coords_flat = np.stack([grid_x.ravel(), grid_y.ravel()], axis=-1).astype(np.float64)
# --- Initialization ---
# Output pixel centers in input coordinates
oy, ox = np.meshgrid(
(np.arange(out_h) + 0.5) * ry,
(np.arange(out_w) + 0.5) * rx,
indexing="ij",
)
centers = np.stack([ox.ravel(), oy.ravel()], axis=-1) # (K, 2)
mu = centers.copy()
# Covariance: diag(rx/3, ry/3) — NOT squared, matching paper
Sigma = np.zeros((K, 2, 2), dtype=np.float64)
Sigma[:, 0, 0] = rx / 3.0
Sigma[:, 1, 1] = ry / 3.0
# Color mean: local average from input
nu = np.full((K, 3), 0.5, dtype=np.float64)
for k in range(K):
cx, cy = centers[k]
x0 = max(0, int(cx - rx / 2))
x1 = min(in_w, int(cx + rx / 2) + 1)
y0 = max(0, int(cy - ry / 2))
y1 = min(in_h, int(cy + ry / 2) + 1)
patch = nlab_image[y0:y1, x0:x1]
mask_patch = alpha_mask[y0:y1, x0:x1]
if mask_patch.any():
nu[k] = patch[mask_patch].mean(axis=0)
# Color variance (scalar), matching paper init
sigma_c = np.full(K, 0.0001, dtype=np.float64)
# Precompute R(k): set of input pixel indices within 2*rx of each kernel
R = [None] * K
for k in range(K):
cx, cy = centers[k]
x0 = max(0, int(cx - 2 * rx))
x1 = min(in_w, int(cx + 2 * rx))
y0 = max(0, int(cy - 2 * ry))
y1 = min(in_h, int(cy + 2 * ry))
yy, xx = np.mgrid[y0:y1, x0:x1]
R[k] = (yy.ravel() * in_w + xx.ravel()).astype(np.int32)
# Eigenvalue clamp bounds, scaled with downscale ratio
# For rx=2 matches paper's [0.05, 0.1]; scales quadratically
ev_min = 0.0125 * rx * rx
ev_max = 0.025 * rx * rx
# --- EM iterations ---
prev_nu = None
for it in range(n_iters):
# ===================== E-STEP =====================
# Compute bilateral weights w_k(i) and per-pixel sums for normalization
# Per-pixel accumulator: sum of w_k(i) across all kernels k
pixel_wsum = np.zeros(N, dtype=np.float64)
# Store per-kernel weights (sparse: only pixels in R(k))
kernel_w = [None] * K
for k in range(K):
idx = R[k]
pi = coords_flat[idx] # (M, 2)
ci = nlab_flat[idx] # (M, 3)
ai = alpha_flat[idx] # (M,) bool
# Spatial Gaussian: Mahalanobis distance
diff_s = pi - mu[k]
Si = _invert_2x2(Sigma[k])
mahal = np.sum(diff_s @ Si * diff_s, axis=-1)
f_k = np.exp(-0.5 * mahal)
# Color Gaussian
diff_c = ci - nu[k]
color_dist_sq = np.sum(diff_c ** 2, axis=-1)
sc2 = max(sigma_c[k] * sigma_c[k], 1e-15)
g_k = np.exp(-color_dist_sq / (2.0 * sc2))
# Bilateral weight
w = f_k * g_k
w[~ai] = 0.0
# Per-kernel normalization (numerical stability)
wsum = w.sum()
if wsum > 0:
w /= wsum
kernel_w[k] = w
pixel_wsum[idx] += w
# ===================== COMPUTE GAMMA & M-STEP =====================
# gamma_k(i) = w_k(i) / sum_n w_n(i) [per-pixel normalization]
new_mu = np.zeros((K, 2), dtype=np.float64)
new_Sigma = np.zeros((K, 2, 2), dtype=np.float64)
new_nu = np.zeros((K, 3), dtype=np.float64)
# Also store gamma for shape constraint overlap check
kernel_gamma = [None] * K
for k in range(K):
idx = R[k]
w = kernel_w[k]
pi = coords_flat[idx]
ci = nlab_flat[idx]
# Per-pixel normalization
denom = pixel_wsum[idx]
denom = np.where(denom > 0, denom, 1.0)
gamma = w / denom
kernel_gamma[k] = gamma
gamma_sum = gamma.sum()
if gamma_sum < 1e-12:
new_mu[k] = centers[k]
new_Sigma[k] = Sigma[k]
new_nu[k] = nu[k]
continue
# M-step
new_mu[k] = (gamma[:, None] * pi).sum(axis=0) / gamma_sum
diff_s = pi - new_mu[k]
new_Sigma[k] = (gamma[:, None, None] *
(diff_s[:, :, None] * diff_s[:, None, :])).sum(axis=0) / gamma_sum
new_nu[k] = (gamma[:, None] * ci).sum(axis=0) / gamma_sum
mu = new_mu
Sigma = new_Sigma
nu = new_nu
# ===================== CORRECTION STEP =====================
# 1. Spatial bias: blend mu toward 4-neighbor average, clamp to box
mu_grid = mu.reshape(out_h, out_w, 2)
neighbor_sum = np.zeros_like(mu_grid)
neighbor_cnt = np.zeros((out_h, out_w, 1), dtype=np.float64)
neighbor_sum[1:, :] += mu_grid[:-1, :]
neighbor_cnt[1:, :] += 1
neighbor_sum[:-1, :] += mu_grid[1:, :]
neighbor_cnt[:-1, :] += 1
neighbor_sum[:, 1:] += mu_grid[:, :-1]
neighbor_cnt[:, 1:] += 1
neighbor_sum[:, :-1] += mu_grid[:, 1:]
neighbor_cnt[:, :-1] += 1
neighbor_cnt = np.maximum(neighbor_cnt, 1)
mu_bar = neighbor_sum / neighbor_cnt
mu_grid = 0.5 * mu_grid + 0.5 * mu_bar
centers_grid = centers.reshape(out_h, out_w, 2)
mu_grid[..., 0] = np.clip(mu_grid[..., 0],
centers_grid[..., 0] - rx / 4.0,
centers_grid[..., 0] + rx / 4.0)
mu_grid[..., 1] = np.clip(mu_grid[..., 1],
centers_grid[..., 1] - ry / 4.0,
centers_grid[..., 1] + ry / 4.0)
mu = mu_grid.reshape(K, 2)
# 2. Constrain spatial covariance via SVD eigenvalue clamping
for k in range(K):
U, s, Vt = np.linalg.svd(Sigma[k])
s = np.clip(s, ev_min, ev_max)
Sigma[k] = U @ np.diag(s) @ Vt
# 3. Shape constraint: check directional variance AND kernel overlap
ky_all = np.arange(K) // out_w
kx_all = np.arange(K) % out_w
for k in range(K):
ky, kx = ky_all[k], kx_all[k]
gamma_k = kernel_gamma[k]
idx_k = R[k]
for dy in range(-1, 2):
for dx in range(-1, 2):
if dy == 0 and dx == 0:
continue
ny, nx = ky + dy, kx + dx
if not (0 <= ny < out_h and 0 <= nx < out_w):
continue
nk = ny * out_w + nx
# Directional variance check
d = mu[nk] - mu[k]
d_norm = np.linalg.norm(d)
if d_norm > 1e-8:
d_hat = d / d_norm
# Weighted directional variance
pi = coords_flat[idx_k]
proj = np.maximum(0, (pi - mu[k]) @ d_hat)
s = (gamma_k * proj * proj).sum()
else:
s = 0.0
# Kernel overlap check
# Find common pixels between R(k) and R(nk)
idx_n = R[nk]
gamma_n = kernel_gamma[nk]
common, k_pos, n_pos = np.intersect1d(idx_k, idx_n,
return_indices=True)
if len(common) > 0:
f = (gamma_k[k_pos] * gamma_n[n_pos]).sum()
else:
f = 0.0
if s > 0.2 * rx or f < 0.08:
sigma_c[k] *= 1.1
sigma_c[nk] *= 1.1
# Convergence check (after 30 iterations, check color stability)
if prev_nu is not None and it >= 30:
max_delta = np.max(np.abs(nu - prev_nu))
if verbose:
print(f" iter {it+1}/{n_iters}: max color delta = {max_delta:.6f}")
if max_delta < 0.002:
if verbose:
print(f" Converged at iteration {it+1}")
break
elif verbose:
mu_shift = np.linalg.norm(mu - centers, axis=-1).mean()
print(f" iter {it+1}/{n_iters}: mean spatial shift = {mu_shift:.4f} px")
prev_nu = nu.copy()
# --- Build output ---
output_nlab = nu.reshape(out_h, out_w, 3)
# Transparency: check opaque fraction per cell
output_alpha = np.ones((out_h, out_w), dtype=bool)
for k in range(K):
ky, kx = ky_all[k], kx_all[k]
cx, cy = centers[k]
x0 = max(0, int(cx - rx / 2))
x1 = min(in_w, int(cx + rx / 2) + 1)
y0 = max(0, int(cy - ry / 2))
y1 = min(in_h, int(cy + ry / 2) + 1)
patch_alpha = alpha_mask[y0:y1, x0:x1]
total = patch_alpha.size
opaque = patch_alpha.sum()
if total > 0 and (opaque / total) < 0.3:
output_alpha[ky, kx] = False
return output_nlab, output_alpha
# ---------------------------------------------------------------------------
# Palette assignment
# ---------------------------------------------------------------------------
def assign_to_palette(output_nlab, output_alpha, palette_rgb):
"""Assign each output pixel to the nearest color in the provided palette.
Matches in CIELAB space. Palette is used exactly as-is.
"""
H, W = output_nlab.shape[:2]
# Convert palette to normalized CIELAB for comparison
pal_rgb01 = palette_rgb.astype(np.float64) / 255.0
pal_lab = rgb_to_lab(pal_rgb01.reshape(1, -1, 3)).reshape(16, 3)
pal_nlab = normalize_lab(pal_lab)
# Denormalize output for comparison in same space
indices = np.zeros((H, W), dtype=np.uint8)
for y in range(H):
for x in range(W):
if not output_alpha[y, x]:
indices[y, x] = 0
else:
d = np.sum((output_nlab[y, x] - pal_nlab) ** 2, axis=-1)
indices[y, x] = np.argmin(d)
return indices
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def parse_args(argv=None):
p = argparse.ArgumentParser(
description="Content-adaptive sprite downscaler for Genesis pixel art.",
)
p.add_argument("input", help="Input PNG path")
p.add_argument("output", help="Output indexed PNG path")
p.add_argument("--size", required=True,
help="Output size WxH (multiples of 8)")
p.add_argument("--palette", required=True,
help="Indexed PNG to use as the 16-color palette")
p.add_argument("--iters", type=int, default=50,
help="Max EM iterations (default 50, converges early)")
p.add_argument("--bg-color",
help="Treat this R,G,B color as transparent (for RGB inputs)")
p.add_argument("--seed", type=int, default=None,
help="RNG seed for reproducibility")
p.add_argument("--verbose", action="store_true",
help="Print per-iteration convergence stats")
return p.parse_args(argv)
def main(argv=None):
args = parse_args(argv)
# Parse size
try:
w_str, h_str = args.size.lower().split("x")
out_w, out_h = int(w_str), int(h_str)
except ValueError:
print(f"Error: --size must be WxH, got '{args.size}'", file=sys.stderr)
sys.exit(1)
if out_w % 8 != 0 or out_h % 8 != 0:
print("Error: output dimensions must be multiples of 8", file=sys.stderr)
sys.exit(1)
# Load palette
palette_path = Path(args.palette)
if not palette_path.exists():
print(f"Error: palette file not found: {args.palette}", file=sys.stderr)
sys.exit(1)
ext_palette = load_palette(args.palette)
# Parse bg color
bg_color = None
if args.bg_color:
try:
parts = [int(x.strip()) for x in args.bg_color.split(",")]
if len(parts) != 3:
raise ValueError
bg_color = tuple(parts)
except ValueError:
print("Error: --bg-color must be R,G,B", file=sys.stderr)
sys.exit(1)
# Load input
input_path = Path(args.input)
if not input_path.exists():
print(f"Error: input file not found: {args.input}", file=sys.stderr)
sys.exit(1)
print(f"Loading {args.input}...")
nlab_image, alpha_mask = load_input(args.input, bg_color)
in_h, in_w = nlab_image.shape[:2]
if out_w >= in_w or out_h >= in_h:
print(f"Error: output ({out_w}x{out_h}) must be smaller than input ({in_w}x{in_h})",
file=sys.stderr)
sys.exit(1)
print(f"Input: {in_w}x{in_h}")
print(f"Output: {out_w}x{out_h}")
print(f"Palette: {args.palette}")
print(f"Max EM iterations: {args.iters}")
# Run EM downscale
print("Running EM downscale...")
output_nlab, output_alpha = run_downscale(
nlab_image, alpha_mask, out_h, out_w,
n_iters=args.iters, verbose=args.verbose,
)
# Assign to provided palette
print("Assigning to palette...")
indices = assign_to_palette(output_nlab, output_alpha, ext_palette)
# Save
print(f"Saving {args.output}...")
save_indexed_png(args.output, indices, ext_palette)
# Summary
n_opaque = output_alpha.sum()
n_transparent = (~output_alpha).sum()
colors_used = len(np.unique(indices[output_alpha])) if n_opaque > 0 else 0
print(f"\nDone!")
print(f" Opaque pixels: {n_opaque}")
print(f" Transparent pixels: {n_transparent}")
print(f" Colors used: {colors_used} / 15")
if __name__ == "__main__":
main()