#!/usr/bin/env python3 """Content-adaptive sprite downscaler (Kopf-Shamir-Peers 2013). Downscales hand-drawn sprites into Genesis-compatible indexed-color pixel art using bilateral EM kernels that respect edges and color boundaries. Dependencies: pip install numpy scikit-image Pillow """ import argparse import sys import numpy as np from pathlib import Path # --------------------------------------------------------------------------- # Constants # --------------------------------------------------------------------------- GENESIS_LEVELS = np.array([0, 36, 72, 109, 145, 182, 218, 255], dtype=np.uint8) # --------------------------------------------------------------------------- # Color utilities # --------------------------------------------------------------------------- def rgb_to_lab(rgb: np.ndarray) -> np.ndarray: """Convert float RGB [0,1] image to CIELAB. Uses skimage.""" from skimage.color import rgb2lab return rgb2lab(rgb) def lab_to_rgb(lab: np.ndarray) -> np.ndarray: """Convert CIELAB image to float RGB [0,1]. Uses skimage.""" from skimage.color import lab2rgb return lab2rgb(lab) def normalize_lab(lab: np.ndarray) -> np.ndarray: """Normalize CIELAB to roughly [0,1] per channel (matching paper).""" out = np.empty_like(lab) out[..., 0] = lab[..., 0] / 100.0 out[..., 1] = (lab[..., 1] + 87.0) / 186.0 out[..., 2] = (lab[..., 2] + 108.0) / 203.0 return out def denormalize_lab(nlab: np.ndarray) -> np.ndarray: """Undo normalize_lab.""" out = np.empty_like(nlab) out[..., 0] = nlab[..., 0] * 100.0 out[..., 1] = nlab[..., 1] * 186.0 - 87.0 out[..., 2] = nlab[..., 2] * 203.0 - 108.0 return out # --------------------------------------------------------------------------- # Image I/O # --------------------------------------------------------------------------- def load_input(path: str, bg_color=None): """Load PNG, return (normalized_lab, alpha_mask).""" from PIL import Image img = Image.open(path) if img.mode == "RGBA": arr = np.array(img, dtype=np.float64) alpha_mask = arr[:, :, 3] > 127 rgb01 = arr[:, :, :3] / 255.0 elif img.mode == "RGB": arr = np.array(img, dtype=np.float64) rgb01 = arr / 255.0 if bg_color is not None: bg = np.array(bg_color, dtype=np.float64) / 255.0 alpha_mask = ~np.all(np.abs(rgb01 - bg) < 1e-3, axis=-1) else: alpha_mask = np.ones(rgb01.shape[:2], dtype=bool) else: img = img.convert("RGBA") arr = np.array(img, dtype=np.float64) alpha_mask = arr[:, :, 3] > 127 rgb01 = arr[:, :, :3] / 255.0 lab = rgb_to_lab(rgb01) nlab = normalize_lab(lab) return nlab, alpha_mask def load_palette(path: str): """Load a 16-color palette from an indexed PNG. Returns (16, 3) uint8 RGB array. """ from PIL import Image img = Image.open(path) if img.mode != "P": print(f"Error: palette image must be indexed-color (mode P), got {img.mode}", file=sys.stderr) sys.exit(1) raw_pal = img.getpalette() if raw_pal is None: print("Error: palette image has no palette data", file=sys.stderr) sys.exit(1) pal = np.array(raw_pal[:48], dtype=np.uint8).reshape(16, 3) return pal def save_indexed_png(path: str, pixels: np.ndarray, palette: np.ndarray): """Write indexed-color PNG with the palette exactly as provided.""" from PIL import Image img = Image.fromarray(pixels, mode="P") flat_pal = palette.flatten().tolist() flat_pal.extend([0] * (768 - len(flat_pal))) img.putpalette(flat_pal) img.save(path) # --------------------------------------------------------------------------- # EM core # --------------------------------------------------------------------------- def _invert_2x2(m): """Invert a single 2x2 matrix.""" a, b, c, d = m[0, 0], m[0, 1], m[1, 0], m[1, 1] det = a * d - b * c if abs(det) < 1e-15: det = 1e-15 return np.array([[d, -b], [-c, a]], dtype=np.float64) / det def run_downscale(nlab_image, alpha_mask, out_h, out_w, n_iters=30, verbose=False): """Run full EM downscale with proper per-pixel normalization. nlab_image: (H, W, 3) normalized CIELAB [0,1] alpha_mask: (H, W) bool Returns (output_nlab, output_alpha). """ in_h, in_w = nlab_image.shape[:2] K = out_h * out_w N = in_h * in_w rx = in_w / out_w ry = in_h / out_h # Flatten input for indexing nlab_flat = nlab_image.reshape(N, 3) alpha_flat = alpha_mask.ravel() # Precompute all input pixel coordinates (row, col) in input space grid_y, grid_x = np.mgrid[:in_h, :in_w] coords_flat = np.stack([grid_x.ravel(), grid_y.ravel()], axis=-1).astype(np.float64) # --- Initialization --- # Output pixel centers in input coordinates oy, ox = np.meshgrid( (np.arange(out_h) + 0.5) * ry, (np.arange(out_w) + 0.5) * rx, indexing="ij", ) centers = np.stack([ox.ravel(), oy.ravel()], axis=-1) # (K, 2) mu = centers.copy() # Covariance: diag(rx/3, ry/3) — NOT squared, matching paper Sigma = np.zeros((K, 2, 2), dtype=np.float64) Sigma[:, 0, 0] = rx / 3.0 Sigma[:, 1, 1] = ry / 3.0 # Color mean: local average from input nu = np.full((K, 3), 0.5, dtype=np.float64) for k in range(K): cx, cy = centers[k] x0 = max(0, int(cx - rx / 2)) x1 = min(in_w, int(cx + rx / 2) + 1) y0 = max(0, int(cy - ry / 2)) y1 = min(in_h, int(cy + ry / 2) + 1) patch = nlab_image[y0:y1, x0:x1] mask_patch = alpha_mask[y0:y1, x0:x1] if mask_patch.any(): nu[k] = patch[mask_patch].mean(axis=0) # Color variance (scalar), matching paper init sigma_c = np.full(K, 0.0001, dtype=np.float64) # Precompute R(k): set of input pixel indices within 2*rx of each kernel R = [None] * K for k in range(K): cx, cy = centers[k] x0 = max(0, int(cx - 2 * rx)) x1 = min(in_w, int(cx + 2 * rx)) y0 = max(0, int(cy - 2 * ry)) y1 = min(in_h, int(cy + 2 * ry)) yy, xx = np.mgrid[y0:y1, x0:x1] R[k] = (yy.ravel() * in_w + xx.ravel()).astype(np.int32) # Eigenvalue clamp bounds, scaled with downscale ratio # For rx=2 matches paper's [0.05, 0.1]; scales quadratically ev_min = 0.0125 * rx * rx ev_max = 0.025 * rx * rx # --- EM iterations --- prev_nu = None for it in range(n_iters): # ===================== E-STEP ===================== # Compute bilateral weights w_k(i) and per-pixel sums for normalization # Per-pixel accumulator: sum of w_k(i) across all kernels k pixel_wsum = np.zeros(N, dtype=np.float64) # Store per-kernel weights (sparse: only pixels in R(k)) kernel_w = [None] * K for k in range(K): idx = R[k] pi = coords_flat[idx] # (M, 2) ci = nlab_flat[idx] # (M, 3) ai = alpha_flat[idx] # (M,) bool # Spatial Gaussian: Mahalanobis distance diff_s = pi - mu[k] Si = _invert_2x2(Sigma[k]) mahal = np.sum(diff_s @ Si * diff_s, axis=-1) f_k = np.exp(-0.5 * mahal) # Color Gaussian diff_c = ci - nu[k] color_dist_sq = np.sum(diff_c ** 2, axis=-1) sc2 = max(sigma_c[k] * sigma_c[k], 1e-15) g_k = np.exp(-color_dist_sq / (2.0 * sc2)) # Bilateral weight w = f_k * g_k w[~ai] = 0.0 # Per-kernel normalization (numerical stability) wsum = w.sum() if wsum > 0: w /= wsum kernel_w[k] = w pixel_wsum[idx] += w # ===================== COMPUTE GAMMA & M-STEP ===================== # gamma_k(i) = w_k(i) / sum_n w_n(i) [per-pixel normalization] new_mu = np.zeros((K, 2), dtype=np.float64) new_Sigma = np.zeros((K, 2, 2), dtype=np.float64) new_nu = np.zeros((K, 3), dtype=np.float64) # Also store gamma for shape constraint overlap check kernel_gamma = [None] * K for k in range(K): idx = R[k] w = kernel_w[k] pi = coords_flat[idx] ci = nlab_flat[idx] # Per-pixel normalization denom = pixel_wsum[idx] denom = np.where(denom > 0, denom, 1.0) gamma = w / denom kernel_gamma[k] = gamma gamma_sum = gamma.sum() if gamma_sum < 1e-12: new_mu[k] = centers[k] new_Sigma[k] = Sigma[k] new_nu[k] = nu[k] continue # M-step new_mu[k] = (gamma[:, None] * pi).sum(axis=0) / gamma_sum diff_s = pi - new_mu[k] new_Sigma[k] = (gamma[:, None, None] * (diff_s[:, :, None] * diff_s[:, None, :])).sum(axis=0) / gamma_sum new_nu[k] = (gamma[:, None] * ci).sum(axis=0) / gamma_sum mu = new_mu Sigma = new_Sigma nu = new_nu # ===================== CORRECTION STEP ===================== # 1. Spatial bias: blend mu toward 4-neighbor average, clamp to box mu_grid = mu.reshape(out_h, out_w, 2) neighbor_sum = np.zeros_like(mu_grid) neighbor_cnt = np.zeros((out_h, out_w, 1), dtype=np.float64) neighbor_sum[1:, :] += mu_grid[:-1, :] neighbor_cnt[1:, :] += 1 neighbor_sum[:-1, :] += mu_grid[1:, :] neighbor_cnt[:-1, :] += 1 neighbor_sum[:, 1:] += mu_grid[:, :-1] neighbor_cnt[:, 1:] += 1 neighbor_sum[:, :-1] += mu_grid[:, 1:] neighbor_cnt[:, :-1] += 1 neighbor_cnt = np.maximum(neighbor_cnt, 1) mu_bar = neighbor_sum / neighbor_cnt mu_grid = 0.5 * mu_grid + 0.5 * mu_bar centers_grid = centers.reshape(out_h, out_w, 2) mu_grid[..., 0] = np.clip(mu_grid[..., 0], centers_grid[..., 0] - rx / 4.0, centers_grid[..., 0] + rx / 4.0) mu_grid[..., 1] = np.clip(mu_grid[..., 1], centers_grid[..., 1] - ry / 4.0, centers_grid[..., 1] + ry / 4.0) mu = mu_grid.reshape(K, 2) # 2. Constrain spatial covariance via SVD eigenvalue clamping for k in range(K): U, s, Vt = np.linalg.svd(Sigma[k]) s = np.clip(s, ev_min, ev_max) Sigma[k] = U @ np.diag(s) @ Vt # 3. Shape constraint: check directional variance AND kernel overlap ky_all = np.arange(K) // out_w kx_all = np.arange(K) % out_w for k in range(K): ky, kx = ky_all[k], kx_all[k] gamma_k = kernel_gamma[k] idx_k = R[k] for dy in range(-1, 2): for dx in range(-1, 2): if dy == 0 and dx == 0: continue ny, nx = ky + dy, kx + dx if not (0 <= ny < out_h and 0 <= nx < out_w): continue nk = ny * out_w + nx # Directional variance check d = mu[nk] - mu[k] d_norm = np.linalg.norm(d) if d_norm > 1e-8: d_hat = d / d_norm # Weighted directional variance pi = coords_flat[idx_k] proj = np.maximum(0, (pi - mu[k]) @ d_hat) s = (gamma_k * proj * proj).sum() else: s = 0.0 # Kernel overlap check # Find common pixels between R(k) and R(nk) idx_n = R[nk] gamma_n = kernel_gamma[nk] common, k_pos, n_pos = np.intersect1d(idx_k, idx_n, return_indices=True) if len(common) > 0: f = (gamma_k[k_pos] * gamma_n[n_pos]).sum() else: f = 0.0 if s > 0.2 * rx or f < 0.08: sigma_c[k] *= 1.1 sigma_c[nk] *= 1.1 # Convergence check (after 30 iterations, check color stability) if prev_nu is not None and it >= 30: max_delta = np.max(np.abs(nu - prev_nu)) if verbose: print(f" iter {it+1}/{n_iters}: max color delta = {max_delta:.6f}") if max_delta < 0.002: if verbose: print(f" Converged at iteration {it+1}") break elif verbose: mu_shift = np.linalg.norm(mu - centers, axis=-1).mean() print(f" iter {it+1}/{n_iters}: mean spatial shift = {mu_shift:.4f} px") prev_nu = nu.copy() # --- Build output --- output_nlab = nu.reshape(out_h, out_w, 3) # Transparency: check opaque fraction per cell output_alpha = np.ones((out_h, out_w), dtype=bool) for k in range(K): ky, kx = ky_all[k], kx_all[k] cx, cy = centers[k] x0 = max(0, int(cx - rx / 2)) x1 = min(in_w, int(cx + rx / 2) + 1) y0 = max(0, int(cy - ry / 2)) y1 = min(in_h, int(cy + ry / 2) + 1) patch_alpha = alpha_mask[y0:y1, x0:x1] total = patch_alpha.size opaque = patch_alpha.sum() if total > 0 and (opaque / total) < 0.3: output_alpha[ky, kx] = False return output_nlab, output_alpha # --------------------------------------------------------------------------- # Palette assignment # --------------------------------------------------------------------------- def assign_to_palette(output_nlab, output_alpha, palette_rgb): """Assign each output pixel to the nearest color in the provided palette. Matches in CIELAB space. Palette is used exactly as-is. """ H, W = output_nlab.shape[:2] # Convert palette to normalized CIELAB for comparison pal_rgb01 = palette_rgb.astype(np.float64) / 255.0 pal_lab = rgb_to_lab(pal_rgb01.reshape(1, -1, 3)).reshape(16, 3) pal_nlab = normalize_lab(pal_lab) # Denormalize output for comparison in same space indices = np.zeros((H, W), dtype=np.uint8) for y in range(H): for x in range(W): if not output_alpha[y, x]: indices[y, x] = 0 else: d = np.sum((output_nlab[y, x] - pal_nlab) ** 2, axis=-1) indices[y, x] = np.argmin(d) return indices # --------------------------------------------------------------------------- # CLI # --------------------------------------------------------------------------- def parse_args(argv=None): p = argparse.ArgumentParser( description="Content-adaptive sprite downscaler for Genesis pixel art.", ) p.add_argument("input", help="Input PNG path") p.add_argument("output", help="Output indexed PNG path") p.add_argument("--size", required=True, help="Output size WxH (multiples of 8)") p.add_argument("--palette", required=True, help="Indexed PNG to use as the 16-color palette") p.add_argument("--iters", type=int, default=50, help="Max EM iterations (default 50, converges early)") p.add_argument("--bg-color", help="Treat this R,G,B color as transparent (for RGB inputs)") p.add_argument("--seed", type=int, default=None, help="RNG seed for reproducibility") p.add_argument("--verbose", action="store_true", help="Print per-iteration convergence stats") return p.parse_args(argv) def main(argv=None): args = parse_args(argv) # Parse size try: w_str, h_str = args.size.lower().split("x") out_w, out_h = int(w_str), int(h_str) except ValueError: print(f"Error: --size must be WxH, got '{args.size}'", file=sys.stderr) sys.exit(1) if out_w % 8 != 0 or out_h % 8 != 0: print("Error: output dimensions must be multiples of 8", file=sys.stderr) sys.exit(1) # Load palette palette_path = Path(args.palette) if not palette_path.exists(): print(f"Error: palette file not found: {args.palette}", file=sys.stderr) sys.exit(1) ext_palette = load_palette(args.palette) # Parse bg color bg_color = None if args.bg_color: try: parts = [int(x.strip()) for x in args.bg_color.split(",")] if len(parts) != 3: raise ValueError bg_color = tuple(parts) except ValueError: print("Error: --bg-color must be R,G,B", file=sys.stderr) sys.exit(1) # Load input input_path = Path(args.input) if not input_path.exists(): print(f"Error: input file not found: {args.input}", file=sys.stderr) sys.exit(1) print(f"Loading {args.input}...") nlab_image, alpha_mask = load_input(args.input, bg_color) in_h, in_w = nlab_image.shape[:2] if out_w >= in_w or out_h >= in_h: print(f"Error: output ({out_w}x{out_h}) must be smaller than input ({in_w}x{in_h})", file=sys.stderr) sys.exit(1) print(f"Input: {in_w}x{in_h}") print(f"Output: {out_w}x{out_h}") print(f"Palette: {args.palette}") print(f"Max EM iterations: {args.iters}") # Run EM downscale print("Running EM downscale...") output_nlab, output_alpha = run_downscale( nlab_image, alpha_mask, out_h, out_w, n_iters=args.iters, verbose=args.verbose, ) # Assign to provided palette print("Assigning to palette...") indices = assign_to_palette(output_nlab, output_alpha, ext_palette) # Save print(f"Saving {args.output}...") save_indexed_png(args.output, indices, ext_palette) # Summary n_opaque = output_alpha.sum() n_transparent = (~output_alpha).sum() colors_used = len(np.unique(indices[output_alpha])) if n_opaque > 0 else 0 print(f"\nDone!") print(f" Opaque pixels: {n_opaque}") print(f" Transparent pixels: {n_transparent}") print(f" Colors used: {colors_used} / 15") if __name__ == "__main__": main()