diff --git a/CMakeLists.txt b/CMakeLists.txt index 87e33071..800c3f06 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -174,6 +174,8 @@ file(GLOB_RECURSE SSB64_SRC_WP "src/wp/*.c" "src/wp/*.h") # Port layer (C++ glue) — excludes port/stubs/ which is compiled as part of ssb64_game file(GLOB_RECURSE SSB64_SRC_PORT "port/*.cpp" "port/*.c" "port/*.h") list(FILTER SSB64_SRC_PORT EXCLUDE REGEX "port/stubs/") +# Exclude the standalone frame-interpolation test runner — has its own main(). +list(FILTER SSB64_SRC_PORT EXCLUDE REGEX "port/frame_interpolation_test_main\\.cpp$") # Debug tools (GBI trace + Acmd trace systems) file(GLOB SSB64_DEBUG_TOOLS @@ -382,6 +384,34 @@ target_include_directories(${PROJECT_NAME} PRIVATE add_dependencies(${PROJECT_NAME} libultraship TorchExternal) target_link_libraries(${PROJECT_NAME} PRIVATE libultraship) +################################################################################ +# Frame-interpolation standalone test runner +# +# Build: cmake --build --target ssb64_frame_interp_test +# Run : ./ssb64_frame_interp_test (no args, no env needed) +# Exits with status 0 on PASS, 2 on any FAIL (logged to stdout). +# +# This target links only the recording/lerp layer + selftest + a small main +# with stub matrix builders. It does NOT pull in the game library or +# libultraship, so it builds in seconds and runs without ROM/assets. Perfect +# for CI smoke tests of the recording infrastructure. +################################################################################ +add_executable(ssb64_frame_interp_test + port/frame_interpolation.cpp + port/frame_interpolation_selftest.cpp + port/frame_interpolation_test_main.cpp +) +target_include_directories(ssb64_frame_interp_test PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/libultraship/include + ${CMAKE_CURRENT_SOURCE_DIR}/port +) +target_compile_definitions(ssb64_frame_interp_test PRIVATE F3DEX_GBI_2=1) +set_target_properties(ssb64_frame_interp_test PROPERTIES + EXCLUDE_FROM_ALL TRUE + EXCLUDE_FROM_DEFAULT_BUILD TRUE +) + ################################################################################ # Platform-specific settings ################################################################################ diff --git a/port/frame_interpolation.cpp b/port/frame_interpolation.cpp new file mode 100644 index 00000000..a7d8445a --- /dev/null +++ b/port/frame_interpolation.cpp @@ -0,0 +1,594 @@ +/** + * frame_interpolation.cpp — see frame_interpolation.h for design notes. + * + * Two recordings are kept: previous_recording (frame N-1) and current_recording + * (frame N being built). A recording is a tree of Path nodes; each Path has: + * - children: map<(stable_ptr, sub_id), vector> // multiple instances OK + * - ops: ordered list of typed matrix-build records + * - items: interleaving order of children-vs-ops so replay is deterministic + * + * At interpolation time, walk both trees pairwise. For each leaf op pair, lerp + * the inputs (shortest-arc for angles, linear for translates/scales), invoke + * the corresponding *F builder against a temporary Mtx44f, and stash the + * result in the replacement map keyed by the *new* frame's destination Mtx*. + * + * If the old tree has no matching node at a given position, the new op is + * replayed at t=1 (no interpolation). This handles spawning actors gracefully. + */ + +#include "frame_interpolation.h" + +#include +#include +#include +#include +#include +#include +#include + +#include /* MtxF, Mtx (libultraship side) */ + +/* The SSB64 *F matrix builders we replay during interpolation. These are + * defined in src/sys/matrix.c and produce a Mtx44f (= float[4][4]) which has + * the same byte layout as MtxF::mf. We declare the prototypes locally with C + * linkage so we don't have to pull the whole game's header soup into this TU. */ +extern "C" { + typedef float Mtx44f_t[4][4]; + /* LookAt is the GBI lighting struct used by syMatrixLookAtReflectF; for + * matrix-only reconstruction we use syMatrixLookAtF (no LookAt out param) + * which produces the same matrix bytes. */ + + void syMatrixScaF(Mtx44f_t *mf, float x, float y, float z); + void syMatrixTraF(Mtx44f_t *mf, float x, float y, float z); + + /* Camera reconstruction at lerp time. */ + void syMatrixLookAtF(Mtx44f_t *mf, + float eye_x, float eye_y, float eye_z, + float at_x, float at_y, float at_z, + float up_x, float up_y, float up_z); + void syMatrixPerspFastF(Mtx44f_t mf, unsigned short *persp_norm, + float fovy, float aspect, float near_, float far_, float scale); + + void syMatrixRotRF(Mtx44f_t *mf, float a, float x, float y, float z); + void syMatrixRotDF(Mtx44f_t *mf, float a, float x, float y, float z); + void syMatrixRotRpyRF(Mtx44f_t *mf, float r, float p, float y); + void syMatrixRotRpyDF(Mtx44f_t *mf, float r, float p, float y); + void syMatrixRotPyrRF(Mtx44f_t *mf, float r, float p, float y); + void syMatrixRotPyRF(Mtx44f_t *mf, float p, float y); + void syMatrixRotRpRF(Mtx44f_t *mf, float r, float p); + void syMatrixRotYawRF(Mtx44f_t *mf, float y); + void syMatrixRotPitchRF(Mtx44f_t *mf, float p); + + void syMatrixTraRotRF(Mtx44f_t *mf, float tx, float ty, float tz, float angle, float rx, float ry, float rz); + void syMatrixTraRotDF(Mtx44f_t *mf, float tx, float ty, float tz, float angle, float rx, float ry, float rz); + void syMatrixTraRotRScaF(Mtx44f_t *mf, float tx, float ty, float tz, float angle, float rx, float ry, float rz, float sx, float sy, float sz); + void syMatrixTraRotRpyRF(Mtx44f_t *mf, float tx, float ty, float tz, float r, float p, float y); + void syMatrixTraRotRpyDF(Mtx44f_t *mf, float tx, float ty, float tz, float r, float p, float y); + void syMatrixTraRotRpyRScaF(Mtx44f_t *mf, float tx, float ty, float tz, float r, float p, float y, float sx, float sy, float sz); + void syMatrixTraRotPyrRF(Mtx44f_t *mf, float tx, float ty, float tz, float r, float p, float y); + void syMatrixTraRotPyrRScaF(Mtx44f_t *mf, float tx, float ty, float tz, float r, float p, float y, float sx, float sy, float sz); + void syMatrixTraRotPyRF(Mtx44f_t *mf, float tx, float ty, float tz, float p, float y); + void syMatrixTraRotRpRF(Mtx44f_t *mf, float tx, float ty, float tz, float r, float p); + void syMatrixTraRotYawRF(Mtx44f_t *mf, float tx, float ty, float tz, float y); + void syMatrixTraRotPitchRF(Mtx44f_t *mf, float tx, float ty, float tz, float p); +} + +namespace { + +enum class Op : uint8_t { + OpenChild, + CloseChild, + + Tra, Sca, + RotR, RotD, + RotRpyR, RotRpyD, + RotPyrR, RotPyR, RotRpR, RotYawR, RotPitchR, + TraRotR, TraRotD, TraRotRSca, + TraRotRpyR, TraRotRpyD, TraRotRpyRSca, + TraRotPyrR, TraRotPyrRSca, + TraRotPyR, TraRotRpR, TraRotYawR, TraRotPitchR, + + F2L, F2LFixedW, + + /* Composite camera op: stored inputs are + * in[0..2] eye_xyz + * in[3..5] at_xyz + * in[6..8] up_xyz + * in[9..13] fovy, aspect, znear, zfar, scale + * 14 slots total — see in[14] sizing. */ + Camera, +}; + +/* All op payloads share a destination pointer (Mtx* cast to void*) and a + * variable-size float input pack. We use a flat 16-float buffer plus an int + * count so we don't need per-op POD types. */ +struct OpData { + Op op; + void* dest; /* Mtx* destination (Op-specific; null for OpenChild/CloseChild) */ + /* For OpenChild, child_key/child_idx label this node in the parent's children map. */ + const void* child_key_ptr; + int child_key_sub; + int child_idx; /* index within parent->children[key] */ + /* Inputs: at most 14 floats (Camera op uses 9 vector + 5 persp). */ + float in[14]; + /* For F2L ops: 16 floats holding the source Mtx44f. */ + float mtx[16]; +}; + +struct Path { + /* children grouped by (key,sub) label; each may have multiple instances + * (e.g. an actor that pushes the same scope twice in one frame). */ + std::map, std::vector> children; + /* All ops/children in order, so replay matches the recording's sequence. */ + std::vector items; +}; + +struct Recording { + Path root; +}; + +bool g_is_recording = false; +bool g_dont_interp_camera = false; +Recording g_current; +Recording g_previous; +std::vector g_path_stack; +int g_last_op_count = 0; +int g_last_child_count = 0; + +inline OpData& append_op(Op op, void* dest) { + Path* p = g_path_stack.back(); + p->items.emplace_back(); + OpData& d = p->items.back(); + std::memset(&d, 0, sizeof(d)); + d.op = op; + d.dest = dest; + return d; +} + +/* -------------------------------------------------------------------------- */ +/* Lerp helpers */ +/* -------------------------------------------------------------------------- */ + +inline float lerp(float a, float b, float t) { return a + (b - a) * t; } + +/* Shortest-arc lerp on a radian angle. If the difference is more than ~PI/2 + * it's almost certainly a snap (teleport, animation cut), so we hold the new + * value to avoid swirly midpoints. */ +inline float lerp_angle_rad(float a, float b, float t) { + constexpr float PI = 3.14159265358979323846f; + constexpr float TWO_PI = 2.0f * PI; + float d = std::fmod(b - a, TWO_PI); + if (d > PI) d -= TWO_PI; + else if (d < -PI) d += TWO_PI; + if (std::fabs(d) > PI * 0.5f) return b; /* snap */ + return a + d * t; +} + +inline float lerp_angle_deg(float a, float b, float t) { + float d = std::fmod(b - a, 360.0f); + if (d > 180.0f) d -= 360.0f; + else if (d < -180.0f) d += 360.0f; + if (std::fabs(d) > 90.0f) return b; + return a + d * t; +} + +/* -------------------------------------------------------------------------- */ +/* Replay one op into the replacement map at lerp factor t */ +/* -------------------------------------------------------------------------- */ + +inline void store_mtx44f(std::unordered_map& out, void* dest, const Mtx44f_t& mf) { + if (dest == nullptr) return; + MtxF& slot = out[reinterpret_cast(dest)]; + std::memcpy(slot.mf, mf, sizeof(Mtx44f_t)); +} + +void replay_op(std::unordered_map& out, + const OpData& oldOp, const OpData& newOp, float t) { + Mtx44f_t mf; + /* For ops with float inputs, lerp old.in[*] with new.in[*]; for F2L ops, + * lerp old.mtx[*] with new.mtx[*] element-wise. */ + auto L = [&](int i) { return lerp(oldOp.in[i], newOp.in[i], t); }; + auto LR = [&](int i) { return lerp_angle_rad(oldOp.in[i], newOp.in[i], t); }; + auto LD = [&](int i) { return lerp_angle_deg(oldOp.in[i], newOp.in[i], t); }; + + switch (newOp.op) { + case Op::Tra: + syMatrixTraF(&mf, L(0), L(1), L(2)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::Sca: + syMatrixScaF(&mf, L(0), L(1), L(2)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::RotR: + syMatrixRotRF(&mf, LR(0), L(1), L(2), L(3)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::RotD: + syMatrixRotDF(&mf, LD(0), L(1), L(2), L(3)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::RotRpyR: + syMatrixRotRpyRF(&mf, LR(0), LR(1), LR(2)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::RotRpyD: + syMatrixRotRpyDF(&mf, LD(0), LD(1), LD(2)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::RotPyrR: + syMatrixRotPyrRF(&mf, LR(0), LR(1), LR(2)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::RotPyR: + syMatrixRotPyRF(&mf, LR(0), LR(1)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::RotRpR: + syMatrixRotRpRF(&mf, LR(0), LR(1)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::RotYawR: + syMatrixRotYawRF(&mf, LR(0)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::RotPitchR: + syMatrixRotPitchRF(&mf, LR(0)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::TraRotR: + syMatrixTraRotRF(&mf, L(0), L(1), L(2), LR(3), L(4), L(5), L(6)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::TraRotD: + syMatrixTraRotDF(&mf, L(0), L(1), L(2), LD(3), L(4), L(5), L(6)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::TraRotRSca: + syMatrixTraRotRScaF(&mf, L(0), L(1), L(2), LR(3), L(4), L(5), L(6), L(7), L(8), L(9)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::TraRotRpyR: + syMatrixTraRotRpyRF(&mf, L(0), L(1), L(2), LR(3), LR(4), LR(5)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::TraRotRpyD: + syMatrixTraRotRpyDF(&mf, L(0), L(1), L(2), LD(3), LD(4), LD(5)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::TraRotRpyRSca: + syMatrixTraRotRpyRScaF(&mf, L(0), L(1), L(2), LR(3), LR(4), LR(5), L(6), L(7), L(8)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::TraRotPyrR: + syMatrixTraRotPyrRF(&mf, L(0), L(1), L(2), LR(3), LR(4), LR(5)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::TraRotPyrRSca: + syMatrixTraRotPyrRScaF(&mf, L(0), L(1), L(2), LR(3), LR(4), LR(5), L(6), L(7), L(8)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::TraRotPyR: + syMatrixTraRotPyRF(&mf, L(0), L(1), L(2), LR(3), LR(4)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::TraRotRpR: + syMatrixTraRotRpRF(&mf, L(0), L(1), L(2), LR(3), LR(4)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::TraRotYawR: + syMatrixTraRotYawRF(&mf, L(0), L(1), L(2), LR(3)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::TraRotPitchR: + syMatrixTraRotPitchRF(&mf, L(0), L(1), L(2), LR(3)); + store_mtx44f(out, newOp.dest, mf); + break; + case Op::F2L: + case Op::F2LFixedW: { + for (int i = 0; i < 16; i++) { + reinterpret_cast(mf)[i] = lerp(oldOp.mtx[i], newOp.mtx[i], t); + } + store_mtx44f(out, newOp.dest, mf); + break; + } + case Op::Camera: { + /* Lerp inputs in their original domains (linear for everything; + * eye/at/up moves are not angular, even when the camera orbits). + * For the rare hard cut, the calling code calls + * FrameInterpolation_DontInterpolateCamera() and t is clamped to 1 + * so we degrade to "snap to current frame". */ + float ex = lerp(oldOp.in[0], newOp.in[0], t); + float ey = lerp(oldOp.in[1], newOp.in[1], t); + float ez = lerp(oldOp.in[2], newOp.in[2], t); + float ax = lerp(oldOp.in[3], newOp.in[3], t); + float ay = lerp(oldOp.in[4], newOp.in[4], t); + float az = lerp(oldOp.in[5], newOp.in[5], t); + float ux = lerp(oldOp.in[6], newOp.in[6], t); + float uy = lerp(oldOp.in[7], newOp.in[7], t); + float uz = lerp(oldOp.in[8], newOp.in[8], t); + float fovy = lerp(oldOp.in[9], newOp.in[9], t); + float aspect = lerp(oldOp.in[10], newOp.in[10], t); + float znear = lerp(oldOp.in[11], newOp.in[11], t); + float zfar = lerp(oldOp.in[12], newOp.in[12], t); + float scale = lerp(oldOp.in[13], newOp.in[13], t); + + Mtx44f_t persp; + Mtx44f_t view; + unsigned short dummy_norm = 0; + syMatrixPerspFastF(persp, &dummy_norm, fovy, aspect, znear, zfar, scale); + syMatrixLookAtF(&view, ex, ey, ez, ax, ay, az, ux, uy, uz); + + /* composite = view * persp (matches gmcamera.c's guMtxCatF order). */ + Mtx44f_t composite; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + float s = 0.0f; + for (int k = 0; k < 4; k++) { + s += view[i][k] * persp[k][j]; + } + composite[i][j] = s; + } + } + store_mtx44f(out, newOp.dest, composite); + break; + } + case Op::OpenChild: + case Op::CloseChild: + /* not a leaf */ + break; + } +} + +/* Replay newOp at t=1 (no old counterpart available — fresh actor). */ +void replay_op_solo(std::unordered_map& out, const OpData& newOp) { + /* Use the same op data as both old and new; t=1 collapses to new's inputs. */ + replay_op(out, newOp, newOp, 1.0f); +} + +/* -------------------------------------------------------------------------- */ +/* Tree walk */ +/* -------------------------------------------------------------------------- */ + +void interpolate_branch(std::unordered_map& out, + const Path* old_path, const Path* new_path, float t) { + /* Walk new_path's items and look for the same op at the same position in + * old_path's items. If positions match and ops match, lerp; otherwise + * replay solo at t=1. */ + const auto& newItems = new_path->items; + const auto& oldItems = old_path ? old_path->items : decltype(new_path->items){}; + + /* Track per-(key,sub) child instance counter so OpenChild dispatches into + * the right vector slot. */ + std::map, size_t> new_child_cursor; + + for (size_t i = 0; i < newItems.size(); i++) { + const OpData& n = newItems[i]; + + if (n.op == Op::OpenChild) { + auto key = std::make_pair(n.child_key_ptr, n.child_key_sub); + size_t idx = new_child_cursor[key]++; + const Path* new_child = nullptr; + const Path* old_child = nullptr; + if (auto it = new_path->children.find(key); + it != new_path->children.end() && idx < it->second.size()) { + new_child = &it->second[idx]; + } + if (old_path) { + if (auto it = old_path->children.find(key); + it != old_path->children.end() && idx < it->second.size()) { + old_child = &it->second[idx]; + } + } + if (new_child) { + interpolate_branch(out, old_child, new_child, t); + } + /* CloseChild for this OpenChild appears later in items; we skip it + * because the recursive call drained the child's items already. */ + continue; + } + if (n.op == Op::CloseChild) { + continue; + } + + /* Try to find the same op at the same items[] index in the old recording. */ + bool matched = false; + if (i < oldItems.size() && oldItems[i].op == n.op && oldItems[i].dest != nullptr) { + replay_op(out, oldItems[i], n, t); + matched = true; + } + if (!matched) { + replay_op_solo(out, n); + } + } +} + +} /* namespace */ + +/* -------------------------------------------------------------------------- */ +/* C API implementation */ +/* -------------------------------------------------------------------------- */ + +extern "C" { + +void FrameInterpolation_StartRecord(void) { + g_previous = std::move(g_current); + g_current = Recording{}; + g_path_stack.clear(); + g_path_stack.push_back(&g_current.root); + g_dont_interp_camera = false; + g_is_recording = true; +} + +void FrameInterpolation_StopRecord(void) { + g_is_recording = false; + /* Tally for diagnostics. */ + int ops = 0, children = 0; + /* Iterative tree walk. */ + std::vector stack; + stack.push_back(&g_current.root); + while (!stack.empty()) { + const Path* p = stack.back(); + stack.pop_back(); + for (const auto& kv : p->children) { + for (const auto& c : kv.second) { + children++; + stack.push_back(&c); + } + } + for (const auto& it : p->items) { + if (it.op != Op::OpenChild && it.op != Op::CloseChild) ops++; + } + } + g_last_op_count = ops; + g_last_child_count = children; +} + +void FrameInterpolation_DontInterpolateCamera(void) { + g_dont_interp_camera = true; +} + +int FrameInterpolation_IsRecording(void) { + return g_is_recording ? 1 : 0; +} + +int FrameInterpolation_GetLastOpCount(void) { return g_last_op_count; } +int FrameInterpolation_GetLastChildCount(void) { return g_last_child_count; } + +void FrameInterpolation_RecordOpenChild(const void *id, int sub_id) { + if (!g_is_recording) return; + auto key = std::make_pair(id, sub_id); + Path* parent = g_path_stack.back(); + auto& vec = parent->children[key]; + vec.emplace_back(); + Path* child = &vec.back(); + + /* Append a marker into items so replay can dispatch into this child at + * the right interleaving point. */ + OpData& d = append_op(Op::OpenChild, nullptr); + d.child_key_ptr = id; + d.child_key_sub = sub_id; + d.child_idx = static_cast(vec.size() - 1); + + g_path_stack.push_back(child); +} + +void FrameInterpolation_RecordCloseChild(void) { + if (!g_is_recording) return; + if (g_path_stack.size() <= 1) return; /* unbalanced — drop on the floor */ + g_path_stack.pop_back(); + /* Marker in parent for replay-time book-keeping (currently unused but + * cheap to keep, and forms a sanity check). */ + append_op(Op::CloseChild, nullptr); +} + +/* Record helpers — pack the inputs and stash dest. */ + +#define REC1(OP, ...) \ + do { \ + if (!g_is_recording) return; \ + OpData& d = append_op(OP, dest); \ + float vals[] = { __VA_ARGS__ }; \ + for (size_t i = 0; i < sizeof(vals)/sizeof(vals[0]); i++) { \ + d.in[i] = vals[i]; \ + } \ + } while (0) + +void FrameInterpolation_RecordMatrixTra(void *dest, float x, float y, float z) { REC1(Op::Tra, x, y, z); } +void FrameInterpolation_RecordMatrixSca(void *dest, float x, float y, float z) { REC1(Op::Sca, x, y, z); } + +void FrameInterpolation_RecordMatrixRotR(void *dest, float a, float x, float y, float z) { REC1(Op::RotR, a, x, y, z); } +void FrameInterpolation_RecordMatrixRotD(void *dest, float a, float x, float y, float z) { REC1(Op::RotD, a, x, y, z); } +void FrameInterpolation_RecordMatrixRotRpyR(void *dest, float r, float p, float y) { REC1(Op::RotRpyR, r, p, y); } +void FrameInterpolation_RecordMatrixRotRpyD(void *dest, float r, float p, float y) { REC1(Op::RotRpyD, r, p, y); } +void FrameInterpolation_RecordMatrixRotPyrR(void *dest, float r, float p, float y) { REC1(Op::RotPyrR, r, p, y); } +void FrameInterpolation_RecordMatrixRotPyR(void *dest, float p, float y) { REC1(Op::RotPyR, p, y); } +void FrameInterpolation_RecordMatrixRotRpR(void *dest, float r, float p) { REC1(Op::RotRpR, r, p); } +void FrameInterpolation_RecordMatrixRotYawR(void *dest, float y) { REC1(Op::RotYawR, y); } +void FrameInterpolation_RecordMatrixRotPitchR(void *dest, float p) { REC1(Op::RotPitchR, p); } + +void FrameInterpolation_RecordMatrixTraRotR(void *dest, float tx, float ty, float tz, float a, float rx, float ry, float rz) { + REC1(Op::TraRotR, tx, ty, tz, a, rx, ry, rz); +} +void FrameInterpolation_RecordMatrixTraRotD(void *dest, float tx, float ty, float tz, float a, float rx, float ry, float rz) { + REC1(Op::TraRotD, tx, ty, tz, a, rx, ry, rz); +} +void FrameInterpolation_RecordMatrixTraRotRSca(void *dest, float tx, float ty, float tz, float a, float rx, float ry, float rz, float sx, float sy, float sz) { + REC1(Op::TraRotRSca, tx, ty, tz, a, rx, ry, rz, sx, sy, sz); +} +void FrameInterpolation_RecordMatrixTraRotRpyR(void *dest, float tx, float ty, float tz, float r, float p, float y) { + REC1(Op::TraRotRpyR, tx, ty, tz, r, p, y); +} +void FrameInterpolation_RecordMatrixTraRotRpyD(void *dest, float tx, float ty, float tz, float r, float p, float y) { + REC1(Op::TraRotRpyD, tx, ty, tz, r, p, y); +} +void FrameInterpolation_RecordMatrixTraRotRpyRSca(void *dest, float tx, float ty, float tz, float r, float p, float y, float sx, float sy, float sz) { + REC1(Op::TraRotRpyRSca, tx, ty, tz, r, p, y, sx, sy, sz); +} +void FrameInterpolation_RecordMatrixTraRotPyrR(void *dest, float tx, float ty, float tz, float r, float p, float y) { + REC1(Op::TraRotPyrR, tx, ty, tz, r, p, y); +} +void FrameInterpolation_RecordMatrixTraRotPyrRSca(void *dest, float tx, float ty, float tz, float r, float p, float y, float sx, float sy, float sz) { + REC1(Op::TraRotPyrRSca, tx, ty, tz, r, p, y, sx, sy, sz); +} +void FrameInterpolation_RecordMatrixTraRotPyR(void *dest, float tx, float ty, float tz, float p, float y) { + REC1(Op::TraRotPyR, tx, ty, tz, p, y); +} +void FrameInterpolation_RecordMatrixTraRotRpR(void *dest, float tx, float ty, float tz, float r, float p) { + REC1(Op::TraRotRpR, tx, ty, tz, r, p); +} +void FrameInterpolation_RecordMatrixTraRotYawR(void *dest, float tx, float ty, float tz, float y) { + REC1(Op::TraRotYawR, tx, ty, tz, y); +} +void FrameInterpolation_RecordMatrixTraRotPitchR(void *dest, float tx, float ty, float tz, float p) { + REC1(Op::TraRotPitchR, tx, ty, tz, p); +} + +#undef REC1 + +static void rec_f2l_impl(Op op, const void *src_mtx44f, void *dest) { + if (!g_is_recording) return; + OpData& d = append_op(op, dest); + /* Mtx44f is float[4][4] = 16 floats, same layout as MtxF::mf. */ + if (src_mtx44f != nullptr) { + std::memcpy(d.mtx, src_mtx44f, sizeof(d.mtx)); + } +} +void FrameInterpolation_RecordMatrixF2L(const void *src, void *dest) { rec_f2l_impl(Op::F2L, src, dest); } +void FrameInterpolation_RecordMatrixF2LFixedW(const void *src, void *dest) { rec_f2l_impl(Op::F2LFixedW, src, dest); } + +void FrameInterpolation_RecordCamera(void *dest, + float ex, float ey, float ez, + float ax, float ay, float az, + float ux, float uy, float uz, + float fovy, float aspect, float znear, float zfar, float scale) +{ + if (!g_is_recording) return; + OpData& d = append_op(Op::Camera, dest); + d.in[0] = ex; d.in[1] = ey; d.in[2] = ez; + d.in[3] = ax; d.in[4] = ay; d.in[5] = az; + d.in[6] = ux; d.in[7] = uy; d.in[8] = uz; + d.in[9] = fovy; + d.in[10] = aspect; + d.in[11] = znear; + d.in[12] = zfar; + d.in[13] = scale; +} + +} /* extern "C" */ + +/* -------------------------------------------------------------------------- */ +/* C++ API */ +/* -------------------------------------------------------------------------- */ + +std::unordered_map FrameInterpolation_Interpolate(float t) { + std::unordered_map out; + if (g_dont_interp_camera) { + /* Caller's responsibility to map specific Mtx* to identity replacement + * if needed; for v1 this flag just disables the global lerp by + * clamping t to 1 (no interpolation = current frame). */ + t = 1.0f; + } + interpolate_branch(out, &g_previous.root, &g_current.root, t); + return out; +} diff --git a/port/frame_interpolation.h b/port/frame_interpolation.h new file mode 100644 index 00000000..548ad878 --- /dev/null +++ b/port/frame_interpolation.h @@ -0,0 +1,156 @@ +#pragma once + +/** + * frame_interpolation — display-rate decoupling for the SSB64 PC port. + * + * Adapted from Shipwright's soh/frame_interpolation. Game logic still ticks + * at the original 30 Hz; the renderer ticks at the user's chosen rate, and + * intermediate display frames are produced by interpolating the GBI matrices + * built by syMatrix*() between the previous and current logic ticks. + * + * The recording layer instruments matrix.c primitives to capture inputs into + * a tree of Path nodes. Tree nodes are scoped by (stable_ptr, sub_id) labels + * via FrameInterpolation_RecordOpenChild/CloseChild — these are typically + * inserted around per-DObj/per-particle/per-camera draw scopes so the diff + * between two recordings can match logically-equivalent matrices even when + * the destination Mtx* pointer is recycled or the actor list changes. + * + * The C API takes void* for matrix pointers so the header has no + * game-type dependency and can be included from any .c file. + */ + +#ifdef __cplusplus +#include +#include // MtxF +typedef MtxF FrameInterpReplacementMtxF; +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* -------------------------------------------------------------------------- */ +/* Recording lifecycle */ +/* -------------------------------------------------------------------------- */ + +/* Begin recording a new frame's matrix-build tree. Called by the port harness + * just before the game's display-list build begins (frame N's logic). The + * previous recording is moved aside as the "old" reference for interpolation. */ +void FrameInterpolation_StartRecord(void); + +/* End recording. Called after the game's DL build is complete. */ +void FrameInterpolation_StopRecord(void); + +/* Hint the harness that the camera was just cut/teleported — interpolation + * should not lerp the camera matrix this frame. Mirrors Shipwright's + * DontInterpolateCamera. Optional — call from cutscene transitions. */ +void FrameInterpolation_DontInterpolateCamera(void); + +/* Returns 1 if recording is currently active, 0 otherwise. + * Cheap — used to short-circuit Record* calls from hot paths. */ +int FrameInterpolation_IsRecording(void); + +/* -------------------------------------------------------------------------- */ +/* Tree scoping */ +/* -------------------------------------------------------------------------- */ + +/* Push a labeled child node onto the tree. The (id, sub_id) pair forms the + * node's identity — typically id is a stable pointer (DObj*, GObj*, CObj*) + * and sub_id distinguishes multiple matrix scopes within the same object. */ +void FrameInterpolation_RecordOpenChild(const void *id, int sub_id); +void FrameInterpolation_RecordCloseChild(void); + +/* -------------------------------------------------------------------------- */ +/* Matrix-build ops — one per syMatrix*(Mtx*, ...) primitive in src/sys/matrix.c */ +/* -------------------------------------------------------------------------- */ + +/* dest is Mtx* — the GBI matrix the result lands in. void* in the API to + * keep the header game-type-agnostic. */ + +/* Direct fixed-point writes (no Mtx44f intermediate) */ +void FrameInterpolation_RecordMatrixTra(void *dest, float x, float y, float z); +void FrameInterpolation_RecordMatrixSca(void *dest, float x, float y, float z); + +/* Float-domain rotation builders — recorded so we can lerp the input angles + * (shortest-arc) rather than lerp the resulting basis vectors element-wise. + * The dest is the final Mtx* after the internal F2L. */ +void FrameInterpolation_RecordMatrixRotR(void *dest, float a, float x, float y, float z); +void FrameInterpolation_RecordMatrixRotD(void *dest, float a, float x, float y, float z); +void FrameInterpolation_RecordMatrixRotRpyR(void *dest, float r, float p, float y); +void FrameInterpolation_RecordMatrixRotRpyD(void *dest, float r, float p, float y); +void FrameInterpolation_RecordMatrixRotPyrR(void *dest, float r, float p, float y); +void FrameInterpolation_RecordMatrixRotPyR(void *dest, float p, float y); +void FrameInterpolation_RecordMatrixRotRpR(void *dest, float r, float p); +void FrameInterpolation_RecordMatrixRotYawR(void *dest, float y); +void FrameInterpolation_RecordMatrixRotPitchR(void *dest, float p); + +void FrameInterpolation_RecordMatrixTraRotR(void *dest, float tx, float ty, float tz, float a, float rx, float ry, float rz); +void FrameInterpolation_RecordMatrixTraRotD(void *dest, float tx, float ty, float tz, float a, float rx, float ry, float rz); +void FrameInterpolation_RecordMatrixTraRotRSca(void *dest, float tx, float ty, float tz, float a, float rx, float ry, float rz, float sx, float sy, float sz); +void FrameInterpolation_RecordMatrixTraRotRpyR(void *dest, float tx, float ty, float tz, float r, float p, float y); +void FrameInterpolation_RecordMatrixTraRotRpyD(void *dest, float tx, float ty, float tz, float r, float p, float y); +void FrameInterpolation_RecordMatrixTraRotRpyRSca(void *dest, float tx, float ty, float tz, float r, float p, float y, float sx, float sy, float sz); +void FrameInterpolation_RecordMatrixTraRotPyrR(void *dest, float tx, float ty, float tz, float r, float p, float y); +void FrameInterpolation_RecordMatrixTraRotPyrRSca(void *dest, float tx, float ty, float tz, float r, float p, float y, float sx, float sy, float sz); +void FrameInterpolation_RecordMatrixTraRotPyR(void *dest, float tx, float ty, float tz, float p, float y); +void FrameInterpolation_RecordMatrixTraRotRpR(void *dest, float tx, float ty, float tz, float r, float p); +void FrameInterpolation_RecordMatrixTraRotYawR(void *dest, float tx, float ty, float tz, float y); +void FrameInterpolation_RecordMatrixTraRotPitchR(void *dest, float tx, float ty, float tz, float p); + +/* Catch-all: snapshot a Mtx44f source and the Mtx* destination at F2L time. + * Used for non-primitive composition paths. Interpolation lerps the source + * matrix elements; this is incorrect for view/rotation matrices because the + * lerp of two rotation bases isn't itself a rotation. Prefer RecordCamera() + * or input-domain primitives whenever the inputs are available. */ +void FrameInterpolation_RecordMatrixF2L(const void *src_mtx44f, void *dest); +void FrameInterpolation_RecordMatrixF2LFixedW(const void *src_mtx44f, void *dest); + +/* Camera composite — input-domain record for view*projection matrices. + * + * Captures the eye/at/up vectors and perspective parameters that produced + * the final GBI projection matrix. At lerp time we lerp the *inputs* + * (eye/at/up linearly, perspective params linearly), rebuild lookat_F and + * persp_F, multiply them, and write the result as the replacement MtxF. + * + * Compared to RecordMatrixF2L on the composite, this preserves rigid-body + * motion of the camera — no warping, no doubling artefacts when the camera + * is panning or dollying. */ +void FrameInterpolation_RecordCamera(void *dest, + float ex, float ey, float ez, + float ax, float ay, float az, + float ux, float uy, float uz, + float fovy, float aspect, float znear, float zfar, float scale); + +/* -------------------------------------------------------------------------- */ +/* Diagnostics */ +/* -------------------------------------------------------------------------- */ + +/* Returns counts from the most recent finished recording. For self-tests. */ +int FrameInterpolation_GetLastOpCount(void); +int FrameInterpolation_GetLastChildCount(void); + +/* Self-test hooks (see frame_interpolation_selftest.cpp). + * + * RunSelfTestIfRequested: invoke at boot. If env SSB64_FRAME_INTERP_UNITTEST + * is set, runs offline unit tests against the recording API and exits with + * status 2 on failure (so CI catches breakage). + * + * TelemetryTick: invoke once per game frame. If env SSB64_FRAME_INTERP_TELEMETRY + * is set, logs op/child counts every ~60 ticks. No-op otherwise. */ +void FrameInterpolation_RunSelfTestIfRequested(void); +void FrameInterpolation_TelemetryTick(void); + +#ifdef __cplusplus +} // extern "C" + +/* C++-only: produce the matrix-replacement map for an intermediate frame. + * t in [0, 1] where 0 = previous game tick, 1 = current game tick. + * + * Self-test: at t = 1.0 the returned MtxF for any matrix should match the + * value that the game's *real* Mtx* would unpack to (modulo fixed-point + * round-trip), because the inputs being replayed are the current frame's + * inputs. The driver should opt out of replacement on the t=1 frame for + * efficiency and to keep this property bit-exact. */ +std::unordered_map FrameInterpolation_Interpolate(float t); + +#endif diff --git a/port/frame_interpolation_selftest.cpp b/port/frame_interpolation_selftest.cpp new file mode 100644 index 00000000..ab8b70c9 --- /dev/null +++ b/port/frame_interpolation_selftest.cpp @@ -0,0 +1,329 @@ +/** + * frame_interpolation_selftest.cpp — sanity tests for the recording/lerp layer. + * + * Two self-test modes: + * + * SSB64_FRAME_INTERP_UNITTEST=1 + * Run unit tests at boot (before the game starts). They exercise the + * recording API in isolation and assert that lerp produces the expected + * interpolated matrices. Logs PASS/FAIL to port_log; on FAIL, the process + * exits with status 2 so CI can detect breakage. + * + * SSB64_FRAME_INTERP_TELEMETRY=1 + * Emit per-second telemetry (op count, child count, last replacement-map + * size) to port_log so a human can sanity-check that recording is live + * when the game is running. Lightweight (one log line per ~60 frames). + * + * The tests are written to be hermetic: they don't touch the game's display + * list or the renderer. They invoke FrameInterpolation_* directly with stub + * matrix pointers and verify the post-Interpolate replacement map. + * + * Why this lives in its own TU: the unit-test code references syMatrix*F + * builders to compute expected reference matrices. Linking them in is fine + * for any TU that goes into the final binary, but we don't want to bloat + * frame_interpolation.cpp's symbol surface for non-test builds. + */ + +#include "frame_interpolation.h" +#include "port_log.h" + +#include /* MtxF */ + +#include +#include +#include +#include + +extern "C" { + typedef float Mtx44f_t[4][4]; + void syMatrixTraF(Mtx44f_t *mf, float x, float y, float z); +} + +namespace { + +bool g_failed = false; +int g_pass = 0; +int g_fail = 0; + +#define EXPECT(cond, ...) \ + do { \ + if (!(cond)) { \ + g_failed = true; \ + g_fail++; \ + char _buf[512]; \ + std::snprintf(_buf, sizeof(_buf), __VA_ARGS__); \ + port_log("SSB64: FRAME_INTERP_SELFTEST FAIL [%s:%d] %s: %s\n", \ + __FILE__, __LINE__, #cond, _buf); \ + } else { \ + g_pass++; \ + } \ + } while (0) + +bool feq(float a, float b, float eps = 1e-5f) { + return std::fabs(a - b) <= eps; +} + +/* ------------------------------------------------------------------------ */ +/* Test 1 — recording structure: op count and child count */ +/* ------------------------------------------------------------------------ */ + +void test_record_counts() { + /* dummy Mtx storage; the API only needs a stable pointer */ + static int dummy_mtx_a, dummy_mtx_b, dummy_mtx_c; + + FrameInterpolation_StartRecord(); + + /* 2 children, 3 ops total (1 in root, 2 inside children) */ + FrameInterpolation_RecordMatrixTra(&dummy_mtx_a, 1.0f, 2.0f, 3.0f); + + FrameInterpolation_RecordOpenChild((const void *)0x1, 0); + FrameInterpolation_RecordMatrixTra(&dummy_mtx_b, 4.0f, 5.0f, 6.0f); + FrameInterpolation_RecordCloseChild(); + + FrameInterpolation_RecordOpenChild((const void *)0x2, 0); + FrameInterpolation_RecordMatrixTra(&dummy_mtx_c, 7.0f, 8.0f, 9.0f); + FrameInterpolation_RecordCloseChild(); + + FrameInterpolation_StopRecord(); + + EXPECT(FrameInterpolation_GetLastOpCount() == 3, + "expected 3 ops, got %d", FrameInterpolation_GetLastOpCount()); + EXPECT(FrameInterpolation_GetLastChildCount() == 2, + "expected 2 children, got %d", FrameInterpolation_GetLastChildCount()); +} + +/* ------------------------------------------------------------------------ */ +/* Test 2 — identity lerp: same inputs both frames -> same output */ +/* ------------------------------------------------------------------------ */ + +void test_identity_lerp() { + static int dest_storage; + Mtx *dest = reinterpret_cast(&dest_storage); + + /* Frame N-1 */ + FrameInterpolation_StartRecord(); + FrameInterpolation_RecordMatrixTra(dest, 5.0f, 7.0f, 11.0f); + FrameInterpolation_StopRecord(); + + /* Frame N — identical inputs, same dest */ + FrameInterpolation_StartRecord(); + FrameInterpolation_RecordMatrixTra(dest, 5.0f, 7.0f, 11.0f); + FrameInterpolation_StopRecord(); + + auto repl = FrameInterpolation_Interpolate(0.5f); + EXPECT(repl.size() == 1, "expected 1 replacement, got %zu", repl.size()); + auto it = repl.find(dest); + EXPECT(it != repl.end(), "destination not found in replacement map"); + if (it != repl.end()) { + const float *mf = &it->second.mf[0][0]; + /* Reference: build the identity translate matrix directly. */ + Mtx44f_t ref; + syMatrixTraF(&ref, 5.0f, 7.0f, 11.0f); + for (int i = 0; i < 16; i++) { + EXPECT(feq(mf[i], reinterpret_cast(ref)[i]), + "identity lerp mismatch at element %d: got %f, expected %f", + i, mf[i], reinterpret_cast(ref)[i]); + } + } +} + +/* ------------------------------------------------------------------------ */ +/* Test 3 — t=0.5 lerp between two distinct translations */ +/* ------------------------------------------------------------------------ */ + +void test_midpoint_lerp() { + static int dest_storage; + Mtx *dest = reinterpret_cast(&dest_storage); + + FrameInterpolation_StartRecord(); + FrameInterpolation_RecordMatrixTra(dest, 0.0f, 0.0f, 0.0f); + FrameInterpolation_StopRecord(); + + FrameInterpolation_StartRecord(); + FrameInterpolation_RecordMatrixTra(dest, 10.0f, 20.0f, 40.0f); + FrameInterpolation_StopRecord(); + + auto repl = FrameInterpolation_Interpolate(0.5f); + auto it = repl.find(dest); + EXPECT(it != repl.end(), "destination not in replacement map"); + if (it != repl.end()) { + /* Translate row in syMatrixTraF lives at mf[3][0..2]. */ + EXPECT(feq(it->second.mf[3][0], 5.0f), "tx midpoint: got %f", it->second.mf[3][0]); + EXPECT(feq(it->second.mf[3][1], 10.0f), "ty midpoint: got %f", it->second.mf[3][1]); + EXPECT(feq(it->second.mf[3][2], 20.0f), "tz midpoint: got %f", it->second.mf[3][2]); + EXPECT(feq(it->second.mf[3][3], 1.0f), "homogeneous w must be 1: got %f", it->second.mf[3][3]); + /* And the upper-left should be identity (no rotation/scale). */ + EXPECT(feq(it->second.mf[0][0], 1.0f), "diagonal [0,0]"); + EXPECT(feq(it->second.mf[1][1], 1.0f), "diagonal [1,1]"); + EXPECT(feq(it->second.mf[2][2], 1.0f), "diagonal [2,2]"); + EXPECT(feq(it->second.mf[0][1], 0.0f), "off-diagonal [0,1]"); + } +} + +/* ------------------------------------------------------------------------ */ +/* Test 4 — t=1.0 fidelity: replacement matches new frame's matrix exactly */ +/* ------------------------------------------------------------------------ */ + +void test_endpoint_fidelity() { + static int dest_storage; + Mtx *dest = reinterpret_cast(&dest_storage); + + FrameInterpolation_StartRecord(); + FrameInterpolation_RecordMatrixTra(dest, 1.0f, 1.0f, 1.0f); + FrameInterpolation_StopRecord(); + + FrameInterpolation_StartRecord(); + FrameInterpolation_RecordMatrixTra(dest, 2.0f, 4.0f, 8.0f); + FrameInterpolation_StopRecord(); + + auto repl = FrameInterpolation_Interpolate(1.0f); + auto it = repl.find(dest); + EXPECT(it != repl.end(), "dest not in replacement map"); + if (it != repl.end()) { + Mtx44f_t ref; + syMatrixTraF(&ref, 2.0f, 4.0f, 8.0f); + const float *got = &it->second.mf[0][0]; + const float *exp = reinterpret_cast(ref); + for (int i = 0; i < 16; i++) { + EXPECT(feq(got[i], exp[i]), + "t=1.0 fidelity element %d: got %f, expected %f", i, got[i], exp[i]); + } + } +} + +/* ------------------------------------------------------------------------ */ +/* Test 5 — angle wrap: lerp from 350° to 10° should go forward through 0° */ +/* ------------------------------------------------------------------------ */ + +void test_angle_wrap() { + /* Use RotYawR which takes a single radian angle. */ + static int dest_storage; + Mtx *dest = reinterpret_cast(&dest_storage); + + const float TWO_PI = 6.28318530718f; + + /* 350° = 6.10865238... radians */ + FrameInterpolation_StartRecord(); + FrameInterpolation_RecordMatrixRotYawR(dest, 6.108652f); + FrameInterpolation_StopRecord(); + + /* 10° = 0.17453292... radians */ + FrameInterpolation_StartRecord(); + FrameInterpolation_RecordMatrixRotYawR(dest, 0.174533f); + FrameInterpolation_StopRecord(); + + auto repl = FrameInterpolation_Interpolate(0.5f); + auto it = repl.find(dest); + EXPECT(it != repl.end(), "dest not in replacement map"); + if (it != repl.end()) { + /* Naive lerp would land near pi (~3.14), short-arc lerp lands near 0 + * (the matrix should be ~identity for yaw). cos(0)=1 should appear in + * the [0][0] and [1][1] of a yaw matrix. */ + float c = it->second.mf[0][0]; + EXPECT(c > 0.95f, + "short-arc lerp failed: cos(yaw_lerped) = %f, expected near 1.0 " + "(naive lerp would give cos(pi) = -1)", c); + } + (void)TWO_PI; +} + +/* ------------------------------------------------------------------------ */ +/* Test 6 — mismatched op count: new actor appears in frame N */ +/* ------------------------------------------------------------------------ */ + +void test_actor_appears() { + static int dest_a_storage, dest_b_storage; + Mtx *dest_a = reinterpret_cast(&dest_a_storage); + Mtx *dest_b = reinterpret_cast(&dest_b_storage); + + /* Frame N-1: only actor A */ + FrameInterpolation_StartRecord(); + FrameInterpolation_RecordOpenChild((const void *)0xA, 0); + FrameInterpolation_RecordMatrixTra(dest_a, 1.0f, 2.0f, 3.0f); + FrameInterpolation_RecordCloseChild(); + FrameInterpolation_StopRecord(); + + /* Frame N: actor A still there, actor B newly spawned */ + FrameInterpolation_StartRecord(); + FrameInterpolation_RecordOpenChild((const void *)0xA, 0); + FrameInterpolation_RecordMatrixTra(dest_a, 5.0f, 6.0f, 7.0f); + FrameInterpolation_RecordCloseChild(); + FrameInterpolation_RecordOpenChild((const void *)0xB, 0); + FrameInterpolation_RecordMatrixTra(dest_b, 100.0f, 200.0f, 300.0f); + FrameInterpolation_RecordCloseChild(); + FrameInterpolation_StopRecord(); + + auto repl = FrameInterpolation_Interpolate(0.5f); + EXPECT(repl.size() == 2, "expected 2 replacements, got %zu", repl.size()); + + /* Actor A: should be midpoint of {1,2,3} and {5,6,7} = {3,4,5}. */ + auto it_a = repl.find(dest_a); + EXPECT(it_a != repl.end(), "actor A not in map"); + if (it_a != repl.end()) { + EXPECT(feq(it_a->second.mf[3][0], 3.0f), "A.tx: got %f", it_a->second.mf[3][0]); + EXPECT(feq(it_a->second.mf[3][1], 4.0f), "A.ty: got %f", it_a->second.mf[3][1]); + } + /* Actor B: no old counterpart -> replay solo at t=1, so {100,200,300}. */ + auto it_b = repl.find(dest_b); + EXPECT(it_b != repl.end(), "actor B not in map"); + if (it_b != repl.end()) { + EXPECT(feq(it_b->second.mf[3][0], 100.0f), "B.tx (solo): got %f", it_b->second.mf[3][0]); + EXPECT(feq(it_b->second.mf[3][1], 200.0f), "B.ty (solo): got %f", it_b->second.mf[3][1]); + } +} + +void run_all() { + port_log("SSB64: FRAME_INTERP_SELFTEST starting unit tests\n"); + g_failed = false; + g_pass = 0; + g_fail = 0; + + test_record_counts(); + test_identity_lerp(); + test_midpoint_lerp(); + test_endpoint_fidelity(); + test_angle_wrap(); + test_actor_appears(); + + port_log("SSB64: FRAME_INTERP_SELFTEST results: %d passed, %d failed\n", + g_pass, g_fail); +} + +} /* namespace */ + +extern "C" void FrameInterpolation_RunSelfTestIfRequested(void) +{ + const char *e = std::getenv("SSB64_FRAME_INTERP_UNITTEST"); + if (e == nullptr || e[0] == '\0' || e[0] == '0') { + return; + } + run_all(); + if (g_failed) { + port_log("SSB64: SELFTEST failed -- exiting with status 2\n"); + port_log_close(); + std::exit(2); + } + port_log("SSB64: SELFTEST passed\n"); +} + +/* Lightweight per-frame telemetry for the running game. Called from + * gameloop.cpp's PortPushFrame. Logs once per second. */ +extern "C" void FrameInterpolation_TelemetryTick(void) +{ + static int sEnabled = -1; + if (sEnabled < 0) { + const char *e = std::getenv("SSB64_FRAME_INTERP_TELEMETRY"); + sEnabled = (e != nullptr && e[0] != '\0' && e[0] != '0') ? 1 : 0; + if (sEnabled) { + port_log("SSB64: FRAME_INTERP_TELEMETRY enabled\n"); + } + } + if (!sEnabled) return; + + static int sCounter = 0; + if (++sCounter < 60) return; + sCounter = 0; + port_log("SSB64: frame_interp telemetry: ops=%d children=%d\n", + FrameInterpolation_GetLastOpCount(), + FrameInterpolation_GetLastChildCount()); +} diff --git a/port/frame_interpolation_test_main.cpp b/port/frame_interpolation_test_main.cpp new file mode 100644 index 00000000..c21033e0 --- /dev/null +++ b/port/frame_interpolation_test_main.cpp @@ -0,0 +1,99 @@ +/* Standalone offline runner for FrameInterpolation_RunSelfTestIfRequested. + * + * Built only when CMake var BUILD_FRAME_INTERP_TEST=ON. The resulting binary + * runs the unit tests without needing the game to launch — useful for CI or + * for hacking on the recording layer in isolation. + * + * Provides minimal stubs for syMatrix*F builders so we don't have to link + * the whole game library. Only the functions actually exercised by the + * tests are stubbed. Add more here if test_* functions grow to use them. + */ + +#include +#include +#include +#include + +extern "C" { + typedef float Mtx44f_t[4][4]; + + /* port_log used by frame_interpolation_selftest.cpp. */ + void port_log(const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + std::vprintf(fmt, ap); + va_end(ap); + } + void port_log_close(void) { std::fflush(stdout); } + + /* All syMatrix*F builders the recording layer might invoke. The tests + * only exercise a subset; unused stubs are still listed so the linker + * resolves any reference inside frame_interpolation.cpp. */ + static void identity(Mtx44f_t *mf) { + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + (*mf)[i][j] = (i == j) ? 1.0f : 0.0f; + } + + void syMatrixTraF(Mtx44f_t *mf, float x, float y, float z) { + identity(mf); + (*mf)[3][0] = x; (*mf)[3][1] = y; (*mf)[3][2] = z; + } + void syMatrixScaF(Mtx44f_t *mf, float x, float y, float z) { + identity(mf); + (*mf)[0][0] = x; (*mf)[1][1] = y; (*mf)[2][2] = z; + } + void syMatrixRotRF(Mtx44f_t *mf, float a, float, float, float) { + identity(mf); + float c = std::cos(a), s = std::sin(a); + (*mf)[0][0] = c; (*mf)[0][1] = s; + (*mf)[1][0] = -s; (*mf)[1][1] = c; + } + void syMatrixRotDF(Mtx44f_t *mf, float a, float x, float y, float z) { + syMatrixRotRF(mf, a * 0.0174533f, x, y, z); + } + void syMatrixRotRpyRF(Mtx44f_t *mf, float, float, float) { identity(mf); } + void syMatrixRotRpyDF(Mtx44f_t *mf, float, float, float) { identity(mf); } + void syMatrixRotPyrRF(Mtx44f_t *mf, float, float, float) { identity(mf); } + void syMatrixRotPyRF(Mtx44f_t *mf, float, float) { identity(mf); } + void syMatrixRotRpRF(Mtx44f_t *mf, float, float) { identity(mf); } + /* RotYawR builds a rotation around the Y axis. The angle-wrap test + * inspects mf[0][0] which should equal cos(yaw). */ + void syMatrixRotYawRF(Mtx44f_t *mf, float y) { + identity(mf); + float c = std::cos(y), s = std::sin(y); + (*mf)[0][0] = c; (*mf)[0][1] = s; + (*mf)[1][0] = -s; (*mf)[1][1] = c; + } + void syMatrixRotPitchRF(Mtx44f_t *mf, float) { identity(mf); } + + void syMatrixTraRotRF(Mtx44f_t *mf, float, float, float, float, float, float, float) { identity(mf); } + void syMatrixTraRotDF(Mtx44f_t *mf, float, float, float, float, float, float, float) { identity(mf); } + void syMatrixTraRotRScaF(Mtx44f_t *mf, float, float, float, float, float, float, float, float, float, float) { identity(mf); } + void syMatrixTraRotRpyRF(Mtx44f_t *mf, float, float, float, float, float, float) { identity(mf); } + void syMatrixTraRotRpyDF(Mtx44f_t *mf, float, float, float, float, float, float) { identity(mf); } + void syMatrixTraRotRpyRScaF(Mtx44f_t *mf, float, float, float, float, float, float, float, float, float) { identity(mf); } + void syMatrixTraRotPyrRF(Mtx44f_t *mf, float, float, float, float, float, float) { identity(mf); } + void syMatrixTraRotPyrRScaF(Mtx44f_t *mf, float, float, float, float, float, float, float, float, float) { identity(mf); } + void syMatrixTraRotPyRF(Mtx44f_t *mf, float, float, float, float, float) { identity(mf); } + void syMatrixTraRotRpRF(Mtx44f_t *mf, float, float, float, float, float) { identity(mf); } + void syMatrixTraRotYawRF(Mtx44f_t *mf, float, float, float, float) { identity(mf); } + void syMatrixTraRotPitchRF(Mtx44f_t *mf, float, float, float, float) { identity(mf); } + + /* Public test entry point declared in frame_interpolation.h. */ + void FrameInterpolation_RunSelfTestIfRequested(void); +} + +#include + +int main(int argc, char **argv) { + /* Force the env var so the runner enters the test path. */ +#ifdef _WIN32 + _putenv_s("SSB64_FRAME_INTERP_UNITTEST", "1"); +#else + setenv("SSB64_FRAME_INTERP_UNITTEST", "1", 1); +#endif + FrameInterpolation_RunSelfTestIfRequested(); + /* If we reach here, all tests passed (failures call exit(2)). */ + return 0; +} diff --git a/port/gameloop.cpp b/port/gameloop.cpp index 0b29ea9c..64f56366 100644 --- a/port/gameloop.cpp +++ b/port/gameloop.cpp @@ -23,8 +23,11 @@ #include "coroutine.h" #include "port.h" #include "port_watchdog.h" +#include "frame_interpolation.h" +#include "xr/xr_runtime.h" #include +#include #include #include @@ -150,6 +153,47 @@ extern "C" int port_get_display_submit_count(void) return sDLSubmitCount; } +/* Frame-interpolation multiplier — CVar (gSettings.FrameInterpolationMult), + * env var fallback (SSB64_INTERP_MULT), and a hard cap. + * + * SSB64 ticks at 60 Hz natively (one tick per VRETRACE, posted every + * PortPushFrame, paced by vsync on a 60 Hz monitor). So mult only helps on + * higher-refresh displays: + * N=1: disabled, identical to historical behavior. + * N=2: 60 Hz logic + 120 Hz display (useful on 120 Hz monitor). + * N=3: 60 Hz logic + 180 Hz display. + * N=4: 60 Hz logic + 240 Hz display. + * + * Resolved every frame so the slider in Settings → Graphics → Frame + * Interpolation Mult takes effect immediately, no relaunch needed. + * + * On a 60 Hz monitor mult>1 is essentially a no-op visually: the + * intermediate lerped frames tear (vsync toggled off below) and are + * immediately overwritten by the equivalent un-lerped frame at the next + * vsync. No benefit, just tearing — the menu tooltip says so. */ +#define SSB64_INTERP_MULT_MAX 8 +#define CVAR_FRAME_INTERP_MULT "gSettings.FrameInterpolationMult" + +static int port_get_interp_mult(void) +{ + int env_default = 1; + if (const char *e = std::getenv("SSB64_INTERP_MULT")) { + int v = std::atoi(e); + if (v >= 1 && v <= SSB64_INTERP_MULT_MAX) env_default = v; + } + int n = CVarGetInteger(CVAR_FRAME_INTERP_MULT, env_default); + if (n < 1) n = 1; + if (n > SSB64_INTERP_MULT_MAX) n = SSB64_INTERP_MULT_MAX; + + /* Log only on transitions so a stable session doesn't spam the log. */ + static int s_last_logged = -1; + if (n != s_last_logged) { + port_log("SSB64: frame interpolation mult = %d (1 = disabled)\n", n); + s_last_logged = n; + } + return n; +} + extern "C" void port_submit_display_list(void *dl) { sDLSubmitCount++; @@ -183,27 +227,93 @@ extern "C" void port_submit_display_list(void *dl) return; } + /* The game just finished building this frame's display list and all + * matrices it touched are now sitting in g_current. Stop recording so + * the replay loop sees a stable snapshot. */ + FrameInterpolation_StopRecord(); + + const int mult = port_get_interp_mult(); + /* Begin trace frame before Fast3D processes the display list */ gbi_trace_begin_frame(); - std::unordered_map mtxReplacements; - try { - window->DrawAndRunGraphicsCommands(static_cast(dl), mtxReplacements); - } catch (long hr) { - port_log("SSB64: CAUGHT DX shader exception HRESULT=0x%08lX on DL #%d\n", hr, sDLSubmitCount); - gbi_trace_end_frame(); - return; - } catch (...) { - port_log("SSB64: CAUGHT unknown exception on DL #%d\n", sDLSubmitCount); - gbi_trace_end_frame(); - return; + /* Slow-motion fix: when mult>1, the DXGI backend throttles BOTH on the + * vsync interval (each Present() blocks ~1/60s) AND on a CPU-side wait + * loop targeting `mTargetFps` (default 60, gfx_dxgi.cpp:43). Together + * they cap presents at 60 Hz, so doing mult presents per game tick + * stretches each tick to mult/60s and slows logic to 60/mult Hz. + * + * Workaround: + * 1. Disable vsync for intermediate presents by flipping + * gVsyncEnabled (DXGI re-reads it at SwapBuffersBegin). + * 2. Bump target FPS to 60*mult so the wait loop completes faster. + * Both restored before the final present so it runs with the user's + * chosen vsync setting and tear-free. + * + * Intermediate presents will tear (no vsync), but they're transient — + * the display will hit a clean vsync on the final present each tick. + * + * No-op when mult == 1. */ + const bool needsTimingToggle = (mult > 1); + int origVsync = 1; + int origTargetFps = 60; + if (needsTimingToggle) { + auto cv = context->GetConsoleVariables(); + origVsync = cv ? cv->GetInteger("gVsyncEnabled", 1) : 1; + origTargetFps = window->GetTargetFps(); + window->SetTargetFps(60 * mult); } + auto restore_timing = [&]() { + if (!needsTimingToggle) return; + auto cv = context->GetConsoleVariables(); + if (cv) cv->SetInteger("gVsyncEnabled", origVsync); + window->SetTargetFps(origTargetFps); + }; + + for (int i = 1; i <= mult; i++) { + std::unordered_map mtxReplacements; + bool isIntermediate = (mult > 1 && i < mult); + if (isIntermediate) { + /* Intermediate frames: build a replacement map for lerp factor t. + * t in (0, 1); the final pass (i==mult, t==1) uses an empty map + * so the GBI's actual matrices flow through unchanged — this + * keeps the last display frame bit-exact with the un-instrumented + * path and serves as the self-test invariant. */ + float t = static_cast(i) / static_cast(mult); + mtxReplacements = FrameInterpolation_Interpolate(t); + } + + /* Vsync toggle: OFF on intermediate, restored on final. */ + if (needsTimingToggle) { + auto cv = context->GetConsoleVariables(); + if (cv) { + cv->SetInteger("gVsyncEnabled", isIntermediate ? 0 : origVsync); + } + } + + try { + window->DrawAndRunGraphicsCommands(static_cast(dl), mtxReplacements); + } catch (long hr) { + port_log("SSB64: CAUGHT DX shader exception HRESULT=0x%08lX on DL #%d (interp pass %d/%d)\n", + hr, sDLSubmitCount, i, mult); + gbi_trace_end_frame(); + restore_timing(); + return; + } catch (...) { + port_log("SSB64: CAUGHT unknown exception on DL #%d (interp pass %d/%d)\n", + sDLSubmitCount, i, mult); + gbi_trace_end_frame(); + restore_timing(); + return; + } + } + restore_timing(); /* End trace frame after processing */ gbi_trace_end_frame(); if (sDLSubmitCount <= 60) { - port_log("SSB64: DrawAndRunGraphicsCommands returned OK\n"); + port_log("SSB64: DrawAndRunGraphicsCommands returned OK (mult=%d)\n", mult); } } @@ -215,6 +325,18 @@ void PortGameInit(void) { port_log("SSB64: PortGameInit — initializing coroutine system\n"); + /* Run frame-interpolation unit tests if SSB64_FRAME_INTERP_UNITTEST is set. + * Exits the process with status 2 on failure. No-op otherwise. */ + FrameInterpolation_RunSelfTestIfRequested(); + + /* OpenXR session init (opt-in via SSB64_XR_ENABLE=1). When the build is + * not configured with -DSSB64_ENABLE_OPENXR=ON, this logs a stub message + * and returns non-zero, leaving xr_runtime_is_active() == 0 so the + * per-frame hooks no-op. See port/xr/xr_runtime.h for completion notes. */ + if (xr_runtime_init() == 0) { + port_log("SSB64: XR runtime active: %s\n", xr_runtime_status()); + } + /* Convert the main thread to a fiber so it can participate in * coroutine switching. */ port_coroutine_init_main(); @@ -378,6 +500,19 @@ void PortPushFrame(void) * `(OSMesg)INTR_VRETRACE` here. */ osSendMesg(&gSYSchedulerTaskMesgQueue, port_make_os_mesg_int(INTR_VRETRACE), OS_MESG_NOBLOCK); + /* Begin a fresh recording for this frame's matrix builds. The current + * recording becomes the "previous" snapshot for the next frame's lerp. + * This MUST happen before threads run (they call syMatrix* which records + * into g_current). It MUST happen after the previous frame's render + * completed, otherwise we'd lose ops still being replayed. + * + * The corresponding StopRecord lives in port_submit_display_list, which + * fires synchronously inside port_resume_service_threads below. */ + FrameInterpolation_StartRecord(); + + /* OpenXR per-frame begin (no-op when XR inactive). */ + xr_runtime_begin_frame(); + /* Resume all service thread coroutines that are waiting for messages. * This runs multiple rounds to handle cascading messages: * Round 1: Scheduler picks up VRETRACE, sends ticks to clients @@ -391,6 +526,13 @@ void PortPushFrame(void) /* Tell the hang watchdog a frame completed. */ port_watchdog_note_frame_end(); + /* OpenXR per-frame end — composites the desktop framebuffer into the XR + * quad-layer swapchain and submits xrEndFrame. No-op when XR inactive. */ + xr_runtime_end_frame(); + + /* Frame-interpolation telemetry (env-var gated, ~60 ticks per log line). */ + FrameInterpolation_TelemetryTick(); + /* Screenshot capture: env-var driven, zero cost when disabled. */ port_screenshot_init_once(); port_screenshot_maybe_capture(sFrameCount); @@ -414,6 +556,7 @@ void PortPushFrame(void) void PortGameShutdown(void) { + xr_runtime_shutdown(); port_watchdog_shutdown(); if (sGameCoroutine != NULL) { port_coroutine_destroy(sGameCoroutine); diff --git a/port/gui/PortMenu.cpp b/port/gui/PortMenu.cpp index bc31476a..ccd4cb27 100644 --- a/port/gui/PortMenu.cpp +++ b/port/gui/PortMenu.cpp @@ -273,6 +273,25 @@ void PortMenu::AddMenuSettings() { .RaceDisable(false) .Options(ComboboxOptions().Tooltip("Sets the active texture filtering mode.").ComboMap(kTextureFilteringMap)); + /* Frame interpolation: render N display frames per game tick by lerping + * the previous frame's recorded matrices toward the current. The slider + * value is read every frame from CVar gSettings.FrameInterpolationMult + * by port_submit_display_list — see port/gameloop.cpp. */ + AddWidget(path, "Frame Interpolation Mult", WIDGET_CVAR_SLIDER_INT) + .CVar("gSettings.FrameInterpolationMult") + .RaceDisable(false) + .Options(IntSliderOptions() + .Tooltip("Render N display frames per game tick. 1 = disabled. " + "SSB64 already ticks at 60 Hz natively, so this only " + "helps on 120 Hz+ monitors (mult=2 -> 120 Hz display, " + "mult=4 -> 240 Hz). On a 60 Hz monitor the intermediate " + "lerped frames tear and are immediately overwritten — " + "no visible benefit, just tearing. Camera lerp is " + "input-domain so rebuilt frames are rigid (no doubling).") + .Min(1) + .Max(8) + .DefaultValue(1)); + path.sidebarName = "Gameplay"; path.column = SECTION_COLUMN_1; AddSidebarEntry("Settings", "Gameplay", 1); diff --git a/port/xr/xr_runtime.cpp b/port/xr/xr_runtime.cpp new file mode 100644 index 00000000..2cfa0b13 --- /dev/null +++ b/port/xr/xr_runtime.cpp @@ -0,0 +1,191 @@ +/** + * xr_runtime.cpp — see xr_runtime.h for design notes and TODO list. + * + * Phase 9 SCAFFOLD: this file gets the integration points compile-clean and + * wired into PortPushFrame. The OpenXR session lifecycle itself is stubbed; + * `xr_runtime_init` deliberately reports "not implemented" unless the build + * is configured with -DSSB64_ENABLE_OPENXR=ON, at which point a real + * implementation (TBD) replaces these stubs. + * + * The opt-in env var SSB64_XR_ENABLE=1 turns on the init path; without it, + * xr_runtime_is_active() returns 0 and all the per-frame hooks are no-ops, + * meaning this file adds essentially zero overhead to non-VR sessions. + */ + +#include "xr_runtime.h" +#include "../port_log.h" + +#include +#include + +#ifdef SSB64_ENABLE_OPENXR +/* Real OpenXR includes go here once the dependency is set up. Example: + * #define XR_USE_GRAPHICS_API_D3D11 + * #include + * #include + */ +#endif + +namespace { + +bool g_active = false; +const char *g_status = "uninitialized"; + +#ifdef SSB64_ENABLE_OPENXR +/* Real session state goes here once xr_runtime_init is implemented: + * + * XrInstance instance = XR_NULL_HANDLE; + * XrSystemId system_id = XR_NULL_SYSTEM_ID; + * XrSession session = XR_NULL_HANDLE; + * XrSpace local_space = XR_NULL_HANDLE; + * XrSwapchain quad_swapchain = XR_NULL_HANDLE; + * XrFrameState frame_state = {}; + * bool session_running = false; + */ +#endif + +bool env_enabled() { + const char *e = std::getenv("SSB64_XR_ENABLE"); + return e != nullptr && e[0] != '\0' && e[0] != '0'; +} + +} // namespace + +extern "C" { + +int xr_runtime_init(void) { + if (!env_enabled()) { + g_status = "disabled (SSB64_XR_ENABLE not set)"; + return 1; + } + +#ifdef SSB64_ENABLE_OPENXR + /* TODO: real session init. + * + * Sketch (Windows D3D11 path): + * XrApplicationInfo app_info = { "BattleShip", 1, "ssb64-pc-port", 0, XR_CURRENT_API_VERSION }; + * const char *exts[] = { XR_KHR_D3D11_ENABLE_EXTENSION_NAME }; + * XrInstanceCreateInfo ici = { XR_TYPE_INSTANCE_CREATE_INFO }; + * ici.applicationInfo = app_info; + * ici.enabledExtensionCount = 1; + * ici.enabledExtensionNames = exts; + * if (xrCreateInstance(&ici, &instance) != XR_SUCCESS) goto fail; + * + * XrSystemGetInfo sgi = { XR_TYPE_SYSTEM_GET_INFO }; + * sgi.formFactor = XR_FORM_FACTOR_HEAD_MOUNTED_DISPLAY; + * if (xrGetSystem(instance, &sgi, &system_id) != XR_SUCCESS) goto fail; + * + * PFN_xrGetD3D11GraphicsRequirementsKHR pfnGetReq = nullptr; + * xrGetInstanceProcAddr(instance, "xrGetD3D11GraphicsRequirementsKHR", + * (PFN_xrVoidFunction*)&pfnGetReq); + * XrGraphicsRequirementsD3D11KHR reqs = { XR_TYPE_GRAPHICS_REQUIREMENTS_D3D11_KHR }; + * pfnGetReq(instance, system_id, &reqs); + * + * ID3D11Device *device = ...; // pull from libultraship's DX11 backend + * XrGraphicsBindingD3D11KHR binding = { XR_TYPE_GRAPHICS_BINDING_D3D11_KHR }; + * binding.device = device; + * + * XrSessionCreateInfo sci = { XR_TYPE_SESSION_CREATE_INFO }; + * sci.next = &binding; + * sci.systemId = system_id; + * if (xrCreateSession(instance, &sci, &session) != XR_SUCCESS) goto fail; + * + * XrReferenceSpaceCreateInfo rsci = { XR_TYPE_REFERENCE_SPACE_CREATE_INFO }; + * rsci.referenceSpaceType = XR_REFERENCE_SPACE_TYPE_LOCAL; + * rsci.poseInReferenceSpace = { {0,0,0,1}, {0,0,0} }; // identity + * xrCreateReferenceSpace(session, &rsci, &local_space); + * + * // Create a swapchain for the quad layer (single image, e.g. 1920x1080) + * ... + * + * g_active = true; + * g_status = "session created"; + * return 0; + * + * fail: + * g_status = "init failed"; + * return 1; + */ + port_log("SSB64: XR_ENABLE set but openxr stubs not implemented yet\n"); + g_status = "stub: SSB64_ENABLE_OPENXR build flag set but session init not implemented"; + return 1; +#else + port_log("SSB64: XR_ENABLE set but binary not built with -DSSB64_ENABLE_OPENXR=ON\n"); + g_status = "build flag SSB64_ENABLE_OPENXR not defined"; + return 1; +#endif +} + +int xr_runtime_shutdown(void) { + if (!g_active) return 0; +#ifdef SSB64_ENABLE_OPENXR + /* TODO: + * if (session) xrDestroySession(session); + * if (instance) xrDestroyInstance(instance); + */ +#endif + g_active = false; + g_status = "shutdown"; + return 0; +} + +int xr_runtime_is_active(void) { + return g_active ? 1 : 0; +} + +void xr_runtime_begin_frame(void) { + if (!g_active) return; +#ifdef SSB64_ENABLE_OPENXR + /* TODO: + * xrWaitFrame(session, &waitFrameInfo, &frame_state); + * XrFrameBeginInfo bfi = { XR_TYPE_FRAME_BEGIN_INFO }; + * xrBeginFrame(session, &bfi); + */ +#endif +} + +void xr_runtime_end_frame(void) { + if (!g_active) return; +#ifdef SSB64_ENABLE_OPENXR + /* TODO: + * // Acquire/wait/release the quad-layer swapchain image + * uint32_t img_idx; + * xrAcquireSwapchainImage(quad_swapchain, ..., &img_idx); + * xrWaitSwapchainImage(quad_swapchain, ...); + * + * // Copy libultraship's main framebuffer into the swapchain image. + * // Source: Fast3dWindow::GetGfxFrameBuffer() returns a uintptr_t to + * // the platform texture handle; on D3D11 it's an ID3D11Texture2D*. + * ID3D11DeviceContext *ctx = ...; + * ID3D11Texture2D *src = (ID3D11Texture2D *)Fast3dWindow::GetGfxFrameBuffer(); + * ID3D11Texture2D *dst = (ID3D11Texture2D *)swapchain_images[img_idx].texture; + * ctx->CopyResource(dst, src); + * + * xrReleaseSwapchainImage(quad_swapchain, ...); + * + * // Build the quad layer (world-locked, ~2m in front, 3m wide) + * XrCompositionLayerQuad quad = { XR_TYPE_COMPOSITION_LAYER_QUAD }; + * quad.space = local_space; + * quad.eyeVisibility = XR_EYE_VISIBILITY_BOTH; + * quad.subImage.swapchain = quad_swapchain; + * quad.pose = { {0,0,0,1}, {0, 0, -2.0f} }; + * quad.size = { 3.2f, 1.8f }; + * + * const XrCompositionLayerBaseHeader *layers[] = { + * reinterpret_cast(&quad) + * }; + * XrFrameEndInfo efi = { XR_TYPE_FRAME_END_INFO }; + * efi.displayTime = frame_state.predictedDisplayTime; + * efi.environmentBlendMode = XR_ENVIRONMENT_BLEND_MODE_OPAQUE; + * efi.layerCount = frame_state.shouldRender ? 1 : 0; + * efi.layers = layers; + * xrEndFrame(session, &efi); + */ +#endif +} + +const char *xr_runtime_status(void) { + return g_status; +} + +} // extern "C" diff --git a/port/xr/xr_runtime.h b/port/xr/xr_runtime.h new file mode 100644 index 00000000..7a16124c --- /dev/null +++ b/port/xr/xr_runtime.h @@ -0,0 +1,76 @@ +#pragma once + +/** + * xr_runtime — OpenXR integration for the SSB64 PC port. + * + * Tier 1 ("cinema mode"): the existing game framebuffer is presented as a + * floating quad in front of the user, in stereo. Game rendering is unchanged; + * we just composite the same image to two eyes through OpenXR's quad layer + * support. No gameplay or comfort impact. + * + * Status: SCAFFOLD ONLY. The lifecycle hooks below compile and integrate with + * the existing port loop, but the actual OpenXR session bring-up is stubbed. + * To finish: + * + * 1. Add OpenXR loader as a dependency (Windows: openxr_loader.lib from + * Khronos OpenXR-SDK 1.0.x; Linux: libopenxr_loader.so). Either bundle + * it via vcpkg (`xrcore` port) or rely on system install. + * + * 2. Replace the `xr_runtime_init_stub` body with real session init: + * - xrCreateInstance with XR_KHR_D3D11_enable (or _opengl_enable) + * - xrGetSystem(XR_FORM_FACTOR_HEAD_MOUNTED_DISPLAY) + * - xrGetD3D11GraphicsRequirementsKHR + * - xrCreateSession bound to the libultraship D3D11 device + * - xrCreateReferenceSpace(XR_REFERENCE_SPACE_TYPE_LOCAL) + * - xrCreateSwapchain x2 (one per eye) — but for cinema mode we need + * only the quad-layer swapchain (single image) + * + * 3. xr_runtime_present(): wrap each game frame between xrWaitFrame / + * xrBeginFrame / xrEndFrame. Submit one quad-layer per eye showing the + * libultraship final framebuffer (Fast3dWindow::GetGfxFrameBuffer()) + * copied into the XR swapchain image. + * + * 4. Mirror logic: when XR is active, optionally suppress the desktop + * window's swap (or run it at lower priority) to avoid double-presenting + * and rendering twice. + * + * 5. Quad placement: world-locked floating screen ~2 m in front of the user, + * ~16:9 aspect, ~3 m wide (configurable via CVars). + * + * Tier 2/3 (true stereo / first-person VR) reuse the recording layer + * (frame_interpolation.{h,cpp}) — per-eye projection matrices go through + * the same `mtx_replacements` map that frame interpolation already uses. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Lifecycle. All return 0 on success, non-zero on failure. Failures are + * non-fatal: on any failure xr_runtime_is_active() returns 0 and the rest + * of the port loop runs as if XR were never enabled. */ + +int xr_runtime_init(void); +int xr_runtime_shutdown(void); + +/* True iff a session is live and we should be submitting frames to it. + * Cheap — used as a gate from the port frame loop. */ +int xr_runtime_is_active(void); + +/* Per-frame hooks. Both no-op when xr_runtime_is_active() is 0. + * + * xr_runtime_begin_frame: called from PortPushFrame just before the game + * coroutine runs. Opportunity for xrWaitFrame / xrBeginFrame. + * + * xr_runtime_end_frame: called after the desktop swap has happened. + * Composites the framebuffer to the XR swapchain images, submits the + * quad layer, and calls xrEndFrame. */ +void xr_runtime_begin_frame(void); +void xr_runtime_end_frame(void); + +/* Diagnostics. Returns a static string; never null. */ +const char *xr_runtime_status(void); + +#ifdef __cplusplus +} +#endif diff --git a/src/gm/gmcamera.c b/src/gm/gmcamera.c index b82a387e..1245f6de 100644 --- a/src/gm/gmcamera.c +++ b/src/gm/gmcamera.c @@ -6,6 +6,9 @@ #include #include // #include +#ifdef PORT +#include +#endif // // // // // // // // // // // // // // @@ -990,6 +993,12 @@ sb32 gmCameraLookAtFuncMatrix(Mtx *mtx, CObj *cobj, Gfx **dls) u16 *perspnorm; f32 max; s32 unused; +#ifdef PORT + /* Capture inputs at the very top of the function so the recording + * reflects the inputs for *this* frame's camera, regardless of which + * branch the high-precision rebuild takes below. */ + f32 fi_scale = cobj->projection.persp.scale; +#endif syMatrixAdvanceW(temp_mtx, gSYTaskmanGraphicsHeap); @@ -1012,9 +1021,29 @@ sb32 gmCameraLookAtFuncMatrix(Mtx *mtx, CObj *cobj, Gfx **dls) syMatrixLookAtReflectF(&sp5C, &gGMCameraStruct.look_at, cobj->vec.eye.x, cobj->vec.eye.y, cobj->vec.eye.z, cobj->vec.at.x, cobj->vec.at.y, cobj->vec.at.z, cobj->vec.up.x, cobj->vec.up.y, cobj->vec.up.z); guMtxCatF(sp5C, gGCMatrixPerspF, gGMCameraMatrix); +#ifdef PORT + fi_scale = 32000.0F / max; +#endif } syMatrixF2L(&gGMCameraMatrix, mtx); +#ifdef PORT + /* Input-domain camera record. Replaces the old element-wise F2L hook — + * lerping a view*proj matrix element-wise produces a non-rigid transform + * (visible "doubling" / warping during camera pans). The replay path + * rebuilds lookat_F + persp_F + catF from lerped scalar inputs, which + * is rigid by construction. */ + FrameInterpolation_RecordCamera(mtx, + cobj->vec.eye.x, cobj->vec.eye.y, cobj->vec.eye.z, + cobj->vec.at.x, cobj->vec.at.y, cobj->vec.at.z, + cobj->vec.up.x, cobj->vec.up.y, cobj->vec.up.z, + cobj->projection.persp.fovy, + cobj->projection.persp.aspect, + cobj->projection.persp.near, + cobj->projection.persp.far, + fi_scale); +#endif + return 0; } diff --git a/src/sys/matrix.c b/src/sys/matrix.c index 6748e0b7..1f0cc0a7 100644 --- a/src/sys/matrix.c +++ b/src/sys/matrix.c @@ -1,4 +1,5 @@ #include +#include extern u16 gSYSinTable[0x800]; @@ -736,6 +737,7 @@ void syMatrixScaF(Mtx44f *mf, f32 x, f32 y, f32 z) void syMatrixSca(Mtx *m, f32 x, f32 y, f32 z) { s32 e1, e2; + FrameInterpolation_RecordMatrixSca(m, x, y, z); m->m[0][1] = 0; m->m[2][1] = 0; @@ -818,6 +820,7 @@ void syMatrixTraF(Mtx44f *mf, f32 x, f32 y, f32 z) void syMatrixTra(Mtx *m, f32 x, f32 y, f32 z) { u32 e1, e2; + FrameInterpolation_RecordMatrixTra(m, x, y, z); m->m[0][0] = COMBINE_INTEGRAL(FTOFIX32(1.0F), FTOFIX32(0.0F)); m->m[2][0] = COMBINE_FRACTIONAL(FTOFIX32(1.0F), FTOFIX32(0.0F)); @@ -889,6 +892,7 @@ void syMatrixRotRF(Mtx44f *mf, f32 a, f32 x, f32 y, f32 z) void syMatrixRotR(Mtx *m, f32 a, f32 x, f32 y, f32 z) { Mtx44f mf; + FrameInterpolation_RecordMatrixRotR(m, a, x, y, z); syMatrixRotRF(&mf, a, x, y, z); @@ -906,6 +910,7 @@ void syMatrixTraRotRF(Mtx44f *mf, f32 tx, f32 ty, f32 tz, f32 angle, f32 rx, f32 void syMatrixTraRotR(Mtx *m, f32 tx, f32 ty, f32 tz, f32 angle, f32 rx, f32 ry, f32 rz) { Mtx44f mf; + FrameInterpolation_RecordMatrixTraRotR(m, tx, ty, tz, angle, rx, ry, rz); syMatrixTraRotRF(&mf, tx, ty, tz, angle, rx, ry, rz); syMatrixF2LFixedW(&mf, m); @@ -923,6 +928,7 @@ void syMatrixTraRotRScaF(Mtx44f *mf, f32 tx, f32 ty, f32 tz, f32 angle, f32 rx, void syMatrixTraRotRSca(Mtx *m, f32 tx, f32 ty, f32 tz, f32 angle, f32 rx, f32 ry, f32 rz, f32 sx, f32 sy, f32 sz) { Mtx44f mf; + FrameInterpolation_RecordMatrixTraRotRSca(m, tx, ty, tz, angle, rx, ry, rz, sx, sy, sz); syMatrixTraRotRScaF(&mf, tx, ty, tz, angle, rx, ry, rz, sx, sy, sz); syMatrixF2LFixedW(&mf, m); @@ -969,6 +975,7 @@ void syMatrixRotRpyR(Mtx *m, f32 r, f32 p, f32 y) s32 cosr, cosp, cosy; u16 indexr, indexp, indexy; u32 e1, e2; + FrameInterpolation_RecordMatrixRotRpyR(m, r, p, y); syGetSinCosUShort(sinr, cosr, r, indexr); syGetSinCosUShort(sinp, cosp, p, indexp); @@ -1026,6 +1033,7 @@ void syMatrixTraRotRpyR(Mtx *m, f32 tx, f32 ty, f32 tz, f32 r, f32 p, f32 y) s32 cosr, cosp, cosy; u16 indexr, indexp, indexy; u32 e1, e2; + FrameInterpolation_RecordMatrixTraRotRpyR(m, tx, ty, tz, r, p, y); syGetSinCosUShort(sinr, cosr, r, indexr); syGetSinCosUShort(sinp, cosp, p, indexp); @@ -1090,8 +1098,9 @@ void syMatrixTraRotRpyRSca(Mtx *m, f32 tx, f32 ty, f32 tz, f32 r, f32 p, f32 y, s32 scalex, scaley, scalez; u16 indexr, indexp, indexy; u32 e1, e2; + FrameInterpolation_RecordMatrixTraRotRpyRSca(m, tx, ty, tz, r, p, y, sx, sy, sz); - syGetSinCosUShort(sinr, cosr, r, indexr); + syGetSinCosUShort(sinr, cosr, r, indexr); syGetSinCosUShort(sinp, cosp, p, indexp); syGetSinCosUShort(siny, cosy, y, indexy); @@ -1179,6 +1188,7 @@ void syMatrixRotPyrRF(Mtx44f *mf, f32 r, f32 p, f32 y) void syMatrixRotPyrR(Mtx *m, f32 r, f32 p, f32 y) { Mtx44f mf; + FrameInterpolation_RecordMatrixRotPyrR(m, r, p, y); syMatrixRotPyrRF(&mf, r, p, y); syMatrixF2LFixedW(&mf, m); @@ -1195,6 +1205,7 @@ void syMatrixTraRotPyrRF(Mtx44f *mf, f32 tx, f32 ty, f32 tz, f32 r, f32 p, f32 y void syMatrixTraRotPyrR(Mtx *m, f32 tx, f32 ty, f32 tz, f32 r, f32 p, f32 y) { Mtx44f mf; + FrameInterpolation_RecordMatrixTraRotPyrR(m, tx, ty, tz, r, p, y); syMatrixTraRotPyrRF(&mf, tx, ty, tz, r, p, y); syMatrixF2LFixedW(&mf, m); @@ -1212,6 +1223,7 @@ void syMatrixTraRotPyrRScaF(Mtx44f *mf, f32 tx, f32 ty, f32 tz, f32 r, f32 p, f3 void syMatrixTraRotPyrRSca(Mtx *m, f32 tx, f32 ty, f32 tz, f32 r, f32 p, f32 y, f32 sx, f32 sy, f32 sz) { Mtx44f mf; + FrameInterpolation_RecordMatrixTraRotPyrRSca(m, tx, ty, tz, r, p, y, sx, sy, sz); syMatrixTraRotPyrRScaF(&mf, tx, ty, tz, r, p, y, sx, sy, sz); syMatrixF2LFixedW(&mf, m); @@ -1248,6 +1260,7 @@ void syMatrixRotPyRF(Mtx44f *mf, f32 p, f32 y) void syMatrixRotPyR(Mtx *m, f32 p, f32 y) { Mtx44f mf; + FrameInterpolation_RecordMatrixRotPyR(m, p, y); syMatrixRotPyRF(&mf, p, y); syMatrixF2LFixedW(&mf, m); @@ -1264,6 +1277,7 @@ void syMatrixTraRotPyRF(Mtx44f *mf, f32 tx, f32 ty, f32 tz, f32 p, f32 y) void syMatrixTraRotPyR(Mtx *m, f32 tx, f32 ty, f32 tz, f32 p, f32 y) { Mtx44f mf; + FrameInterpolation_RecordMatrixTraRotPyR(m, tx, ty, tz, p, y); syMatrixTraRotPyRF(&mf, tx, ty, tz, p, y); syMatrixF2LFixedW(&mf, m); @@ -1300,6 +1314,7 @@ void syMatrixRotRpRF(Mtx44f *mf, f32 r, f32 p) void syMatrixRotRpR(Mtx *m, f32 r, f32 p) { Mtx44f mf; + FrameInterpolation_RecordMatrixRotRpR(m, r, p); syMatrixRotRpRF(&mf, r, p); syMatrixF2LFixedW(&mf, m); @@ -1316,6 +1331,7 @@ void syMatrixTraRotRpRF(Mtx44f *mf, f32 tx, f32 ty, f32 tz, f32 r, f32 p) void syMatrixTraRotRpR(Mtx *m, f32 tx, f32 ty, f32 tz, f32 r, f32 p) { Mtx44f mf; + FrameInterpolation_RecordMatrixTraRotRpR(m, tx, ty, tz, r, p); syMatrixTraRotRpRF(&mf, tx, ty, tz, r, p); syMatrixF2LFixedW(&mf, m); @@ -1356,6 +1372,7 @@ void syMatrixRotYawRF(Mtx44f *mf, f32 y) void syMatrixRotYawR(Mtx *m, f32 y) { Mtx44f mf; + FrameInterpolation_RecordMatrixRotYawR(m, y); syMatrixRotYawRF(&mf, y); syMatrixF2LFixedW(&mf, m); @@ -1372,6 +1389,7 @@ void syMatrixTraRotYawRF(Mtx44f *mf, f32 tx, f32 ty, f32 tz, f32 y) void syMatrixTraRotYawR(Mtx *m, f32 tx, f32 ty, f32 tz, f32 y) { Mtx44f mf; + FrameInterpolation_RecordMatrixTraRotYawR(m, tx, ty, tz, y); syMatrixTraRotYawRF(&mf, tx, ty, tz, y); syMatrixF2LFixedW(&mf, m); @@ -1412,6 +1430,7 @@ void syMatrixRotPitchRF(Mtx44f *mf, f32 p) void syMatrixRotPitchR(Mtx *m, f32 p) { Mtx44f mf; + FrameInterpolation_RecordMatrixRotPitchR(m, p); syMatrixRotPitchRF(&mf, p); syMatrixF2LFixedW(&mf, m); @@ -1428,6 +1447,7 @@ void syMatrixTraRotPitchRF(Mtx44f *mf, f32 tx, f32 ty, f32 tz, f32 p) void syMatrixTraRotPitchR(Mtx *m, f32 tx, f32 ty, f32 tz, f32 p) { Mtx44f mf; + FrameInterpolation_RecordMatrixTraRotPitchR(m, tx, ty, tz, p); syMatrixTraRotPitchRF(&mf, tx, ty, tz, p); syMatrixF2LFixedW(&mf, m); @@ -1441,6 +1461,7 @@ void syMatrixRotDF(Mtx44f *mf, f32 a, f32 x, f32 y, f32 z) void syMatrixRotD(Mtx *m, f32 a, f32 x, f32 y, f32 z) { Mtx44f mf; + FrameInterpolation_RecordMatrixRotD(m, a, x, y, z); syMatrixRotRF(&mf, F_CLC_DTOR32(a), x, y, z); syMatrixF2LFixedW(&mf, m); @@ -1454,6 +1475,7 @@ void syMatrixTraRotDF(Mtx44f *mf, f32 tx, f32 ty, f32 tz, f32 a, f32 rx, f32 ry, void syMatrixTraRotD(Mtx *m, f32 tx, f32 ty, f32 tz, f32 a, f32 rx, f32 ry, f32 rz) { Mtx44f mf; + FrameInterpolation_RecordMatrixTraRotD(m, tx, ty, tz, a, rx, ry, rz); syMatrixTraRotRF(&mf, tx, ty, tz, F_CLC_DTOR32(a), rx, ry, rz); syMatrixF2LFixedW(&mf, m); @@ -1467,6 +1489,7 @@ void syMatrixRotRpyDF(Mtx44f *mf, f32 r, f32 p, f32 y) void syMatrixRotRpyD(Mtx *m, f32 r, f32 p, f32 y) { Mtx44f mf; + FrameInterpolation_RecordMatrixRotRpyD(m, r, p, y); syMatrixRotRpyRF(&mf, F_CLC_DTOR32(r), F_CLC_DTOR32(p), F_CLC_DTOR32(y)); syMatrixF2LFixedW(&mf, m); @@ -1480,6 +1503,7 @@ void syMatrixTraRotRpyDF(Mtx44f *mf, f32 tx, f32 ty, f32 tz, f32 r, f32 p, f32 y void syMatrixTraRotRpyD(Mtx *m, f32 tx, f32 ty, f32 tz, f32 r, f32 p, f32 y) { Mtx44f mf; + FrameInterpolation_RecordMatrixTraRotRpyD(m, tx, ty, tz, r, p, y); syMatrixTraRotRpyRF(&mf, tx, ty, tz, F_CLC_DTOR32(r), F_CLC_DTOR32(p), F_CLC_DTOR32(y)); syMatrixF2LFixedW(&mf, m); diff --git a/src/sys/objdisplay.c b/src/sys/objdisplay.c index e94ccef8..570450a2 100644 --- a/src/sys/objdisplay.c +++ b/src/sys/objdisplay.c @@ -8,6 +8,7 @@ #include "libc/math.h" #ifdef PORT #include "port_log.h" +#include #include #include #ifdef _MSC_VER @@ -610,6 +611,12 @@ s32 gcPrepDObjMatrix(Gfx **dl, DObj *dobj) s32 kind; sp2CC = 0; +#ifdef PORT + /* Frame interpolation: scope every matrix this DObj writes by its stable + * pointer. Lets the diff between frames N-1 and N match logically- + * equivalent matrices even if the underlying Mtx* slot was recycled. */ + FrameInterpolation_RecordOpenChild(dobj, 0); +#endif if (dobj->vec != NULL) { @@ -1465,6 +1472,9 @@ s32 gcPrepDObjMatrix(Gfx **dl, DObj *dobj) } *dl = current_dl; +#ifdef PORT + FrameInterpolation_RecordCloseChild(); +#endif return sp2CC; } #else