From fc402cfab1eff3b6859316e8d64bde9d42d75154 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Tr=C4=99bacz?= Date: Sun, 28 Dec 2025 12:59:41 +0100 Subject: [PATCH 1/2] Optimized Docker build script for faster rebuilds --- CLAUDE.md | 360 ++++++++++++++++++++++++++++++++++++++++++ tools/docker-build.sh | 19 +++ 2 files changed, 379 insertions(+) create mode 100644 CLAUDE.md create mode 100755 tools/docker-build.sh diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..6b1a440 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,360 @@ +## FF7 Decomp: Working Notes for Agents + +Final Fantasy VII (PS1 USA) decompilation project. Goal: byte-accurate recompilation of game executables. + +## Quick Reference + +### Docker Build Commands (Optimized) +```shell +# Build Docker image (only needed once) +docker build --platform=linux/amd64 --tag ff7-build:latest . + +# Run a build (FAST - uses Go caching, ~12s clean, ~2s incremental) +./tools/docker-build.sh "make build" + +# Format code after changes +./tools/docker-build.sh "make format" + +# Enter container interactively +./tools/docker-build.sh bash +``` + +The `docker-build.sh` wrapper adds Go module and build caching which provides a significant speedup. + +**Legacy commands** - only use if you have issues with the optimized script: +```shell +docker run --rm --platform=linux/amd64 -v "$(pwd)":/ff7 -v ff7_venv:/ff7/.venv -v "$(pwd)/build":/ff7/build ff7-build:latest -lc 'cd /ff7 && make build' +``` + +### Key Make Targets +- `make build` - Build all overlays, verify SHA1 checksums +- `make format` - Run clang-format on code +- `make clean` - Remove build artifacts +- `make submit` - Clean + build + format + stage files for commit + +### Mako Commands (via `./mako.sh`) +- `./mako.sh build` - Build project +- `./mako.sh dec ` - Decompile function, replace INCLUDE_ASM +- `./mako.sh dec --fix-structs` - Also replace D_8009XXXX with Savemap fields +- `./mako.sh rank ` - Rank functions by difficulty (easiest first) +- `./mako.sh symbols add [size]` - Add new symbol +- `./mako.sh format` - Format code + +## Project Structure + +``` +src/ # Decompiled C source (organized by overlay) +asm/us/ # MIPS assembly (original/undecompiled) + └── / + ├── nonmatchings/ # Undecompiled function .s files + └── data/ # Data segment .s files +include/ # Headers (common.h, game.h, psxsdk/) +config/ # Build config and symbol files + ├── us.yaml # Main overlay definitions + ├── symbols.*.txt # Symbol address files + └── sym_*.txt # Import/export symbol files +build/us/ # Build output (.o, .exe, .map files) +tools/ # Build tools (Go builder, Python scripts) +disks/us/ # Extracted game files (from disc image) +``` + +## Overlays + +| Overlay | VRAM Start | Source | Description | +|---------|------------|--------|-------------| +| main | 0x80010000 | `src/main/` | Core engine, initialization | +| battle | 0x800A0000 | `src/battle/` | Battle system | +| batini | 0x801B0000 | `src/battle/batini.c` | Battle initialization | +| field | 0x800A0000 | `src/field/` | Field exploration | +| world | 0x800A0000 | `src/world/` | World map | +| menu | varies | `src/menu/` | Menu systems (savemenu, title, etc.) | + +## Decompilation Workflow + +### 1. Find a function to decompile +```shell +# List undecompiled functions, ranked by difficulty (easiest first) +./mako.sh rank src/battle/nonmatchings/battle + +# Or browse asm files directly +ls asm/us/battle/nonmatchings/battle/ +``` + +### 2. Decompile with m2c +```shell +# This replaces INCLUDE_ASM in the .c file with decompiled code and performs struct field replacement +./mako.sh dec func_800A1158 --fix-structs +``` + +### 3. Refine the code +- Fix unknown types (marked with `/*?*/`) +- Match register allocation and instruction order +- Add/reference symbols in `config/symbols.*.txt` +- The goal is to produce a decompiled code that is 1:1 matching the original PSX binaries + +### 4. Build and verify +```shell +make build # Rebuilds and verifies SHA1 match +``` + +### 5. Format and submit +```shell +make format +git add config/ include/ src/ +git commit -m "Decompile func_800A1158" +``` + +## Code Patterns + +### INCLUDE_ASM Macro +Embeds undecompiled assembly: +```c +INCLUDE_ASM("asm/us/battle/nonmatchings/battle", func_800A1158); +``` +Replace with decompiled C code when function matches. + +### Common Types +```c +typedef signed char s8; typedef unsigned char u8; +typedef signed short s16; typedef unsigned short u16; +typedef signed int s32; typedef unsigned int u32; +typedef u8 unk_data; typedef unsigned int* unk_ptr; +``` + +### String Encoding +FF7 uses custom character encoding, not ASCII. Use `_S()` macro: +```c +const char* msg = _S("Save game?"); +``` + +## Naming Conventions + +### Functions +- Undecompiled: `func_800XXXXX` (address-based) +- Decompiled: verb-first descriptive names, prefer PSX SDK names when applicable +- Examples: `InitBattle`, `LoadScene`, `DrawSprite` + +### Data/Globals +- Unknown: `D_800XXXXX` (address-based) +- Known game state: `g_` prefix (e.g., `g_BattleState`) +- Module constants: module prefix (e.g., `BATTLE_MAX_ENEMIES`) +- Struct fields: `unkXX` until purpose known + +## Symbol Files + +Located in `config/`: +- `symbols.main.us.txt` - Main overlay symbols +- `symbols.battle.txt` - Battle overlay symbols +- `sym_export.us.txt` - Cross-overlay exports +- `sym_extern.us.txt` - External references + +Format: +``` +function_name = 0x800A1158; +D_800F5BB8 = 0x800F5BB8; // size:0xCC +``` + +Add symbols with: +```shell +./mako.sh symbols add config/symbols.battle.txt MyFunction 0x800A1234 +``` + +## Key Data Structures + +### Savemap (0x8009C6E4) +Game save data. Use `--fix-structs` to auto-replace `D_8009XXXX` references: +- `Savemap.party[9]` - Party member data +- `Savemap.inventory` - Item inventory +- `Savemap.materia` - Materia slots +- `Savemap.gil` - Money +- See `tools/fix_structs.py` for full field list + +### Battle Structures (`src/battle/battle.h`) +- `BattleSetup` - Battle configuration +- `Unk800F83E0` - Battle state (0x68 bytes) +- `BattleSetupType` - Encounter types (preemptive, back attack, etc.) + +## Tips for Matching + +1. **Compiler quirks**: Two PSX compilers available (`cc1-psx-26`, `cc1-psx-272`). Check which one the overlay uses in `config/us.yaml`. + +2. **Register allocation**: Order of operations matters. Sometimes restructuring expressions helps match. + +3. **Global pointer (GP)**: Main overlay uses `gp=0x80062D44`. Variables near this use gp-relative addressing. + +4. **Rodata association**: Battle overlay has `migrate_rodata_to_functions: true` - rodata is bundled with functions. + +5. **Use mipsel-linux-gnu-objdump** for line-by-line comparison + +## Comparing Original vs Compiled Assembly + +When a decompiled function doesn't match, compare the original assembly with the compiled output: + +### 1. Check Function Size (Quick Check) +```shell +# Get compiled function size from symbol table +./tools/docker-build.sh "mipsel-linux-gnu-objdump -t build/us/src/main/18B8.c.o" | grep func_80026F44 + +# Output: 00015e8c g F .text 00000128 func_80026F44 +# ^^^^^^^^ ^^^^^^^^ <- size in hex (0x128 = 296 bytes) +``` + +Compare with original size: count bytes from function start to end in the `.s` file, or calculate from addresses (end_addr - start_addr + 4). + +### 2. View Compiled Disassembly +```shell +# Disassemble a specific function from the compiled object +./tools/docker-build.sh "mipsel-linux-gnu-objdump -d build/us/src/main/18B8.c.o" | grep -A100 ':' +``` + +### 3. View Original Assembly +```shell +# Original assembly is in asm/us//nonmatchings// +cat asm/us/main/nonmatchings/18B8/func_80026F44.s +``` + +### 4. Common Size Mismatch Causes +- **Code merged after branches**: Compiler optimizes common code after if/else. Fix: put function calls inside each branch. +- **Delay slot optimization**: Original uses delay slots cleverly. The compiled code may not reproduce this. +- **Sign extension duplication**: Original may have `sll/sra` pairs duplicated in branches; compiler merges them. +- **Register allocation**: Different register choices can cause different instruction sequences. +- **Type differences**: `s8` vs `u8` generates `lb` vs `lbu` instructions. + +### 5. Useful Patterns +```shell +# Count instructions in original (each line with glabel excluded, /* */ comments have instructions) +grep -c '/\*' asm/us/main/nonmatchings/18B8/func_80026F44.s + +# Find jr ra (function returns) to identify function boundaries +./tools/docker-build.sh "mipsel-linux-gnu-objdump -d build/us/src/main/18B8.c.o" | grep 'jr.*ra' +``` + +## Decompilation Tips Checklist + +When decompiling a function, follow this checklist: + +- [ ] No prototypes or parameters with '?' as type +- [ ] No 'void*' parameters that should be typed structs +- [ ] No pointer arithmetic with manual offset calculations +- [ ] Use array indices to access arrays, do not use arithmetic calculations +- [ ] All struct field accesses use '->' or '.' operators +- [ ] Struct sizes match the assembly access patterns +- [ ] 'goto loop_*' are converted as 'while' loops +- [ ] 'goto block_*' in 'switch' are inlined, to reverse code optimization + +Alignment is critical. Code and data are aligned by 4-byte. + +## Finding symbol names and applying them to the decompiled code + +Our goal in this project currently is to find out as many symbol names (functions, variables, enums, structs, local variables, function parameters) as possible and apply them to the decompiled code. At the same time we want to ensure the code compiles 1:1 matching the original PSX binaries so we cannot alter the parameter types and change any logic. + +1. First we need to analyse both the decompiled PSX code in the @src/ folder and the PC decompiled code in the @assets/ folder. You usually start with a function in the PSX code that is unique enough and already has some symbols decompiled or its logic is recognizable enough that it can be matched to the PC code. +2. Spawn a sub-agent to look through the PC code in the @assets/ folder (do not read these files directly because they're huge) and find any corresponding code that can help us find new symbol names +3. Map out the connections between the PSX code (skip function names defined with INCLUDE_ASM macro, only functions fully defined in C can be renamed) and the PC code and store your findings in @CLAUDE_DECOMP.md for future reference. Make sure you're absolutely certain when creating this mapping - if there is any doubt about a specific symbol you should skip it. +4. Apply the changes. To do this spawn an Apply sub-agent that will take a map of symbol names to update (eg. "func_800A1158 = MyFunction", "D_800F5BB8 = MyVariable") and then the sub-agent will figure out how to efficiently and comprehensively apply these changes to the PSX code in @src/ (remember to update references to the changed symbol names in other source and header files; when changing a function name you should update all function calls to it in the code). The sub-agent should think hard to make sure it does not break anything as some symbol names are used in multiple places. The sub-agent shall not alter parameter types in function signatures. The sub-agent shall not read or update the assembly files in asm/ (these are generated by the disassembler and should not be manually edited). After finishing applying the changes it should return immediately without trying to run the build. +5. In parallel spawn a second sub-agent that will update the symbol maps in @config/ in symbols.main.us.txt and any other *.txt files that contain symbol names (skip sym_export.us.txt and sym_export_battle.us.txt since these are autogenerated by the build system) +6. After the work is done run the build to verify if the changes are correct. The build should succeed without any errors and all the SHA1 sums should match. Fix any issues that arise and repeat the process until the changes are correct. +7. Take one more look at CLAUDE_DECOMP.md and make sure its updated with all the new knowledge you've gained. +8. Finally think about what the next steps would be and suggest options to the user + +## Files to Commit +When submitting decompiled code: +```shell +git add config/ # Symbol files +git add include/ # Headers (if modified) +git add src/ # Decompiled source +``` + +## Advanced Matching Techniques + +### Understanding Why Decompiled Code Doesn't Match + +When the size is wrong, the compiler is generating different code structure. Common causes: + +#### 1. Merged Function Calls +**Problem**: Modern compilers merge common code after if/else branches. +```c +// BAD - compiler merges the call after the branches +if (condition) { + setup_a(); +} else { + setup_b(); +} +common_call(); // Compiler optimizes: one call site + +// GOOD - forces separate code paths (matches original) +if (condition) { + setup_a(); + common_call(); // Call inside branch +} else { + setup_b(); + common_call(); // Duplicate call +} +``` + +#### 2. Variable Type Affects Load Instructions +- `s8` generates `lb` (load byte signed) +- `u8` generates `lbu` (load byte unsigned) +- Check if global variables need type changes to match original instructions + +#### 3. Struct vs Separate Variables +**Problem**: Compiler optimizes away stores to local variables if it thinks they're unused. +```c +// BAD - compiler may skip storing to sp1C, sp1E +s16 sp18, sp1A, sp1C, sp1E; +sp1C = 0xFF; // May be optimized away! +sp1E = 0xFF; +sp18 = 0; +sp1A = 0; +SetDrawMode(..., (RECT*)&sp18); + +// GOOD - using proper struct ensures all fields are stored +RECT rect; +rect.w = 0xFF; // Compiler knows RECT fields are accessed +rect.h = 0xFF; +rect.x = 0; +rect.y = 0; +SetDrawMode(..., &rect); +``` + +#### 4. Delay Slot Optimization +Original PSX compiler often put useful instructions in branch delay slots. Modern compilers may: +- Put the instruction before the branch instead +- Use NOP in delay slots + +Example: Original saves `y << 16` before loop, uses delay slot for `s2 = 0`: +```asm +beqz v0, check_mode +addu s2, zero, zero # delay slot: s2 = 0 +sll s1, a1, 16 # after branch: save y +``` + +#### 5. Redundant Instructions in Original +The original compiler sometimes generated redundant code: +- `andi t0, v1, 0xFF` after `lbu v1` (already 8-bit) +- Duplicate `sra a1, s1, 16` in both branches + +These are hard to reproduce with modern compilers without tricks. + +### Matching Workflow + +1. **Get size right first** - Restructure code until instruction count matches (note: if the output size is wrong you might see compiler/linker errors in unrelated files - this is a good indicator that the size is wrong) +2. **Fix types second** - Change s8/u8, s16/u16 to match load/store instructions +3. **Reorder operations** - Match the order of stores/loads in original +4. **Check branch structure** - Use goto labels to match original control flow +5. Look at other decompiled functions in the same file to see how they handle similar patterns to the ones you're trying to match + +### Debugging Commands +```shell +# Compare function sizes +./tools/docker-build.sh "mipsel-linux-gnu-objdump -t build/us/src/main/18B8.c.o" | grep func_NAME + +# View compiled assembly +./tools/docker-build.sh "mipsel-linux-gnu-objdump -d build/us/src/main/18B8.c.o" | grep -A100 ':' + +# Original is in asm/us//nonmatchings//func_NAME.s +``` + +## Reference Links +- PSX SDK docs: PSY-Q library documentation +- FF7 Scarlet: https://github.com/petfriendamy/ff7-scarlet (game data structures) diff --git a/tools/docker-build.sh b/tools/docker-build.sh new file mode 100755 index 0000000..900f8df --- /dev/null +++ b/tools/docker-build.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Optimized Docker build wrapper for FF7 decomp +# This script adds Go caching for 3x faster builds + +set -e + +# Create Go cache directories if they don't exist +mkdir -p ~/.cache/ff7-go-mod ~/.cache/ff7-go-build + +# Run Docker with all necessary volume mounts including Go caches +exec docker run --rm --platform=linux/amd64 \ + -v "$(pwd)":/ff7 \ + -v ff7_venv:/ff7/.venv \ + -v "$(pwd)/build":/ff7/build \ + -v "$HOME/.cache/ff7-go-mod":/gocache/mod \ + -v "$HOME/.cache/ff7-go-build":/gocache/build \ + -e "GOMODCACHE=/gocache/mod" \ + -e "GOCACHE=/gocache/build" \ + ff7-build:latest -lc "cd /ff7 && $*" From 02cef32e943c35027a08694e15299153173a3801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Tr=C4=99bacz?= Date: Mon, 29 Dec 2025 21:37:54 +0100 Subject: [PATCH 2/2] Small changes to Dockerfile and build script --- CLAUDE.md | 360 ------------------------------------------ Dockerfile | 24 +-- tools/docker-build.sh | 15 +- 3 files changed, 20 insertions(+), 379 deletions(-) delete mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 6b1a440..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,360 +0,0 @@ -## FF7 Decomp: Working Notes for Agents - -Final Fantasy VII (PS1 USA) decompilation project. Goal: byte-accurate recompilation of game executables. - -## Quick Reference - -### Docker Build Commands (Optimized) -```shell -# Build Docker image (only needed once) -docker build --platform=linux/amd64 --tag ff7-build:latest . - -# Run a build (FAST - uses Go caching, ~12s clean, ~2s incremental) -./tools/docker-build.sh "make build" - -# Format code after changes -./tools/docker-build.sh "make format" - -# Enter container interactively -./tools/docker-build.sh bash -``` - -The `docker-build.sh` wrapper adds Go module and build caching which provides a significant speedup. - -**Legacy commands** - only use if you have issues with the optimized script: -```shell -docker run --rm --platform=linux/amd64 -v "$(pwd)":/ff7 -v ff7_venv:/ff7/.venv -v "$(pwd)/build":/ff7/build ff7-build:latest -lc 'cd /ff7 && make build' -``` - -### Key Make Targets -- `make build` - Build all overlays, verify SHA1 checksums -- `make format` - Run clang-format on code -- `make clean` - Remove build artifacts -- `make submit` - Clean + build + format + stage files for commit - -### Mako Commands (via `./mako.sh`) -- `./mako.sh build` - Build project -- `./mako.sh dec ` - Decompile function, replace INCLUDE_ASM -- `./mako.sh dec --fix-structs` - Also replace D_8009XXXX with Savemap fields -- `./mako.sh rank ` - Rank functions by difficulty (easiest first) -- `./mako.sh symbols add [size]` - Add new symbol -- `./mako.sh format` - Format code - -## Project Structure - -``` -src/ # Decompiled C source (organized by overlay) -asm/us/ # MIPS assembly (original/undecompiled) - └── / - ├── nonmatchings/ # Undecompiled function .s files - └── data/ # Data segment .s files -include/ # Headers (common.h, game.h, psxsdk/) -config/ # Build config and symbol files - ├── us.yaml # Main overlay definitions - ├── symbols.*.txt # Symbol address files - └── sym_*.txt # Import/export symbol files -build/us/ # Build output (.o, .exe, .map files) -tools/ # Build tools (Go builder, Python scripts) -disks/us/ # Extracted game files (from disc image) -``` - -## Overlays - -| Overlay | VRAM Start | Source | Description | -|---------|------------|--------|-------------| -| main | 0x80010000 | `src/main/` | Core engine, initialization | -| battle | 0x800A0000 | `src/battle/` | Battle system | -| batini | 0x801B0000 | `src/battle/batini.c` | Battle initialization | -| field | 0x800A0000 | `src/field/` | Field exploration | -| world | 0x800A0000 | `src/world/` | World map | -| menu | varies | `src/menu/` | Menu systems (savemenu, title, etc.) | - -## Decompilation Workflow - -### 1. Find a function to decompile -```shell -# List undecompiled functions, ranked by difficulty (easiest first) -./mako.sh rank src/battle/nonmatchings/battle - -# Or browse asm files directly -ls asm/us/battle/nonmatchings/battle/ -``` - -### 2. Decompile with m2c -```shell -# This replaces INCLUDE_ASM in the .c file with decompiled code and performs struct field replacement -./mako.sh dec func_800A1158 --fix-structs -``` - -### 3. Refine the code -- Fix unknown types (marked with `/*?*/`) -- Match register allocation and instruction order -- Add/reference symbols in `config/symbols.*.txt` -- The goal is to produce a decompiled code that is 1:1 matching the original PSX binaries - -### 4. Build and verify -```shell -make build # Rebuilds and verifies SHA1 match -``` - -### 5. Format and submit -```shell -make format -git add config/ include/ src/ -git commit -m "Decompile func_800A1158" -``` - -## Code Patterns - -### INCLUDE_ASM Macro -Embeds undecompiled assembly: -```c -INCLUDE_ASM("asm/us/battle/nonmatchings/battle", func_800A1158); -``` -Replace with decompiled C code when function matches. - -### Common Types -```c -typedef signed char s8; typedef unsigned char u8; -typedef signed short s16; typedef unsigned short u16; -typedef signed int s32; typedef unsigned int u32; -typedef u8 unk_data; typedef unsigned int* unk_ptr; -``` - -### String Encoding -FF7 uses custom character encoding, not ASCII. Use `_S()` macro: -```c -const char* msg = _S("Save game?"); -``` - -## Naming Conventions - -### Functions -- Undecompiled: `func_800XXXXX` (address-based) -- Decompiled: verb-first descriptive names, prefer PSX SDK names when applicable -- Examples: `InitBattle`, `LoadScene`, `DrawSprite` - -### Data/Globals -- Unknown: `D_800XXXXX` (address-based) -- Known game state: `g_` prefix (e.g., `g_BattleState`) -- Module constants: module prefix (e.g., `BATTLE_MAX_ENEMIES`) -- Struct fields: `unkXX` until purpose known - -## Symbol Files - -Located in `config/`: -- `symbols.main.us.txt` - Main overlay symbols -- `symbols.battle.txt` - Battle overlay symbols -- `sym_export.us.txt` - Cross-overlay exports -- `sym_extern.us.txt` - External references - -Format: -``` -function_name = 0x800A1158; -D_800F5BB8 = 0x800F5BB8; // size:0xCC -``` - -Add symbols with: -```shell -./mako.sh symbols add config/symbols.battle.txt MyFunction 0x800A1234 -``` - -## Key Data Structures - -### Savemap (0x8009C6E4) -Game save data. Use `--fix-structs` to auto-replace `D_8009XXXX` references: -- `Savemap.party[9]` - Party member data -- `Savemap.inventory` - Item inventory -- `Savemap.materia` - Materia slots -- `Savemap.gil` - Money -- See `tools/fix_structs.py` for full field list - -### Battle Structures (`src/battle/battle.h`) -- `BattleSetup` - Battle configuration -- `Unk800F83E0` - Battle state (0x68 bytes) -- `BattleSetupType` - Encounter types (preemptive, back attack, etc.) - -## Tips for Matching - -1. **Compiler quirks**: Two PSX compilers available (`cc1-psx-26`, `cc1-psx-272`). Check which one the overlay uses in `config/us.yaml`. - -2. **Register allocation**: Order of operations matters. Sometimes restructuring expressions helps match. - -3. **Global pointer (GP)**: Main overlay uses `gp=0x80062D44`. Variables near this use gp-relative addressing. - -4. **Rodata association**: Battle overlay has `migrate_rodata_to_functions: true` - rodata is bundled with functions. - -5. **Use mipsel-linux-gnu-objdump** for line-by-line comparison - -## Comparing Original vs Compiled Assembly - -When a decompiled function doesn't match, compare the original assembly with the compiled output: - -### 1. Check Function Size (Quick Check) -```shell -# Get compiled function size from symbol table -./tools/docker-build.sh "mipsel-linux-gnu-objdump -t build/us/src/main/18B8.c.o" | grep func_80026F44 - -# Output: 00015e8c g F .text 00000128 func_80026F44 -# ^^^^^^^^ ^^^^^^^^ <- size in hex (0x128 = 296 bytes) -``` - -Compare with original size: count bytes from function start to end in the `.s` file, or calculate from addresses (end_addr - start_addr + 4). - -### 2. View Compiled Disassembly -```shell -# Disassemble a specific function from the compiled object -./tools/docker-build.sh "mipsel-linux-gnu-objdump -d build/us/src/main/18B8.c.o" | grep -A100 ':' -``` - -### 3. View Original Assembly -```shell -# Original assembly is in asm/us//nonmatchings// -cat asm/us/main/nonmatchings/18B8/func_80026F44.s -``` - -### 4. Common Size Mismatch Causes -- **Code merged after branches**: Compiler optimizes common code after if/else. Fix: put function calls inside each branch. -- **Delay slot optimization**: Original uses delay slots cleverly. The compiled code may not reproduce this. -- **Sign extension duplication**: Original may have `sll/sra` pairs duplicated in branches; compiler merges them. -- **Register allocation**: Different register choices can cause different instruction sequences. -- **Type differences**: `s8` vs `u8` generates `lb` vs `lbu` instructions. - -### 5. Useful Patterns -```shell -# Count instructions in original (each line with glabel excluded, /* */ comments have instructions) -grep -c '/\*' asm/us/main/nonmatchings/18B8/func_80026F44.s - -# Find jr ra (function returns) to identify function boundaries -./tools/docker-build.sh "mipsel-linux-gnu-objdump -d build/us/src/main/18B8.c.o" | grep 'jr.*ra' -``` - -## Decompilation Tips Checklist - -When decompiling a function, follow this checklist: - -- [ ] No prototypes or parameters with '?' as type -- [ ] No 'void*' parameters that should be typed structs -- [ ] No pointer arithmetic with manual offset calculations -- [ ] Use array indices to access arrays, do not use arithmetic calculations -- [ ] All struct field accesses use '->' or '.' operators -- [ ] Struct sizes match the assembly access patterns -- [ ] 'goto loop_*' are converted as 'while' loops -- [ ] 'goto block_*' in 'switch' are inlined, to reverse code optimization - -Alignment is critical. Code and data are aligned by 4-byte. - -## Finding symbol names and applying them to the decompiled code - -Our goal in this project currently is to find out as many symbol names (functions, variables, enums, structs, local variables, function parameters) as possible and apply them to the decompiled code. At the same time we want to ensure the code compiles 1:1 matching the original PSX binaries so we cannot alter the parameter types and change any logic. - -1. First we need to analyse both the decompiled PSX code in the @src/ folder and the PC decompiled code in the @assets/ folder. You usually start with a function in the PSX code that is unique enough and already has some symbols decompiled or its logic is recognizable enough that it can be matched to the PC code. -2. Spawn a sub-agent to look through the PC code in the @assets/ folder (do not read these files directly because they're huge) and find any corresponding code that can help us find new symbol names -3. Map out the connections between the PSX code (skip function names defined with INCLUDE_ASM macro, only functions fully defined in C can be renamed) and the PC code and store your findings in @CLAUDE_DECOMP.md for future reference. Make sure you're absolutely certain when creating this mapping - if there is any doubt about a specific symbol you should skip it. -4. Apply the changes. To do this spawn an Apply sub-agent that will take a map of symbol names to update (eg. "func_800A1158 = MyFunction", "D_800F5BB8 = MyVariable") and then the sub-agent will figure out how to efficiently and comprehensively apply these changes to the PSX code in @src/ (remember to update references to the changed symbol names in other source and header files; when changing a function name you should update all function calls to it in the code). The sub-agent should think hard to make sure it does not break anything as some symbol names are used in multiple places. The sub-agent shall not alter parameter types in function signatures. The sub-agent shall not read or update the assembly files in asm/ (these are generated by the disassembler and should not be manually edited). After finishing applying the changes it should return immediately without trying to run the build. -5. In parallel spawn a second sub-agent that will update the symbol maps in @config/ in symbols.main.us.txt and any other *.txt files that contain symbol names (skip sym_export.us.txt and sym_export_battle.us.txt since these are autogenerated by the build system) -6. After the work is done run the build to verify if the changes are correct. The build should succeed without any errors and all the SHA1 sums should match. Fix any issues that arise and repeat the process until the changes are correct. -7. Take one more look at CLAUDE_DECOMP.md and make sure its updated with all the new knowledge you've gained. -8. Finally think about what the next steps would be and suggest options to the user - -## Files to Commit -When submitting decompiled code: -```shell -git add config/ # Symbol files -git add include/ # Headers (if modified) -git add src/ # Decompiled source -``` - -## Advanced Matching Techniques - -### Understanding Why Decompiled Code Doesn't Match - -When the size is wrong, the compiler is generating different code structure. Common causes: - -#### 1. Merged Function Calls -**Problem**: Modern compilers merge common code after if/else branches. -```c -// BAD - compiler merges the call after the branches -if (condition) { - setup_a(); -} else { - setup_b(); -} -common_call(); // Compiler optimizes: one call site - -// GOOD - forces separate code paths (matches original) -if (condition) { - setup_a(); - common_call(); // Call inside branch -} else { - setup_b(); - common_call(); // Duplicate call -} -``` - -#### 2. Variable Type Affects Load Instructions -- `s8` generates `lb` (load byte signed) -- `u8` generates `lbu` (load byte unsigned) -- Check if global variables need type changes to match original instructions - -#### 3. Struct vs Separate Variables -**Problem**: Compiler optimizes away stores to local variables if it thinks they're unused. -```c -// BAD - compiler may skip storing to sp1C, sp1E -s16 sp18, sp1A, sp1C, sp1E; -sp1C = 0xFF; // May be optimized away! -sp1E = 0xFF; -sp18 = 0; -sp1A = 0; -SetDrawMode(..., (RECT*)&sp18); - -// GOOD - using proper struct ensures all fields are stored -RECT rect; -rect.w = 0xFF; // Compiler knows RECT fields are accessed -rect.h = 0xFF; -rect.x = 0; -rect.y = 0; -SetDrawMode(..., &rect); -``` - -#### 4. Delay Slot Optimization -Original PSX compiler often put useful instructions in branch delay slots. Modern compilers may: -- Put the instruction before the branch instead -- Use NOP in delay slots - -Example: Original saves `y << 16` before loop, uses delay slot for `s2 = 0`: -```asm -beqz v0, check_mode -addu s2, zero, zero # delay slot: s2 = 0 -sll s1, a1, 16 # after branch: save y -``` - -#### 5. Redundant Instructions in Original -The original compiler sometimes generated redundant code: -- `andi t0, v1, 0xFF` after `lbu v1` (already 8-bit) -- Duplicate `sra a1, s1, 16` in both branches - -These are hard to reproduce with modern compilers without tricks. - -### Matching Workflow - -1. **Get size right first** - Restructure code until instruction count matches (note: if the output size is wrong you might see compiler/linker errors in unrelated files - this is a good indicator that the size is wrong) -2. **Fix types second** - Change s8/u8, s16/u16 to match load/store instructions -3. **Reorder operations** - Match the order of stores/loads in original -4. **Check branch structure** - Use goto labels to match original control flow -5. Look at other decompiled functions in the same file to see how they handle similar patterns to the ones you're trying to match - -### Debugging Commands -```shell -# Compare function sizes -./tools/docker-build.sh "mipsel-linux-gnu-objdump -t build/us/src/main/18B8.c.o" | grep func_NAME - -# View compiled assembly -./tools/docker-build.sh "mipsel-linux-gnu-objdump -d build/us/src/main/18B8.c.o" | grep -A100 ':' - -# Original is in asm/us//nonmatchings//func_NAME.s -``` - -## Reference Links -- PSX SDK docs: PSY-Q library documentation -- FF7 Scarlet: https://github.com/petfriendamy/ff7-scarlet (game data structures) diff --git a/Dockerfile b/Dockerfile index 353dfb3..228d4fe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,18 @@ # HOW TO USE THIS DOCKERFILE # -# 1. build image and tag it as ff7-build:latest -# docker build --tag ff7-build:latest . +# 1. Build image (one time): +# docker build --platform=linux/amd64 --tag ff7-build:latest . # -# 2. launch container and mount current directory under /ff7 -# docker run --name ff7-work -it -v $(pwd):/ff7 -v ff7_venv:/ff7/.venv -v ff7_build:/ff7/build ff7-build +# 2. Run builds using the wrapper script (recommended): +# ./tools/docker-build.sh "make build" +# ./tools/docker-build.sh "make format" +# ./tools/docker-build.sh bash # interactive shell # -# 3. you are now ready to build and work on FF7 -# make expected -# -# 4. from now on, to re-use the same container execute the following: -# docker start -ai ff7-work +# 3. Or run interactively and reuse the container: +# docker run --name ff7-work -it --platform=linux/amd64 \ +# -v $(pwd):/ff7 -v ff7_venv:/ff7/.venv -v ff7_build:/ff7/build \ +# -v go_cache:/gocache ff7-build +# # Then reattach with: docker start -ai ff7-work FROM ubuntu:noble @@ -27,6 +29,10 @@ RUN echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ questing main unive COPY --from=golang:1.25-bookworm /usr/local/go/ /usr/local/go/ ENV PATH="${PATH}:/usr/local/go/bin" +ENV GOMODCACHE=/gocache/mod +ENV GOCACHE=/gocache/build + +RUN mkdir -p /gocache/mod /gocache/build && chmod -R 777 /gocache USER ubuntu WORKDIR /ff7 diff --git a/tools/docker-build.sh b/tools/docker-build.sh index 900f8df..119e21b 100755 --- a/tools/docker-build.sh +++ b/tools/docker-build.sh @@ -1,19 +1,14 @@ #!/bin/bash # Optimized Docker build wrapper for FF7 decomp -# This script adds Go caching for 3x faster builds +# Uses Docker volumes for Go caching (faster on macOS than host mounts) set -e -# Create Go cache directories if they don't exist -mkdir -p ~/.cache/ff7-go-mod ~/.cache/ff7-go-build - -# Run Docker with all necessary volume mounts including Go caches +# Run Docker with volume mounts for source, venv, build output, and Go cache exec docker run --rm --platform=linux/amd64 \ + --name ff7-work \ -v "$(pwd)":/ff7 \ -v ff7_venv:/ff7/.venv \ - -v "$(pwd)/build":/ff7/build \ - -v "$HOME/.cache/ff7-go-mod":/gocache/mod \ - -v "$HOME/.cache/ff7-go-build":/gocache/build \ - -e "GOMODCACHE=/gocache/mod" \ - -e "GOCACHE=/gocache/build" \ + -v ff7_build:/ff7/build \ + -v go_cache:/gocache \ ff7-build:latest -lc "cd /ff7 && $*"