diff --git a/callsite_test.go b/callsite_test.go index 6a78b27..0f6df61 100644 --- a/callsite_test.go +++ b/callsite_test.go @@ -1,7 +1,6 @@ package resurgo_test import ( - "bytes" "debug/elf" "os" "os/exec" @@ -29,77 +28,70 @@ func TestDetectCallSitesAMD64_Call(t *testing.T) { wantConf resurgo.Confidence wantSource uint64 wantTarget uint64 - }{ - { - name: "pc-relative-call", - // call $+0x10 (rel32 = 0x0000000B, instruction length = 5) - // Target = 0 + 5 + 0x0B = 0x10 - code: []byte{0xE8, 0x0B, 0x00, 0x00, 0x00}, - baseAddr: 0, - wantCount: 1, - wantType: resurgo.CallSiteCall, - wantMode: resurgo.AddressingModePCRelative, - wantConf: resurgo.ConfidenceHigh, - wantSource: 0, - wantTarget: 0x10, - }, - { - name: "pc-relative-call-negative-offset", - // call $-0x20 (rel32 = 0xFFFFFFE0, two's complement -32) - // At address 0x100, target = 0x100 + 5 + (-32) = 0xE5 - code: []byte{0xE8, 0xE0, 0xFF, 0xFF, 0xFF}, - baseAddr: 0x100, - wantCount: 1, - wantType: resurgo.CallSiteCall, - wantMode: resurgo.AddressingModePCRelative, - wantConf: resurgo.ConfidenceHigh, - wantSource: 0x100, - wantTarget: 0xE5, - }, - { - name: "register-indirect-call", - // call rax = FF D0 - code: []byte{0xFF, 0xD0}, - baseAddr: 0x200, - wantCount: 1, - wantType: resurgo.CallSiteCall, - wantMode: resurgo.AddressingModeRegisterIndirect, - wantConf: resurgo.ConfidenceNone, - wantSource: 0x200, - wantTarget: 0, - }, - { - name: "rip-relative-call", - // call [rip+0x1234] = FF 15 34 12 00 00 (6 bytes) - // At address 0x1000, target = 0x1000 + 6 + 0x1234 = 0x223A - code: []byte{0xFF, 0x15, 0x34, 0x12, 0x00, 0x00}, - baseAddr: 0x1000, - wantCount: 1, - wantType: resurgo.CallSiteCall, - wantMode: resurgo.AddressingModePCRelative, - wantConf: resurgo.ConfidenceMedium, - wantSource: 0x1000, - wantTarget: 0x223A, - }, - { - name: "memory-call-with-base-register", - // call [rbx+0x10] = FF 53 10 - code: []byte{0xFF, 0x53, 0x10}, - baseAddr: 0x300, - wantCount: 1, - wantType: resurgo.CallSiteCall, - wantMode: resurgo.AddressingModeRegisterIndirect, - wantConf: resurgo.ConfidenceNone, - wantSource: 0x300, - wantTarget: 0, - }, - { - name: "no-call-instructions", - code: []byte{0x90, 0x90, 0x90}, // nop, nop, nop - baseAddr: 0, - wantCount: 0, - }, - } + }{{ + name: "pc-relative-call", + // call $+0x10 (rel32 = 0x0000000B, instruction length = 5) + // Target = 0 + 5 + 0x0B = 0x10 + code: []byte{0xE8, 0x0B, 0x00, 0x00, 0x00}, + baseAddr: 0, + wantCount: 1, + wantType: resurgo.CallSiteCall, + wantMode: resurgo.AddressingModePCRelative, + wantConf: resurgo.ConfidenceHigh, + wantSource: 0, + wantTarget: 0x10, + }, { + name: "pc-relative-call-negative-offset", + // call $-0x20 (rel32 = 0xFFFFFFE0, two's complement -32) + // At address 0x100, target = 0x100 + 5 + (-32) = 0xE5 + code: []byte{0xE8, 0xE0, 0xFF, 0xFF, 0xFF}, + baseAddr: 0x100, + wantCount: 1, + wantType: resurgo.CallSiteCall, + wantMode: resurgo.AddressingModePCRelative, + wantConf: resurgo.ConfidenceHigh, + wantSource: 0x100, + wantTarget: 0xE5, + }, { + name: "register-indirect-call", + // call rax = FF D0 + code: []byte{0xFF, 0xD0}, + baseAddr: 0x200, + wantCount: 1, + wantType: resurgo.CallSiteCall, + wantMode: resurgo.AddressingModeRegisterIndirect, + wantConf: resurgo.ConfidenceNone, + wantSource: 0x200, + wantTarget: 0, + }, { + name: "rip-relative-call", + // call [rip+0x1234] = FF 15 34 12 00 00 (6 bytes) + // At address 0x1000, target = 0x1000 + 6 + 0x1234 = 0x223A + code: []byte{0xFF, 0x15, 0x34, 0x12, 0x00, 0x00}, + baseAddr: 0x1000, + wantCount: 1, + wantType: resurgo.CallSiteCall, + wantMode: resurgo.AddressingModePCRelative, + wantConf: resurgo.ConfidenceMedium, + wantSource: 0x1000, + wantTarget: 0x223A, + }, { + name: "memory-call-with-base-register", + // call [rbx+0x10] = FF 53 10 + code: []byte{0xFF, 0x53, 0x10}, + baseAddr: 0x300, + wantCount: 1, + wantType: resurgo.CallSiteCall, + wantMode: resurgo.AddressingModeRegisterIndirect, + wantConf: resurgo.ConfidenceNone, + wantSource: 0x300, + wantTarget: 0, + }, { + name: "no-call-instructions", + code: []byte{0x90, 0x90, 0x90}, // nop, nop, nop + baseAddr: 0, + wantCount: 0, + }} for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -150,43 +142,39 @@ func TestDetectCallSitesAMD64_Jump(t *testing.T) { wantMode resurgo.AddressingMode wantConf resurgo.Confidence wantTarget uint64 - }{ - { - name: "unconditional-jmp-rel32", - // jmp $+0x20 (rel32 = 0x0000001B, instruction length = 5) - // Target = 0 + 5 + 0x1B = 0x20 - code: []byte{0xE9, 0x1B, 0x00, 0x00, 0x00}, - baseAddr: 0, - wantCount: 1, - wantType: resurgo.CallSiteJump, - wantMode: resurgo.AddressingModePCRelative, - wantConf: resurgo.ConfidenceMedium, // Unconditional = medium - wantTarget: 0x20, - }, - { - name: "unconditional-jmp-rel8", - // jmp $+0x10 (rel8 = 0x0E, instruction length = 2) - // Target = 0 + 2 + 0x0E = 0x10 - code: []byte{0xEB, 0x0E}, - baseAddr: 0, - wantCount: 1, - wantType: resurgo.CallSiteJump, - wantMode: resurgo.AddressingModePCRelative, - wantConf: resurgo.ConfidenceMedium, - wantTarget: 0x10, - }, - { - name: "register-indirect-jmp", - // jmp rax = FF E0 - code: []byte{0xFF, 0xE0}, - baseAddr: 0x400, - wantCount: 1, - wantType: resurgo.CallSiteJump, - wantMode: resurgo.AddressingModeRegisterIndirect, - wantConf: resurgo.ConfidenceNone, - wantTarget: 0, - }, - } + }{{ + name: "unconditional-jmp-rel32", + // jmp $+0x20 (rel32 = 0x0000001B, instruction length = 5) + // Target = 0 + 5 + 0x1B = 0x20 + code: []byte{0xE9, 0x1B, 0x00, 0x00, 0x00}, + baseAddr: 0, + wantCount: 1, + wantType: resurgo.CallSiteJump, + wantMode: resurgo.AddressingModePCRelative, + wantConf: resurgo.ConfidenceMedium, // Unconditional = medium + wantTarget: 0x20, + }, { + name: "unconditional-jmp-rel8", + // jmp $+0x10 (rel8 = 0x0E, instruction length = 2) + // Target = 0 + 2 + 0x0E = 0x10 + code: []byte{0xEB, 0x0E}, + baseAddr: 0, + wantCount: 1, + wantType: resurgo.CallSiteJump, + wantMode: resurgo.AddressingModePCRelative, + wantConf: resurgo.ConfidenceMedium, + wantTarget: 0x10, + }, { + name: "register-indirect-jmp", + // jmp rax = FF E0 + code: []byte{0xFF, 0xE0}, + baseAddr: 0x400, + wantCount: 1, + wantType: resurgo.CallSiteJump, + wantMode: resurgo.AddressingModeRegisterIndirect, + wantConf: resurgo.ConfidenceNone, + wantTarget: 0, + }} for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -232,31 +220,28 @@ func TestDetectCallSitesARM64_BL(t *testing.T) { wantType resurgo.CallSiteType wantConf resurgo.Confidence wantTarget uint64 - }{ - { - name: "bl-forward", - // BL +0x1000 (offset = 0x1000, encoded as 0x1000/4 = 0x400) - // Instruction: 0x94000400 - code: arm64Insn(0x94000400), - baseAddr: 0x1000, - wantCount: 1, - wantType: resurgo.CallSiteCall, - wantConf: resurgo.ConfidenceHigh, - wantTarget: 0x2000, // 0x1000 + 0x1000 - }, - { - name: "bl-backward", - // BL -0x100 (offset = -0x100, encoded as (-0x100/4) & 0x3FFFFFF) - // Two's complement: 0x3FFFFFC0 - // Instruction: 0x97FFFFC0 - code: arm64Insn(0x97FFFFC0), - baseAddr: 0x2000, - wantCount: 1, - wantType: resurgo.CallSiteCall, - wantConf: resurgo.ConfidenceHigh, - wantTarget: 0x1F00, // 0x2000 - 0x100 - }, - } + }{{ + name: "bl-forward", + // BL +0x1000 (offset = 0x1000, encoded as 0x1000/4 = 0x400) + // Instruction: 0x94000400 + code: arm64Insn(0x94000400), + baseAddr: 0x1000, + wantCount: 1, + wantType: resurgo.CallSiteCall, + wantConf: resurgo.ConfidenceHigh, + wantTarget: 0x2000, // 0x1000 + 0x1000 + }, { + name: "bl-backward", + // BL -0x100 (offset = -0x100, encoded as (-0x100/4) & 0x3FFFFFF) + // Two's complement: 0x3FFFFFC0 + // Instruction: 0x97FFFFC0 + code: arm64Insn(0x97FFFFC0), + baseAddr: 0x2000, + wantCount: 1, + wantType: resurgo.CallSiteCall, + wantConf: resurgo.ConfidenceHigh, + wantTarget: 0x1F00, // 0x2000 - 0x100 + }} for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -299,19 +284,17 @@ func TestDetectCallSitesARM64_B(t *testing.T) { wantType resurgo.CallSiteType wantConf resurgo.Confidence wantTarget uint64 - }{ - { - name: "b-unconditional", - // B +0x100 (offset = 0x100, encoded as 0x100/4 = 0x40) - // Instruction: 0x14000040 - code: arm64Insn(0x14000040), - baseAddr: 0x1000, - wantCount: 1, - wantType: resurgo.CallSiteJump, - wantConf: resurgo.ConfidenceMedium, - wantTarget: 0x1100, // 0x1000 + 0x100 - }, - } + }{{ + name: "b-unconditional", + // B +0x100 (offset = 0x100, encoded as 0x100/4 = 0x40) + // Instruction: 0x14000040 + code: arm64Insn(0x14000040), + baseAddr: 0x1000, + wantCount: 1, + wantType: resurgo.CallSiteJump, + wantConf: resurgo.ConfidenceMedium, + wantTarget: 0x1100, // 0x1000 + 0x100 + }} for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -383,24 +366,28 @@ func TestDetectCallSites_EmptyInput(t *testing.T) { tests := []struct { name string code []byte - }{ - {name: "nil", code: nil}, - {name: "empty", code: []byte{}}, - } + arch resurgo.Arch + }{{ + name: "nil/amd64", + code: nil, + arch: resurgo.ArchAMD64, + }, { + name: "nil/arm64", + code: nil, + arch: resurgo.ArchARM64, + }, { + name: "empty/amd64", + code: []byte{}, + arch: resurgo.ArchAMD64, + }, { + name: "empty/arm64", + code: []byte{}, + arch: resurgo.ArchARM64, + }} for _, tt := range tests { - t.Run(tt.name+"/amd64", func(t *testing.T) { - edges, err := resurgo.DetectCallSites(tt.code, 0, resurgo.ArchAMD64) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(edges) != 0 { - t.Errorf("expected 0 edges, got %d", len(edges)) - } - }) - - t.Run(tt.name+"/arm64", func(t *testing.T) { - edges, err := resurgo.DetectCallSites(tt.code, 0, resurgo.ArchARM64) + t.Run(tt.name, func(t *testing.T) { + edges, err := resurgo.DetectCallSites(tt.code, 0, tt.arch) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -440,103 +427,6 @@ func TestDetectCallSites_JumpTarget(t *testing.T) { } } -func TestDetectFunctionsFromELF(t *testing.T) { - binPath := filepath.Join(t.TempDir(), "demo-app") - args := []string{"build", "-o", binPath, "testdata/demo-app.go"} - - cmd := exec.Command("go", args...) - cmd.Env = append(os.Environ(), "CGO_ENABLED=0", "GOARCH=amd64") - if out, err := cmd.CombinedOutput(); err != nil { - t.Fatalf("failed to compile demo-app: %v\n%s", err, out) - } - - f, err := elf.Open(binPath) - if err != nil { - t.Fatalf("failed to open ELF binary: %v", err) - } - defer f.Close() - - candidates, err := resurgo.DetectFunctionsFromELF(f) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if len(candidates) == 0 { - t.Fatal("expected at least one function candidate, got none") - } - - counts := make(map[resurgo.DetectionType]int) - for _, c := range candidates { - counts[c.DetectionType]++ - } - t.Logf("total candidates: %d, by type: %v", len(candidates), counts) - - if counts[resurgo.DetectionPrologueOnly] == 0 { - t.Error("expected at least one prologue-only candidate") - } -} - -// TestDisasmDetector verifies that DisasmDetector, when run against a real ELF -// binary, produces candidates with the expected detection types and that -// functions both called and matching a prologue pattern are promoted to -// DetectionPrologueCallSite with ConfidenceHigh. -func TestDisasmDetector(t *testing.T) { - binPath := filepath.Join(t.TempDir(), "demo-app") - cmd := exec.Command("go", "build", "-o", binPath, "testdata/demo-app.go") - cmd.Env = append(os.Environ(), "CGO_ENABLED=0", "GOARCH=amd64") - if out, err := cmd.CombinedOutput(); err != nil { - t.Fatalf("failed to compile demo-app: %v\n%s", err, out) - } - - f, err := elf.Open(binPath) - if err != nil { - t.Fatalf("failed to open ELF binary: %v", err) - } - defer f.Close() - - candidates, err := resurgo.DisasmDetector(f) - if err != nil { - t.Fatalf("DisasmDetector: %v", err) - } - - if len(candidates) == 0 { - t.Fatal("expected at least one candidate, got none") - } - - counts := make(map[resurgo.DetectionType]int) - for _, c := range candidates { - counts[c.DetectionType]++ - } - t.Logf("total candidates: %d, by type: %v", len(candidates), counts) - - // Disasm must find functions via prologue pattern. - if counts[resurgo.DetectionPrologueOnly] == 0 && counts[resurgo.DetectionPrologueCallSite] == 0 { - t.Error("expected prologue-based candidates, got none") - } - - // Functions confirmed by both prologue and call-site must be ConfidenceHigh - // and must carry at least one caller address. - for _, c := range candidates { - if c.DetectionType == resurgo.DetectionPrologueCallSite { - if c.Confidence != resurgo.ConfidenceHigh { - t.Errorf("0x%x: expected ConfidenceHigh for prologue-callsite, got %s", c.Address, c.Confidence) - } - if len(c.CalledFrom) == 0 && len(c.JumpedFrom) == 0 { - t.Errorf("0x%x: prologue-callsite candidate has no caller or jump source", c.Address) - } - } - } -} - -func TestDetectFunctionsFromELF_InvalidELF(t *testing.T) { - r := bytes.NewReader([]byte{0x00, 0x01, 0x02, 0x03}) - f, err := elf.NewFile(r) - if err == nil { - f.Close() - t.Fatal("expected elf.NewFile to fail on invalid data") - } -} - func TestDetectCallSitesARM64_BConditional(t *testing.T) { // ARM64 B.EQ (conditional branch): // B.cond has encoding 0x54000000 | (imm19 << 5) | cond diff --git a/convergence_test.go b/convergence_test.go deleted file mode 100644 index 9bc7601..0000000 --- a/convergence_test.go +++ /dev/null @@ -1,340 +0,0 @@ -package resurgo_test - -import ( - "encoding/binary" - "testing" - - "github.com/maxgio92/resurgo" -) - -// encodeCallRel32 writes an AMD64 CALL rel32 instruction at code[offset:]. -func encodeCallRel32(code []byte, offset int, baseAddr, target uint64) { - source := baseAddr + uint64(offset) - rel := int32(int64(target) - int64(source+5)) - code[offset] = 0xE8 - binary.LittleEndian.PutUint32(code[offset+1:], uint32(rel)) -} - -// encodeJmpRel32 writes an AMD64 JMP rel32 instruction at code[offset:]. -func encodeJmpRel32(code []byte, offset int, baseAddr, target uint64) { - source := baseAddr + uint64(offset) - rel := int32(int64(target) - int64(source+5)) - code[offset] = 0xE9 - binary.LittleEndian.PutUint32(code[offset+1:], uint32(rel)) -} - -// arm64BranchInsn encodes an ARM64 BL or B instruction word. -// opBase is 0x94000000 for BL or 0x14000000 for B. -func arm64BranchInsn(opBase uint32, source, target uint64) uint32 { - off := int64(target) - int64(source) - imm26 := uint32(off/4) & 0x03FFFFFF - return opBase | imm26 -} - -// assertConvergence checks convergence between prologue and call-site detection -// by running both independently and counting addresses found by both signals. -// minTotal is the minimum number of candidates expected, minBoth the minimum -// number of addresses confirmed by both signals, and minRatio the minimum -// convergence ratio (both / total). -func assertConvergence(t *testing.T, code []byte, baseAddr uint64, arch resurgo.Arch, minTotal, minBoth int, minRatio float64) { - t.Helper() - - prologues, err := resurgo.DetectPrologues(code, baseAddr, arch) - if err != nil { - t.Fatalf("DetectPrologues: %v", err) - } - edges, err := resurgo.DetectCallSites(code, baseAddr, arch) - if err != nil { - t.Fatalf("DetectCallSites: %v", err) - } - - prologueSet := make(map[uint64]resurgo.PrologueType, len(prologues)) - for _, p := range prologues { - prologueSet[p.Address] = p.Type - } - callSet := make(map[uint64]struct{}, len(edges)) - for _, e := range edges { - callSet[e.TargetAddr] = struct{}{} - } - - allAddrs := make(map[uint64]struct{}) - for _, p := range prologues { - allAddrs[p.Address] = struct{}{} - } - for _, e := range edges { - allAddrs[e.TargetAddr] = struct{}{} - } - - var bothCount, prologueOnly, callTarget int - for addr := range allAddrs { - _, hasPrologue := prologueSet[addr] - _, hasCall := callSet[addr] - switch { - case hasPrologue && hasCall: - bothCount++ - t.Logf(" 0x%x: %-15s (prologue: %s)", addr, resurgo.DetectionPrologueCallSite, prologueSet[addr]) - case hasPrologue: - prologueOnly++ - t.Logf(" 0x%x: %-15s (prologue: %s)", addr, resurgo.DetectionPrologueOnly, prologueSet[addr]) - case hasCall: - callTarget++ - t.Logf(" 0x%x: %-15s", addr, resurgo.DetectionCallTarget) - } - } - - total := len(allAddrs) - ratio := float64(bothCount) / float64(total) - - t.Logf("total=%d both=%d prologue-only=%d call-target=%d ratio=%.3f", - total, bothCount, prologueOnly, callTarget, ratio) - - if total < minTotal { - t.Errorf("expected >= %d candidates, got %d", minTotal, total) - } - if bothCount < minBoth { - t.Errorf("expected >= %d 'both' candidates, got %d", minBoth, bothCount) - } - if ratio < minRatio { - t.Errorf("convergence ratio %.3f < %.3f", ratio, minRatio) - } - if prologueOnly < 1 { - t.Error("expected at least one prologue-only candidate") - } - if callTarget < 1 { - t.Error("expected at least one call-target candidate") - } -} - -// buildSyntheticAMD64 builds a synthetic AMD64 .text section with 12 functions -// exercising multiple prologue styles and a realistic call graph. -// -// Layout: 0x300 bytes, base 0x1000, 0x40-byte slots, NOP-filled. -func buildSyntheticAMD64() (code []byte, baseAddr uint64) { - const base = uint64(0x1000) - code = make([]byte, 0x300) - for i := range code { - code[i] = 0x90 // NOP fill - } - - // Function offsets (0x40-byte slots) - const ( - offMain = 0x000 - offFuncA = 0x040 - offFuncB = 0x080 - offFuncC = 0x0C0 - offFuncD = 0x100 - offFuncE = 0x140 - offFuncF = 0x180 - offFuncG = 0x1C0 - offFuncH = 0x200 - offFuncI = 0x240 - offFuncJ = 0x280 - offFuncK = 0x2C0 - ) - - // Classic prologue: push rbp; mov rbp, rsp - classicPrologue := func(off int) { - code[off] = 0x55 // push rbp - code[off+1] = 0x48 // REX.W prefix ┐ - code[off+2] = 0x89 // mov r/m64 ├ mov rbp, rsp - code[off+3] = 0xe5 // ModRM ┘ - } - - // main: classic prologue, calls funcA, funcB, funcC - classicPrologue(offMain) - encodeCallRel32(code, offMain+4, base, base+uint64(offFuncA)) - encodeCallRel32(code, offMain+9, base, base+uint64(offFuncB)) - encodeCallRel32(code, offMain+14, base, base+uint64(offFuncC)) - code[offMain+19] = 0xC3 // ret - - // funcA: classic prologue, calls funcD, funcE, funcI - classicPrologue(offFuncA) - encodeCallRel32(code, offFuncA+4, base, base+uint64(offFuncD)) - encodeCallRel32(code, offFuncA+9, base, base+uint64(offFuncE)) - encodeCallRel32(code, offFuncA+14, base, base+uint64(offFuncI)) - code[offFuncA+19] = 0xC3 // ret - - // funcB: classic prologue, calls funcE, funcF - classicPrologue(offFuncB) - encodeCallRel32(code, offFuncB+4, base, base+uint64(offFuncE)) - encodeCallRel32(code, offFuncB+9, base, base+uint64(offFuncF)) - code[offFuncB+14] = 0xC3 // ret - - // funcC: classic prologue, call funcJ, jmp funcK (tail call) - classicPrologue(offFuncC) - encodeCallRel32(code, offFuncC+4, base, base+uint64(offFuncJ)) - encodeJmpRel32(code, offFuncC+9, base, base+uint64(offFuncK)) - - // funcD: classic prologue - classicPrologue(offFuncD) - code[offFuncD+4] = 0xC3 // ret - - // funcE: classic prologue - classicPrologue(offFuncE) - code[offFuncE+4] = 0xC3 // ret - - // funcF: classic prologue, jmp funcG (tail call) - classicPrologue(offFuncF) - encodeJmpRel32(code, offFuncF+4, base, base+uint64(offFuncG)) - - // funcG: classic prologue - classicPrologue(offFuncG) - code[offFuncG+4] = 0xC3 // ret - - // funcH: push-only prologue (push rbx at RET boundary) - code[offFuncH-1] = 0xC3 // ret (boundary marker) - code[offFuncH] = 0x53 // push rbx (callee-saved) - code[offFuncH+1] = 0xC3 // ret - - // funcI: no prologue (call-target only) - code[offFuncI] = 0xC3 // ret - - // funcJ: no-frame-pointer prologue (sub rsp, imm at RET boundary) - code[offFuncJ-1] = 0xC3 // ret (boundary marker) - code[offFuncJ] = 0x48 // REX.W prefix ┐ - code[offFuncJ+1] = 0x83 // sub r/m64 ├ sub rsp, 0x20 - code[offFuncJ+2] = 0xec // ModRM: rsp │ - code[offFuncJ+3] = 0x20 // imm8: 0x20 ┘ - code[offFuncJ+4] = 0xC3 // ret - - // funcK: no prologue (jump-target only) - code[offFuncK] = 0xC3 // ret - - return code, base -} - -// buildSyntheticARM64 builds a synthetic ARM64 .text section with 12 functions -// exercising multiple prologue styles and a realistic call graph. -// -// Layout: 0x300 bytes, base 0x10000, 0x40-byte slots, NOP-filled. -func buildSyntheticARM64() (code []byte, baseAddr uint64) { - const base = uint64(0x10000) - code = make([]byte, 0x300) - // Fill with ARM64 NOPs (0xd503201f = nop) - for i := 0; i < len(code); i += 4 { - binary.LittleEndian.PutUint32(code[i:], 0xd503201f) // nop - } - - putInsn := func(off int, insn uint32) { - binary.LittleEndian.PutUint32(code[off:], insn) - } - - const ( - stpX29X30 = uint32(0xa9bf7bfd) // stp x29, x30, [sp, #-16]! - movX29SP = uint32(0x910003fd) // mov x29, sp - subSPImm = uint32(0xd10083ff) // sub sp, sp, #0x20 - arm64RET = uint32(0xd65f03c0) // ret - blOp = uint32(0x94000000) // BL base opcode - bOp = uint32(0x14000000) // B base opcode - ) - - // Function offsets (0x40-byte slots) - const ( - offMain = 0x000 - offFuncA = 0x040 - offFuncB = 0x080 - offFuncC = 0x0C0 - offFuncD = 0x100 - offFuncE = 0x140 - offFuncF = 0x180 - offFuncG = 0x1C0 - offFuncH = 0x200 - offFuncI = 0x240 - offFuncJ = 0x280 - offFuncK = 0x2C0 - ) - - // STP frame pair prologue: stp x29, x30, [sp, #-16]!; mov x29, sp - stpPrologue := func(off int) { - putInsn(off, stpX29X30) - putInsn(off+4, movX29SP) - } - - bl := func(srcOff, dstOff int) uint32 { - return arm64BranchInsn(blOp, base+uint64(srcOff), base+uint64(dstOff)) - } - b := func(srcOff, dstOff int) uint32 { - return arm64BranchInsn(bOp, base+uint64(srcOff), base+uint64(dstOff)) - } - - // main: STP frame pair, BL funcA, BL funcB, BL funcC - stpPrologue(offMain) - putInsn(offMain+8, bl(offMain+8, offFuncA)) - putInsn(offMain+12, bl(offMain+12, offFuncB)) - putInsn(offMain+16, bl(offMain+16, offFuncC)) - putInsn(offMain+20, arm64RET) - - // funcA: STP frame pair, BL funcD, BL funcE, BL funcI - stpPrologue(offFuncA) - putInsn(offFuncA+8, bl(offFuncA+8, offFuncD)) - putInsn(offFuncA+12, bl(offFuncA+12, offFuncE)) - putInsn(offFuncA+16, bl(offFuncA+16, offFuncI)) - putInsn(offFuncA+20, arm64RET) - - // funcB: STP frame pair, BL funcE, BL funcF - stpPrologue(offFuncB) - putInsn(offFuncB+8, bl(offFuncB+8, offFuncE)) - putInsn(offFuncB+12, bl(offFuncB+12, offFuncF)) - putInsn(offFuncB+16, arm64RET) - - // funcC: STP frame pair, BL funcJ, B funcK (tail jump) - stpPrologue(offFuncC) - putInsn(offFuncC+8, bl(offFuncC+8, offFuncJ)) - putInsn(offFuncC+12, b(offFuncC+12, offFuncK)) - - // funcD: STP frame pair - stpPrologue(offFuncD) - putInsn(offFuncD+8, arm64RET) - - // funcE: STP frame pair - stpPrologue(offFuncE) - putInsn(offFuncE+8, arm64RET) - - // funcF: STP frame pair, B funcG (tail jump) - stpPrologue(offFuncF) - putInsn(offFuncF+8, b(offFuncF+8, offFuncG)) - - // funcG: STP-only (stp x29, x30 followed by NOP, not mov x29, sp) - putInsn(offFuncG, stpX29X30) - // Next slot (offFuncG+4) already has NOP → STP-only fires - putInsn(offFuncG+8, arm64RET) - - // funcH: STP frame pair (not called/jumped to) - stpPrologue(offFuncH) - putInsn(offFuncH+8, arm64RET) - - // funcI: no prologue (call-target only) - putInsn(offFuncI, arm64RET) - - // funcJ: sub-sp prologue (needs RET before it) - putInsn(offFuncJ-4, arm64RET) // boundary marker - putInsn(offFuncJ, subSPImm) - putInsn(offFuncJ+4, arm64RET) - - // funcK: no prologue (jump-target only) - putInsn(offFuncK, arm64RET) - - return code, base -} - -func TestDetectFunctionsFromELF_Convergence(t *testing.T) { - // Call graph (both architectures): - // main → funcA, funcB, funcC (calls) - // funcA → funcD, funcE, funcI (calls) - // funcB → funcE, funcF (calls) - // funcC → funcJ, funcK (call + tail-jump) - // funcF → funcG (tail-jump) - // funcH (prologue only, not called) - // - // 12 functions, expected 8 "prologue-callsite" / 12 total = 0.667 convergence. - - t.Run("amd64", func(t *testing.T) { - code, base := buildSyntheticAMD64() - assertConvergence(t, code, base, resurgo.ArchAMD64, 10, 7, 0.6) - }) - - t.Run("arm64", func(t *testing.T) { - code, base := buildSyntheticARM64() - assertConvergence(t, code, base, resurgo.ArchARM64, 10, 7, 0.6) - }) -} diff --git a/detector_test.go b/detector_test.go index d18c285..ee4c6a6 100644 --- a/detector_test.go +++ b/detector_test.go @@ -1,13 +1,11 @@ package resurgo_test import ( + "bytes" "debug/elf" - "encoding/binary" "os" "os/exec" "path/filepath" - "strconv" - "strings" "testing" "github.com/maxgio92/resurgo" @@ -18,452 +16,231 @@ const ( demoAppBinary = "demo-app" ) -func TestDetectProloguesAMD64(t *testing.T) { - // AMD64 instruction encodings: - // nop = 0x90 - // push rbp = 0x55 - // mov rbp, rsp = 0x48 0x89 0xe5 - // sub rsp, 0x20 = 0x48 0x83 0xec 0x20 - +// TestDetectFunctionsFromELF verifies that DetectFunctionsFromELF runs the +// full detector and filter pipeline and produces the expected detection types +// for both Go and C binaries. +func TestDetectFunctionsFromELF(t *testing.T) { tests := []struct { name string - code []byte - baseAddr uint64 - wantCount int - wantType resurgo.PrologueType - wantAddr uint64 - }{ - { - // nop; push rbp; mov rbp, rsp - // The leading nop ensures push rbp is not at start-of-input, - // so only the classic pattern fires. - name: string(resurgo.PrologueClassic), - code: []byte{0x90, 0x55, 0x48, 0x89, 0xe5}, - baseAddr: 0, - wantCount: 1, - wantType: resurgo.PrologueClassic, - wantAddr: 1, - }, - { - // sub rsp, 0x20 at start of code (no preceding instruction) - name: string(resurgo.PrologueNoFramePointer), - code: []byte{0x48, 0x83, 0xec, 0x20}, - baseAddr: 0, - wantCount: 1, - wantType: resurgo.PrologueNoFramePointer, - wantAddr: 0, - }, - { - // nop; push rbx (0x53); sub rsp, 0x20 - push not at boundary, - // only the sub rsp is detected as NoFramePointer. - name: "no-frame-pointer-after-push", - code: []byte{0x90, 0x53, 0x48, 0x83, 0xec, 0x20}, - baseAddr: 0, - wantCount: 1, - wantType: resurgo.PrologueNoFramePointer, - wantAddr: 2, - }, - { - // push rbp; nop - push rbp at start, not followed by mov rbp, rsp - name: string(resurgo.ProloguePushOnly), - code: []byte{0x55, 0x90}, - baseAddr: 0, - wantCount: 1, - wantType: resurgo.ProloguePushOnly, - wantAddr: 0, - }, - { - name: "EmptyNil", - code: nil, - wantCount: 0, - }, - { - name: "EmptySlice", - code: []byte{}, - wantCount: 0, - }, - { - // Garbage bytes that should not match any prologue pattern. - name: "InvalidBytes", - code: []byte{0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe}, - wantCount: 0, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - prologues, err := resurgo.DetectPrologues(tt.code, tt.baseAddr, resurgo.ArchAMD64) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if len(prologues) != tt.wantCount { - t.Fatalf("expected %d prologue(s), got %d: %+v", tt.wantCount, len(prologues), prologues) - } - if tt.wantCount == 0 { - return - } - if prologues[0].Type != tt.wantType { - t.Errorf("expected type %s, got %s", tt.wantType, prologues[0].Type) - } - if prologues[0].Address != tt.wantAddr { - t.Errorf("expected address 0x%x, got 0x%x", tt.wantAddr, prologues[0].Address) + build func(t *testing.T, dir string) string + wantTypes []resurgo.DetectionType + }{{ + name: "go", + build: func(t *testing.T, dir string) string { + t.Helper() + binPath := filepath.Join(dir, demoAppBinary) + cmd := exec.Command("go", "build", "-o", binPath, demoAppSource) + cmd.Env = append(os.Environ(), "CGO_ENABLED=0", "GOARCH=amd64") + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("failed to compile demo-app: %v\n%s", err, out) } - }) - } -} - -func TestDetectProloguesARM64(t *testing.T) { - // ARM64 instruction encodings (little-endian): - // stp x29, x30, [sp, #-16]! = 0xa9bf7bfd - // mov x29, sp = 0x910003fd - // sub sp, sp, #0x20 = 0xd10083ff - // nop = 0xd503201f - // ret = 0xd65f03c0 - - stpX29X30 := uint32(0xa9bf7bfd) // stp x29, x30, [sp, #-16]! - movX29SP := uint32(0x910003fd) // mov x29, sp - subSP := uint32(0xd10083ff) // sub sp, sp, #0x20 - strX30 := uint32(0xf81e0ffe) // str x30, [sp, #-32]! - nop := uint32(0xd503201f) // nop - - tests := []struct { - name string - code []byte - baseAddr uint64 - wantCount int - wantType resurgo.PrologueType - wantAddr uint64 - }{ - { - name: string(resurgo.PrologueSTPFramePair), - code: arm64Insn(stpX29X30, movX29SP), - baseAddr: 0, - wantCount: 1, - wantType: resurgo.PrologueSTPFramePair, - wantAddr: 0, - }, - { - name: string(resurgo.PrologueSTRLRPreIndex), - code: arm64Insn(strX30), - baseAddr: 0, - wantCount: 1, - wantType: resurgo.PrologueSTRLRPreIndex, - wantAddr: 0, - }, - { - name: string(resurgo.PrologueSubSP), - code: arm64Insn(subSP), - baseAddr: 0, - wantCount: 1, - wantType: resurgo.PrologueSubSP, - wantAddr: 0, - }, - { - // stp x29, x30, [sp, #-16]! followed by nop (not mov x29, sp) - name: string(resurgo.PrologueSTPOnly), - code: arm64Insn(stpX29X30, nop), - baseAddr: 0, - wantCount: 1, - wantType: resurgo.PrologueSTPOnly, - wantAddr: 0, + return binPath }, - { - name: "ARM64_EmptyNil", - code: nil, - wantCount: 0, + // Go binaries use .gopclntab instead of .eh_frame; no DetectionCFI expected. + wantTypes: []resurgo.DetectionType{ + resurgo.DetectionPrologueOnly, + resurgo.DetectionPrologueCallSite, }, - { - name: "ARM64_EmptySlice", - code: []byte{}, - wantCount: 0, - }, - { - name: "ARM64_InvalidBytes", - code: []byte{0xde, 0xad, 0xbe, 0xef}, - wantCount: 0, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - prologues, err := resurgo.DetectPrologues(tt.code, tt.baseAddr, resurgo.ArchARM64) - if err != nil { - t.Fatalf("unexpected error: %v", err) + }, { + name: "c", + build: func(t *testing.T, dir string) string { + t.Helper() + if _, err := exec.LookPath("gcc"); err != nil { + t.Skip("gcc not found, skipping") } - - if len(prologues) != tt.wantCount { - t.Fatalf("expected %d prologue(s), got %d: %+v", tt.wantCount, len(prologues), prologues) - } - if tt.wantCount == 0 { - return - } - if prologues[0].Type != tt.wantType { - t.Errorf("expected type %s, got %s", tt.wantType, prologues[0].Type) - } - if prologues[0].Address != tt.wantAddr { - t.Errorf("expected address 0x%x, got 0x%x", tt.wantAddr, prologues[0].Address) + outPath := filepath.Join(dir, "demo-app-c") + cmd := exec.Command("gcc", "-O0", "-o", outPath, "testdata/demo-app.c") + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("failed to compile demo-app.c: %v\n%s", err, out) } - }) - } -} - -func TestDetectPrologues_UnsupportedArch(t *testing.T) { - _, err := resurgo.DetectPrologues([]byte{0x00}, 0, resurgo.Arch("mips")) - if err == nil { - t.Fatal("expected error for unsupported architecture, got nil") - } -} - -// arm64Insn encodes ARM64 instructions as little-endian bytes. -func arm64Insn(insns ...uint32) []byte { - buf := make([]byte, 4*len(insns)) - for i, insn := range insns { - binary.LittleEndian.PutUint32(buf[i*4:], insn) - } - return buf -} - -func TestDetectPrologues_Go(t *testing.T) { - tests := []struct { - name string - goarch string - buildArgs []string - minCounts map[resurgo.PrologueType]int - }{ - { - name: "amd64/optimized", - goarch: "amd64", - buildArgs: nil, - minCounts: map[resurgo.PrologueType]int{ - resurgo.PrologueClassic: 1, - resurgo.PrologueNoFramePointer: 1, - }, - }, - { - name: "amd64/unoptimized", - goarch: "amd64", - buildArgs: []string{"-gcflags=all=-N -l"}, - minCounts: map[resurgo.PrologueType]int{ - resurgo.PrologueClassic: 1, - }, - }, - { - name: "arm64/optimized", - goarch: "arm64", - buildArgs: nil, - minCounts: map[resurgo.PrologueType]int{ - resurgo.PrologueSTRLRPreIndex: 1, - }, + return outPath }, - { - name: "arm64/unoptimized", - goarch: "arm64", - buildArgs: []string{"-gcflags=all=-N -l"}, - minCounts: map[resurgo.PrologueType]int{ - resurgo.PrologueSTRLRPreIndex: 1, - }, + // C binaries carry .eh_frame FDE records; expect CFI candidates. + wantTypes: []resurgo.DetectionType{ + resurgo.DetectionCFI, }, - } + }} for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - binPath := filepath.Join(t.TempDir(), demoAppBinary) - args := append([]string{"build", "-o", binPath}, tt.buildArgs...) - args = append(args, demoAppSource) - - cmd := exec.Command("go", args...) - cmd.Env = append(os.Environ(), "CGO_ENABLED=0", "GOARCH="+tt.goarch) - if out, err := cmd.CombinedOutput(); err != nil { - t.Fatalf("failed to compile demo-app: %v\n%s", err, out) - } + binPath := tt.build(t, t.TempDir()) f, err := elf.Open(binPath) if err != nil { - t.Fatalf("failed to open ELF: %v", err) + t.Fatalf("failed to open ELF binary: %v", err) } defer f.Close() - textSec := f.Section(".text") - if textSec == nil { - t.Fatal("no .text section") - } - code, err := textSec.Data() - if err != nil { - t.Fatalf("failed to read .text: %v", err) - } - - arch := resurgo.ArchAMD64 - if tt.goarch == "arm64" { - arch = resurgo.ArchARM64 - } - - prologues, err := resurgo.DetectPrologues(code, textSec.Addr, arch) + candidates, err := resurgo.DetectFunctionsFromELF(f) if err != nil { t.Fatalf("unexpected error: %v", err) } - if len(prologues) == 0 { - t.Fatal("expected at least one prologue, got none") + + if len(candidates) == 0 { + t.Fatal("expected at least one function candidate, got none") } - counts := make(map[resurgo.PrologueType]int) - for _, p := range prologues { - counts[p.Type]++ + counts := make(map[resurgo.DetectionType]int) + for _, c := range candidates { + counts[c.DetectionType]++ } - t.Logf("total prologues: %d, by type: %v", len(prologues), counts) + t.Logf("total candidates: %d, by type: %v", len(candidates), counts) - for typ, min := range tt.minCounts { - if counts[typ] < min { - t.Errorf("expected at least %d %s prologue(s), got %d", min, typ, counts[typ]) + for _, typ := range tt.wantTypes { + if counts[typ] == 0 { + t.Errorf("expected at least one %s candidate, got none", typ) } } }) } } -func TestDetectPrologues_C(t *testing.T) { - const cSource = "testdata/demo-app.c" - - tests := []struct { - name string - compiler string - args []string - minCounts map[resurgo.PrologueType]int - }{ - { - name: "amd64/gcc/optimized", - compiler: "gcc", - args: []string{"-O2"}, - }, - { - name: "amd64/gcc/unoptimized", - compiler: "gcc", - args: []string{"-O0", "-fno-omit-frame-pointer"}, - minCounts: map[resurgo.PrologueType]int{ - resurgo.PrologueClassic: 1, - }, - }, - { - name: "arm64/clang/optimized", - compiler: "clang", - args: []string{"--target=aarch64-linux-gnu", "-c", "-O2"}, - minCounts: map[resurgo.PrologueType]int{ - resurgo.PrologueSTPFramePair: 1, - }, - }, - { - name: "arm64/clang/unoptimized", - compiler: "clang", - args: []string{"--target=aarch64-linux-gnu", "-c", "-O0"}, - minCounts: map[resurgo.PrologueType]int{ - resurgo.PrologueSubSP: 1, - }, - }, +// TestWithDetectors verifies that a detector registered via WithDetectors is +// actually invoked by the pipeline. +func TestWithDetectors(t *testing.T) { + exe, err := os.Executable() + if err != nil { + t.Fatalf("os.Executable: %v", err) } + f, err := elf.Open(exe) + if err != nil { + t.Fatalf("elf.Open: %v", err) + } + defer f.Close() - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - minCounts := tt.minCounts - if tt.name == "amd64/gcc/optimized" { - minCounts = gccOptimizedExpectations(t) - } - prologues := compileAndDetectPrologues(t, tt.compiler, tt.args, cSource) - assertPrologues(t, prologues, minCounts) - }) + called := false + fake := func(*elf.File) ([]resurgo.FunctionCandidate, error) { + called = true + return nil, nil + } + if _, err := resurgo.DetectFunctionsFromELF(f, resurgo.WithDetectors(fake)); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !called { + t.Error("WithDetectors: detector was not called") } } -// gccMajorVersion returns the major version of the GCC compiler at the given -// path, or 0 if it cannot be determined. -func gccMajorVersion(compiler string) int { - out, err := exec.Command(compiler, "-dumpversion").Output() +// TestWithFilters verifies that a filter registered via WithFilters is +// actually invoked by the pipeline. +func TestWithFilters(t *testing.T) { + exe, err := os.Executable() if err != nil { - return 0 + t.Fatalf("os.Executable: %v", err) } - parts := strings.SplitN(strings.TrimSpace(string(out)), ".", 2) - v, err := strconv.Atoi(parts[0]) + f, err := elf.Open(exe) if err != nil { - return 0 + t.Fatalf("elf.Open: %v", err) + } + defer f.Close() + + called := false + fakeDetector := func(*elf.File) ([]resurgo.FunctionCandidate, error) { + return []resurgo.FunctionCandidate{{Address: 0x1000}}, nil + } + fakeFilter := func(cs []resurgo.FunctionCandidate, _ *elf.File) ([]resurgo.FunctionCandidate, error) { + called = true + return cs, nil + } + if _, err := resurgo.DetectFunctionsFromELF(f, + resurgo.WithDetectors(fakeDetector), + resurgo.WithFilters(fakeFilter), + ); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !called { + t.Error("WithFilters: filter was not called") } - return v } -// gccOptimizedExpectations returns the expected prologue types for GCC -O2 -// output based on the installed GCC version. -func gccOptimizedExpectations(t *testing.T) map[resurgo.PrologueType]int { - t.Helper() - v := gccMajorVersion("gcc") - switch { - case v >= 13: - return map[resurgo.PrologueType]int{ - resurgo.ProloguePushOnly: 1, - } - default: - t.Logf("gcc %d: no version-specific prologue expectation", v) - return map[resurgo.PrologueType]int{} +func TestDetectFunctionsFromELF_InvalidELF(t *testing.T) { + r := bytes.NewReader([]byte{0x00, 0x01, 0x02, 0x03}) + f, err := elf.NewFile(r) + if err == nil { + f.Close() + t.Fatal("expected elf.NewFile to fail on invalid data") } } -// compileAndDetectPrologues compiles cSource with the given compiler and flags, -// extracts the .text section, and returns prologues detected on the raw bytes. -func compileAndDetectPrologues(t *testing.T, compiler string, args []string, cSource string) []resurgo.Prologue { - t.Helper() - if _, err := exec.LookPath(compiler); err != nil { - t.Skipf("%s not found, skipping", compiler) +// TestDetectors verifies that each detector, when run against a C ELF binary, +// returns non-empty results that include at least one candidate of the expected +// detection type. +func TestDetectors(t *testing.T) { + if _, err := exec.LookPath("gcc"); err != nil { + t.Skip("gcc not found, skipping") } outPath := filepath.Join(t.TempDir(), "demo-app-c") - buildArgs := append(args, "-o", outPath, cSource) - - cmd := exec.Command(compiler, buildArgs...) + cmd := exec.Command("gcc", "-O0", "-o", outPath, "testdata/demo-app.c") if out, err := cmd.CombinedOutput(); err != nil { - t.Fatalf("failed to compile %s: %v\n%s", cSource, err, out) + t.Fatalf("failed to compile demo-app.c: %v\n%s", err, out) } f, err := elf.Open(outPath) if err != nil { - t.Fatalf("failed to open ELF: %v", err) + t.Fatalf("failed to open ELF binary: %v", err) } defer f.Close() - textSec := f.Section(".text") - if textSec == nil { - t.Fatal("no .text section") - } - code, err := textSec.Data() - if err != nil { - t.Fatalf("failed to read .text: %v", err) - } - - arch := resurgo.ArchAMD64 - if f.Machine == elf.EM_AARCH64 { - arch = resurgo.ArchARM64 - } + tests := []struct { + name string + detector resurgo.CandidateDetector + wantType resurgo.DetectionType + check func(t *testing.T, candidates []resurgo.FunctionCandidate) + }{{ + name: "disasm", + detector: resurgo.DisasmDetector, + wantType: resurgo.DetectionPrologueOnly, + }, { + // Verify that functions confirmed by both prologue and call-site signals + // are merged into DetectionPrologueCallSite with ConfidenceHigh and at + // least one caller or jump source. + name: "disasm/merge", + detector: resurgo.DisasmDetector, + wantType: resurgo.DetectionPrologueCallSite, + check: func(t *testing.T, candidates []resurgo.FunctionCandidate) { + for _, c := range candidates { + if c.DetectionType != resurgo.DetectionPrologueCallSite { + continue + } + if c.Confidence != resurgo.ConfidenceHigh { + t.Errorf("0x%x: expected ConfidenceHigh, got %s", c.Address, c.Confidence) + } + if len(c.CalledFrom) == 0 && len(c.JumpedFrom) == 0 { + t.Errorf("0x%x: no CalledFrom or JumpedFrom", c.Address) + } + } + }, + }, { + name: "ehframe", + detector: resurgo.EhFrameDetector, + wantType: resurgo.DetectionCFI, + }} - prologues, err := resurgo.DetectPrologues(code, textSec.Addr, arch) - if err != nil { - t.Fatalf("unexpected error: %v", err) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + candidates, err := tt.detector(f) + if err != nil { + t.Fatalf("%s: %v", tt.name, err) + } + if len(candidates) == 0 { + t.Fatalf("%s: expected at least one candidate, got none", tt.name) + } + found := false + for _, c := range candidates { + if c.DetectionType == tt.wantType { + found = true + break + } + } + if !found { + t.Errorf("%s: expected at least one %s candidate", tt.name, tt.wantType) + } + if tt.check != nil { + tt.check(t, candidates) + } + }) } - return prologues } -// assertPrologues verifies that prologues is non-empty and that the -// per-type counts meet the specified minimums. -func assertPrologues(t *testing.T, prologues []resurgo.Prologue, minCounts map[resurgo.PrologueType]int) { - t.Helper() - if len(prologues) == 0 { - t.Fatal("expected at least one prologue, got none") - } - counts := make(map[resurgo.PrologueType]int) - for _, p := range prologues { - counts[p.Type]++ - } - t.Logf("total prologues: %d, by type: %v", len(prologues), counts) - - for typ, count := range minCounts { - if counts[typ] < count { - t.Errorf("expected at least %d %s prologue(s), got %d", count, typ, counts[typ]) - } - } -} diff --git a/filter.go b/filter.go index 05692d6..0394be6 100644 --- a/filter.go +++ b/filter.go @@ -26,7 +26,7 @@ func PLTFilter(candidates []FunctionCandidate, f *elf.File) ([]FunctionCandidate pltRanges = append(pltRanges, [2]uint64{sec.Addr, sec.Addr + sec.Size}) } } - return filterCandidatesInRanges(candidates, pltRanges), nil + return FilterCandidatesInRanges(candidates, pltRanges), nil } // CETFilter filters candidates using the CET-aware ENDBR64 heuristic, reading @@ -46,10 +46,10 @@ func CETFilter(candidates []FunctionCandidate, f *elf.File) ([]FunctionCandidate if err != nil { return nil, err } - return filterAlignedEntriesCETAMD64(candidates, textBytes, textSec.Addr, f.Entry), nil + return FilterAlignedEntriesCETAMD64(candidates, textBytes, textSec.Addr, f.Entry), nil } -// filterAlignedEntriesCETAMD64 drops aligned-entry candidates lacking ENDBR64 +// FilterAlignedEntriesCETAMD64 drops aligned-entry candidates lacking ENDBR64 // on CET-enabled AMD64 binaries. On CET binaries every indirect-branch-target // function entry carries ENDBR64; an aligned address inside a function body // (reached by a jump or NOP padding) never does, making it a reliable @@ -62,7 +62,7 @@ func CETFilter(candidates []FunctionCandidate, f *elf.File) ([]FunctionCandidate // avoids false triggering on non-CET binaries that may have a few incidental // ENDBR64 hits from CRT helpers. Non-CET binaries are returned unchanged. // Only DetectionAlignedEntry candidates are affected. -func filterAlignedEntriesCETAMD64(candidates []FunctionCandidate, textBytes []byte, textVA, entryVA uint64) []FunctionCandidate { +func FilterAlignedEntriesCETAMD64(candidates []FunctionCandidate, textBytes []byte, textVA, entryVA uint64) []FunctionCandidate { hasENDBR64 := func(va uint64) bool { if va < textVA { return false @@ -103,13 +103,13 @@ func filterAlignedEntriesCETAMD64(candidates []FunctionCandidate, textBytes []by return result } -// filterCandidatesInRanges removes candidates whose addresses fall within any +// FilterCandidatesInRanges removes candidates whose addresses fall within any // of the given address ranges. Each range is a [lo, hi) pair. // // Used to discard candidates that land inside linker-generated sections (e.g. // PLT stubs) that the call-site scanner can detect as CALL/JMP targets even // though they are not real function entries in the binary under analysis. -func filterCandidatesInRanges(candidates []FunctionCandidate, ranges [][2]uint64) []FunctionCandidate { +func FilterCandidatesInRanges(candidates []FunctionCandidate, ranges [][2]uint64) []FunctionCandidate { if len(ranges) == 0 { return candidates } diff --git a/filter_test.go b/filter_test.go index cd5c883..afd449f 100644 --- a/filter_test.go +++ b/filter_test.go @@ -1,7 +1,13 @@ -package resurgo +package resurgo_test import ( + "debug/elf" + "os/exec" + "path/filepath" "testing" + + "github.com/maxgio92/resurgo" + ) func TestFilterAlignedEntriesCETAMD64(t *testing.T) { @@ -9,7 +15,7 @@ func TestFilterAlignedEntriesCETAMD64(t *testing.T) { endbr64 := []byte{0xf3, 0x0f, 0x1e, 0xfa} - addrs := func(cs []FunctionCandidate) []uint64 { + addrs := func(cs []resurgo.FunctionCandidate) []uint64 { out := make([]uint64, len(cs)) for i, c := range cs { out[i] = c.Address @@ -17,13 +23,13 @@ func TestFilterAlignedEntriesCETAMD64(t *testing.T) { return out } - // 4 ENDBR64 hits at 0x00–0x30; zeroes at 0x40 and 0x50. + // 4 ENDBR64 hits at 0x00-0x30; zeroes at 0x40 and 0x50. text1 := make([]byte, 0x60) for _, off := range []int{0x00, 0x10, 0x20, 0x30} { copy(text1[off:], endbr64) } - // 5 ENDBR64 hits at 0x00–0x40 (triggers CET) and one more at 0x80; + // 5 ENDBR64 hits at 0x00-0x40 (triggers CET) and one more at 0x80; // zeroes at 0x50, 0x60, 0x70. text2 := make([]byte, 0x90) for _, off := range []int{0x00, 0x10, 0x20, 0x30, 0x40, 0x80} { @@ -33,43 +39,40 @@ func TestFilterAlignedEntriesCETAMD64(t *testing.T) { tests := []struct { name string text []byte - input []FunctionCandidate + input []resurgo.FunctionCandidate wantAddrs []uint64 - }{ - { - name: "non-CET binary returns all candidates unchanged", - text: text1, - input: []FunctionCandidate{ - {Address: 0x1000, DetectionType: DetectionAlignedEntry}, - {Address: 0x1010, DetectionType: DetectionAlignedEntry}, - {Address: 0x1020, DetectionType: DetectionAlignedEntry}, - {Address: 0x1030, DetectionType: DetectionAlignedEntry}, // 4 ENDBR64: below threshold - {Address: 0x1040, DetectionType: DetectionAlignedEntry}, - {Address: 0x1050, DetectionType: DetectionAlignedEntry}, - }, - wantAddrs: []uint64{0x1000, 0x1010, 0x1020, 0x1030, 0x1040, 0x1050}, + }{{ + name: "non-CET binary returns all candidates unchanged", + text: text1, + input: []resurgo.FunctionCandidate{ + {Address: 0x1000, DetectionType: resurgo.DetectionAlignedEntry}, + {Address: 0x1010, DetectionType: resurgo.DetectionAlignedEntry}, + {Address: 0x1020, DetectionType: resurgo.DetectionAlignedEntry}, + {Address: 0x1030, DetectionType: resurgo.DetectionAlignedEntry}, // 4 ENDBR64: below threshold + {Address: 0x1040, DetectionType: resurgo.DetectionAlignedEntry}, + {Address: 0x1050, DetectionType: resurgo.DetectionAlignedEntry}, }, - { - name: "CET binary drops aligned-entry candidates without ENDBR64", - text: text2, - input: []FunctionCandidate{ - {Address: 0x1000, DetectionType: DetectionAlignedEntry}, // ENDBR64 - kept - {Address: 0x1010, DetectionType: DetectionAlignedEntry}, // ENDBR64 - kept - {Address: 0x1020, DetectionType: DetectionAlignedEntry}, // ENDBR64 - kept - {Address: 0x1030, DetectionType: DetectionAlignedEntry}, // ENDBR64 - kept - {Address: 0x1040, DetectionType: DetectionAlignedEntry}, // ENDBR64 - kept (5th, triggers CET) - {Address: 0x1050, DetectionType: DetectionAlignedEntry}, // no ENDBR64 - dropped - {Address: 0x1060, DetectionType: DetectionAlignedEntry}, // no ENDBR64 - dropped - {Address: 0x1070, DetectionType: DetectionPrologueOnly}, // not AlignedEntry - kept - {Address: 0x1080, DetectionType: DetectionAlignedEntry}, // ENDBR64, after threshold - kept - }, - wantAddrs: []uint64{0x1000, 0x1010, 0x1020, 0x1030, 0x1040, 0x1070, 0x1080}, + wantAddrs: []uint64{0x1000, 0x1010, 0x1020, 0x1030, 0x1040, 0x1050}, + }, { + name: "CET binary drops aligned-entry candidates without ENDBR64", + text: text2, + input: []resurgo.FunctionCandidate{ + {Address: 0x1000, DetectionType: resurgo.DetectionAlignedEntry}, // ENDBR64 - kept + {Address: 0x1010, DetectionType: resurgo.DetectionAlignedEntry}, // ENDBR64 - kept + {Address: 0x1020, DetectionType: resurgo.DetectionAlignedEntry}, // ENDBR64 - kept + {Address: 0x1030, DetectionType: resurgo.DetectionAlignedEntry}, // ENDBR64 - kept + {Address: 0x1040, DetectionType: resurgo.DetectionAlignedEntry}, // ENDBR64 - kept (5th, triggers CET) + {Address: 0x1050, DetectionType: resurgo.DetectionAlignedEntry}, // no ENDBR64 - dropped + {Address: 0x1060, DetectionType: resurgo.DetectionAlignedEntry}, // no ENDBR64 - dropped + {Address: 0x1070, DetectionType: resurgo.DetectionPrologueOnly}, // not AlignedEntry - kept + {Address: 0x1080, DetectionType: resurgo.DetectionAlignedEntry}, // ENDBR64, after threshold - kept }, - } + wantAddrs: []uint64{0x1000, 0x1010, 0x1020, 0x1030, 0x1040, 0x1070, 0x1080}, + }} for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := filterAlignedEntriesCETAMD64(tt.input, tt.text, textVA, 0) + got := resurgo.FilterAlignedEntriesCETAMD64(tt.input, tt.text, textVA, 0) gotAddrs := addrs(got) if len(gotAddrs) != len(tt.wantAddrs) { t.Fatalf("len=%d want=%d: got %v want %v", @@ -85,15 +88,15 @@ func TestFilterAlignedEntriesCETAMD64(t *testing.T) { } func TestFilterCandidatesInRanges(t *testing.T) { - cands := func(addrs ...uint64) []FunctionCandidate { - out := make([]FunctionCandidate, len(addrs)) + cands := func(addrs ...uint64) []resurgo.FunctionCandidate { + out := make([]resurgo.FunctionCandidate, len(addrs)) for i, a := range addrs { - out[i] = FunctionCandidate{Address: a} + out[i] = resurgo.FunctionCandidate{Address: a} } return out } - addrs := func(cs []FunctionCandidate) []uint64 { + addrs := func(cs []resurgo.FunctionCandidate) []uint64 { out := make([]uint64, len(cs)) for i, c := range cs { out[i] = c.Address @@ -103,68 +106,60 @@ func TestFilterCandidatesInRanges(t *testing.T) { tests := []struct { name string - input []FunctionCandidate + input []resurgo.FunctionCandidate ranges [][2]uint64 want []uint64 - }{ - { - name: "no ranges keeps all", - input: cands(0x100, 0x200, 0x300), - ranges: nil, - want: []uint64{0x100, 0x200, 0x300}, + }{{ + name: "no ranges keeps all", + input: cands(0x100, 0x200, 0x300), // three candidates, no exclusion ranges + ranges: nil, + want: []uint64{0x100, 0x200, 0x300}, + }, { + name: "empty input", + input: cands(), + ranges: [][2]uint64{{0x100, 0x200}}, // range present but nothing to filter + want: []uint64{}, + }, { + name: "removes candidate inside range", + input: cands(0x100, 0x150, 0x200), // 0x150 falls inside [0x140, 0x160) + ranges: [][2]uint64{ + {0x140, 0x160}, }, - { - name: "empty input", - input: cands(), - ranges: [][2]uint64{{0x100, 0x200}}, - want: []uint64{}, + want: []uint64{0x100, 0x200}, // 0x150 removed + }, { + name: "lo boundary included hi boundary excluded", + input: cands(0x100, 0x140, 0x160, 0x200), // 0x140 == lo (removed), 0x160 == hi (kept) + ranges: [][2]uint64{ + {0x140, 0x160}, // half-open interval [lo, hi) }, - { - name: "removes candidate inside range", - input: cands(0x100, 0x150, 0x200), - ranges: [][2]uint64{ - {0x140, 0x160}, - }, - want: []uint64{0x100, 0x200}, + want: []uint64{0x100, 0x160, 0x200}, + }, { + name: "multiple ranges", + input: cands(0x100, 0x200, 0x300, 0x400, 0x500), // 0x200 in [0x180,0x220), 0x400 in [0x380,0x420) + ranges: [][2]uint64{ + {0x180, 0x220}, + {0x380, 0x420}, }, - { - name: "lo boundary included hi boundary excluded", - input: cands(0x100, 0x140, 0x160, 0x200), - ranges: [][2]uint64{ - {0x140, 0x160}, - }, - want: []uint64{0x100, 0x160, 0x200}, + want: []uint64{0x100, 0x300, 0x500}, // 0x200 and 0x400 removed + }, { + name: "all candidates removed", + input: cands(0x100, 0x110, 0x120), // all three fall inside [0x100, 0x130) + ranges: [][2]uint64{ + {0x100, 0x130}, }, - { - name: "multiple ranges", - input: cands(0x100, 0x200, 0x300, 0x400, 0x500), - ranges: [][2]uint64{ - {0x180, 0x220}, - {0x380, 0x420}, - }, - want: []uint64{0x100, 0x300, 0x500}, + want: []uint64{}, + }, { + name: "no candidates in range", + input: cands(0x100, 0x200), // candidates below range [0x300, 0x400) + ranges: [][2]uint64{ + {0x300, 0x400}, }, - { - name: "all candidates removed", - input: cands(0x100, 0x110, 0x120), - ranges: [][2]uint64{ - {0x100, 0x130}, - }, - want: []uint64{}, - }, - { - name: "no candidates in range", - input: cands(0x100, 0x200), - ranges: [][2]uint64{ - {0x300, 0x400}, - }, - want: []uint64{0x100, 0x200}, - }, - } + want: []uint64{0x100, 0x200}, // nothing removed + }} for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := filterCandidatesInRanges(tt.input, tt.ranges) + got := resurgo.FilterCandidatesInRanges(tt.input, tt.ranges) gotAddrs := addrs(got) if len(gotAddrs) != len(tt.want) { t.Fatalf("len=%d want=%d: got %v want %v", @@ -178,3 +173,106 @@ func TestFilterCandidatesInRanges(t *testing.T) { }) } } + +// TestFilters verifies the behavioral contract of each exported filter against +// a real C ELF binary. +func TestFilters(t *testing.T) { + if _, err := exec.LookPath("gcc"); err != nil { + t.Skip("gcc not found, skipping") + } + + outPath := filepath.Join(t.TempDir(), "demo-app-c") + cmd := exec.Command("gcc", "-O0", "-o", outPath, "testdata/demo-app.c") + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("failed to compile demo-app.c: %v\n%s", err, out) + } + + f, err := elf.Open(outPath) + if err != nil { + t.Fatalf("failed to open ELF: %v", err) + } + defer f.Close() + + input, err := resurgo.DisasmDetector(f) + if err != nil { + t.Fatalf("resurgo.DisasmDetector: %v", err) + } + + tests := []struct { + name string + filter resurgo.CandidateFilter + check func(t *testing.T, result []resurgo.FunctionCandidate) + }{{ + name: "cet", + filter: resurgo.CETFilter, + // resurgo.CETFilter must never drop the ELF entry point. + check: func(t *testing.T, result []resurgo.FunctionCandidate) { + inputHasEntry := false + for _, c := range input { + if c.Address == f.Entry { + inputHasEntry = true + break + } + } + if !inputHasEntry { + return + } + for _, c := range result { + if c.Address == f.Entry { + return + } + } + t.Errorf("entry point 0x%x was dropped", f.Entry) + }, + }, { + name: "plt", + filter: resurgo.PLTFilter, + // resurgo.PLTFilter must remove all candidates inside the .plt section. + check: func(t *testing.T, result []resurgo.FunctionCandidate) { + plt := f.Section(".plt") + if plt == nil { + t.Skip("no .plt section") + } + for _, c := range result { + if c.Address >= plt.Addr && c.Address < plt.Addr+plt.Size { + t.Errorf("candidate 0x%x inside PLT [0x%x, 0x%x) was not removed", + c.Address, plt.Addr, plt.Addr+plt.Size) + } + } + }, + }, { + name: "ehframe", + filter: resurgo.EhFrameFilter, + // resurgo.EhFrameFilter must retain only FDE-confirmed candidates. + check: func(t *testing.T, result []resurgo.FunctionCandidate) { + fde, err := resurgo.EhFrameDetector(f) + if err != nil { + t.Fatalf("resurgo.EhFrameDetector: %v", err) + } + fdeSet := make(map[uint64]struct{}, len(fde)) + for _, c := range fde { + fdeSet[c.Address] = struct{}{} + } + for _, c := range result { + if _, ok := fdeSet[c.Address]; !ok { + t.Errorf("candidate 0x%x kept but not FDE-confirmed", c.Address) + } + } + }, + }} + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := tt.filter(input, f) + if err != nil { + t.Fatalf("%v", err) + } + if len(result) > len(input) { + t.Errorf("added candidates: input=%d output=%d", len(input), len(result)) + } + tt.check(t, result) + }) + } +} + + diff --git a/prologue_test.go b/prologue_test.go new file mode 100644 index 0000000..76b0a23 --- /dev/null +++ b/prologue_test.go @@ -0,0 +1,438 @@ +package resurgo_test + +import ( + "debug/elf" + "encoding/binary" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "testing" + + "github.com/maxgio92/resurgo" +) + +func TestDetectProloguesAMD64(t *testing.T) { + // AMD64 instruction encodings: + // nop = 0x90 + // push rbp = 0x55 + // mov rbp, rsp = 0x48 0x89 0xe5 + // sub rsp, 0x20 = 0x48 0x83 0xec 0x20 + + tests := []struct { + name string + code []byte + baseAddr uint64 + wantCount int + wantType resurgo.PrologueType + wantAddr uint64 + }{{ + // nop; push rbp; mov rbp, rsp + // The leading nop ensures push rbp is not at start-of-input, + // so only the classic pattern fires. + name: string(resurgo.PrologueClassic), + code: []byte{0x90, 0x55, 0x48, 0x89, 0xe5}, + baseAddr: 0, + wantCount: 1, + wantType: resurgo.PrologueClassic, + wantAddr: 1, + }, { + // sub rsp, 0x20 at start of code (no preceding instruction) + name: string(resurgo.PrologueNoFramePointer), + code: []byte{0x48, 0x83, 0xec, 0x20}, + baseAddr: 0, + wantCount: 1, + wantType: resurgo.PrologueNoFramePointer, + wantAddr: 0, + }, { + // nop; push rbx (0x53); sub rsp, 0x20 - push not at boundary, + // only the sub rsp is detected as NoFramePointer. + name: "no-frame-pointer-after-push", + code: []byte{0x90, 0x53, 0x48, 0x83, 0xec, 0x20}, + baseAddr: 0, + wantCount: 1, + wantType: resurgo.PrologueNoFramePointer, + wantAddr: 2, + }, { + // push rbp; nop - push rbp at start, not followed by mov rbp, rsp + name: string(resurgo.ProloguePushOnly), + code: []byte{0x55, 0x90}, + baseAddr: 0, + wantCount: 1, + wantType: resurgo.ProloguePushOnly, + wantAddr: 0, + }, { + name: "EmptyNil", + code: nil, + wantCount: 0, + }, { + name: "EmptySlice", + code: []byte{}, + wantCount: 0, + }, { + // Garbage bytes that should not match any prologue pattern. + name: "InvalidBytes", + code: []byte{0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe}, + wantCount: 0, + }} + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + prologues, err := resurgo.DetectPrologues(tt.code, tt.baseAddr, resurgo.ArchAMD64) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(prologues) != tt.wantCount { + t.Fatalf("expected %d prologue(s), got %d: %+v", tt.wantCount, len(prologues), prologues) + } + if tt.wantCount == 0 { + return + } + if prologues[0].Type != tt.wantType { + t.Errorf("expected type %s, got %s", tt.wantType, prologues[0].Type) + } + if prologues[0].Address != tt.wantAddr { + t.Errorf("expected address 0x%x, got 0x%x", tt.wantAddr, prologues[0].Address) + } + }) + } +} + +func TestDetectProloguesARM64(t *testing.T) { + // ARM64 instruction encodings (little-endian): + // stp x29, x30, [sp, #-16]! = 0xa9bf7bfd + // mov x29, sp = 0x910003fd + // sub sp, sp, #0x20 = 0xd10083ff + // nop = 0xd503201f + // ret = 0xd65f03c0 + + stpX29X30 := uint32(0xa9bf7bfd) // stp x29, x30, [sp, #-16]! + movX29SP := uint32(0x910003fd) // mov x29, sp + subSP := uint32(0xd10083ff) // sub sp, sp, #0x20 + strX30 := uint32(0xf81e0ffe) // str x30, [sp, #-32]! + nop := uint32(0xd503201f) // nop + + tests := []struct { + name string + code []byte + baseAddr uint64 + wantCount int + wantType resurgo.PrologueType + wantAddr uint64 + }{{ + name: string(resurgo.PrologueSTPFramePair), + code: arm64Insn(stpX29X30, movX29SP), + baseAddr: 0, + wantCount: 1, + wantType: resurgo.PrologueSTPFramePair, + wantAddr: 0, + }, { + name: string(resurgo.PrologueSTRLRPreIndex), + code: arm64Insn(strX30), + baseAddr: 0, + wantCount: 1, + wantType: resurgo.PrologueSTRLRPreIndex, + wantAddr: 0, + }, { + name: string(resurgo.PrologueSubSP), + code: arm64Insn(subSP), + baseAddr: 0, + wantCount: 1, + wantType: resurgo.PrologueSubSP, + wantAddr: 0, + }, { + // stp x29, x30, [sp, #-16]! followed by nop (not mov x29, sp) + name: string(resurgo.PrologueSTPOnly), + code: arm64Insn(stpX29X30, nop), + baseAddr: 0, + wantCount: 1, + wantType: resurgo.PrologueSTPOnly, + wantAddr: 0, + }, { + name: "ARM64_EmptyNil", + code: nil, + wantCount: 0, + }, { + name: "ARM64_EmptySlice", + code: []byte{}, + wantCount: 0, + }, { + name: "ARM64_InvalidBytes", + code: []byte{0xde, 0xad, 0xbe, 0xef}, + wantCount: 0, + }} + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + prologues, err := resurgo.DetectPrologues(tt.code, tt.baseAddr, resurgo.ArchARM64) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(prologues) != tt.wantCount { + t.Fatalf("expected %d prologue(s), got %d: %+v", tt.wantCount, len(prologues), prologues) + } + if tt.wantCount == 0 { + return + } + if prologues[0].Type != tt.wantType { + t.Errorf("expected type %s, got %s", tt.wantType, prologues[0].Type) + } + if prologues[0].Address != tt.wantAddr { + t.Errorf("expected address 0x%x, got 0x%x", tt.wantAddr, prologues[0].Address) + } + }) + } +} + +func TestDetectPrologues_UnsupportedArch(t *testing.T) { + _, err := resurgo.DetectPrologues([]byte{0x00}, 0, resurgo.Arch("mips")) + if err == nil { + t.Fatal("expected error for unsupported architecture, got nil") + } +} + +// arm64Insn encodes ARM64 instructions as little-endian bytes. +func arm64Insn(insns ...uint32) []byte { + buf := make([]byte, 4*len(insns)) + for i, insn := range insns { + binary.LittleEndian.PutUint32(buf[i*4:], insn) + } + return buf +} + +func TestDetectPrologues_Go(t *testing.T) { + tests := []struct { + name string + goarch string + buildArgs []string + minCounts map[resurgo.PrologueType]int + }{{ + name: "amd64/optimized", + goarch: "amd64", + buildArgs: nil, + minCounts: map[resurgo.PrologueType]int{ + resurgo.PrologueClassic: 1, + resurgo.PrologueNoFramePointer: 1, + }, + }, { + name: "amd64/unoptimized", + goarch: "amd64", + buildArgs: []string{"-gcflags=all=-N -l"}, + minCounts: map[resurgo.PrologueType]int{ + resurgo.PrologueClassic: 1, + }, + }, { + name: "arm64/optimized", + goarch: "arm64", + buildArgs: nil, + minCounts: map[resurgo.PrologueType]int{ + resurgo.PrologueSTRLRPreIndex: 1, + }, + }, { + name: "arm64/unoptimized", + goarch: "arm64", + buildArgs: []string{"-gcflags=all=-N -l"}, + minCounts: map[resurgo.PrologueType]int{ + resurgo.PrologueSTRLRPreIndex: 1, + }, + }} + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + binPath := filepath.Join(t.TempDir(), demoAppBinary) + args := append([]string{"build", "-o", binPath}, tt.buildArgs...) + args = append(args, demoAppSource) + + cmd := exec.Command("go", args...) + cmd.Env = append(os.Environ(), "CGO_ENABLED=0", "GOARCH="+tt.goarch) + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("failed to compile demo-app: %v\n%s", err, out) + } + + f, err := elf.Open(binPath) + if err != nil { + t.Fatalf("failed to open ELF: %v", err) + } + defer f.Close() + + textSec := f.Section(".text") + if textSec == nil { + t.Fatal("no .text section") + } + code, err := textSec.Data() + if err != nil { + t.Fatalf("failed to read .text: %v", err) + } + + arch := resurgo.ArchAMD64 + if tt.goarch == "arm64" { + arch = resurgo.ArchARM64 + } + + prologues, err := resurgo.DetectPrologues(code, textSec.Addr, arch) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(prologues) == 0 { + t.Fatal("expected at least one prologue, got none") + } + + counts := make(map[resurgo.PrologueType]int) + for _, p := range prologues { + counts[p.Type]++ + } + t.Logf("total prologues: %d, by type: %v", len(prologues), counts) + + for typ, min := range tt.minCounts { + if counts[typ] < min { + t.Errorf("expected at least %d %s prologue(s), got %d", min, typ, counts[typ]) + } + } + }) + } +} + +func TestDetectPrologues_C(t *testing.T) { + const cSource = "testdata/demo-app.c" + + tests := []struct { + name string + compiler string + args []string + minCounts map[resurgo.PrologueType]int + }{{ + name: "amd64/gcc/optimized", + compiler: "gcc", + args: []string{"-O2"}, + }, { + name: "amd64/gcc/unoptimized", + compiler: "gcc", + args: []string{"-O0", "-fno-omit-frame-pointer"}, + minCounts: map[resurgo.PrologueType]int{ + resurgo.PrologueClassic: 1, + }, + }, { + name: "arm64/clang/optimized", + compiler: "clang", + args: []string{"--target=aarch64-linux-gnu", "-c", "-O2"}, + minCounts: map[resurgo.PrologueType]int{ + resurgo.PrologueSTPFramePair: 1, + }, + }, { + name: "arm64/clang/unoptimized", + compiler: "clang", + args: []string{"--target=aarch64-linux-gnu", "-c", "-O0"}, + minCounts: map[resurgo.PrologueType]int{ + resurgo.PrologueSubSP: 1, + }, + }} + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + minCounts := tt.minCounts + if tt.name == "amd64/gcc/optimized" { + minCounts = gccOptimizedExpectations(t) + } + prologues := compileAndDetectPrologues(t, tt.compiler, tt.args, cSource) + assertPrologues(t, prologues, minCounts) + }) + } +} + +// gccMajorVersion returns the major version of the GCC compiler at the given +// path, or 0 if it cannot be determined. +func gccMajorVersion(compiler string) int { + out, err := exec.Command(compiler, "-dumpversion").Output() + if err != nil { + return 0 + } + parts := strings.SplitN(strings.TrimSpace(string(out)), ".", 2) + v, err := strconv.Atoi(parts[0]) + if err != nil { + return 0 + } + return v +} + +// gccOptimizedExpectations returns the expected prologue types for GCC -O2 +// output based on the installed GCC version. +func gccOptimizedExpectations(t *testing.T) map[resurgo.PrologueType]int { + t.Helper() + v := gccMajorVersion("gcc") + switch { + case v >= 13: + return map[resurgo.PrologueType]int{ + resurgo.ProloguePushOnly: 1, + } + default: + t.Logf("gcc %d: no version-specific prologue expectation", v) + return map[resurgo.PrologueType]int{} + } +} + +// compileAndDetectPrologues compiles cSource with the given compiler and flags, +// extracts the .text section, and returns prologues detected on the raw bytes. +func compileAndDetectPrologues(t *testing.T, compiler string, args []string, cSource string) []resurgo.Prologue { + t.Helper() + if _, err := exec.LookPath(compiler); err != nil { + t.Skipf("%s not found, skipping", compiler) + } + + outPath := filepath.Join(t.TempDir(), "demo-app-c") + buildArgs := append(args, "-o", outPath, cSource) + + cmd := exec.Command(compiler, buildArgs...) + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("failed to compile %s: %v\n%s", cSource, err, out) + } + + f, err := elf.Open(outPath) + if err != nil { + t.Fatalf("failed to open ELF: %v", err) + } + defer f.Close() + + textSec := f.Section(".text") + if textSec == nil { + t.Fatal("no .text section") + } + code, err := textSec.Data() + if err != nil { + t.Fatalf("failed to read .text: %v", err) + } + + arch := resurgo.ArchAMD64 + if f.Machine == elf.EM_AARCH64 { + arch = resurgo.ArchARM64 + } + + prologues, err := resurgo.DetectPrologues(code, textSec.Addr, arch) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + return prologues +} + +// assertPrologues verifies that prologues is non-empty and that the +// per-type counts meet the specified minimums. +func assertPrologues(t *testing.T, prologues []resurgo.Prologue, minCounts map[resurgo.PrologueType]int) { + t.Helper() + if len(prologues) == 0 { + t.Fatal("expected at least one prologue, got none") + } + + counts := make(map[resurgo.PrologueType]int) + for _, p := range prologues { + counts[p.Type]++ + } + t.Logf("total prologues: %d, by type: %v", len(prologues), counts) + + for typ, count := range minCounts { + if counts[typ] < count { + t.Errorf("expected at least %d %s prologue(s), got %d", count, typ, counts[typ]) + } + } +}