From 169ecf5ebd6ad85dbda6a1aa9d0435934dd8fe23 Mon Sep 17 00:00:00 2001 From: Raffael Campos Date: Wed, 24 Sep 2025 11:47:18 -0300 Subject: [PATCH 01/13] feat: add leaderwatch and tn_vacuum extensions - Introduced the `leaderwatch` extension to manage leader callbacks during block processing, including functionality for acquiring and losing leadership. - Implemented the `tn_vacuum` extension with various triggers (block interval, cron, manual, and digest coupled) to manage vacuum operations based on configurable parameters. - Added configuration loading and initialization for both extensions, ensuring they integrate with the existing hooks and service infrastructure. - Included comprehensive tests for the `leaderwatch` and `tn_vacuum` functionalities to ensure reliability and correctness. This update enhances the system's ability to manage leadership transitions and perform vacuum operations efficiently, contributing to overall performance improvements. --- extensions/leaderwatch/constants.go | 3 + extensions/leaderwatch/leaderwatch.go | 170 ++++++++++++++ extensions/leaderwatch/leaderwatch_test.go | 106 +++++++++ extensions/register.go | 4 + extensions/tn_digest/README.md | 2 +- extensions/tn_vacuum/config.go | 108 +++++++++ extensions/tn_vacuum/constants.go | 17 ++ extensions/tn_vacuum/extension.go | 213 ++++++++++++++++++ extensions/tn_vacuum/mechanism.go | 53 +++++ extensions/tn_vacuum/runner.go | 42 ++++ extensions/tn_vacuum/tn_vacuum.go | 125 ++++++++++ extensions/tn_vacuum/trigger.go | 38 ++++ .../tn_vacuum/trigger_block_interval.go | 78 +++++++ extensions/tn_vacuum/trigger_cron.go | 92 ++++++++ extensions/tn_vacuum/trigger_digest.go | 62 +++++ extensions/tn_vacuum/trigger_manual.go | 62 +++++ extensions/tn_vacuum/vacuum_test.go | 111 +++++++++ 17 files changed, 1285 insertions(+), 1 deletion(-) create mode 100644 extensions/leaderwatch/constants.go create mode 100644 extensions/leaderwatch/leaderwatch.go create mode 100644 extensions/leaderwatch/leaderwatch_test.go create mode 100644 extensions/tn_vacuum/config.go create mode 100644 extensions/tn_vacuum/constants.go create mode 100644 extensions/tn_vacuum/extension.go create mode 100644 extensions/tn_vacuum/mechanism.go create mode 100644 extensions/tn_vacuum/runner.go create mode 100644 extensions/tn_vacuum/tn_vacuum.go create mode 100644 extensions/tn_vacuum/trigger.go create mode 100644 extensions/tn_vacuum/trigger_block_interval.go create mode 100644 extensions/tn_vacuum/trigger_cron.go create mode 100644 extensions/tn_vacuum/trigger_digest.go create mode 100644 extensions/tn_vacuum/trigger_manual.go create mode 100644 extensions/tn_vacuum/vacuum_test.go diff --git a/extensions/leaderwatch/constants.go b/extensions/leaderwatch/constants.go new file mode 100644 index 000000000..5a758880c --- /dev/null +++ b/extensions/leaderwatch/constants.go @@ -0,0 +1,3 @@ +package leaderwatch + +const ExtensionName = "leaderwatch" diff --git a/extensions/leaderwatch/leaderwatch.go b/extensions/leaderwatch/leaderwatch.go new file mode 100644 index 000000000..bc83b09ab --- /dev/null +++ b/extensions/leaderwatch/leaderwatch.go @@ -0,0 +1,170 @@ +package leaderwatch + +import ( + "context" + "fmt" + "sync" + + "github.com/trufnetwork/kwil-db/common" + "github.com/trufnetwork/kwil-db/core/log" + "github.com/trufnetwork/kwil-db/extensions/hooks" +) + +type Callbacks struct { + OnAcquire func(ctx context.Context, app *common.App, block *common.BlockContext) + OnLose func(ctx context.Context, app *common.App, block *common.BlockContext) + OnEndBlock func(ctx context.Context, app *common.App, block *common.BlockContext) +} + +type watcher struct { + callbacks Callbacks + isLeader bool +} + +type extension struct { + mu sync.RWMutex + logger log.Logger + service *common.Service + watchers map[string]*watcher + order []string +} + +var ( + extOnce sync.Once + extInst *extension +) + +func getExtension() *extension { + extOnce.Do(func() { + extInst = &extension{ + logger: log.New(log.WithLevel(log.LevelInfo)).New(ExtensionName), + watchers: make(map[string]*watcher), + } + }) + return extInst +} + +func InitializeExtension() { + if err := hooks.RegisterEngineReadyHook(ExtensionName+"_engine_ready", engineReadyHook); err != nil { + panic(fmt.Sprintf("failed to register %s engine ready hook: %v", ExtensionName, err)) + } + if err := hooks.RegisterEndBlockHook(ExtensionName+"_end_block", endBlockHook); err != nil { + panic(fmt.Sprintf("failed to register %s end block hook: %v", ExtensionName, err)) + } +} + +func engineReadyHook(ctx context.Context, app *common.App) error { + ext := getExtension() + if app != nil && app.Service != nil { + ext.mu.Lock() + ext.logger = app.Service.Logger.New(ExtensionName) + ext.service = app.Service + ext.mu.Unlock() + } + return nil +} + +func endBlockHook(ctx context.Context, app *common.App, block *common.BlockContext) error { + ext := getExtension() + + isLeader := determineLeader(app, block) + + ext.mu.Lock() + var svc *common.Service + var logger log.Logger + if app != nil { + svc = app.Service + } + if svc != nil { + logger = svc.Logger.New(ExtensionName) + } else { + logger = ext.logger + } + ext.service = svc + ext.logger = logger + + updates := make([]struct { + callbacks Callbacks + change int + }, 0, len(ext.order)) + + for _, name := range ext.order { + w, ok := ext.watchers[name] + if !ok { + continue + } + change := 0 + if w.isLeader != isLeader { + w.isLeader = isLeader + if isLeader { + change = 1 + } else { + change = -1 + } + } + updates = append(updates, struct { + callbacks Callbacks + change int + }{callbacks: w.callbacks, change: change}) + } + callbacks := make([]Callbacks, len(updates)) + changes := make([]int, len(updates)) + for i, u := range updates { + callbacks[i] = u.callbacks + changes[i] = u.change + } + ext.mu.Unlock() + + for i, cb := range callbacks { + switch changes[i] { + case 1: + if cb.OnAcquire != nil { + cb.OnAcquire(ctx, app, block) + } + case -1: + if cb.OnLose != nil { + cb.OnLose(ctx, app, block) + } + } + if cb.OnEndBlock != nil { + cb.OnEndBlock(ctx, app, block) + } + } + + return nil +} + +func determineLeader(app *common.App, block *common.BlockContext) bool { + if app == nil || app.Service == nil || block == nil || block.ChainContext == nil || block.ChainContext.NetworkParameters == nil { + return false + } + nodeID := app.Service.Identity + leader := block.ChainContext.NetworkParameters.Leader + if len(nodeID) == 0 || leader.PublicKey == nil { + return false + } + return string(nodeID) == string(leader.PublicKey.Bytes()) +} + +func Register(name string, callbacks Callbacks) error { + ext := getExtension() + ext.mu.Lock() + defer ext.mu.Unlock() + if name == "" { + return fmt.Errorf("leaderwatch: name cannot be empty") + } + if _, exists := ext.watchers[name]; exists { + return fmt.Errorf("leaderwatch: watcher %q already registered", name) + } + ext.watchers[name] = &watcher{callbacks: callbacks} + ext.order = append(ext.order, name) + return nil +} + +func ResetForTest() { + ext := getExtension() + ext.mu.Lock() + ext.watchers = make(map[string]*watcher) + ext.order = nil + ext.mu.Unlock() +} diff --git a/extensions/leaderwatch/leaderwatch_test.go b/extensions/leaderwatch/leaderwatch_test.go new file mode 100644 index 000000000..5c654511c --- /dev/null +++ b/extensions/leaderwatch/leaderwatch_test.go @@ -0,0 +1,106 @@ +package leaderwatch + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + "github.com/trufnetwork/kwil-db/common" + "github.com/trufnetwork/kwil-db/core/crypto" + "github.com/trufnetwork/kwil-db/core/log" + coretypes "github.com/trufnetwork/kwil-db/core/types" +) + +func makeBlock(height int64, leader []byte) *common.BlockContext { + pk := &fakePubKey{b: leader} + return &common.BlockContext{ + Height: height, + ChainContext: &common.ChainContext{ + NetworkParameters: &coretypes.NetworkParameters{ + Leader: coretypes.PublicKey{PublicKey: pk}, + }, + }, + } +} + +type fakePubKey struct { + b []byte +} + +func (f *fakePubKey) Equals(other crypto.Key) bool { + if other == nil { + return false + } + return string(f.Bytes()) == string(other.Bytes()) && f.Type() == other.Type() +} +func (f *fakePubKey) Bytes() []byte { return f.b } +func (f *fakePubKey) Type() crypto.KeyType { return crypto.KeyTypeEd25519 } +func (f *fakePubKey) Verify(data []byte, sig []byte) (bool, error) { return true, nil } + +func TestLeaderwatch_CallbackOrderingAndTransitions(t *testing.T) { + ResetForTest() + app := &common.App{Service: &common.Service{Identity: []byte("nodeA"), Logger: log.New()}} + ctx := context.Background() + + var events []string + + err := Register("first", Callbacks{ + OnAcquire: func(ctx context.Context, app *common.App, block *common.BlockContext) { + events = append(events, "first_acquire") + }, + OnLose: func(ctx context.Context, app *common.App, block *common.BlockContext) { + events = append(events, "first_lose") + }, + OnEndBlock: func(ctx context.Context, app *common.App, block *common.BlockContext) { + events = append(events, "first_end") + }, + }) + require.NoError(t, err) + + err = Register("second", Callbacks{ + OnAcquire: func(ctx context.Context, app *common.App, block *common.BlockContext) { + events = append(events, "second_acquire") + }, + OnLose: func(ctx context.Context, app *common.App, block *common.BlockContext) { + events = append(events, "second_lose") + }, + OnEndBlock: func(ctx context.Context, app *common.App, block *common.BlockContext) { + events = append(events, "second_end") + }, + }) + require.NoError(t, err) + + // become leader + require.NoError(t, endBlockHook(ctx, app, makeBlock(1, []byte("nodeA")))) + require.Equal(t, []string{ + "first_acquire", "first_end", + "second_acquire", "second_end", + }, events) + + // stay leader - only end callbacks fire + events = nil + require.NoError(t, endBlockHook(ctx, app, makeBlock(2, []byte("nodeA")))) + require.Equal(t, []string{"first_end", "second_end"}, events) + + // lose leadership + events = nil + require.NoError(t, endBlockHook(ctx, app, makeBlock(3, []byte("nodeB")))) + require.Equal(t, []string{ + "first_lose", "first_end", + "second_lose", "second_end", + }, events) +} + +func TestLeaderwatch_DuplicateRegistrationFails(t *testing.T) { + ResetForTest() + require.NoError(t, Register("dup", Callbacks{})) + err := Register("dup", Callbacks{}) + require.Error(t, err) +} + +func TestLeaderwatch_ResetClearsState(t *testing.T) { + ResetForTest() + require.NoError(t, Register("one", Callbacks{})) + ResetForTest() + require.NoError(t, Register("one", Callbacks{})) +} diff --git a/extensions/register.go b/extensions/register.go index dc36dd354..9cbece86d 100644 --- a/extensions/register.go +++ b/extensions/register.go @@ -1,11 +1,15 @@ package extensions import ( + "github.com/trufnetwork/node/extensions/leaderwatch" "github.com/trufnetwork/node/extensions/tn_cache" "github.com/trufnetwork/node/extensions/tn_digest" + "github.com/trufnetwork/node/extensions/tn_vacuum" ) func init() { + leaderwatch.InitializeExtension() tn_cache.InitializeExtension() tn_digest.InitializeExtension() + tn_vacuum.InitializeExtension() } diff --git a/extensions/tn_digest/README.md b/extensions/tn_digest/README.md index 3006ea6af..0c185a785 100644 --- a/extensions/tn_digest/README.md +++ b/extensions/tn_digest/README.md @@ -29,7 +29,7 @@ Minimal SQL to adjust: ```sql -- First-time setup: ensure the single row exists INSERT INTO main.digest_config (id, enabled, digest_schedule) -VALUES (1, true, '*/10 * * * *'); +VALUES (1, true, '0 9 * * *'); -- Subsequent changes UPDATE main.digest_config SET enabled = true, digest_schedule = '*/10 * * * *' WHERE id = 1; diff --git a/extensions/tn_vacuum/config.go b/extensions/tn_vacuum/config.go new file mode 100644 index 000000000..6e0b6ebb3 --- /dev/null +++ b/extensions/tn_vacuum/config.go @@ -0,0 +1,108 @@ +package tn_vacuum + +import ( + "fmt" + "strconv" + "strings" + + "github.com/trufnetwork/kwil-db/common" +) + +type Config struct { + Enabled bool + + Trigger TriggerConfig + ReloadIntervalBlocks int64 +} + +type TriggerConfig struct { + Kind string + BlockInterval int64 + CronSchedule string +} + +func LoadConfig(service *common.Service) (Config, error) { + cfg := Config{ + Trigger: TriggerConfig{ + Kind: triggerFromString(""), + }, + ReloadIntervalBlocks: defaultReloadBlocks, + } + + if service == nil || service.LocalConfig == nil { + return cfg, nil + } + + raw, ok := service.LocalConfig.Extensions[ExtensionName] + if !ok { + return cfg, nil + } + + if v, ok := raw["enabled"]; ok { + boolVal, err := parseBool(v) + if err != nil { + return cfg, fmt.Errorf("parse enabled: %w", err) + } + cfg.Enabled = boolVal + } + + if v, ok := raw["trigger"]; ok { + cfg.Trigger.Kind = triggerFromString(v) + } + + if v, ok := raw["block_interval"]; ok { + val, err := strconv.ParseInt(strings.TrimSpace(v), 10, 64) + if err != nil { + return cfg, fmt.Errorf("parse block_interval: %w", err) + } + if val < minBlockInterval { + val = minBlockInterval + } + cfg.Trigger.BlockInterval = val + } + + if v, ok := raw["cron_schedule"]; ok { + cfg.Trigger.CronSchedule = strings.TrimSpace(v) + } + + if v, ok := raw["reload_interval_blocks"]; ok { + val, err := strconv.ParseInt(strings.TrimSpace(v), 10, 64) + if err != nil { + return cfg, fmt.Errorf("parse reload_interval_blocks: %w", err) + } + if val <= 0 { + val = defaultReloadBlocks + } + cfg.ReloadIntervalBlocks = val + } + + return cfg, nil +} + +func parseBool(in string) (bool, error) { + switch strings.ToLower(strings.TrimSpace(in)) { + case "true", "1", "yes", "y", "on": + return true, nil + case "false", "0", "no", "n", "off", "": + return false, nil + default: + return false, fmt.Errorf("invalid bool %q", in) + } +} + +func triggerFromString(in string) string { + switch strings.ToLower(strings.TrimSpace(in)) { + case TriggerDigestCoupled: + return TriggerDigestCoupled + case TriggerBlockInterval: + return TriggerBlockInterval + case TriggerCron: + return TriggerCron + case TriggerManual: + return TriggerManual + case "": + return defaultTrigger + default: + return defaultTrigger + } +} diff --git a/extensions/tn_vacuum/constants.go b/extensions/tn_vacuum/constants.go new file mode 100644 index 000000000..a3b2d6783 --- /dev/null +++ b/extensions/tn_vacuum/constants.go @@ -0,0 +1,17 @@ +package tn_vacuum + +const ( + // ExtensionName is used for hook registration and config namespace. + ExtensionName = "tn_vacuum" + + TriggerDigestCoupled = "digest_coupled" + TriggerBlockInterval = "block_interval" + TriggerCron = "cron" + TriggerManual = "manual" +) + +const ( + defaultTrigger = TriggerManual + defaultReloadBlocks = int64(1000) + minBlockInterval = int64(1) +) diff --git a/extensions/tn_vacuum/extension.go b/extensions/tn_vacuum/extension.go new file mode 100644 index 000000000..058c1d628 --- /dev/null +++ b/extensions/tn_vacuum/extension.go @@ -0,0 +1,213 @@ +package tn_vacuum + +import ( + "context" + "sync" + + "github.com/trufnetwork/kwil-db/common" + "github.com/trufnetwork/kwil-db/core/log" +) + +type Extension struct { + mu sync.RWMutex + logger log.Logger + service *common.Service + config Config + trigger Trigger + mechanism Mechanism + runner *Runner + isLeader bool + reloadIntervalBlocks int64 + lastConfigHeight int64 +} + +var ( + extInstance *Extension + once sync.Once +) + +func GetExtension() *Extension { + once.Do(func() { + extInstance = &Extension{ + logger: log.New(log.WithLevel(log.LevelInfo)), + reloadIntervalBlocks: defaultReloadBlocks, + } + }) + return extInstance +} + +func SetExtension(e *Extension) { + extInstance = e +} + +func ResetForTest() { + once = sync.Once{} + extInstance = nil +} + +func (e *Extension) Logger() log.Logger { + e.mu.RLock() + defer e.mu.RUnlock() + return e.logger +} + +func (e *Extension) setLogger(l log.Logger) { + e.mu.Lock() + defer e.mu.Unlock() + e.logger = l +} + +func (e *Extension) Service() *common.Service { + e.mu.RLock() + defer e.mu.RUnlock() + return e.service +} + +func (e *Extension) setService(s *common.Service) { + e.mu.Lock() + defer e.mu.Unlock() + e.service = s +} + +func (e *Extension) Config() Config { + e.mu.RLock() + defer e.mu.RUnlock() + return e.config +} + +func (e *Extension) setConfig(cfg Config) { + e.mu.Lock() + defer e.mu.Unlock() + e.config = cfg +} + +func (e *Extension) setMechanism(m Mechanism) { + e.mu.Lock() + defer e.mu.Unlock() + e.mechanism = m +} + +func (e *Extension) Mechanism() Mechanism { + e.mu.RLock() + defer e.mu.RUnlock() + return e.mechanism +} + +func (e *Extension) setTrigger(t Trigger) { + e.mu.Lock() + defer e.mu.Unlock() + e.trigger = t +} + +func (e *Extension) Trigger() Trigger { + e.mu.RLock() + defer e.mu.RUnlock() + return e.trigger +} + +func (e *Extension) ensureRunner() *Runner { + e.mu.Lock() + defer e.mu.Unlock() + if e.runner == nil { + e.runner = &Runner{logger: e.logger} + } + return e.runner +} + +func (e *Extension) setLeader(v bool) { + e.mu.Lock() + defer e.mu.Unlock() + e.isLeader = v +} + +func (e *Extension) IsLeader() bool { + e.mu.RLock() + defer e.mu.RUnlock() + return e.isLeader +} + +func (e *Extension) SetReloadIntervalBlocks(v int64) { + e.mu.Lock() + defer e.mu.Unlock() + e.reloadIntervalBlocks = v +} + +func (e *Extension) ReloadIntervalBlocks() int64 { + e.mu.RLock() + defer e.mu.RUnlock() + return e.reloadIntervalBlocks +} + +func (e *Extension) SetLastConfigHeight(h int64) { + e.mu.Lock() + defer e.mu.Unlock() + e.lastConfigHeight = h +} + +func (e *Extension) LastConfigHeight() int64 { + e.mu.RLock() + defer e.mu.RUnlock() + return e.lastConfigHeight +} + +func (e *Extension) Close(ctx context.Context) { + e.mu.Lock() + defer e.mu.Unlock() + if e.trigger != nil { + _ = e.trigger.Stop(ctx) + e.trigger = nil + } + if e.mechanism != nil { + _ = e.mechanism.Close(ctx) + e.mechanism = nil + } + e.runner = nil +} + +func (e *Extension) reconfigure(ctx context.Context, cfg Config, deps MechanismDeps) error { + e.mu.Lock() + defer e.mu.Unlock() + + if e.trigger != nil { + _ = e.trigger.Stop(ctx) + } + if e.mechanism != nil { + _ = e.mechanism.Close(ctx) + } + + mech := newMechanism() + if err := mech.Prepare(ctx, deps); err != nil { + return err + } + + trig, err := newTrigger(cfg.Trigger.Kind) + if err != nil { + return err + } + fire := func(callCtx context.Context, opts FireOpts) error { + return e.ensureRunner().Execute(callCtx, RunnerArgs{Mechanism: mech, Trigger: trig, Logger: e.logger, Reason: opts.Reason}) + } + if err := trig.Configure(ctx, cfg.Trigger, fire); err != nil { + return err + } + + e.reloadIntervalBlocks = cfg.ReloadIntervalBlocks + e.config = cfg + e.mechanism = mech + e.trigger = trig + e.runner = &Runner{logger: e.logger} + + return nil +} + +func (e *Extension) startTriggerIfLeader(ctx context.Context) { + e.mu.RLock() + trig := e.trigger + cfg := e.config + leader := e.isLeader + e.mu.RUnlock() + if !leader || trig == nil || !cfg.Enabled { + return + } + _ = trig.Start(ctx) +} diff --git a/extensions/tn_vacuum/mechanism.go b/extensions/tn_vacuum/mechanism.go new file mode 100644 index 000000000..0f9388ea5 --- /dev/null +++ b/extensions/tn_vacuum/mechanism.go @@ -0,0 +1,53 @@ +package tn_vacuum + +import ( + "context" + + "github.com/trufnetwork/kwil-db/core/log" +) + +type Mechanism interface { + Name() string + Prepare(ctx context.Context, deps MechanismDeps) error + Run(ctx context.Context, req RunRequest) (*RunReport, error) + Close(ctx context.Context) error +} + +type MechanismDeps struct { + Logger log.Logger +} + +type RunRequest struct { + Reason string +} + +type RunReport struct { + Mechanism string + Status string +} + +func newMechanism() Mechanism { + return &vacuumStubMechanism{} +} + +type vacuumStubMechanism struct { + logger log.Logger +} + +func (m *vacuumStubMechanism) Name() string { return "vacuum_stub" } + +func (m *vacuumStubMechanism) Prepare(ctx context.Context, deps MechanismDeps) error { + m.logger = deps.Logger.New("mechanism.vacuum_stub") + m.logger.Info("vacuum stub prepared") + return nil +} + +func (m *vacuumStubMechanism) Run(ctx context.Context, req RunRequest) (*RunReport, error) { + m.logger.Info("vacuum stub run", "reason", req.Reason) + return &RunReport{Mechanism: m.Name(), Status: "ok"}, nil +} + +func (m *vacuumStubMechanism) Close(ctx context.Context) error { + m.logger.Info("vacuum stub closed") + return nil +} diff --git a/extensions/tn_vacuum/runner.go b/extensions/tn_vacuum/runner.go new file mode 100644 index 000000000..bd241d76a --- /dev/null +++ b/extensions/tn_vacuum/runner.go @@ -0,0 +1,42 @@ +package tn_vacuum + +import ( + "context" + + "github.com/trufnetwork/kwil-db/core/log" +) + +type Runner struct { + logger log.Logger +} + +type RunnerArgs struct { + Mechanism Mechanism + Trigger Trigger + Logger log.Logger + Reason string +} + +func (r *Runner) Execute(ctx context.Context, args RunnerArgs) error { + if args.Mechanism == nil { + return nil + } + logger := r.logger + if logger == nil { + logger = args.Logger + } + if logger != nil { + logger.Info("vacuum runner executing", "mechanism", args.Mechanism.Name(), "reason", args.Reason) + } + _, err := args.Mechanism.Run(ctx, RunRequest{Reason: args.Reason}) + if err != nil { + if logger != nil { + logger.Warn("vacuum runner failed", "error", err) + } + return err + } + if logger != nil { + logger.Info("vacuum runner completed", "mechanism", args.Mechanism.Name()) + } + return nil +} diff --git a/extensions/tn_vacuum/tn_vacuum.go b/extensions/tn_vacuum/tn_vacuum.go new file mode 100644 index 000000000..db45370fb --- /dev/null +++ b/extensions/tn_vacuum/tn_vacuum.go @@ -0,0 +1,125 @@ +package tn_vacuum + +import ( + "context" + "fmt" + + "github.com/trufnetwork/kwil-db/common" + "github.com/trufnetwork/kwil-db/extensions/hooks" + "github.com/trufnetwork/kwil-db/extensions/precompiles" + sql "github.com/trufnetwork/kwil-db/node/types/sql" + "github.com/trufnetwork/node/extensions/leaderwatch" +) + +func InitializeExtension() { + if err := precompiles.RegisterInitializer(ExtensionName, initializePrecompile); err != nil { + panic(fmt.Sprintf("failed to register %s initializer: %v", ExtensionName, err)) + } + if err := hooks.RegisterEngineReadyHook(ExtensionName+"_engine_ready", engineReadyHook); err != nil { + panic(fmt.Sprintf("failed to register %s engine ready hook: %v", ExtensionName, err)) + } + if err := hooks.RegisterEndBlockHook(ExtensionName+"_end_block", endBlockHook); err != nil { + panic(fmt.Sprintf("failed to register %s end block hook: %v", ExtensionName, err)) + } + if err := leaderwatch.Register(ExtensionName, leaderwatch.Callbacks{ + OnAcquire: leaderAcquire, + OnLose: leaderLose, + OnEndBlock: leaderEndBlock, + }); err != nil { + panic(fmt.Sprintf("failed to register %s leader watcher: %v", ExtensionName, err)) + } +} + +func initializePrecompile(ctx context.Context, service *common.Service, db sql.DB, alias string, metadata map[string]any) (precompiles.Precompile, error) { + ext := GetExtension() + if service != nil { + ext.setLogger(service.Logger.New(ExtensionName)) + ext.setService(service) + } + return precompiles.Precompile{}, nil +} + +func engineReadyHook(ctx context.Context, app *common.App) error { + ext := GetExtension() + if app != nil && app.Service != nil { + ext.setLogger(app.Service.Logger.New(ExtensionName)) + ext.setService(app.Service) + } + + cfg, err := LoadConfig(app.Service) + if err != nil { + ext.Logger().Warn("failed to load tn_vacuum config", "error", err) + cfg.Enabled = false + } + + deps := MechanismDeps{Logger: ext.Logger()} + if err := ext.reconfigure(ctx, cfg, deps); err != nil { + ext.Logger().Warn("failed to configure tn_vacuum", "error", err) + return nil + } + + ext.SetReloadIntervalBlocks(cfg.ReloadIntervalBlocks) + ext.SetLastConfigHeight(0) + + if cfg.Enabled { + ext.startTriggerIfLeader(ctx) + } + + return nil +} + +func endBlockHook(ctx context.Context, app *common.App, block *common.BlockContext) error { + return nil +} + +func leaderAcquire(ctx context.Context, app *common.App, block *common.BlockContext) { + ext := GetExtension() + ext.setLeader(true) + if trig := ext.Trigger(); trig != nil { + _ = trig.OnLeaderChange(ctx, true) + } + ext.startTriggerIfLeader(ctx) +} + +func leaderLose(ctx context.Context, app *common.App, block *common.BlockContext) { + ext := GetExtension() + ext.setLeader(false) + if trig := ext.Trigger(); trig != nil { + _ = trig.OnLeaderChange(ctx, false) + _ = trig.Stop(ctx) + } +} + +func leaderEndBlock(ctx context.Context, app *common.App, block *common.BlockContext) { + ext := GetExtension() + cfg := ext.Config() + if !cfg.Enabled { + return + } + + if trig := ext.Trigger(); trig != nil { + _ = trig.OnEndBlock(ctx, block) + } + + if block == nil { + return + } + + reload := ext.ReloadIntervalBlocks() + if reload > 0 && block.Height-ext.LastConfigHeight() >= reload { + var svc *common.Service + if app != nil { + svc = app.Service + } + cfg, err := LoadConfig(svc) + if err != nil { + ext.Logger().Warn("failed to reload tn_vacuum config", "error", err) + } else { + deps := MechanismDeps{Logger: ext.Logger()} + if err := ext.reconfigure(ctx, cfg, deps); err != nil { + ext.Logger().Warn("failed to apply tn_vacuum config", "error", err) + } + } + ext.SetLastConfigHeight(block.Height) + } +} diff --git a/extensions/tn_vacuum/trigger.go b/extensions/tn_vacuum/trigger.go new file mode 100644 index 000000000..bbabd594d --- /dev/null +++ b/extensions/tn_vacuum/trigger.go @@ -0,0 +1,38 @@ +package tn_vacuum + +import ( + "context" + "fmt" + + "github.com/trufnetwork/kwil-db/common" + "github.com/trufnetwork/kwil-db/core/log" +) + +type FireOpts struct { + Reason string +} + +type Trigger interface { + Configure(ctx context.Context, cfg TriggerConfig, fire func(context.Context, FireOpts) error) error + Start(ctx context.Context) error + Stop(ctx context.Context) error + OnLeaderChange(ctx context.Context, isLeader bool) error + OnEndBlock(ctx context.Context, block *common.BlockContext) error + Kind() string +} + +func newTrigger(kind string) (Trigger, error) { + baseLogger := log.New(log.WithLevel(log.LevelInfo)) + switch kind { + case TriggerDigestCoupled: + return newDigestTrigger(baseLogger), nil + case TriggerBlockInterval: + return newBlockIntervalTrigger(baseLogger), nil + case TriggerCron: + return newCronTrigger(baseLogger), nil + case TriggerManual: + return newManualTrigger(baseLogger), nil + default: + return nil, fmt.Errorf("unsupported trigger %q", kind) + } +} diff --git a/extensions/tn_vacuum/trigger_block_interval.go b/extensions/tn_vacuum/trigger_block_interval.go new file mode 100644 index 000000000..7c582ff9d --- /dev/null +++ b/extensions/tn_vacuum/trigger_block_interval.go @@ -0,0 +1,78 @@ +package tn_vacuum + +import ( + "context" + "fmt" + "sync" + + "github.com/trufnetwork/kwil-db/common" + "github.com/trufnetwork/kwil-db/core/log" +) + +type blockIntervalTrigger struct { + mu sync.Mutex + fire func(context.Context, FireOpts) error + logger log.Logger + interval int64 + lastFired int64 +} + +func newBlockIntervalTrigger(logger log.Logger) *blockIntervalTrigger { + return &blockIntervalTrigger{logger: logger} +} + +func (t *blockIntervalTrigger) Kind() string { return TriggerBlockInterval } + +func (t *blockIntervalTrigger) Configure(ctx context.Context, cfg TriggerConfig, fire func(context.Context, FireOpts) error) error { + t.mu.Lock() + defer t.mu.Unlock() + t.logger = t.logger.New("trigger.block_interval") + if cfg.BlockInterval < minBlockInterval { + cfg.BlockInterval = minBlockInterval + } + if cfg.BlockInterval == 0 { + cfg.BlockInterval = 100 + } + t.interval = cfg.BlockInterval + t.fire = fire + t.lastFired = 0 + t.logger.Info("block interval trigger configured", "interval", t.interval) + return nil +} + +func (t *blockIntervalTrigger) Start(ctx context.Context) error { + t.logger.Info("block interval trigger active") + return nil +} + +func (t *blockIntervalTrigger) Stop(ctx context.Context) error { + t.logger.Info("block interval trigger stopped") + return nil +} + +func (t *blockIntervalTrigger) OnLeaderChange(ctx context.Context, isLeader bool) error { + if !isLeader { + t.logger.Debug("block interval trigger paused") + } + return nil +} + +func (t *blockIntervalTrigger) OnEndBlock(ctx context.Context, block *common.BlockContext) error { + if block == nil { + return nil + } + t.mu.Lock() + defer t.mu.Unlock() + if t.fire == nil { + return nil + } + height := block.Height + if t.lastFired == 0 || height-t.lastFired >= t.interval { + if err := t.fire(ctx, FireOpts{Reason: fmt.Sprintf("block_interval:%d", height)}); err != nil { + t.logger.Warn("block interval trigger failed", "error", err) + } else { + t.lastFired = height + } + } + return nil +} diff --git a/extensions/tn_vacuum/trigger_cron.go b/extensions/tn_vacuum/trigger_cron.go new file mode 100644 index 000000000..316d0e7ba --- /dev/null +++ b/extensions/tn_vacuum/trigger_cron.go @@ -0,0 +1,92 @@ +package tn_vacuum + +import ( + "context" + "sync" + "time" + + "github.com/trufnetwork/kwil-db/common" + "github.com/trufnetwork/kwil-db/core/log" +) + +type cronTrigger struct { + mu sync.Mutex + fire func(context.Context, FireOpts) error + logger log.Logger + running bool + cancel context.CancelFunc +} + +func newCronTrigger(logger log.Logger) *cronTrigger { + return &cronTrigger{logger: logger} +} + +func (t *cronTrigger) Kind() string { return TriggerCron } + +func (t *cronTrigger) Configure(ctx context.Context, cfg TriggerConfig, fire func(context.Context, FireOpts) error) error { + t.mu.Lock() + defer t.mu.Unlock() + t.logger = t.logger.New("trigger.cron") + t.fire = fire + t.logger.Info("cron trigger configured", "schedule", cfg.CronSchedule) + return nil +} + +func (t *cronTrigger) Start(ctx context.Context) error { + t.mu.Lock() + defer t.mu.Unlock() + if t.running { + return nil + } + loopCtx, cancel := context.WithCancel(ctx) + t.cancel = cancel + t.running = true + go t.loop(loopCtx) + t.logger.Info("cron trigger loop started (stub)") + return nil +} + +func (t *cronTrigger) loop(ctx context.Context) { + ticker := time.NewTicker(1 * time.Hour) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + t.mu.Lock() + fire := t.fire + t.mu.Unlock() + if fire != nil { + if err := fire(ctx, FireOpts{Reason: "cron_stub"}); err != nil { + t.logger.Warn("cron trigger fire failed", "error", err) + } + } + } + } +} + +func (t *cronTrigger) Stop(ctx context.Context) error { + t.mu.Lock() + defer t.mu.Unlock() + if !t.running { + return nil + } + if t.cancel != nil { + t.cancel() + } + t.running = false + t.logger.Info("cron trigger stopped") + return nil +} + +func (t *cronTrigger) OnLeaderChange(ctx context.Context, isLeader bool) error { + if !isLeader { + t.logger.Debug("cron trigger paused") + } + return nil +} + +func (t *cronTrigger) OnEndBlock(ctx context.Context, block *common.BlockContext) error { + return nil +} diff --git a/extensions/tn_vacuum/trigger_digest.go b/extensions/tn_vacuum/trigger_digest.go new file mode 100644 index 000000000..c41a4ee96 --- /dev/null +++ b/extensions/tn_vacuum/trigger_digest.go @@ -0,0 +1,62 @@ +package tn_vacuum + +import ( + "context" + "fmt" + "sync" + + "github.com/trufnetwork/kwil-db/common" + "github.com/trufnetwork/kwil-db/core/log" +) + +type digestCoupledTrigger struct { + mu sync.RWMutex + fire func(context.Context, FireOpts) error + logger log.Logger +} + +func newDigestTrigger(logger log.Logger) *digestCoupledTrigger { + return &digestCoupledTrigger{logger: logger} +} + +func (t *digestCoupledTrigger) Kind() string { return TriggerDigestCoupled } + +func (t *digestCoupledTrigger) Configure(ctx context.Context, cfg TriggerConfig, fire func(context.Context, FireOpts) error) error { + t.mu.Lock() + defer t.mu.Unlock() + t.logger = t.logger.New("trigger.digest") + t.fire = fire + t.logger.Info("digest trigger configured") + return nil +} + +func (t *digestCoupledTrigger) Start(ctx context.Context) error { + t.logger.Info("digest trigger start - waiting for notifications") + return nil +} + +func (t *digestCoupledTrigger) Stop(ctx context.Context) error { + t.logger.Info("digest trigger stop") + return nil +} + +func (t *digestCoupledTrigger) OnLeaderChange(ctx context.Context, isLeader bool) error { + if !isLeader { + t.logger.Debug("digest trigger paused - not leader") + } + return nil +} + +func (t *digestCoupledTrigger) OnEndBlock(ctx context.Context, block *common.BlockContext) error { + return nil +} + +func (t *digestCoupledTrigger) NotifyDigestComplete(ctx context.Context, reason string) error { + t.mu.RLock() + fire := t.fire + t.mu.RUnlock() + if fire == nil { + return fmt.Errorf("digest trigger not configured") + } + return fire(ctx, FireOpts{Reason: reason}) +} diff --git a/extensions/tn_vacuum/trigger_manual.go b/extensions/tn_vacuum/trigger_manual.go new file mode 100644 index 000000000..76082f890 --- /dev/null +++ b/extensions/tn_vacuum/trigger_manual.go @@ -0,0 +1,62 @@ +package tn_vacuum + +import ( + "context" + "fmt" + "sync" + + "github.com/trufnetwork/kwil-db/common" + "github.com/trufnetwork/kwil-db/core/log" +) + +type manualTrigger struct { + mu sync.RWMutex + fire func(context.Context, FireOpts) error + logger log.Logger +} + +func newManualTrigger(logger log.Logger) *manualTrigger { + return &manualTrigger{logger: logger} +} + +func (t *manualTrigger) Kind() string { return TriggerManual } + +func (t *manualTrigger) Configure(ctx context.Context, cfg TriggerConfig, fire func(context.Context, FireOpts) error) error { + t.mu.Lock() + defer t.mu.Unlock() + t.logger = t.logger.New("trigger.manual") + t.fire = fire + t.logger.Info("manual trigger configured") + return nil +} + +func (t *manualTrigger) Start(ctx context.Context) error { + t.logger.Info("manual trigger ready") + return nil +} + +func (t *manualTrigger) Stop(ctx context.Context) error { + t.logger.Info("manual trigger stopped") + return nil +} + +func (t *manualTrigger) OnLeaderChange(ctx context.Context, isLeader bool) error { + if !isLeader { + t.logger.Debug("manual trigger idle - not leader") + } + return nil +} + +func (t *manualTrigger) OnEndBlock(ctx context.Context, block *common.BlockContext) error { + return nil +} + +func (t *manualTrigger) Fire(ctx context.Context, reason string) error { + t.mu.RLock() + fire := t.fire + t.mu.RUnlock() + if fire == nil { + return fmt.Errorf("manual trigger not configured") + } + return fire(ctx, FireOpts{Reason: reason}) +} diff --git a/extensions/tn_vacuum/vacuum_test.go b/extensions/tn_vacuum/vacuum_test.go new file mode 100644 index 000000000..0433f1f88 --- /dev/null +++ b/extensions/tn_vacuum/vacuum_test.go @@ -0,0 +1,111 @@ +package tn_vacuum + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + "github.com/trufnetwork/kwil-db/common" + "github.com/trufnetwork/kwil-db/config" + "github.com/trufnetwork/kwil-db/core/log" +) + +type fakeTrigger struct { + started int + stopped int + endCalls int + lastCfg TriggerConfig +} + +func (f *fakeTrigger) Configure(ctx context.Context, cfg TriggerConfig, fire func(context.Context, FireOpts) error) error { + f.lastCfg = cfg + return nil +} +func (f *fakeTrigger) Start(ctx context.Context) error { + f.started++ + return nil +} +func (f *fakeTrigger) Stop(ctx context.Context) error { + f.stopped++ + return nil +} +func (f *fakeTrigger) OnLeaderChange(ctx context.Context, isLeader bool) error { return nil } +func (f *fakeTrigger) OnEndBlock(ctx context.Context, block *common.BlockContext) error { + f.endCalls++ + return nil +} +func (f *fakeTrigger) Kind() string { return "fake" } + +func TestLeaderCallbacksRespectEnabledFlag(t *testing.T) { + ctx := context.Background() + ResetForTest() + ext := GetExtension() + + fake := &fakeTrigger{} + mech := newMechanism() + require.NoError(t, mech.Prepare(ctx, MechanismDeps{Logger: log.New()})) + + ext.mu.Lock() + ext.logger = log.New() + ext.config = Config{Enabled: false, Trigger: TriggerConfig{Kind: TriggerManual}} + ext.trigger = fake + ext.mechanism = mech + ext.reloadIntervalBlocks = defaultReloadBlocks + ext.mu.Unlock() + + leaderAcquire(ctx, nil, nil) + require.Equal(t, 0, fake.started, "should not start when disabled") + + ext.mu.Lock() + ext.config.Enabled = true + ext.mu.Unlock() + + leaderAcquire(ctx, nil, nil) + require.Equal(t, 1, fake.started) + + leaderLose(ctx, nil, nil) + require.Equal(t, 1, fake.stopped) +} + +func TestLeaderEndBlockTriggersReload(t *testing.T) { + ctx := context.Background() + ResetForTest() + ext := GetExtension() + + fake := &fakeTrigger{} + mech := newMechanism() + require.NoError(t, mech.Prepare(ctx, MechanismDeps{Logger: log.New()})) + + ext.mu.Lock() + ext.logger = log.New() + ext.config = Config{Enabled: true, Trigger: TriggerConfig{Kind: TriggerManual}, ReloadIntervalBlocks: 3} + ext.trigger = fake + ext.mechanism = mech + ext.reloadIntervalBlocks = 3 + ext.lastConfigHeight = 1 + ext.mu.Unlock() + + // end block before reload threshold -> only end callback + leaderEndBlock(ctx, nil, &common.BlockContext{Height: 2}) + require.Equal(t, 1, fake.endCalls) + require.Equal(t, TriggerManual, ext.Config().Trigger.Kind) + + // prepare service config to swap trigger kind on reload + svc := &common.Service{ + Logger: log.New(), + LocalConfig: &config.Config{Extensions: map[string]map[string]string{ + ExtensionName: { + "enabled": "true", + "trigger": TriggerBlockInterval, + "block_interval": "5", + }, + }}, + } + app := &common.App{Service: svc} + + leaderEndBlock(ctx, app, &common.BlockContext{Height: 5}) + require.GreaterOrEqual(t, ext.LastConfigHeight(), int64(5)) + require.Equal(t, 2, fake.endCalls) // old trigger still sees callback before reload + require.Equal(t, TriggerBlockInterval, ext.Config().Trigger.Kind) + require.NotEqual(t, fake, ext.Trigger()) +} From 5c662cd8519106e4325d83244f22e563d1714f02 Mon Sep 17 00:00:00 2001 From: Raffael Campos Date: Wed, 24 Sep 2025 12:06:36 -0300 Subject: [PATCH 02/13] refactor: migrate leader handling to leaderwatch extension - Removed the `leader.go` file and integrated leader acquisition, loss, and end block handling into the `leaderwatch` extension. - Updated tests in `leader_reload_test.go` to utilize the new leader handling functions. - Ensured compatibility with existing hooks while enhancing the management of leadership transitions. This refactor improves code organization and maintains the functionality of leadership management within the system. --- extensions/tn_digest/leader.go | 21 --- extensions/tn_digest/leader_reload_test.go | 25 ++-- extensions/tn_digest/tn_digest.go | 163 +++++++++++++-------- 3 files changed, 115 insertions(+), 94 deletions(-) delete mode 100644 extensions/tn_digest/leader.go diff --git a/extensions/tn_digest/leader.go b/extensions/tn_digest/leader.go deleted file mode 100644 index 4f168faa0..000000000 --- a/extensions/tn_digest/leader.go +++ /dev/null @@ -1,21 +0,0 @@ -package tn_digest - -import ( - "bytes" - - "github.com/trufnetwork/kwil-db/common" -) - -// isCurrentLeader compares the chain leader in block context with this node's identity. -func isCurrentLeader(app *common.App, block *common.BlockContext) bool { - if block == nil || block.ChainContext == nil || block.ChainContext.NetworkParameters == nil || app.Service == nil || app.Service.Identity == nil { - return false - } - leaderBytes := block.ChainContext.NetworkParameters.Leader.Bytes() - if len(leaderBytes) == 0 { - // warn that leader is not set - app.Service.Logger.Warn("leader is not set") - return false - } - return bytes.Equal(leaderBytes, app.Service.Identity) -} diff --git a/extensions/tn_digest/leader_reload_test.go b/extensions/tn_digest/leader_reload_test.go index ddb8c0c9d..5f7ae4c3b 100644 --- a/extensions/tn_digest/leader_reload_test.go +++ b/extensions/tn_digest/leader_reload_test.go @@ -147,10 +147,10 @@ func TestDigest_DefaultDisabled_NoSchedulerOnLeaderAcquire(t *testing.T) { ext.SetReloadIntervalBlocks(1000) identity := []byte("nodeA") app := &common.App{Service: makeService(identity, "1000")} + ext.SetService(app.Service) block := makeBlock(1, identity) - err := endBlockHook(context.Background(), app, block) - require.NoError(t, err) + digestLeaderAcquire(context.Background(), app, block) assert.Nil(t, ext.Scheduler()) } @@ -160,9 +160,10 @@ func TestDigest_LeaderAcquire_StartsScheduler_WhenEnabled(t *testing.T) { ext.SetReloadIntervalBlocks(1000) identity := []byte("nodeB") app := &common.App{Service: makeService(identity, "1000")} + ext.SetService(app.Service) block := makeBlock(1, identity) - require.NoError(t, endBlockHook(context.Background(), app, block)) + digestLeaderAcquire(context.Background(), app, block) require.NotNil(t, ext.Scheduler()) // cleanup _ = ext.Scheduler().Stop() @@ -174,14 +175,15 @@ func TestDigest_LoseLeadership_StopsScheduler(t *testing.T) { ext.SetReloadIntervalBlocks(1000) identity := []byte("nodeC") app := &common.App{Service: makeService(identity, "1000")} + ext.SetService(app.Service) // acquire leadership - require.NoError(t, endBlockHook(context.Background(), app, makeBlock(1, identity))) + digestLeaderAcquire(context.Background(), app, makeBlock(1, identity)) require.NotNil(t, ext.Scheduler()) // lose leadership other := []byte("other") - require.NoError(t, endBlockHook(context.Background(), app, makeBlock(2, other))) + digestLeaderLose(context.Background(), app, makeBlock(2, other)) // idempotent stop _ = ext.Scheduler().Stop() @@ -196,17 +198,18 @@ func TestDigest_Reload_EnablesAndStarts_WhenBecomesEnabled(t *testing.T) { ext.SetLastCheckedHeight(1) identity := []byte("nodeD") app := &common.App{Service: makeService(identity, "1")} + ext.SetService(app.Service) // attach EngineOps with fake DB that returns enabled on reload BEFORE first hook fdb := &fakeDB{enabled: true, schedule: "*/5 * * * *"} ext.SetEngineOps(digestinternal.NewEngineOperations(&fakeEngine{}, fdb, &fakeAccounts{}, log.New())) // leader at height 1: disabled, no scheduler - require.NoError(t, endBlockHook(context.Background(), app, makeBlock(1, identity))) + digestLeaderAcquire(context.Background(), app, makeBlock(1, identity)) assert.Nil(t, ext.Scheduler()) // height 2 triggers reload -> should enable and start - require.NoError(t, endBlockHook(context.Background(), app, makeBlock(2, identity))) + digestLeaderEndBlock(context.Background(), app, makeBlock(2, identity)) require.NotNil(t, ext.Scheduler()) _ = ext.Scheduler().Stop() } @@ -219,15 +222,16 @@ func TestDigest_Reload_DisablesAndStops_WhenBecomesDisabled(t *testing.T) { ext.SetLastCheckedHeight(1) identity := []byte("nodeE") app := &common.App{Service: makeService(identity, "1")} + ext.SetService(app.Service) // start as leader enabled (no reload yet) - require.NoError(t, endBlockHook(context.Background(), app, makeBlock(1, identity))) + digestLeaderAcquire(context.Background(), app, makeBlock(1, identity)) require.NotNil(t, ext.Scheduler()) // reload returns disabled fdb := &fakeDB{enabled: false, schedule: "*/5 * * * *"} ext.SetEngineOps(digestinternal.NewEngineOperations(&fakeEngine{}, fdb, &fakeAccounts{}, log.New())) - require.NoError(t, endBlockHook(context.Background(), app, makeBlock(2, identity))) + digestLeaderEndBlock(context.Background(), app, makeBlock(2, identity)) // stop should be idempotent _ = ext.Scheduler().Stop() @@ -239,9 +243,10 @@ func TestDigest_LeaderDetection_UsesNetworkParametersLeader(t *testing.T) { ext.SetReloadIntervalBlocks(1000) identity := []byte("nodeF") app := &common.App{Service: makeService(identity, "1000")} + ext.SetService(app.Service) // Proposer would be different, but we use NetworkParameters.Leader in makeBlock - require.NoError(t, endBlockHook(context.Background(), app, makeBlock(1, identity))) + digestLeaderAcquire(context.Background(), app, makeBlock(1, identity)) require.True(t, ext.IsLeader()) require.NotNil(t, ext.Scheduler()) _ = ext.Scheduler().Stop() diff --git a/extensions/tn_digest/tn_digest.go b/extensions/tn_digest/tn_digest.go index 92d1be2fb..4e438b3e2 100644 --- a/extensions/tn_digest/tn_digest.go +++ b/extensions/tn_digest/tn_digest.go @@ -9,6 +9,7 @@ import ( "github.com/trufnetwork/kwil-db/extensions/hooks" "github.com/trufnetwork/kwil-db/extensions/precompiles" sql "github.com/trufnetwork/kwil-db/node/types/sql" + "github.com/trufnetwork/node/extensions/leaderwatch" "github.com/trufnetwork/node/extensions/tn_digest/internal" ) @@ -24,10 +25,17 @@ func InitializeExtension() { if err := hooks.RegisterEngineReadyHook(ExtensionName+"_engine_ready", engineReadyHook); err != nil { panic(fmt.Sprintf("failed to register %s engine ready hook: %v", ExtensionName, err)) } - // Register end-block hook for leader gating + // Register end-block hook (kept for compatibility; actual leader handling via leaderwatch) if err := hooks.RegisterEndBlockHook(ExtensionName+"_end_block", endBlockHook); err != nil { panic(fmt.Sprintf("failed to register %s end block hook: %v", ExtensionName, err)) } + if err := leaderwatch.Register(ExtensionName, leaderwatch.Callbacks{ + OnAcquire: digestLeaderAcquire, + OnLose: digestLeaderLose, + OnEndBlock: digestLeaderEndBlock, + }); err != nil { + panic(fmt.Sprintf("failed to register %s leader watcher: %v", ExtensionName, err)) + } } // InitializeDigestPrecompile makes the extension visible in logs @@ -91,77 +99,106 @@ func engineReadyHook(ctx context.Context, app *common.App) error { // endBlockHook toggles scheduler based on leader status and config func endBlockHook(ctx context.Context, app *common.App, block *common.BlockContext) error { + return nil +} + +func digestLeaderAcquire(ctx context.Context, app *common.App, block *common.BlockContext) { ext := GetExtension() if ext == nil { - return nil + return } - - // Determine leader: compare NetworkParameters.Leader with node identity - isLeader := isCurrentLeader(app, block) - - prev := ext.IsLeader() - if !prev && isLeader { - // became leader: start scheduler if enabled - if ext.ConfigEnabled() { - // lazily create scheduler if missing using app.Service (tests may not set ext.Service) - if ext.ensureSchedulerWithService(app.Service) { - // created scheduler, continue to start below - } - if ext.Scheduler() == nil { // still missing due to missing prereqs - ext.Logger().Debug("tn_digest: prerequisites missing; deferring start until broadcaster/signer/engine/service are available") - } else if err := ext.startScheduler(ctx); err != nil { - ext.Logger().Warn("failed to start tn_digest scheduler on leader acquire", "error", err) - } else { - ext.Logger().Info("tn_digest started (leader)", "schedule", ext.Schedule()) - } + ext.setLeader(true) + if !ext.ConfigEnabled() { + return + } + service := ext.Service() + if app != nil && app.Service != nil { + service = app.Service + if ext.Service() == nil { + ext.SetService(service) } } - if prev && !isLeader { - // lost leadership: stop scheduler if running - ext.stopSchedulerIfRunning() + if ext.ensureSchedulerWithService(service) { + // scheduler created; fall through to start + } + if ext.Scheduler() == nil { + ext.Logger().Debug("tn_digest: prerequisites missing; deferring start until broadcaster/signer/engine/service are available") + return + } + if err := ext.startScheduler(ctx); err != nil { + ext.Logger().Warn("failed to start tn_digest scheduler on leader acquire", "error", err) + } else { + ext.Logger().Info("tn_digest started (leader)", "schedule", ext.Schedule()) + } +} + +func digestLeaderLose(ctx context.Context, app *common.App, block *common.BlockContext) { + ext := GetExtension() + if ext == nil { + return + } + ext.setLeader(false) + ext.stopSchedulerIfRunning() + if ext.Logger() != nil { ext.Logger().Info("tn_digest stopped (lost leadership)") } - ext.setLeader(isLeader) - - // Periodic config reload - if block != nil && ext.ReloadIntervalBlocks() > 0 { - if block.Height-ext.LastCheckedHeight() >= ext.ReloadIntervalBlocks() { - if ext.EngineOps() != nil { - enabled, schedule, _ := ext.EngineOps().LoadDigestConfig(ctx) - // Only act if changed - if enabled != ext.ConfigEnabled() || (schedule != "" && schedule != ext.Schedule()) { - // fallback to default if schedule empty - if schedule == "" { - schedule = DefaultDigestSchedule - } - ext.SetConfig(enabled, schedule) - // reconcile based on new config and current leadership - if !enabled { - // disabled -> stop if running - ext.stopSchedulerIfRunning() - ext.Logger().Info("tn_digest stopped due to config disabled") - } else if isLeader { - // enabled and leader -> (re)start with new schedule - if ext.Scheduler() == nil && !ext.ensureSchedulerWithService(app.Service) { - ext.Logger().Debug("tn_digest: prerequisites missing; deferring (re)start after config update") - } else if err := func() error { - // stop if existing, then start - if ext.Scheduler() != nil { - ext.stopSchedulerIfRunning() - } - return ext.startScheduler(ctx) - }(); err != nil { - ext.Logger().Warn("failed to (re)start tn_digest scheduler after config update", "error", err) - } else { - ext.Logger().Info("tn_digest (re)started with new schedule", "schedule", ext.Schedule()) - } - } +} + +func digestLeaderEndBlock(ctx context.Context, app *common.App, block *common.BlockContext) { + ext := GetExtension() + if ext == nil { + return + } + + if block == nil { + return + } + + reload := ext.ReloadIntervalBlocks() + if reload <= 0 { + return + } + + if block.Height-ext.LastCheckedHeight() < reload { + return + } + + if ext.EngineOps() == nil { + ext.Logger().Debug("tn_digest: skip reload; EngineOps not ready") + ext.SetLastCheckedHeight(block.Height) + return + } + + enabled, schedule, _ := ext.EngineOps().LoadDigestConfig(ctx) + if schedule == "" { + schedule = DefaultDigestSchedule + } + + if enabled != ext.ConfigEnabled() || schedule != ext.Schedule() { + ext.SetConfig(enabled, schedule) + if !enabled { + ext.stopSchedulerIfRunning() + ext.Logger().Info("tn_digest stopped due to config disabled") + } else if ext.IsLeader() { + service := ext.Service() + if app != nil && app.Service != nil { + service = app.Service + if ext.Service() == nil { + ext.SetService(service) + } + } + if ext.Scheduler() == nil && !ext.ensureSchedulerWithService(service) { + ext.Logger().Debug("tn_digest: prerequisites missing; deferring (re)start after config update") + } else if ext.Scheduler() != nil { + ext.stopSchedulerIfRunning() + if err := ext.startScheduler(ctx); err != nil { + ext.Logger().Warn("failed to (re)start tn_digest scheduler after config update", "error", err) + } else { + ext.Logger().Info("tn_digest (re)started with new schedule", "schedule", ext.Schedule()) } - } else { - ext.Logger().Debug("tn_digest: skip reload; EngineOps not ready") } - ext.SetLastCheckedHeight(block.Height) } } - return nil + + ext.SetLastCheckedHeight(block.Height) } From d0bddb96a11b88b41f6d9e0d757dbd0f46a5c202 Mon Sep 17 00:00:00 2001 From: Raffael Campos Date: Wed, 24 Sep 2025 12:33:18 -0300 Subject: [PATCH 03/13] feat: implement pg_repack mechanism for vacuum operations - Added a new `pgRepackMechanism` to the `tn_vacuum` extension, which utilizes the `pg_repack` binary for vacuum operations. - Updated the Dockerfile to include the `pg_repack` binary installation alongside the PostgreSQL client tools. - Modified the `compose.yaml` to allow dynamic image tagging for the `tn-db` service. - Enhanced tests to verify the availability of the `pg_repack` binary and its integration within the vacuum mechanism. This addition improves the vacuuming capabilities of the system, ensuring more efficient database maintenance. --- compose.yaml | 8 ++-- deployments/Dockerfile | 4 +- extensions/tn_vacuum/mechanism.go | 26 ++++-------- extensions/tn_vacuum/mechanism_repack.go | 50 ++++++++++++++++++++++++ extensions/tn_vacuum/vacuum_test.go | 39 ++++++++++++++++++ 5 files changed, 102 insertions(+), 25 deletions(-) create mode 100644 extensions/tn_vacuum/mechanism_repack.go diff --git a/compose.yaml b/compose.yaml index f7bfa1023..89db4aebc 100644 --- a/compose.yaml +++ b/compose.yaml @@ -28,7 +28,7 @@ services: tn-db: container_name: tn-db hostname: tn-db - image: "ghcr.io/trufnetwork/node:local" + image: "ghcr.io/trufnetwork/node:${TN_IMAGE:-local}" restart: unless-stopped build: context: . @@ -37,9 +37,9 @@ services: CONFIG_PATH: /root/.kwild # app.pg-db-host KWILD_DB_HOST: kwil-postgres - # Optionally supply SETUP_DB_OWNER to override the owner derived from the generated node key - SETUP_DB_OWNER: ${SETUP_DB_OWNER:-} - SETUP_CHAIN_ID: ${SETUP_CHAIN_ID:-trufnetwork-dev} + # Optionally supply SETUP_DB_OWNER to override the owner derived from the generated node key + SETUP_DB_OWNER: ${SETUP_DB_OWNER:-} + SETUP_CHAIN_ID: ${SETUP_CHAIN_ID:-trufnetwork-dev} ports: - "50051:50051" - "${TN_RPC_PORT:-8484}:8484" diff --git a/deployments/Dockerfile b/deployments/Dockerfile index d7b8906c8..902819ff2 100644 --- a/deployments/Dockerfile +++ b/deployments/Dockerfile @@ -29,8 +29,8 @@ ENV CONFIG_PATH=/root/.kwild WORKDIR /app -# add postgres client tools -RUN apk add --no-cache postgresql16-client +# add postgres client tools and pg_repack binary used by tn_vacuum +RUN apk add --no-cache postgresql16-client pg_repack # move .build content to /app COPY --from=build /app/.build/* /app/ diff --git a/extensions/tn_vacuum/mechanism.go b/extensions/tn_vacuum/mechanism.go index 0f9388ea5..43ebd0404 100644 --- a/extensions/tn_vacuum/mechanism.go +++ b/extensions/tn_vacuum/mechanism.go @@ -26,28 +26,16 @@ type RunReport struct { Status string } -func newMechanism() Mechanism { - return &vacuumStubMechanism{} -} - -type vacuumStubMechanism struct { - logger log.Logger -} - -func (m *vacuumStubMechanism) Name() string { return "vacuum_stub" } +var mechanismFactory = func() Mechanism { return NewPgRepackMechanism() } -func (m *vacuumStubMechanism) Prepare(ctx context.Context, deps MechanismDeps) error { - m.logger = deps.Logger.New("mechanism.vacuum_stub") - m.logger.Info("vacuum stub prepared") - return nil +func newMechanism() Mechanism { + return mechanismFactory() } -func (m *vacuumStubMechanism) Run(ctx context.Context, req RunRequest) (*RunReport, error) { - m.logger.Info("vacuum stub run", "reason", req.Reason) - return &RunReport{Mechanism: m.Name(), Status: "ok"}, nil +func setMechanismFactoryForTest(f func() Mechanism) { + mechanismFactory = f } -func (m *vacuumStubMechanism) Close(ctx context.Context) error { - m.logger.Info("vacuum stub closed") - return nil +func resetMechanismFactory() { + mechanismFactory = func() Mechanism { return NewPgRepackMechanism() } } diff --git a/extensions/tn_vacuum/mechanism_repack.go b/extensions/tn_vacuum/mechanism_repack.go new file mode 100644 index 000000000..021d0e4b3 --- /dev/null +++ b/extensions/tn_vacuum/mechanism_repack.go @@ -0,0 +1,50 @@ +package tn_vacuum + +import ( + "context" + "errors" + "fmt" + "os/exec" + + "github.com/trufnetwork/kwil-db/core/log" +) + +var ErrPgRepackUnavailable = errors.New("pg_repack binary not found in PATH") + +type pgRepackMechanism struct { + logger log.Logger + binaryPath string +} + +func NewPgRepackMechanism() Mechanism { + return &pgRepackMechanism{} +} + +func (m *pgRepackMechanism) Name() string { return "pg_repack" } + +func (m *pgRepackMechanism) Prepare(ctx context.Context, deps MechanismDeps) error { + m.logger = deps.Logger.New("mechanism.pg_repack") + path, err := exec.LookPath("pg_repack") + if err != nil { + m.logger.Warn("pg_repack binary not found; vacuum runs will fail until available", "error", err) + return ErrPgRepackUnavailable + } + m.binaryPath = path + m.logger.Info("pg_repack binary detected", "path", path) + return nil +} + +func (m *pgRepackMechanism) Run(ctx context.Context, req RunRequest) (*RunReport, error) { + if m.binaryPath == "" { + return nil, fmt.Errorf("pg_repack unavailable: %w", ErrPgRepackUnavailable) + } + m.logger.Info("pg_repack stub run", "reason", req.Reason, "binary", m.binaryPath) + return &RunReport{Mechanism: m.Name(), Status: "ok"}, nil +} + +func (m *pgRepackMechanism) Close(ctx context.Context) error { + if m.logger != nil { + m.logger.Info("pg_repack mechanism closed") + } + return nil +} diff --git a/extensions/tn_vacuum/vacuum_test.go b/extensions/tn_vacuum/vacuum_test.go index 0433f1f88..4b3168f01 100644 --- a/extensions/tn_vacuum/vacuum_test.go +++ b/extensions/tn_vacuum/vacuum_test.go @@ -2,6 +2,7 @@ package tn_vacuum import ( "context" + "os" "testing" "github.com/stretchr/testify/require" @@ -17,6 +18,20 @@ type fakeTrigger struct { lastCfg TriggerConfig } +type stubMechanism struct { + prepared bool +} + +func (s *stubMechanism) Name() string { return "stub" } +func (s *stubMechanism) Prepare(ctx context.Context, deps MechanismDeps) error { + s.prepared = true + return nil +} +func (s *stubMechanism) Run(ctx context.Context, req RunRequest) (*RunReport, error) { + return &RunReport{Mechanism: s.Name(), Status: "ok"}, nil +} +func (s *stubMechanism) Close(ctx context.Context) error { return nil } + func (f *fakeTrigger) Configure(ctx context.Context, cfg TriggerConfig, fire func(context.Context, FireOpts) error) error { f.lastCfg = cfg return nil @@ -40,6 +55,8 @@ func TestLeaderCallbacksRespectEnabledFlag(t *testing.T) { ctx := context.Background() ResetForTest() ext := GetExtension() + setMechanismFactoryForTest(func() Mechanism { return &stubMechanism{} }) + defer resetMechanismFactory() fake := &fakeTrigger{} mech := newMechanism() @@ -71,6 +88,8 @@ func TestLeaderEndBlockTriggersReload(t *testing.T) { ctx := context.Background() ResetForTest() ext := GetExtension() + setMechanismFactoryForTest(func() Mechanism { return &stubMechanism{} }) + defer resetMechanismFactory() fake := &fakeTrigger{} mech := newMechanism() @@ -109,3 +128,23 @@ func TestLeaderEndBlockTriggersReload(t *testing.T) { require.Equal(t, TriggerBlockInterval, ext.Config().Trigger.Kind) require.NotEqual(t, fake, ext.Trigger()) } + +func TestPgRepackMechanismRequiresBinary(t *testing.T) { + ctx := context.Background() + resetMechanismFactory() + mech := newMechanism() + pr, ok := mech.(*pgRepackMechanism) + require.True(t, ok, "mechanism should be pgRepackMechanism") + + oldPath := os.Getenv("PATH") + require.NoError(t, os.Setenv("PATH", "")) + defer os.Setenv("PATH", oldPath) + + err := mech.Prepare(ctx, MechanismDeps{Logger: log.New()}) + require.ErrorIs(t, err, ErrPgRepackUnavailable) + + _, runErr := mech.Run(ctx, RunRequest{Reason: "test"}) + require.ErrorIs(t, runErr, ErrPgRepackUnavailable) + + require.NoError(t, pr.Close(ctx)) +} From 754bd44d77631995cc3596f163f2f30758820add Mon Sep 17 00:00:00 2001 From: Raffael Campos Date: Wed, 24 Sep 2025 12:58:30 -0300 Subject: [PATCH 04/13] feat: enhance pg_repack mechanism with database connection handling - Introduced a new `dbConnFromService` function to extract database connection details from the service configuration. - Updated the `pgRepackMechanism` to utilize the database connection information during preparation and execution. - Modified the `RunnerArgs` structure to include `DBConnConfig`, ensuring that the database connection is passed through the execution flow. - Enhanced tests to validate the integration of database connection handling within the `pg_repack` mechanism. These changes improve the robustness of the vacuum operations by ensuring proper database connectivity and configuration management. --- extensions/tn_vacuum/extension.go | 23 +++++- extensions/tn_vacuum/mechanism.go | 22 ++++-- extensions/tn_vacuum/mechanism_repack.go | 90 +++++++++++++++++++++++- extensions/tn_vacuum/runner.go | 3 +- extensions/tn_vacuum/vacuum_test.go | 6 +- 5 files changed, 132 insertions(+), 12 deletions(-) diff --git a/extensions/tn_vacuum/extension.go b/extensions/tn_vacuum/extension.go index 058c1d628..6515f59f5 100644 --- a/extensions/tn_vacuum/extension.go +++ b/extensions/tn_vacuum/extension.go @@ -176,6 +176,7 @@ func (e *Extension) reconfigure(ctx context.Context, cfg Config, deps MechanismD } mech := newMechanism() + deps.DB = dbConnFromService(e.service) if err := mech.Prepare(ctx, deps); err != nil { return err } @@ -185,7 +186,13 @@ func (e *Extension) reconfigure(ctx context.Context, cfg Config, deps MechanismD return err } fire := func(callCtx context.Context, opts FireOpts) error { - return e.ensureRunner().Execute(callCtx, RunnerArgs{Mechanism: mech, Trigger: trig, Logger: e.logger, Reason: opts.Reason}) + return e.ensureRunner().Execute(callCtx, RunnerArgs{ + Mechanism: mech, + Trigger: trig, + Logger: e.logger, + Reason: opts.Reason, + DB: dbConnFromService(e.service), + }) } if err := trig.Configure(ctx, cfg.Trigger, fire); err != nil { return err @@ -211,3 +218,17 @@ func (e *Extension) startTriggerIfLeader(ctx context.Context) { } _ = trig.Start(ctx) } + +func dbConnFromService(service *common.Service) DBConnConfig { + if service == nil || service.LocalConfig == nil { + return DBConnConfig{} + } + db := service.LocalConfig.DB + return DBConnConfig{ + Host: db.Host, + Port: db.Port, + User: db.User, + Password: db.Pass, + Database: db.DBName, + } +} diff --git a/extensions/tn_vacuum/mechanism.go b/extensions/tn_vacuum/mechanism.go index 43ebd0404..24fee38b7 100644 --- a/extensions/tn_vacuum/mechanism.go +++ b/extensions/tn_vacuum/mechanism.go @@ -7,18 +7,20 @@ import ( ) type Mechanism interface { - Name() string - Prepare(ctx context.Context, deps MechanismDeps) error - Run(ctx context.Context, req RunRequest) (*RunReport, error) - Close(ctx context.Context) error + Name() string + Prepare(ctx context.Context, deps MechanismDeps) error + Run(ctx context.Context, req RunRequest) (*RunReport, error) + Close(ctx context.Context) error } type MechanismDeps struct { - Logger log.Logger + Logger log.Logger + DB DBConnConfig } type RunRequest struct { - Reason string + Reason string + DB DBConnConfig } type RunReport struct { @@ -26,6 +28,14 @@ type RunReport struct { Status string } +type DBConnConfig struct { + Host string + Port string + User string + Password string + Database string +} + var mechanismFactory = func() Mechanism { return NewPgRepackMechanism() } func newMechanism() Mechanism { diff --git a/extensions/tn_vacuum/mechanism_repack.go b/extensions/tn_vacuum/mechanism_repack.go index 021d0e4b3..b41e54cea 100644 --- a/extensions/tn_vacuum/mechanism_repack.go +++ b/extensions/tn_vacuum/mechanism_repack.go @@ -1,11 +1,15 @@ package tn_vacuum import ( + "bytes" "context" "errors" "fmt" + "os" "os/exec" + "strings" + "github.com/jackc/pgx/v5" "github.com/trufnetwork/kwil-db/core/log" ) @@ -14,6 +18,7 @@ var ErrPgRepackUnavailable = errors.New("pg_repack binary not found in PATH") type pgRepackMechanism struct { logger log.Logger binaryPath string + db DBConnConfig } func NewPgRepackMechanism() Mechanism { @@ -24,6 +29,7 @@ func (m *pgRepackMechanism) Name() string { return "pg_repack" } func (m *pgRepackMechanism) Prepare(ctx context.Context, deps MechanismDeps) error { m.logger = deps.Logger.New("mechanism.pg_repack") + m.db = deps.DB path, err := exec.LookPath("pg_repack") if err != nil { m.logger.Warn("pg_repack binary not found; vacuum runs will fail until available", "error", err) @@ -31,6 +37,9 @@ func (m *pgRepackMechanism) Prepare(ctx context.Context, deps MechanismDeps) err } m.binaryPath = path m.logger.Info("pg_repack binary detected", "path", path) + if err := ensurePgRepackExtension(ctx, deps.DB, m.logger); err != nil { + return fmt.Errorf("ensure pg_repack extension: %w", err) + } return nil } @@ -38,7 +47,42 @@ func (m *pgRepackMechanism) Run(ctx context.Context, req RunRequest) (*RunReport if m.binaryPath == "" { return nil, fmt.Errorf("pg_repack unavailable: %w", ErrPgRepackUnavailable) } - m.logger.Info("pg_repack stub run", "reason", req.Reason, "binary", m.binaryPath) + db := req.DB + if db.Database == "" { + db = m.db + } + if db.Database == "" { + return nil, fmt.Errorf("pg_repack requires database name") + } + + args := []string{fmt.Sprintf("--dbname=%s", db.Database), "--all"} + if db.Host != "" { + args = append(args, fmt.Sprintf("--host=%s", db.Host)) + } + if db.Port != "" { + args = append(args, fmt.Sprintf("--port=%s", db.Port)) + } + if db.User != "" { + args = append(args, fmt.Sprintf("--username=%s", db.User)) + } + + cmd := exec.CommandContext(ctx, m.binaryPath, args...) + env := os.Environ() + if db.Password != "" { + env = append(env, fmt.Sprintf("PGPASSWORD=%s", db.Password)) + } + cmd.Env = env + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + m.logger.Info("pg_repack starting", "args", args) + if err := cmd.Run(); err != nil { + m.logger.Warn("pg_repack failed", "error", err, "stderr", stderr.String()) + return nil, fmt.Errorf("pg_repack execution failed: %w", err) + } + m.logger.Info("pg_repack completed", "stdout", stdout.String()) return &RunReport{Mechanism: m.Name(), Status: "ok"}, nil } @@ -48,3 +92,47 @@ func (m *pgRepackMechanism) Close(ctx context.Context) error { } return nil } + +func ensurePgRepackExtension(ctx context.Context, db DBConnConfig, logger log.Logger) error { + if db.Database == "" { + return fmt.Errorf("missing database name for pg_repack extension setup") + } + connStr := buildConnString(db) + conn, err := pgx.Connect(ctx, connStr) + if err != nil { + logger.Warn("failed to connect to database for pg_repack extension", "error", err) + return fmt.Errorf("pg_repack extension connection: %w", err) + } + defer conn.Close(ctx) + + if _, err := conn.Exec(ctx, "CREATE EXTENSION IF NOT EXISTS pg_repack"); err != nil { + logger.Warn("failed to create pg_repack extension", "error", err) + return fmt.Errorf("create pg_repack extension: %w", err) + } + logger.Info("pg_repack extension ensured") + return nil +} + +func buildConnString(db DBConnConfig) string { + host := db.Host + if host == "" { + host = "127.0.0.1" + } + port := db.Port + if port == "" { + port = "5432" + } + parts := []string{ + fmt.Sprintf("host=%s", host), + fmt.Sprintf("port=%s", port), + fmt.Sprintf("dbname=%s", db.Database), + "sslmode=disable", + } + if db.User != "" { + parts = append(parts, fmt.Sprintf("user=%s", db.User)) + } + if db.Password != "" { + parts = append(parts, fmt.Sprintf("password=%s", db.Password)) + } + return strings.Join(parts, " ") +} diff --git a/extensions/tn_vacuum/runner.go b/extensions/tn_vacuum/runner.go index bd241d76a..95583bc67 100644 --- a/extensions/tn_vacuum/runner.go +++ b/extensions/tn_vacuum/runner.go @@ -15,6 +15,7 @@ type RunnerArgs struct { Trigger Trigger Logger log.Logger Reason string + DB DBConnConfig } func (r *Runner) Execute(ctx context.Context, args RunnerArgs) error { @@ -28,7 +29,7 @@ func (r *Runner) Execute(ctx context.Context, args RunnerArgs) error { if logger != nil { logger.Info("vacuum runner executing", "mechanism", args.Mechanism.Name(), "reason", args.Reason) } - _, err := args.Mechanism.Run(ctx, RunRequest{Reason: args.Reason}) + _, err := args.Mechanism.Run(ctx, RunRequest{Reason: args.Reason, DB: args.DB}) if err != nil { if logger != nil { logger.Warn("vacuum runner failed", "error", err) diff --git a/extensions/tn_vacuum/vacuum_test.go b/extensions/tn_vacuum/vacuum_test.go index 4b3168f01..e5f7d529a 100644 --- a/extensions/tn_vacuum/vacuum_test.go +++ b/extensions/tn_vacuum/vacuum_test.go @@ -60,7 +60,7 @@ func TestLeaderCallbacksRespectEnabledFlag(t *testing.T) { fake := &fakeTrigger{} mech := newMechanism() - require.NoError(t, mech.Prepare(ctx, MechanismDeps{Logger: log.New()})) + require.NoError(t, mech.Prepare(ctx, MechanismDeps{Logger: log.New(), DB: DBConnConfig{Database: "kwild"}})) ext.mu.Lock() ext.logger = log.New() @@ -93,7 +93,7 @@ func TestLeaderEndBlockTriggersReload(t *testing.T) { fake := &fakeTrigger{} mech := newMechanism() - require.NoError(t, mech.Prepare(ctx, MechanismDeps{Logger: log.New()})) + require.NoError(t, mech.Prepare(ctx, MechanismDeps{Logger: log.New(), DB: DBConnConfig{Database: "kwild"}})) ext.mu.Lock() ext.logger = log.New() @@ -140,7 +140,7 @@ func TestPgRepackMechanismRequiresBinary(t *testing.T) { require.NoError(t, os.Setenv("PATH", "")) defer os.Setenv("PATH", oldPath) - err := mech.Prepare(ctx, MechanismDeps{Logger: log.New()}) + err := mech.Prepare(ctx, MechanismDeps{Logger: log.New(), DB: DBConnConfig{Database: "kwild"}}) require.ErrorIs(t, err, ErrPgRepackUnavailable) _, runErr := mech.Run(ctx, RunRequest{Reason: "test"}) From fcf6fd2fdfc55579422a0d94a140a378e6fb707b Mon Sep 17 00:00:00 2001 From: Raffael Campos Date: Mon, 29 Sep 2025 09:41:38 -0300 Subject: [PATCH 05/13] refactor: simplify tn_vacuum configuration and remove unused triggers - Refactored the `Config` structure in `config.go` to streamline configuration loading by removing the `TriggerConfig` and related fields. - Updated the `LoadConfig` function to directly handle block interval configuration. - Removed unused trigger implementations (`trigger_block_interval.go`, `trigger_cron.go`, `trigger_digest.go`, `trigger_manual.go`, and `trigger.go`) to clean up the codebase. - Adjusted the `Extension` struct and related methods to reflect the removal of triggers and ensure proper configuration handling. - Enhanced tests to validate the new configuration logic and ensure the mechanism operates correctly without the removed triggers. These changes improve the maintainability of the `tn_vacuum` extension by simplifying its configuration and reducing unnecessary complexity. --- extensions/tn_vacuum/config.go | 58 +---- extensions/tn_vacuum/constants.go | 10 +- extensions/tn_vacuum/extension.go | 198 +++++------------- extensions/tn_vacuum/mechanism.go | 26 +-- extensions/tn_vacuum/runner.go | 1 - extensions/tn_vacuum/tn_vacuum.go | 83 ++------ extensions/tn_vacuum/trigger.go | 38 ---- .../tn_vacuum/trigger_block_interval.go | 78 ------- extensions/tn_vacuum/trigger_cron.go | 92 -------- extensions/tn_vacuum/trigger_digest.go | 62 ------ extensions/tn_vacuum/trigger_manual.go | 62 ------ extensions/tn_vacuum/vacuum_test.go | 149 +++++-------- 12 files changed, 147 insertions(+), 710 deletions(-) delete mode 100644 extensions/tn_vacuum/trigger.go delete mode 100644 extensions/tn_vacuum/trigger_block_interval.go delete mode 100644 extensions/tn_vacuum/trigger_cron.go delete mode 100644 extensions/tn_vacuum/trigger_digest.go delete mode 100644 extensions/tn_vacuum/trigger_manual.go diff --git a/extensions/tn_vacuum/config.go b/extensions/tn_vacuum/config.go index 6e0b6ebb3..a8f3a7146 100644 --- a/extensions/tn_vacuum/config.go +++ b/extensions/tn_vacuum/config.go @@ -9,25 +9,12 @@ import ( ) type Config struct { - Enabled bool - - Trigger TriggerConfig - ReloadIntervalBlocks int64 -} - -type TriggerConfig struct { - Kind string + Enabled bool BlockInterval int64 - CronSchedule string } func LoadConfig(service *common.Service) (Config, error) { - cfg := Config{ - Trigger: TriggerConfig{ - Kind: triggerFromString(""), - }, - ReloadIntervalBlocks: defaultReloadBlocks, - } + cfg := Config{Enabled: true, BlockInterval: defaultBlockInterval} if service == nil || service.LocalConfig == nil { return cfg, nil @@ -46,34 +33,18 @@ func LoadConfig(service *common.Service) (Config, error) { cfg.Enabled = boolVal } - if v, ok := raw["trigger"]; ok { - cfg.Trigger.Kind = triggerFromString(v) - } - if v, ok := raw["block_interval"]; ok { val, err := strconv.ParseInt(strings.TrimSpace(v), 10, 64) if err != nil { return cfg, fmt.Errorf("parse block_interval: %w", err) } + if val <= 0 { + val = defaultBlockInterval + } if val < minBlockInterval { val = minBlockInterval } - cfg.Trigger.BlockInterval = val - } - - if v, ok := raw["cron_schedule"]; ok { - cfg.Trigger.CronSchedule = strings.TrimSpace(v) - } - - if v, ok := raw["reload_interval_blocks"]; ok { - val, err := strconv.ParseInt(strings.TrimSpace(v), 10, 64) - if err != nil { - return cfg, fmt.Errorf("parse reload_interval_blocks: %w", err) - } - if val <= 0 { - val = defaultReloadBlocks - } - cfg.ReloadIntervalBlocks = val + cfg.BlockInterval = val } return cfg, nil @@ -89,20 +60,3 @@ func parseBool(in string) (bool, error) { return false, fmt.Errorf("invalid bool %q", in) } } - -func triggerFromString(in string) string { - switch strings.ToLower(strings.TrimSpace(in)) { - case TriggerDigestCoupled: - return TriggerDigestCoupled - case TriggerBlockInterval: - return TriggerBlockInterval - case TriggerCron: - return TriggerCron - case TriggerManual: - return TriggerManual - case "": - return defaultTrigger - default: - return defaultTrigger - } -} diff --git a/extensions/tn_vacuum/constants.go b/extensions/tn_vacuum/constants.go index a3b2d6783..45513dbf1 100644 --- a/extensions/tn_vacuum/constants.go +++ b/extensions/tn_vacuum/constants.go @@ -3,15 +3,9 @@ package tn_vacuum const ( // ExtensionName is used for hook registration and config namespace. ExtensionName = "tn_vacuum" - - TriggerDigestCoupled = "digest_coupled" - TriggerBlockInterval = "block_interval" - TriggerCron = "cron" - TriggerManual = "manual" ) const ( - defaultTrigger = TriggerManual - defaultReloadBlocks = int64(1000) - minBlockInterval = int64(1) + defaultBlockInterval = int64(50000) + minBlockInterval = int64(1) ) diff --git a/extensions/tn_vacuum/extension.go b/extensions/tn_vacuum/extension.go index 6515f59f5..ea3205206 100644 --- a/extensions/tn_vacuum/extension.go +++ b/extensions/tn_vacuum/extension.go @@ -2,6 +2,7 @@ package tn_vacuum import ( "context" + "fmt" "sync" "github.com/trufnetwork/kwil-db/common" @@ -9,16 +10,13 @@ import ( ) type Extension struct { - mu sync.RWMutex - logger log.Logger - service *common.Service - config Config - trigger Trigger - mechanism Mechanism - runner *Runner - isLeader bool - reloadIntervalBlocks int64 - lastConfigHeight int64 + mu sync.RWMutex + logger log.Logger + service *common.Service + config Config + mechanism Mechanism + runner *Runner + lastRunHeight int64 } var ( @@ -29,8 +27,7 @@ var ( func GetExtension() *Extension { once.Do(func() { extInstance = &Extension{ - logger: log.New(log.WithLevel(log.LevelInfo)), - reloadIntervalBlocks: defaultReloadBlocks, + logger: log.New(log.WithLevel(log.LevelInfo)), } }) return extInstance @@ -57,166 +54,87 @@ func (e *Extension) setLogger(l log.Logger) { e.logger = l } -func (e *Extension) Service() *common.Service { - e.mu.RLock() - defer e.mu.RUnlock() - return e.service -} - func (e *Extension) setService(s *common.Service) { e.mu.Lock() defer e.mu.Unlock() e.service = s } -func (e *Extension) Config() Config { - e.mu.RLock() - defer e.mu.RUnlock() - return e.config -} - -func (e *Extension) setConfig(cfg Config) { - e.mu.Lock() - defer e.mu.Unlock() - e.config = cfg -} - -func (e *Extension) setMechanism(m Mechanism) { +func (e *Extension) configure(ctx context.Context, cfg Config) error { e.mu.Lock() defer e.mu.Unlock() - e.mechanism = m -} - -func (e *Extension) Mechanism() Mechanism { - e.mu.RLock() - defer e.mu.RUnlock() - return e.mechanism -} - -func (e *Extension) setTrigger(t Trigger) { - e.mu.Lock() - defer e.mu.Unlock() - e.trigger = t -} - -func (e *Extension) Trigger() Trigger { - e.mu.RLock() - defer e.mu.RUnlock() - return e.trigger -} - -func (e *Extension) ensureRunner() *Runner { - e.mu.Lock() - defer e.mu.Unlock() - if e.runner == nil { - e.runner = &Runner{logger: e.logger} - } - return e.runner -} - -func (e *Extension) setLeader(v bool) { - e.mu.Lock() - defer e.mu.Unlock() - e.isLeader = v -} - -func (e *Extension) IsLeader() bool { - e.mu.RLock() - defer e.mu.RUnlock() - return e.isLeader -} - -func (e *Extension) SetReloadIntervalBlocks(v int64) { - e.mu.Lock() - defer e.mu.Unlock() - e.reloadIntervalBlocks = v -} - -func (e *Extension) ReloadIntervalBlocks() int64 { - e.mu.RLock() - defer e.mu.RUnlock() - return e.reloadIntervalBlocks -} - -func (e *Extension) SetLastConfigHeight(h int64) { - e.mu.Lock() - defer e.mu.Unlock() - e.lastConfigHeight = h -} -func (e *Extension) LastConfigHeight() int64 { - e.mu.RLock() - defer e.mu.RUnlock() - return e.lastConfigHeight -} - -func (e *Extension) Close(ctx context.Context) { - e.mu.Lock() - defer e.mu.Unlock() - if e.trigger != nil { - _ = e.trigger.Stop(ctx) - e.trigger = nil - } if e.mechanism != nil { _ = e.mechanism.Close(ctx) e.mechanism = nil } - e.runner = nil -} -func (e *Extension) reconfigure(ctx context.Context, cfg Config, deps MechanismDeps) error { - e.mu.Lock() - defer e.mu.Unlock() + e.config = cfg + e.lastRunHeight = 0 - if e.trigger != nil { - _ = e.trigger.Stop(ctx) - } - if e.mechanism != nil { - _ = e.mechanism.Close(ctx) + if !cfg.Enabled { + return nil } mech := newMechanism() - deps.DB = dbConnFromService(e.service) + deps := MechanismDeps{Logger: e.logger, DB: dbConnFromService(e.service)} if err := mech.Prepare(ctx, deps); err != nil { return err } - trig, err := newTrigger(cfg.Trigger.Kind) - if err != nil { - return err - } - fire := func(callCtx context.Context, opts FireOpts) error { - return e.ensureRunner().Execute(callCtx, RunnerArgs{ - Mechanism: mech, - Trigger: trig, - Logger: e.logger, - Reason: opts.Reason, - DB: dbConnFromService(e.service), - }) - } - if err := trig.Configure(ctx, cfg.Trigger, fire); err != nil { - return err - } - - e.reloadIntervalBlocks = cfg.ReloadIntervalBlocks - e.config = cfg e.mechanism = mech - e.trigger = trig e.runner = &Runner{logger: e.logger} - return nil } -func (e *Extension) startTriggerIfLeader(ctx context.Context) { +func (e *Extension) maybeRun(ctx context.Context, blockHeight int64) { + if blockHeight <= 0 { + return + } + e.mu.RLock() - trig := e.trigger cfg := e.config - leader := e.isLeader + mech := e.mechanism + runner := e.runner + last := e.lastRunHeight + logger := e.logger + svc := e.service e.mu.RUnlock() - if !leader || trig == nil || !cfg.Enabled { + + if !cfg.Enabled || mech == nil || runner == nil { + return + } + + if last != 0 && blockHeight-last < cfg.BlockInterval { return } - _ = trig.Start(ctx) + + reason := fmt.Sprintf("block_interval:%d", blockHeight) + err := runner.Execute(ctx, RunnerArgs{ + Mechanism: mech, + Logger: logger, + Reason: reason, + DB: dbConnFromService(svc), + }) + if err != nil { + return + } + + e.mu.Lock() + if blockHeight > e.lastRunHeight { + e.lastRunHeight = blockHeight + } + e.mu.Unlock() +} + +func (e *Extension) Close(ctx context.Context) { + e.mu.Lock() + defer e.mu.Unlock() + if e.mechanism != nil { + _ = e.mechanism.Close(ctx) + e.mechanism = nil + } + e.runner = nil } func dbConnFromService(service *common.Service) DBConnConfig { diff --git a/extensions/tn_vacuum/mechanism.go b/extensions/tn_vacuum/mechanism.go index 24fee38b7..ae1c6bad1 100644 --- a/extensions/tn_vacuum/mechanism.go +++ b/extensions/tn_vacuum/mechanism.go @@ -7,20 +7,20 @@ import ( ) type Mechanism interface { - Name() string - Prepare(ctx context.Context, deps MechanismDeps) error - Run(ctx context.Context, req RunRequest) (*RunReport, error) - Close(ctx context.Context) error + Name() string + Prepare(ctx context.Context, deps MechanismDeps) error + Run(ctx context.Context, req RunRequest) (*RunReport, error) + Close(ctx context.Context) error } type MechanismDeps struct { - Logger log.Logger - DB DBConnConfig + Logger log.Logger + DB DBConnConfig } type RunRequest struct { - Reason string - DB DBConnConfig + Reason string + DB DBConnConfig } type RunReport struct { @@ -29,11 +29,11 @@ type RunReport struct { } type DBConnConfig struct { - Host string - Port string - User string - Password string - Database string + Host string + Port string + User string + Password string + Database string } var mechanismFactory = func() Mechanism { return NewPgRepackMechanism() } diff --git a/extensions/tn_vacuum/runner.go b/extensions/tn_vacuum/runner.go index 95583bc67..f59e4e267 100644 --- a/extensions/tn_vacuum/runner.go +++ b/extensions/tn_vacuum/runner.go @@ -12,7 +12,6 @@ type Runner struct { type RunnerArgs struct { Mechanism Mechanism - Trigger Trigger Logger log.Logger Reason string DB DBConnConfig diff --git a/extensions/tn_vacuum/tn_vacuum.go b/extensions/tn_vacuum/tn_vacuum.go index db45370fb..ebe6c8bb9 100644 --- a/extensions/tn_vacuum/tn_vacuum.go +++ b/extensions/tn_vacuum/tn_vacuum.go @@ -8,7 +8,6 @@ import ( "github.com/trufnetwork/kwil-db/extensions/hooks" "github.com/trufnetwork/kwil-db/extensions/precompiles" sql "github.com/trufnetwork/kwil-db/node/types/sql" - "github.com/trufnetwork/node/extensions/leaderwatch" ) func InitializeExtension() { @@ -21,13 +20,6 @@ func InitializeExtension() { if err := hooks.RegisterEndBlockHook(ExtensionName+"_end_block", endBlockHook); err != nil { panic(fmt.Sprintf("failed to register %s end block hook: %v", ExtensionName, err)) } - if err := leaderwatch.Register(ExtensionName, leaderwatch.Callbacks{ - OnAcquire: leaderAcquire, - OnLose: leaderLose, - OnEndBlock: leaderEndBlock, - }); err != nil { - panic(fmt.Sprintf("failed to register %s leader watcher: %v", ExtensionName, err)) - } } func initializePrecompile(ctx context.Context, service *common.Service, db sql.DB, alias string, metadata map[string]any) (precompiles.Precompile, error) { @@ -46,80 +38,31 @@ func engineReadyHook(ctx context.Context, app *common.App) error { ext.setService(app.Service) } - cfg, err := LoadConfig(app.Service) + svc := (*common.Service)(nil) + if app != nil { + svc = app.Service + } + + cfg, err := LoadConfig(svc) if err != nil { ext.Logger().Warn("failed to load tn_vacuum config", "error", err) cfg.Enabled = false } - deps := MechanismDeps{Logger: ext.Logger()} - if err := ext.reconfigure(ctx, cfg, deps); err != nil { + if err := ext.configure(ctx, cfg); err != nil { ext.Logger().Warn("failed to configure tn_vacuum", "error", err) - return nil } - - ext.SetReloadIntervalBlocks(cfg.ReloadIntervalBlocks) - ext.SetLastConfigHeight(0) - - if cfg.Enabled { - ext.startTriggerIfLeader(ctx) - } - return nil } func endBlockHook(ctx context.Context, app *common.App, block *common.BlockContext) error { - return nil -} - -func leaderAcquire(ctx context.Context, app *common.App, block *common.BlockContext) { - ext := GetExtension() - ext.setLeader(true) - if trig := ext.Trigger(); trig != nil { - _ = trig.OnLeaderChange(ctx, true) - } - ext.startTriggerIfLeader(ctx) -} - -func leaderLose(ctx context.Context, app *common.App, block *common.BlockContext) { - ext := GetExtension() - ext.setLeader(false) - if trig := ext.Trigger(); trig != nil { - _ = trig.OnLeaderChange(ctx, false) - _ = trig.Stop(ctx) - } -} - -func leaderEndBlock(ctx context.Context, app *common.App, block *common.BlockContext) { - ext := GetExtension() - cfg := ext.Config() - if !cfg.Enabled { - return - } - - if trig := ext.Trigger(); trig != nil { - _ = trig.OnEndBlock(ctx, block) - } - if block == nil { - return + return nil } - - reload := ext.ReloadIntervalBlocks() - if reload > 0 && block.Height-ext.LastConfigHeight() >= reload { - var svc *common.Service - if app != nil { - svc = app.Service - } - cfg, err := LoadConfig(svc) - if err != nil { - ext.Logger().Warn("failed to reload tn_vacuum config", "error", err) - } else { - deps := MechanismDeps{Logger: ext.Logger()} - if err := ext.reconfigure(ctx, cfg, deps); err != nil { - ext.Logger().Warn("failed to apply tn_vacuum config", "error", err) - } - } - ext.SetLastConfigHeight(block.Height) + ext := GetExtension() + if app != nil && app.Service != nil { + ext.setService(app.Service) } + ext.maybeRun(ctx, block.Height) + return nil } diff --git a/extensions/tn_vacuum/trigger.go b/extensions/tn_vacuum/trigger.go deleted file mode 100644 index bbabd594d..000000000 --- a/extensions/tn_vacuum/trigger.go +++ /dev/null @@ -1,38 +0,0 @@ -package tn_vacuum - -import ( - "context" - "fmt" - - "github.com/trufnetwork/kwil-db/common" - "github.com/trufnetwork/kwil-db/core/log" -) - -type FireOpts struct { - Reason string -} - -type Trigger interface { - Configure(ctx context.Context, cfg TriggerConfig, fire func(context.Context, FireOpts) error) error - Start(ctx context.Context) error - Stop(ctx context.Context) error - OnLeaderChange(ctx context.Context, isLeader bool) error - OnEndBlock(ctx context.Context, block *common.BlockContext) error - Kind() string -} - -func newTrigger(kind string) (Trigger, error) { - baseLogger := log.New(log.WithLevel(log.LevelInfo)) - switch kind { - case TriggerDigestCoupled: - return newDigestTrigger(baseLogger), nil - case TriggerBlockInterval: - return newBlockIntervalTrigger(baseLogger), nil - case TriggerCron: - return newCronTrigger(baseLogger), nil - case TriggerManual: - return newManualTrigger(baseLogger), nil - default: - return nil, fmt.Errorf("unsupported trigger %q", kind) - } -} diff --git a/extensions/tn_vacuum/trigger_block_interval.go b/extensions/tn_vacuum/trigger_block_interval.go deleted file mode 100644 index 7c582ff9d..000000000 --- a/extensions/tn_vacuum/trigger_block_interval.go +++ /dev/null @@ -1,78 +0,0 @@ -package tn_vacuum - -import ( - "context" - "fmt" - "sync" - - "github.com/trufnetwork/kwil-db/common" - "github.com/trufnetwork/kwil-db/core/log" -) - -type blockIntervalTrigger struct { - mu sync.Mutex - fire func(context.Context, FireOpts) error - logger log.Logger - interval int64 - lastFired int64 -} - -func newBlockIntervalTrigger(logger log.Logger) *blockIntervalTrigger { - return &blockIntervalTrigger{logger: logger} -} - -func (t *blockIntervalTrigger) Kind() string { return TriggerBlockInterval } - -func (t *blockIntervalTrigger) Configure(ctx context.Context, cfg TriggerConfig, fire func(context.Context, FireOpts) error) error { - t.mu.Lock() - defer t.mu.Unlock() - t.logger = t.logger.New("trigger.block_interval") - if cfg.BlockInterval < minBlockInterval { - cfg.BlockInterval = minBlockInterval - } - if cfg.BlockInterval == 0 { - cfg.BlockInterval = 100 - } - t.interval = cfg.BlockInterval - t.fire = fire - t.lastFired = 0 - t.logger.Info("block interval trigger configured", "interval", t.interval) - return nil -} - -func (t *blockIntervalTrigger) Start(ctx context.Context) error { - t.logger.Info("block interval trigger active") - return nil -} - -func (t *blockIntervalTrigger) Stop(ctx context.Context) error { - t.logger.Info("block interval trigger stopped") - return nil -} - -func (t *blockIntervalTrigger) OnLeaderChange(ctx context.Context, isLeader bool) error { - if !isLeader { - t.logger.Debug("block interval trigger paused") - } - return nil -} - -func (t *blockIntervalTrigger) OnEndBlock(ctx context.Context, block *common.BlockContext) error { - if block == nil { - return nil - } - t.mu.Lock() - defer t.mu.Unlock() - if t.fire == nil { - return nil - } - height := block.Height - if t.lastFired == 0 || height-t.lastFired >= t.interval { - if err := t.fire(ctx, FireOpts{Reason: fmt.Sprintf("block_interval:%d", height)}); err != nil { - t.logger.Warn("block interval trigger failed", "error", err) - } else { - t.lastFired = height - } - } - return nil -} diff --git a/extensions/tn_vacuum/trigger_cron.go b/extensions/tn_vacuum/trigger_cron.go deleted file mode 100644 index 316d0e7ba..000000000 --- a/extensions/tn_vacuum/trigger_cron.go +++ /dev/null @@ -1,92 +0,0 @@ -package tn_vacuum - -import ( - "context" - "sync" - "time" - - "github.com/trufnetwork/kwil-db/common" - "github.com/trufnetwork/kwil-db/core/log" -) - -type cronTrigger struct { - mu sync.Mutex - fire func(context.Context, FireOpts) error - logger log.Logger - running bool - cancel context.CancelFunc -} - -func newCronTrigger(logger log.Logger) *cronTrigger { - return &cronTrigger{logger: logger} -} - -func (t *cronTrigger) Kind() string { return TriggerCron } - -func (t *cronTrigger) Configure(ctx context.Context, cfg TriggerConfig, fire func(context.Context, FireOpts) error) error { - t.mu.Lock() - defer t.mu.Unlock() - t.logger = t.logger.New("trigger.cron") - t.fire = fire - t.logger.Info("cron trigger configured", "schedule", cfg.CronSchedule) - return nil -} - -func (t *cronTrigger) Start(ctx context.Context) error { - t.mu.Lock() - defer t.mu.Unlock() - if t.running { - return nil - } - loopCtx, cancel := context.WithCancel(ctx) - t.cancel = cancel - t.running = true - go t.loop(loopCtx) - t.logger.Info("cron trigger loop started (stub)") - return nil -} - -func (t *cronTrigger) loop(ctx context.Context) { - ticker := time.NewTicker(1 * time.Hour) - defer ticker.Stop() - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - t.mu.Lock() - fire := t.fire - t.mu.Unlock() - if fire != nil { - if err := fire(ctx, FireOpts{Reason: "cron_stub"}); err != nil { - t.logger.Warn("cron trigger fire failed", "error", err) - } - } - } - } -} - -func (t *cronTrigger) Stop(ctx context.Context) error { - t.mu.Lock() - defer t.mu.Unlock() - if !t.running { - return nil - } - if t.cancel != nil { - t.cancel() - } - t.running = false - t.logger.Info("cron trigger stopped") - return nil -} - -func (t *cronTrigger) OnLeaderChange(ctx context.Context, isLeader bool) error { - if !isLeader { - t.logger.Debug("cron trigger paused") - } - return nil -} - -func (t *cronTrigger) OnEndBlock(ctx context.Context, block *common.BlockContext) error { - return nil -} diff --git a/extensions/tn_vacuum/trigger_digest.go b/extensions/tn_vacuum/trigger_digest.go deleted file mode 100644 index c41a4ee96..000000000 --- a/extensions/tn_vacuum/trigger_digest.go +++ /dev/null @@ -1,62 +0,0 @@ -package tn_vacuum - -import ( - "context" - "fmt" - "sync" - - "github.com/trufnetwork/kwil-db/common" - "github.com/trufnetwork/kwil-db/core/log" -) - -type digestCoupledTrigger struct { - mu sync.RWMutex - fire func(context.Context, FireOpts) error - logger log.Logger -} - -func newDigestTrigger(logger log.Logger) *digestCoupledTrigger { - return &digestCoupledTrigger{logger: logger} -} - -func (t *digestCoupledTrigger) Kind() string { return TriggerDigestCoupled } - -func (t *digestCoupledTrigger) Configure(ctx context.Context, cfg TriggerConfig, fire func(context.Context, FireOpts) error) error { - t.mu.Lock() - defer t.mu.Unlock() - t.logger = t.logger.New("trigger.digest") - t.fire = fire - t.logger.Info("digest trigger configured") - return nil -} - -func (t *digestCoupledTrigger) Start(ctx context.Context) error { - t.logger.Info("digest trigger start - waiting for notifications") - return nil -} - -func (t *digestCoupledTrigger) Stop(ctx context.Context) error { - t.logger.Info("digest trigger stop") - return nil -} - -func (t *digestCoupledTrigger) OnLeaderChange(ctx context.Context, isLeader bool) error { - if !isLeader { - t.logger.Debug("digest trigger paused - not leader") - } - return nil -} - -func (t *digestCoupledTrigger) OnEndBlock(ctx context.Context, block *common.BlockContext) error { - return nil -} - -func (t *digestCoupledTrigger) NotifyDigestComplete(ctx context.Context, reason string) error { - t.mu.RLock() - fire := t.fire - t.mu.RUnlock() - if fire == nil { - return fmt.Errorf("digest trigger not configured") - } - return fire(ctx, FireOpts{Reason: reason}) -} diff --git a/extensions/tn_vacuum/trigger_manual.go b/extensions/tn_vacuum/trigger_manual.go deleted file mode 100644 index 76082f890..000000000 --- a/extensions/tn_vacuum/trigger_manual.go +++ /dev/null @@ -1,62 +0,0 @@ -package tn_vacuum - -import ( - "context" - "fmt" - "sync" - - "github.com/trufnetwork/kwil-db/common" - "github.com/trufnetwork/kwil-db/core/log" -) - -type manualTrigger struct { - mu sync.RWMutex - fire func(context.Context, FireOpts) error - logger log.Logger -} - -func newManualTrigger(logger log.Logger) *manualTrigger { - return &manualTrigger{logger: logger} -} - -func (t *manualTrigger) Kind() string { return TriggerManual } - -func (t *manualTrigger) Configure(ctx context.Context, cfg TriggerConfig, fire func(context.Context, FireOpts) error) error { - t.mu.Lock() - defer t.mu.Unlock() - t.logger = t.logger.New("trigger.manual") - t.fire = fire - t.logger.Info("manual trigger configured") - return nil -} - -func (t *manualTrigger) Start(ctx context.Context) error { - t.logger.Info("manual trigger ready") - return nil -} - -func (t *manualTrigger) Stop(ctx context.Context) error { - t.logger.Info("manual trigger stopped") - return nil -} - -func (t *manualTrigger) OnLeaderChange(ctx context.Context, isLeader bool) error { - if !isLeader { - t.logger.Debug("manual trigger idle - not leader") - } - return nil -} - -func (t *manualTrigger) OnEndBlock(ctx context.Context, block *common.BlockContext) error { - return nil -} - -func (t *manualTrigger) Fire(ctx context.Context, reason string) error { - t.mu.RLock() - fire := t.fire - t.mu.RUnlock() - if fire == nil { - return fmt.Errorf("manual trigger not configured") - } - return fire(ctx, FireOpts{Reason: reason}) -} diff --git a/extensions/tn_vacuum/vacuum_test.go b/extensions/tn_vacuum/vacuum_test.go index e5f7d529a..da47809a2 100644 --- a/extensions/tn_vacuum/vacuum_test.go +++ b/extensions/tn_vacuum/vacuum_test.go @@ -2,7 +2,7 @@ package tn_vacuum import ( "context" - "os" + "errors" "testing" "github.com/stretchr/testify/require" @@ -11,140 +11,101 @@ import ( "github.com/trufnetwork/kwil-db/core/log" ) -type fakeTrigger struct { - started int - stopped int - endCalls int - lastCfg TriggerConfig -} - type stubMechanism struct { - prepared bool + prepared int + runs []RunRequest + closeCnt int } func (s *stubMechanism) Name() string { return "stub" } + func (s *stubMechanism) Prepare(ctx context.Context, deps MechanismDeps) error { - s.prepared = true + s.prepared++ return nil } + func (s *stubMechanism) Run(ctx context.Context, req RunRequest) (*RunReport, error) { + s.runs = append(s.runs, req) return &RunReport{Mechanism: s.Name(), Status: "ok"}, nil } -func (s *stubMechanism) Close(ctx context.Context) error { return nil } -func (f *fakeTrigger) Configure(ctx context.Context, cfg TriggerConfig, fire func(context.Context, FireOpts) error) error { - f.lastCfg = cfg +func (s *stubMechanism) Close(ctx context.Context) error { + s.closeCnt++ return nil } -func (f *fakeTrigger) Start(ctx context.Context) error { - f.started++ - return nil -} -func (f *fakeTrigger) Stop(ctx context.Context) error { - f.stopped++ - return nil + +type failingMechanism struct{} + +func (f *failingMechanism) Name() string { return "fail" } +func (f *failingMechanism) Prepare(ctx context.Context, deps MechanismDeps) error { + return errors.New("prepare failed") } -func (f *fakeTrigger) OnLeaderChange(ctx context.Context, isLeader bool) error { return nil } -func (f *fakeTrigger) OnEndBlock(ctx context.Context, block *common.BlockContext) error { - f.endCalls++ - return nil +func (f *failingMechanism) Run(ctx context.Context, req RunRequest) (*RunReport, error) { + return nil, errors.New("should not run") } -func (f *fakeTrigger) Kind() string { return "fake" } +func (f *failingMechanism) Close(ctx context.Context) error { return nil } -func TestLeaderCallbacksRespectEnabledFlag(t *testing.T) { +func TestConfigureDisabledSkipsMechanism(t *testing.T) { ctx := context.Background() ResetForTest() ext := GetExtension() - setMechanismFactoryForTest(func() Mechanism { return &stubMechanism{} }) - defer resetMechanismFactory() - - fake := &fakeTrigger{} - mech := newMechanism() - require.NoError(t, mech.Prepare(ctx, MechanismDeps{Logger: log.New(), DB: DBConnConfig{Database: "kwild"}})) - - ext.mu.Lock() - ext.logger = log.New() - ext.config = Config{Enabled: false, Trigger: TriggerConfig{Kind: TriggerManual}} - ext.trigger = fake - ext.mechanism = mech - ext.reloadIntervalBlocks = defaultReloadBlocks - ext.mu.Unlock() + ext.setLogger(log.New()) - leaderAcquire(ctx, nil, nil) - require.Equal(t, 0, fake.started, "should not start when disabled") - - ext.mu.Lock() - ext.config.Enabled = true - ext.mu.Unlock() - - leaderAcquire(ctx, nil, nil) - require.Equal(t, 1, fake.started) + stub := &stubMechanism{} + setMechanismFactoryForTest(func() Mechanism { return stub }) + defer resetMechanismFactory() - leaderLose(ctx, nil, nil) - require.Equal(t, 1, fake.stopped) + require.NoError(t, ext.configure(ctx, Config{Enabled: false, BlockInterval: 5})) + require.Equal(t, 0, stub.prepared) } -func TestLeaderEndBlockTriggersReload(t *testing.T) { +func TestEngineReadyPreparesMechanism(t *testing.T) { ctx := context.Background() ResetForTest() - ext := GetExtension() - setMechanismFactoryForTest(func() Mechanism { return &stubMechanism{} }) + + stub := &stubMechanism{} + setMechanismFactoryForTest(func() Mechanism { return stub }) defer resetMechanismFactory() - fake := &fakeTrigger{} - mech := newMechanism() - require.NoError(t, mech.Prepare(ctx, MechanismDeps{Logger: log.New(), DB: DBConnConfig{Database: "kwild"}})) - - ext.mu.Lock() - ext.logger = log.New() - ext.config = Config{Enabled: true, Trigger: TriggerConfig{Kind: TriggerManual}, ReloadIntervalBlocks: 3} - ext.trigger = fake - ext.mechanism = mech - ext.reloadIntervalBlocks = 3 - ext.lastConfigHeight = 1 - ext.mu.Unlock() - - // end block before reload threshold -> only end callback - leaderEndBlock(ctx, nil, &common.BlockContext{Height: 2}) - require.Equal(t, 1, fake.endCalls) - require.Equal(t, TriggerManual, ext.Config().Trigger.Kind) - - // prepare service config to swap trigger kind on reload svc := &common.Service{ Logger: log.New(), LocalConfig: &config.Config{Extensions: map[string]map[string]string{ ExtensionName: { "enabled": "true", - "trigger": TriggerBlockInterval, - "block_interval": "5", + "block_interval": "3", }, }}, } + app := &common.App{Service: svc} + require.NoError(t, engineReadyHook(ctx, app)) + require.Equal(t, 1, stub.prepared) - leaderEndBlock(ctx, app, &common.BlockContext{Height: 5}) - require.GreaterOrEqual(t, ext.LastConfigHeight(), int64(5)) - require.Equal(t, 2, fake.endCalls) // old trigger still sees callback before reload - require.Equal(t, TriggerBlockInterval, ext.Config().Trigger.Kind) - require.NotEqual(t, fake, ext.Trigger()) + block := &common.BlockContext{Height: 1} + require.NoError(t, endBlockHook(ctx, app, block)) + require.Len(t, stub.runs, 1) + require.Equal(t, "block_interval:1", stub.runs[0].Reason) + + require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 2})) + require.Len(t, stub.runs, 1) + + require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 4})) + require.Len(t, stub.runs, 2) } -func TestPgRepackMechanismRequiresBinary(t *testing.T) { +func TestConfigureFailureLeavesMechanismNil(t *testing.T) { ctx := context.Background() - resetMechanismFactory() - mech := newMechanism() - pr, ok := mech.(*pgRepackMechanism) - require.True(t, ok, "mechanism should be pgRepackMechanism") - - oldPath := os.Getenv("PATH") - require.NoError(t, os.Setenv("PATH", "")) - defer os.Setenv("PATH", oldPath) + ResetForTest() + ext := GetExtension() + ext.setLogger(log.New()) - err := mech.Prepare(ctx, MechanismDeps{Logger: log.New(), DB: DBConnConfig{Database: "kwild"}}) - require.ErrorIs(t, err, ErrPgRepackUnavailable) + setMechanismFactoryForTest(func() Mechanism { return &failingMechanism{} }) + defer resetMechanismFactory() - _, runErr := mech.Run(ctx, RunRequest{Reason: "test"}) - require.ErrorIs(t, runErr, ErrPgRepackUnavailable) + err := ext.configure(ctx, Config{Enabled: true, BlockInterval: 10}) + require.Error(t, err) - require.NoError(t, pr.Close(ctx)) + ext.mu.RLock() + defer ext.mu.RUnlock() + require.Nil(t, ext.mechanism) } From f24b9b96f2ba893bc2d8915b2d9c760fd07eea3c Mon Sep 17 00:00:00 2001 From: Raffael Campos Date: Fri, 26 Sep 2025 14:43:20 -0300 Subject: [PATCH 06/13] fix: update ERC20 bridge test to handle nil recipient This commit modifies the `TestERC20BridgeEndToEnd` function in the ERC20 bridge end-to-end test to allow for a nil recipient parameter when calling the bridge action. This change ensures that the test accurately reflects the intended behavior of bridging tokens to the sender's address, improving the reliability of the test case. --- .../erc20/meta_extension_deposit_test.go | 81 +++++++++++++++++++ .../erc20/erc20_bridge_admin_authz_test.go | 2 +- .../erc20/erc20_bridge_end_to_end_test.go | 4 +- .../erc20/erc20_bridge_epoch_test.go | 2 +- .../erc20/erc20_bridge_injection_test.go | 31 ++++++- .../erc20/erc20_bridge_multi_instance_test.go | 2 +- .../erc20_bridge_transfer_actions_test.go | 6 +- .../erc20/erc20_bridge_transfer_test.go | 2 +- tests/streams/utils/erc20/helper.go | 6 +- tests/streams/utils/erc20/inject.go | 30 ++++--- tests/streams/utils/utils.go | 2 + 11 files changed, 143 insertions(+), 25 deletions(-) create mode 100644 node/exts/erc20-bridge/erc20/meta_extension_deposit_test.go diff --git a/node/exts/erc20-bridge/erc20/meta_extension_deposit_test.go b/node/exts/erc20-bridge/erc20/meta_extension_deposit_test.go new file mode 100644 index 000000000..dbc591080 --- /dev/null +++ b/node/exts/erc20-bridge/erc20/meta_extension_deposit_test.go @@ -0,0 +1,81 @@ +//go:build kwiltest + +package erc20 + +import ( + "context" + "math/big" + "testing" + + ethcommon "github.com/ethereum/go-ethereum/common" + ethtypes "github.com/ethereum/go-ethereum/core/types" + "github.com/stretchr/testify/require" + + "github.com/trufnetwork/kwil-db/core/types" + "github.com/trufnetwork/kwil-db/node/exts/evm-sync/chains" + orderedsync "github.com/trufnetwork/kwil-db/node/exts/ordered-sync" +) + +// TestApplyDepositLog verifies that applyDepositLog credits the deposit recipient. +func TestApplyDepositLog(t *testing.T) { + ctx := context.Background() + db, err := newTestDB() + require.NoError(t, err) + defer db.Close() + + tx, err := db.BeginTx(ctx) + require.NoError(t, err) + defer tx.Rollback(ctx) + + app := setup(t, tx) + + id := newUUID() + chainInfo, ok := chains.GetChainInfoByID("11155111") + if !ok { + t.Fatalf("missing chain info for test chain") + } + + upd := &userProvidedData{ + ID: id, + ChainInfo: &chainInfo, + EscrowAddress: ethcommon.HexToAddress("0x00000000000000000000000000000000000000aa"), + DistributionPeriod: 3600, + } + + require.NoError(t, createNewRewardInstance(ctx, app, upd)) + + require.NoError(t, setRewardSynced(ctx, app, id, 1, &syncedRewardData{ + Erc20Address: ethcommon.HexToAddress("0x00000000000000000000000000000000000000bb"), + Erc20Decimals: 18, + })) + + recipient := ethcommon.HexToAddress("0x00000000000000000000000000000000000000cc") + amount := big.NewInt(1_500_000_000_000_000_000) + + var data [64]byte + copy(data[32-len(recipient.Bytes()):32], recipient.Bytes()) + copy(data[64-len(amount.Bytes()):], amount.Bytes()) + + depositLog := ethtypes.Log{ + Address: upd.EscrowAddress, + Topics: []ethcommon.Hash{ + ethcommon.HexToHash("0xe1fffcc4923d04b559f4d29a8bfc6cda04eb5b0d3c460751c2402c5c5cc9109c"), + }, + Data: data[:], + } + + require.NoError(t, applyDepositLog(ctx, app, id, depositLog)) + + balRecipient, err := balanceOf(ctx, app, id, recipient) + require.NoError(t, err) + require.NotNil(t, balRecipient) + require.Equal(t, types.MustParseDecimal(amount.String()), balRecipient) + + other := ethcommon.HexToAddress("0x00000000000000000000000000000000000000dd") + balOther, err := balanceOf(ctx, app, id, other) + require.NoError(t, err) + require.Nil(t, balOther) + + orderedsync.ForTestingReset() +} + diff --git a/tests/extensions/erc20/erc20_bridge_admin_authz_test.go b/tests/extensions/erc20/erc20_bridge_admin_authz_test.go index 70d0cf67b..12f9f9801 100644 --- a/tests/extensions/erc20/erc20_bridge_admin_authz_test.go +++ b/tests/extensions/erc20/erc20_bridge_admin_authz_test.go @@ -32,7 +32,7 @@ func TestERC20BridgeAdminAuthz(t *testing.T) { require.NoError(t, err) // Step 1: Inject deposit so user has balance - err = testerc20.InjectERC20Transfer(ctx, platform, TestChain, TestEscrowA, TestERC20, TestUserA, TestEscrowA, TestAmount2, 10, nil) + err = testerc20.InjectERC20Transfer(ctx, platform, TestChain, TestEscrowA, TestERC20, TestUserA, TestUserA, TestAmount2, 10, nil) require.NoError(t, err) // Verify user has the balance diff --git a/tests/extensions/erc20/erc20_bridge_end_to_end_test.go b/tests/extensions/erc20/erc20_bridge_end_to_end_test.go index 8a172d2f5..a7517928e 100644 --- a/tests/extensions/erc20/erc20_bridge_end_to_end_test.go +++ b/tests/extensions/erc20/erc20_bridge_end_to_end_test.go @@ -52,7 +52,7 @@ func TestERC20BridgeEndToEnd(t *testing.T) { require.True(t, enabledResult, "instance should be enabled before bridge") // Step 1: Inject deposit to give user a balance - err := testerc20.InjectERC20Transfer(ctx, platform, TestChain, TestEscrowA, TestERC20, TestUserA, TestEscrowA, TestAmount1, 10, nil) + err := testerc20.InjectERC20Transfer(ctx, platform, TestChain, TestEscrowA, TestERC20, TestUserA, TestUserA, TestAmount1, 10, nil) require.NoError(t, err) // Verify user has the balance @@ -134,7 +134,7 @@ func TestERC20BridgeCustomRecipient(t *testing.T) { require.NoError(t, erc20shim.ForTestingSeedAndActivateInstance(ctx, platform, TestChain, TestEscrowA, TestERC20, 18, 1, TestExtensionAlias)) // Give user A balance to bridge - require.NoError(t, testerc20.InjectERC20Transfer(ctx, platform, TestChain, TestEscrowA, TestERC20, TestUserA, TestEscrowA, TestAmount1, 10, nil)) + require.NoError(t, testerc20.InjectERC20Transfer(ctx, platform, TestChain, TestEscrowA, TestERC20, TestUserA, TestUserA, TestAmount1, 10, nil)) engineCtx := engCtx(ctx, platform, TestUserA, 2, false) amtDec, err := types.ParseDecimalExplicit(TestAmount1, 78, 0) diff --git a/tests/extensions/erc20/erc20_bridge_epoch_test.go b/tests/extensions/erc20/erc20_bridge_epoch_test.go index 94f17f85b..24a51d7f3 100644 --- a/tests/extensions/erc20/erc20_bridge_epoch_test.go +++ b/tests/extensions/erc20/erc20_bridge_epoch_test.go @@ -31,7 +31,7 @@ func TestERC20BridgeEpochFlow(t *testing.T) { require.NoError(t, erc20shim.ForTestingSeedAndActivateInstance(ctx, platform, chain, escrow, erc20, 18, 1, TestExtensionAlias)) // Credit balance via injected transfer (simulates inbound deposit) - require.NoError(t, testerc20.InjectERC20Transfer(ctx, platform, chain, escrow, erc20, user, escrow, value, 10, nil)) + require.NoError(t, testerc20.InjectERC20Transfer(ctx, platform, chain, escrow, erc20, user, user, value, 10, nil)) // Lock and issue directly into epoch (simulate bridge request) require.NoError(t, erc20shim.ForTestingLockAndIssueDirect(ctx, platform, chain, escrow, user, value)) diff --git a/tests/extensions/erc20/erc20_bridge_injection_test.go b/tests/extensions/erc20/erc20_bridge_injection_test.go index f22d4962c..b25cdc906 100644 --- a/tests/extensions/erc20/erc20_bridge_injection_test.go +++ b/tests/extensions/erc20/erc20_bridge_injection_test.go @@ -35,7 +35,7 @@ func TestERC20BridgeInjectedTransferAffectsBalance(t *testing.T) { }) // Inject a transfer: from user to escrow (lock/credit path) - err = testerc20.InjectERC20Transfer(ctx, platform, chain, escrow, erc20, user, escrow, value, 1, nil) + err = testerc20.InjectERC20Transfer(ctx, platform, chain, escrow, erc20, user, user, value, 1, nil) require.NoError(t, err) // Query balance via the test alias @@ -66,3 +66,32 @@ func TestERC20BridgeInjectedTransferAffectsBalance(t *testing.T) { return nil }) } + +// TestERC20BridgeInjectedDepositCreditsRecipient ensures deposits can credit a recipient distinct from the depositor. +func TestERC20BridgeInjectedDepositCreditsRecipient(t *testing.T) { + seedAndRun(t, "erc20_bridge_injected_deposit_recipient", func(ctx context.Context, platform *kwilTesting.Platform) error { + chain := "sepolia" + escrow := "0xeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee" + erc20 := "0x2222222222222222222222222222222222222222" + depositor := "0xabc0000000000000000000000000000000000004" + recipient := "0xabc0000000000000000000000000000000000005" + value := "1500000000000000000" + + require.NoError(t, erc20shim.ForTestingSeedAndActivateInstance(ctx, platform, chain, escrow, erc20, 18, 60, TestExtensionAlias)) + t.Cleanup(func() { + erc20shim.ForTestingDisableInstance(ctx, platform, chain, escrow, TestExtensionAlias) + }) + + require.NoError(t, testerc20.InjectERC20Transfer(ctx, platform, chain, escrow, erc20, depositor, recipient, value, 2, nil)) + + balanceRecipient, err := testerc20.GetUserBalance(ctx, platform, TestExtensionAlias, recipient) + require.NoError(t, err) + require.Equal(t, value, balanceRecipient, "recipient should receive credited deposit") + + balanceDepositor, err := testerc20.GetUserBalance(ctx, platform, TestExtensionAlias, depositor) + require.NoError(t, err) + require.Equal(t, "0", balanceDepositor, "depositor should not be credited by deposit") + + return nil + }) +} diff --git a/tests/extensions/erc20/erc20_bridge_multi_instance_test.go b/tests/extensions/erc20/erc20_bridge_multi_instance_test.go index a4783116a..ca5d7a0b6 100644 --- a/tests/extensions/erc20/erc20_bridge_multi_instance_test.go +++ b/tests/extensions/erc20/erc20_bridge_multi_instance_test.go @@ -40,7 +40,7 @@ func TestERC20BridgeMultiInstanceIsolation(t *testing.T) { // Step 1: Inject deposit only for escrowA, userX // This simulates a deposit to escrowA only - err = testerc20.InjectERC20Transfer(ctx, platform, TestChain, escrowA, TestERC20, TestUserA, escrowA, TestAmount1, 10, nil) + err = testerc20.InjectERC20Transfer(ctx, platform, TestChain, escrowA, TestERC20, TestUserA, TestUserA, TestAmount1, 10, nil) require.NoError(t, err) // Step 2: Verify isolation - aliasA should have balance, aliasB should not diff --git a/tests/extensions/erc20/erc20_bridge_transfer_actions_test.go b/tests/extensions/erc20/erc20_bridge_transfer_actions_test.go index 08009b997..39d9b4ca5 100644 --- a/tests/extensions/erc20/erc20_bridge_transfer_actions_test.go +++ b/tests/extensions/erc20/erc20_bridge_transfer_actions_test.go @@ -31,7 +31,7 @@ func TestSepoliaTransferActions(t *testing.T) { // Credit initial balance to TestUserA using configured escrow err = testerc20.InjectERC20Transfer(ctx, platform, - TestChain, configuredEscrow, TestERC20, TestUserA, configuredEscrow, TestAmount2, 10, nil) + TestChain, configuredEscrow, TestERC20, TestUserA, TestUserA, TestAmount2, 10, nil) require.NoError(t, err) // Verify initial balance via sepolia_wallet_balance action @@ -122,7 +122,7 @@ func TestTransferActionValidation(t *testing.T) { // Give TestUserA a small balance (half of what they'll try to transfer) smallAmount := "500000000000000000" // 0.5 tokens (half of TestAmount1 which is 1.0) err = testerc20.InjectERC20Transfer(ctx, platform, - TestChain, configuredEscrow, TestERC20, TestUserA, configuredEscrow, smallAmount, 10, nil) + TestChain, configuredEscrow, TestERC20, TestUserA, TestUserA, smallAmount, 10, nil) require.NoError(t, err) // Verify they have the small balance @@ -171,7 +171,7 @@ func TestMultipleTransferActions(t *testing.T) { // Credit large initial balance to userA initialAmount := "10000000000000000000" // 10.0 tokens err = testerc20.InjectERC20Transfer(ctx, platform, - TestChain, configuredEscrow, TestERC20, userA, configuredEscrow, initialAmount, 10, nil) + TestChain, configuredEscrow, TestERC20, userA, userA, initialAmount, 10, nil) require.NoError(t, err) // Transfer A -> B (3 tokens) diff --git a/tests/extensions/erc20/erc20_bridge_transfer_test.go b/tests/extensions/erc20/erc20_bridge_transfer_test.go index d771c88ae..87e76e1a3 100644 --- a/tests/extensions/erc20/erc20_bridge_transfer_test.go +++ b/tests/extensions/erc20/erc20_bridge_transfer_test.go @@ -31,7 +31,7 @@ func TestERC20BridgeTransferBalances(t *testing.T) { require.NoError(t, err) // Step 1: Inject deposit for userA - err = testerc20.InjectERC20Transfer(ctx, platform, TestChain, TestEscrowA, TestERC20, TestUserA, TestEscrowA, TestAmount2, 10, nil) + err = testerc20.InjectERC20Transfer(ctx, platform, TestChain, TestEscrowA, TestERC20, TestUserA, TestUserA, TestAmount2, 10, nil) require.NoError(t, err) // Verify userA received the full deposit diff --git a/tests/streams/utils/erc20/helper.go b/tests/streams/utils/erc20/helper.go index 0e8f394da..5ba6e3c40 100644 --- a/tests/streams/utils/erc20/helper.go +++ b/tests/streams/utils/erc20/helper.go @@ -37,12 +37,12 @@ func WithERC20TestSetup(chain, alias string, escrowAddr string) func(t *testing. } } -// CreditUserBalance injects a realistic ERC-20 transfer to credit the user's balance. -// This simulates a user depositing tokens into the bridge. +// CreditUserBalance injects a synthetic escrow deposit to credit the user's balance. +// This simulates a user depositing tokens into the bridge via the RewardDistributor contract. func CreditUserBalance(ctx context.Context, platform *kwilTesting.Platform, extensionAlias, escrowAddr, userAddr, amount string) error { // Use the platform's DB and Engine (could be transaction-scoped) return InjectERC20Transfer( - ctx, platform, extensionAlias, escrowAddr, "0x2222222222222222222222222222222222222222", userAddr, escrowAddr, amount, 10, nil) + ctx, platform, extensionAlias, escrowAddr, "0x2222222222222222222222222222222222222222", userAddr, userAddr, amount, 10, nil) } // GetUserBalance queries the user's current balance via the extension. diff --git a/tests/streams/utils/erc20/inject.go b/tests/streams/utils/erc20/inject.go index 94d6d5938..356969b9e 100644 --- a/tests/streams/utils/erc20/inject.go +++ b/tests/streams/utils/erc20/inject.go @@ -20,7 +20,7 @@ import ( kwilTesting "github.com/trufnetwork/kwil-db/testing" ) -// InjectERC20Transfer forces an instance synced and injects a synthetic Transfer log that credits balance. +// InjectERC20Transfer forces an instance synced and injects a synthetic Deposit log that credits balance. func InjectERC20Transfer(ctx context.Context, platform *kwilTesting.Platform, chain, escrow, erc20Addr, fromHex, toHex string, valueStr string, point int64, prev *int64) error { // 1) Ensure instance exists and is synced id, err := erc20bridge.ForTestingForceSyncInstance(ctx, platform, chain, escrow, erc20Addr, 18) @@ -31,26 +31,32 @@ func InjectERC20Transfer(ctx context.Context, platform *kwilTesting.Platform, ch // 2) Compute ordered-sync topic topic := erc20bridge.ForTestingTransferListenerTopic(*id) - // 3) Build a synthetic transfer log - from := ethcommon.HexToAddress(fromHex) + // 3) Build a synthetic deposit log + if !ethcommon.IsHexAddress(fromHex) { + return fmt.Errorf("invalid address: %s", fromHex) + } + if !ethcommon.IsHexAddress(toHex) { + return fmt.Errorf("invalid address: %s", toHex) + } to := ethcommon.HexToAddress(toHex) - erc20Address := ethcommon.HexToAddress(erc20Addr) + escrowAddress := ethcommon.HexToAddress(escrow) var bn big.Int if _, ok := bn.SetString(valueStr, 10); !ok { return fmt.Errorf("invalid value: %s", valueStr) } - // topics: signature + from + to + // topics: only signature (no indexed params) topics := []ethcommon.Hash{ - ethcommon.HexToHash("0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef"), - ethcommon.BytesToHash(from.Bytes()), - ethcommon.BytesToHash(to.Bytes()), + ethcommon.HexToHash("0xe1fffcc4923d04b559f4d29a8bfc6cda04eb5b0d3c460751c2402c5c5cc9109c"), } - // data: 32-byte big-endian value + + var recipientWord [32]byte + copy(recipientWord[32-len(to.Bytes()):], to.Bytes()) val32 := types.BigIntToHash32(&bn) + data := append(recipientWord[:], val32[:]...) lg := ðtypes.Log{ - Address: erc20Address, + Address: escrowAddress, Topics: topics, - Data: val32[:], + Data: data, BlockNumber: uint64(point), TxHash: ethcommon.Hash{}, TxIndex: 0, @@ -58,7 +64,7 @@ func InjectERC20Transfer(ctx context.Context, platform *kwilTesting.Platform, ch Index: 0, Removed: false, } - ethLog := &evmsync.EthLog{Metadata: []byte("e20trsnfr"), Log: lg} + ethLog := &evmsync.EthLog{Metadata: []byte("rcpdepst"), Log: lg} // 4) Serialize like production and store via ordered-sync logsData, err := serializeEthLogsLocal([]*evmsync.EthLog{ethLog}) diff --git a/tests/streams/utils/utils.go b/tests/streams/utils/utils.go index 040673152..927092823 100644 --- a/tests/streams/utils/utils.go +++ b/tests/streams/utils/utils.go @@ -1,3 +1,5 @@ +//go:build kwiltest + // Package testutils provides utilities for testing Kwil schemas and extensions. // This package maintains backward compatibility while organizing functionality into focused subpackages. // From 7b8397d0158465b1df48a0d2fff860c6b696b45b Mon Sep 17 00:00:00 2001 From: Raffael Campos Date: Mon, 29 Sep 2025 17:17:36 -0300 Subject: [PATCH 07/13] feat: enhance tn_vacuum extension with automated maintenance and metrics This commit introduces the `tn_vacuum` extension, which automates database maintenance through periodic vacuuming operations using `pg_repack`. Key changes include: - Added configuration options for enabling the vacuum extension and setting the block interval. - Implemented metrics recording for vacuum operations, including counters for starts, completions, errors, and skipped operations. - Enhanced the `pgRepackMechanism` to provide detailed run reports, including duration and tables processed. - Updated installation instructions in the documentation to include `pg_repack` requirements for various platforms. - Added tests to validate the new functionality and ensure metrics are recorded correctly. These improvements enhance database performance and observability, making maintenance more efficient. --- docs/node-operator-guide.md | 33 ++- extensions/tn_vacuum/README.md | 107 ++++++++++ extensions/tn_vacuum/config.go | 13 +- extensions/tn_vacuum/constants.go | 19 ++ extensions/tn_vacuum/extension.go | 16 +- extensions/tn_vacuum/mechanism.go | 8 +- extensions/tn_vacuum/mechanism_repack.go | 30 ++- extensions/tn_vacuum/metrics/metrics.go | 78 ++++++++ extensions/tn_vacuum/metrics/metrics_test.go | 94 +++++++++ extensions/tn_vacuum/metrics/noop.go | 23 +++ extensions/tn_vacuum/metrics/otel.go | 143 +++++++++++++ extensions/tn_vacuum/runner.go | 35 +++- extensions/tn_vacuum/vacuum_test.go | 199 ++++++++++++++++++- go.mod | 4 +- go.sum | 4 + 15 files changed, 776 insertions(+), 30 deletions(-) create mode 100644 extensions/tn_vacuum/README.md create mode 100644 extensions/tn_vacuum/metrics/metrics.go create mode 100644 extensions/tn_vacuum/metrics/metrics_test.go create mode 100644 extensions/tn_vacuum/metrics/noop.go create mode 100644 extensions/tn_vacuum/metrics/otel.go diff --git a/docs/node-operator-guide.md b/docs/node-operator-guide.md index a243a48e6..cf4638fe1 100644 --- a/docs/node-operator-guide.md +++ b/docs/node-operator-guide.md @@ -54,7 +54,7 @@ The PostgreSQL client (`psql`) is required for database operations, and the `pg_ #### For Linux (Ubuntu/Debian) ```bash -sudo apt-get install -y postgresql-client-16 +sudo apt-get install -y postgresql-client-16 postgresql-16-repack ``` #### For macOS @@ -62,7 +62,7 @@ sudo apt-get install -y postgresql-client-16 You can install the PostgreSQL client using [Homebrew](https://brew.sh/). If you don't have Homebrew, install it first by following the instructions on their website. ```bash -brew install postgresql@16 +brew install postgresql@16 pg_repack ``` To use it from any terminal, you may need to add it to your `PATH`. For `zsh` (the default in modern macOS): @@ -485,6 +485,26 @@ For complete configuration options (stream lists, schedules, metrics, troublesho [extensions/tn_cache/README.md#operations--monitoring](../extensions/tn_cache/README.md#operations--monitoring) +### Vacuum Extension (tn_vacuum) + +The `tn_vacuum` extension provides **automated database maintenance** through periodic vacuuming operations. It helps reclaim disk space and optimize database performance by removing dead tuples using `pg_repack`. + +> **Note:** If you're using the official TrufNetwork node image, `pg_repack` is already included. For custom installations, see the installation guide in the extension documentation. + +**Quick enable** + +```toml +[extensions.tn_vacuum] +enabled = true +block_interval = 50000 # runs vacuum every 50k blocks +``` + +After editing `config.toml`, restart `kwild` for the change to take effect. + +For tuning guidance, metrics, and troubleshooting, see the full documentation: + +[extensions/tn_vacuum/README.md](../extensions/tn_vacuum/README.md) + ### 7. Become a Validator (Optional) To upgrade your node to a validator: @@ -575,19 +595,19 @@ For Ubuntu/Debian: ```bash sudo apt-get update -sudo apt-get install postgresql-client-16 +sudo apt-get install postgresql-client-16 postgresql-16-repack ``` For CentOS/RHEL: ```bash -sudo yum install postgresql16 +sudo yum install postgresql16 pg_repack_16 ``` For macOS (using Homebrew): ```bash -brew install postgresql@16 +brew install postgresql@16 pg_repack ``` Verify the installation: @@ -983,6 +1003,3 @@ private = true ``` For more details, see the [Kwil Private RPC documentation](http://docs.kwil.com/docs/node/private-rpc). - - - diff --git a/extensions/tn_vacuum/README.md b/extensions/tn_vacuum/README.md new file mode 100644 index 000000000..c43bdaa3e --- /dev/null +++ b/extensions/tn_vacuum/README.md @@ -0,0 +1,107 @@ +# TN Vacuum Extension + +Automated database maintenance through periodic vacuuming operations. Reclaims disk space and optimizes database performance by removing dead tuples using `pg_repack`. + +## Configuration + +Add to your node's configuration file: + +```toml +[extensions.tn_vacuum] +enabled = true +block_interval = 50000 +``` + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `enabled` | boolean | `true` | Enable/disable the vacuum extension (`true` or `false`) | +| `block_interval` | integer | `50000` | Number of blocks between vacuum runs | + +**Tuning `block_interval`:** +- High-write environments: `25000` - `50000` blocks +- Read-heavy environments: `75000` - `100000` blocks +- Minimum: `1` block (enforced) + +## Prerequisites + +Requires `pg_repack` binary installed and in system PATH. + +> **Note:** If you're using the official TrufNetwork node image, `pg_repack` is already included. Skip installation. + +**For custom installations:** + +**Ubuntu/Debian:** +```bash +sudo apt-get install postgresql-client-16 postgresql-16-repack +``` + +**RHEL/CentOS:** +```bash +sudo yum install postgresql16 pg_repack_16 +``` + +The extension automatically creates the `pg_repack` PostgreSQL extension on first run. + +## Metrics + +When OpenTelemetry is enabled, the extension provides: + +**Counters:** +- `tn_vacuum.vacuum_start_total` - Vacuum operations started +- `tn_vacuum.vacuum_complete_total` - Successful completions +- `tn_vacuum.vacuum_error_total` - Errors encountered +- `tn_vacuum.vacuum_skipped_total` - Operations skipped + +**Histograms:** +- `tn_vacuum.vacuum_duration_seconds` - Duration of operations +- `tn_vacuum.tables_processed` - Tables processed per run + +**Gauges:** +- `tn_vacuum.last_run_height` - Block height of last run + +## Troubleshooting + +### pg_repack Not Found + +**Symptom:** Logs show "pg_repack binary not found" + +**Solution:** +1. Install pg_repack (see [Prerequisites](#prerequisites)) +2. Ensure it's in system PATH +3. Restart the node + +### Permission Errors + +**Symptom:** Vacuum fails with permission denied + +**Solution:** Ensure database user has superuser privileges OR grant explicit permissions: +```sql +GRANT EXECUTE ON FUNCTION pg_repack.* TO your_user; +``` + +### High Memory Usage + +**Symptom:** Node memory spikes during vacuum + +**Solution:** Increase `block_interval` to run less frequently + +## Performance Impact + +Vacuum operations are non-blocking but consume resources: +- **CPU**: Moderate during operation +- **Memory**: Proportional to table sizes +- **Disk I/O**: Significant but spread over time + +**Best Practices:** +- Start with default interval (50k blocks) +- Monitor for 24-48 hours before adjusting +- Consider database size and growth rate + +## Security Note + +Database credentials are passed via environment variables to `pg_repack`. Requires elevated database privileges (superuser or pg_repack role). + +## Related Documentation + +- [pg_repack Documentation](https://reorg.github.io/pg_repack/) +- [PostgreSQL VACUUM](https://www.postgresql.org/docs/current/sql-vacuum.html) diff --git a/extensions/tn_vacuum/config.go b/extensions/tn_vacuum/config.go index a8f3a7146..088771275 100644 --- a/extensions/tn_vacuum/config.go +++ b/extensions/tn_vacuum/config.go @@ -25,7 +25,7 @@ func LoadConfig(service *common.Service) (Config, error) { return cfg, nil } - if v, ok := raw["enabled"]; ok { + if v, ok := raw[ConfigKeyEnabled]; ok { boolVal, err := parseBool(v) if err != nil { return cfg, fmt.Errorf("parse enabled: %w", err) @@ -33,7 +33,7 @@ func LoadConfig(service *common.Service) (Config, error) { cfg.Enabled = boolVal } - if v, ok := raw["block_interval"]; ok { + if v, ok := raw[ConfigKeyBlockInterval]; ok { val, err := strconv.ParseInt(strings.TrimSpace(v), 10, 64) if err != nil { return cfg, fmt.Errorf("parse block_interval: %w", err) @@ -51,12 +51,13 @@ func LoadConfig(service *common.Service) (Config, error) { } func parseBool(in string) (bool, error) { - switch strings.ToLower(strings.TrimSpace(in)) { - case "true", "1", "yes", "y", "on": + val := strings.TrimSpace(in) + switch val { + case "true": return true, nil - case "false", "0", "no", "n", "off", "": + case "false", "": return false, nil default: - return false, fmt.Errorf("invalid bool %q", in) + return false, fmt.Errorf("invalid bool %q, expected 'true' or 'false'", in) } } diff --git a/extensions/tn_vacuum/constants.go b/extensions/tn_vacuum/constants.go index 45513dbf1..5537a2d03 100644 --- a/extensions/tn_vacuum/constants.go +++ b/extensions/tn_vacuum/constants.go @@ -9,3 +9,22 @@ const ( defaultBlockInterval = int64(50000) minBlockInterval = int64(1) ) + +// Configuration keys +const ( + ConfigKeyEnabled = "enabled" + ConfigKeyBlockInterval = "block_interval" +) + +// Database connection defaults +const ( + DefaultPostgresHost = "127.0.0.1" + DefaultPostgresPort = "5432" + DefaultSSLMode = "sslmode=disable" +) + +// Report status values +const ( + StatusOK = "ok" + StatusFailed = "failed" +) diff --git a/extensions/tn_vacuum/extension.go b/extensions/tn_vacuum/extension.go index ea3205206..fd89972d2 100644 --- a/extensions/tn_vacuum/extension.go +++ b/extensions/tn_vacuum/extension.go @@ -7,6 +7,7 @@ import ( "github.com/trufnetwork/kwil-db/common" "github.com/trufnetwork/kwil-db/core/log" + "github.com/trufnetwork/node/extensions/tn_vacuum/metrics" ) type Extension struct { @@ -17,6 +18,7 @@ type Extension struct { mechanism Mechanism runner *Runner lastRunHeight int64 + metrics metrics.MetricsRecorder } var ( @@ -26,8 +28,10 @@ var ( func GetExtension() *Extension { once.Do(func() { + logger := log.New(log.WithLevel(log.LevelInfo)) extInstance = &Extension{ - logger: log.New(log.WithLevel(log.LevelInfo)), + logger: logger, + metrics: metrics.NewMetricsRecorder(logger), } }) return extInstance @@ -52,6 +56,7 @@ func (e *Extension) setLogger(l log.Logger) { e.mu.Lock() defer e.mu.Unlock() e.logger = l + e.metrics = metrics.NewMetricsRecorder(l) } func (e *Extension) setService(s *common.Service) { @@ -99,6 +104,7 @@ func (e *Extension) maybeRun(ctx context.Context, blockHeight int64) { last := e.lastRunHeight logger := e.logger svc := e.service + metricsRecorder := e.metrics e.mu.RUnlock() if !cfg.Enabled || mech == nil || runner == nil { @@ -106,6 +112,9 @@ func (e *Extension) maybeRun(ctx context.Context, blockHeight int64) { } if last != 0 && blockHeight-last < cfg.BlockInterval { + if metricsRecorder != nil { + metricsRecorder.RecordVacuumSkipped(ctx, "block_interval_not_met") + } return } @@ -115,14 +124,19 @@ func (e *Extension) maybeRun(ctx context.Context, blockHeight int64) { Logger: logger, Reason: reason, DB: dbConnFromService(svc), + Metrics: metricsRecorder, }) if err != nil { + logger.Warn("vacuum run failed", "error", err, "height", blockHeight, "reason", reason) return } e.mu.Lock() if blockHeight > e.lastRunHeight { e.lastRunHeight = blockHeight + if metricsRecorder != nil { + metricsRecorder.RecordLastRunHeight(ctx, blockHeight) + } } e.mu.Unlock() } diff --git a/extensions/tn_vacuum/mechanism.go b/extensions/tn_vacuum/mechanism.go index ae1c6bad1..72d539864 100644 --- a/extensions/tn_vacuum/mechanism.go +++ b/extensions/tn_vacuum/mechanism.go @@ -2,6 +2,7 @@ package tn_vacuum import ( "context" + "time" "github.com/trufnetwork/kwil-db/core/log" ) @@ -24,8 +25,11 @@ type RunRequest struct { } type RunReport struct { - Mechanism string - Status string + Mechanism string + Status string + Duration time.Duration + TablesProcessed int + Error string } type DBConnConfig struct { diff --git a/extensions/tn_vacuum/mechanism_repack.go b/extensions/tn_vacuum/mechanism_repack.go index b41e54cea..485ce0dde 100644 --- a/extensions/tn_vacuum/mechanism_repack.go +++ b/extensions/tn_vacuum/mechanism_repack.go @@ -8,6 +8,7 @@ import ( "os" "os/exec" "strings" + "time" "github.com/jackc/pgx/v5" "github.com/trufnetwork/kwil-db/core/log" @@ -44,6 +45,12 @@ func (m *pgRepackMechanism) Prepare(ctx context.Context, deps MechanismDeps) err } func (m *pgRepackMechanism) Run(ctx context.Context, req RunRequest) (*RunReport, error) { + startTime := time.Now() + report := &RunReport{ + Mechanism: m.Name(), + Status: StatusOK, + } + if m.binaryPath == "" { return nil, fmt.Errorf("pg_repack unavailable: %w", ErrPgRepackUnavailable) } @@ -79,11 +86,20 @@ func (m *pgRepackMechanism) Run(ctx context.Context, req RunRequest) (*RunReport m.logger.Info("pg_repack starting", "args", args) if err := cmd.Run(); err != nil { - m.logger.Warn("pg_repack failed", "error", err, "stderr", stderr.String()) - return nil, fmt.Errorf("pg_repack execution failed: %w", err) + report.Duration = time.Since(startTime) + report.Status = StatusFailed + report.Error = err.Error() + m.logger.Warn("pg_repack failed", "error", err, "stderr", stderr.String(), "duration", report.Duration) + return report, fmt.Errorf("pg_repack execution failed: %w", err) } - m.logger.Info("pg_repack completed", "stdout", stdout.String()) - return &RunReport{Mechanism: m.Name(), Status: "ok"}, nil + + report.Duration = time.Since(startTime) + // Parse stdout to count tables if possible (pg_repack outputs "INFO: repacking table...") + tablesProcessed := strings.Count(stdout.String(), "INFO: repacking table") + report.TablesProcessed = tablesProcessed + + m.logger.Info("pg_repack completed", "stdout", stdout.String(), "duration", report.Duration, "tables", tablesProcessed) + return report, nil } func (m *pgRepackMechanism) Close(ctx context.Context) error { @@ -116,17 +132,17 @@ func ensurePgRepackExtension(ctx context.Context, db DBConnConfig, logger log.Lo func buildConnString(db DBConnConfig) string { host := db.Host if host == "" { - host = "127.0.0.1" + host = DefaultPostgresHost } port := db.Port if port == "" { - port = "5432" + port = DefaultPostgresPort } parts := []string{ fmt.Sprintf("host=%s", host), fmt.Sprintf("port=%s", port), fmt.Sprintf("dbname=%s", db.Database), - "sslmode=disable", + DefaultSSLMode, } if db.User != "" { parts = append(parts, fmt.Sprintf("user=%s", db.User)) diff --git a/extensions/tn_vacuum/metrics/metrics.go b/extensions/tn_vacuum/metrics/metrics.go new file mode 100644 index 000000000..da77b06e5 --- /dev/null +++ b/extensions/tn_vacuum/metrics/metrics.go @@ -0,0 +1,78 @@ +// Package metrics provides observability for the tn_vacuum extension. +// It uses a plugin pattern to ensure zero overhead when OpenTelemetry is not available. +package metrics + +import ( + "context" + "strings" + "time" + + "github.com/trufnetwork/kwil-db/core/log" + "go.opentelemetry.io/otel" +) + +// MetricsRecorder defines the interface for recording vacuum metrics. +// This allows for pluggable implementations - either real OTEL metrics or no-op. +type MetricsRecorder interface { + // Vacuum operation metrics + RecordVacuumStart(ctx context.Context, mechanism string) + RecordVacuumComplete(ctx context.Context, mechanism string, duration time.Duration, tablesProcessed int) + RecordVacuumError(ctx context.Context, mechanism string, errType string) + RecordVacuumSkipped(ctx context.Context, reason string) + + // Resource metrics + RecordLastRunHeight(ctx context.Context, height int64) +} + +// NewMetricsRecorder creates a metrics recorder instance. +// It automatically detects if OpenTelemetry is available and returns +// either a real OTEL implementation or a no-op implementation. +func NewMetricsRecorder(logger log.Logger) MetricsRecorder { + // Try to get the global meter provider + meter := otel.GetMeterProvider().Meter("github.com/trufnetwork/kwil-db/extensions/tn_vacuum") + + // Try to create a test metric to verify OTEL is functional + _, err := meter.Int64Counter("tn_vacuum.test") + if err != nil { + logger.Debug("OpenTelemetry not available, metrics disabled") + return NewNoOpMetrics() + } + + // OTEL is available, create real metrics recorder + otelMetrics, err := NewOTELMetrics(meter, logger) + if err != nil { + logger.Warn("failed to initialize OTEL metrics, falling back to no-op", "error", err) + return NewNoOpMetrics() + } + + logger.Info("OpenTelemetry metrics initialized successfully") + return otelMetrics +} + +// ClassifyError categorizes errors for metric labels to keep cardinality low +func ClassifyError(err error) string { + if err == nil { + return "none" + } + + errStr := err.Error() + switch { + case strings.Contains(errStr, "context deadline exceeded"): + return "timeout" + case strings.Contains(errStr, "context canceled"): + return "cancelled" + case strings.Contains(errStr, "connection"): + return "connection_error" + case strings.Contains(errStr, "pg_repack unavailable") || strings.Contains(errStr, "not found in PATH"): + return "binary_unavailable" + case strings.Contains(errStr, "execution failed"): + return "execution_failed" + case strings.Contains(errStr, "database") || strings.Contains(errStr, "sql"): + return "database_error" + case strings.Contains(errStr, "permission denied") || strings.Contains(errStr, "unauthorized"): + return "permission_denied" + default: + return "unknown" + } +} + diff --git a/extensions/tn_vacuum/metrics/metrics_test.go b/extensions/tn_vacuum/metrics/metrics_test.go new file mode 100644 index 000000000..b7bfc30a0 --- /dev/null +++ b/extensions/tn_vacuum/metrics/metrics_test.go @@ -0,0 +1,94 @@ +package metrics + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/trufnetwork/kwil-db/core/log" +) + +func TestNoOpMetrics(t *testing.T) { + ctx := context.Background() + m := NewNoOpMetrics() + + // Should not panic + m.RecordVacuumStart(ctx, "test") + m.RecordVacuumComplete(ctx, "test", time.Second, 5) + m.RecordVacuumError(ctx, "test", "error") + m.RecordVacuumSkipped(ctx, "reason") + m.RecordLastRunHeight(ctx, 100) +} + +func TestClassifyError(t *testing.T) { + tests := []struct { + name string + err error + expected string + }{ + { + name: "nil error", + err: nil, + expected: "none", + }, + { + name: "timeout error", + err: errors.New("context deadline exceeded"), + expected: "timeout", + }, + { + name: "cancelled error", + err: errors.New("context canceled"), + expected: "cancelled", + }, + { + name: "connection error", + err: errors.New("connection refused"), + expected: "connection_error", + }, + { + name: "binary unavailable", + err: errors.New("pg_repack unavailable: not found in PATH"), + expected: "binary_unavailable", + }, + { + name: "execution failed", + err: errors.New("pg_repack execution failed"), + expected: "execution_failed", + }, + { + name: "database error", + err: errors.New("sql error: syntax error"), + expected: "database_error", + }, + { + name: "permission denied", + err: errors.New("permission denied"), + expected: "permission_denied", + }, + { + name: "unknown error", + err: errors.New("something went wrong"), + expected: "unknown", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ClassifyError(tt.err) + require.Equal(t, tt.expected, result) + }) + } +} + +func TestNewMetricsRecorder(t *testing.T) { + logger := log.New() + m := NewMetricsRecorder(logger) + require.NotNil(t, m) + + // Should return a valid metrics recorder (either OTEL or NoOp) + ctx := context.Background() + m.RecordVacuumStart(ctx, "test") +} diff --git a/extensions/tn_vacuum/metrics/noop.go b/extensions/tn_vacuum/metrics/noop.go new file mode 100644 index 000000000..ad3329838 --- /dev/null +++ b/extensions/tn_vacuum/metrics/noop.go @@ -0,0 +1,23 @@ +package metrics + +import ( + "context" + "time" +) + +// NoOpMetrics is a no-op implementation of MetricsRecorder. +// It does nothing and has zero overhead. +type NoOpMetrics struct{} + +// NewNoOpMetrics creates a new no-op metrics recorder. +func NewNoOpMetrics() MetricsRecorder { + return &NoOpMetrics{} +} + +func (n *NoOpMetrics) RecordVacuumStart(ctx context.Context, mechanism string) {} +func (n *NoOpMetrics) RecordVacuumComplete(ctx context.Context, mechanism string, duration time.Duration, tablesProcessed int) { +} +func (n *NoOpMetrics) RecordVacuumError(ctx context.Context, mechanism string, errType string) {} +func (n *NoOpMetrics) RecordVacuumSkipped(ctx context.Context, reason string) {} +func (n *NoOpMetrics) RecordLastRunHeight(ctx context.Context, height int64) {} + diff --git a/extensions/tn_vacuum/metrics/otel.go b/extensions/tn_vacuum/metrics/otel.go new file mode 100644 index 000000000..47c68dfed --- /dev/null +++ b/extensions/tn_vacuum/metrics/otel.go @@ -0,0 +1,143 @@ +package metrics + +import ( + "context" + "time" + + "github.com/trufnetwork/kwil-db/core/log" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" +) + +// OTELMetrics implements MetricsRecorder using OpenTelemetry. +type OTELMetrics struct { + logger log.Logger + + // Counters + vacuumStartCounter metric.Int64Counter + vacuumCompleteCounter metric.Int64Counter + vacuumErrorCounter metric.Int64Counter + vacuumSkippedCounter metric.Int64Counter + + // Histograms + vacuumDuration metric.Float64Histogram + tablesProcessed metric.Int64Histogram + + // Gauges + lastRunHeight metric.Int64Gauge +} + +// NewOTELMetrics creates a new OTEL metrics recorder. +func NewOTELMetrics(meter metric.Meter, logger log.Logger) (*OTELMetrics, error) { + m := &OTELMetrics{logger: logger} + + var err error + + // Create counters + m.vacuumStartCounter, err = meter.Int64Counter( + "tn_vacuum.vacuum_start_total", + metric.WithDescription("Total number of vacuum operations started"), + metric.WithUnit("{operation}"), + ) + if err != nil { + return nil, err + } + + m.vacuumCompleteCounter, err = meter.Int64Counter( + "tn_vacuum.vacuum_complete_total", + metric.WithDescription("Total number of vacuum operations completed successfully"), + metric.WithUnit("{operation}"), + ) + if err != nil { + return nil, err + } + + m.vacuumErrorCounter, err = meter.Int64Counter( + "tn_vacuum.vacuum_error_total", + metric.WithDescription("Total number of vacuum errors"), + metric.WithUnit("{error}"), + ) + if err != nil { + return nil, err + } + + m.vacuumSkippedCounter, err = meter.Int64Counter( + "tn_vacuum.vacuum_skipped_total", + metric.WithDescription("Total number of vacuum operations skipped"), + metric.WithUnit("{operation}"), + ) + if err != nil { + return nil, err + } + + // Create histograms + m.vacuumDuration, err = meter.Float64Histogram( + "tn_vacuum.vacuum_duration_seconds", + metric.WithDescription("Duration of vacuum operations"), + metric.WithUnit("s"), + ) + if err != nil { + return nil, err + } + + m.tablesProcessed, err = meter.Int64Histogram( + "tn_vacuum.tables_processed", + metric.WithDescription("Number of tables processed during vacuum"), + metric.WithUnit("{table}"), + ) + if err != nil { + return nil, err + } + + // Create gauges + m.lastRunHeight, err = meter.Int64Gauge( + "tn_vacuum.last_run_height", + metric.WithDescription("Block height of the last vacuum run"), + metric.WithUnit("{block}"), + ) + if err != nil { + return nil, err + } + + return m, nil +} + +func (m *OTELMetrics) RecordVacuumStart(ctx context.Context, mechanism string) { + m.vacuumStartCounter.Add(ctx, 1, + metric.WithAttributes( + attribute.String("mechanism", mechanism), + ), + ) +} + +func (m *OTELMetrics) RecordVacuumComplete(ctx context.Context, mechanism string, duration time.Duration, tablesProcessed int) { + attrs := metric.WithAttributes( + attribute.String("mechanism", mechanism), + ) + + m.vacuumCompleteCounter.Add(ctx, 1, attrs) + m.vacuumDuration.Record(ctx, duration.Seconds(), attrs) + m.tablesProcessed.Record(ctx, int64(tablesProcessed), attrs) +} + +func (m *OTELMetrics) RecordVacuumError(ctx context.Context, mechanism string, errType string) { + m.vacuumErrorCounter.Add(ctx, 1, + metric.WithAttributes( + attribute.String("mechanism", mechanism), + attribute.String("error_type", errType), + ), + ) +} + +func (m *OTELMetrics) RecordVacuumSkipped(ctx context.Context, reason string) { + m.vacuumSkippedCounter.Add(ctx, 1, + metric.WithAttributes( + attribute.String("reason", reason), + ), + ) +} + +func (m *OTELMetrics) RecordLastRunHeight(ctx context.Context, height int64) { + m.lastRunHeight.Record(ctx, height) +} + diff --git a/extensions/tn_vacuum/runner.go b/extensions/tn_vacuum/runner.go index f59e4e267..b09370424 100644 --- a/extensions/tn_vacuum/runner.go +++ b/extensions/tn_vacuum/runner.go @@ -2,8 +2,10 @@ package tn_vacuum import ( "context" + "time" "github.com/trufnetwork/kwil-db/core/log" + "github.com/trufnetwork/node/extensions/tn_vacuum/metrics" ) type Runner struct { @@ -15,6 +17,7 @@ type RunnerArgs struct { Logger log.Logger Reason string DB DBConnConfig + Metrics metrics.MetricsRecorder } func (r *Runner) Execute(ctx context.Context, args RunnerArgs) error { @@ -25,18 +28,44 @@ func (r *Runner) Execute(ctx context.Context, args RunnerArgs) error { if logger == nil { logger = args.Logger } + + mechanismName := args.Mechanism.Name() + if logger != nil { - logger.Info("vacuum runner executing", "mechanism", args.Mechanism.Name(), "reason", args.Reason) + logger.Info("vacuum runner executing", "mechanism", mechanismName, "reason", args.Reason) + } + if args.Metrics != nil { + args.Metrics.RecordVacuumStart(ctx, mechanismName) } - _, err := args.Mechanism.Run(ctx, RunRequest{Reason: args.Reason, DB: args.DB}) + + report, err := args.Mechanism.Run(ctx, RunRequest{Reason: args.Reason, DB: args.DB}) if err != nil { if logger != nil { logger.Warn("vacuum runner failed", "error", err) } + if args.Metrics != nil { + args.Metrics.RecordVacuumError(ctx, mechanismName, metrics.ClassifyError(err)) + } return err } + if logger != nil { - logger.Info("vacuum runner completed", "mechanism", args.Mechanism.Name()) + fields := []any{"mechanism", mechanismName} + if report != nil { + fields = append(fields, "duration", report.Duration, "tables", report.TablesProcessed) + } + logger.Info("vacuum runner completed", fields...) } + + if args.Metrics != nil { + var duration time.Duration + tables := 0 + if report != nil { + duration = report.Duration + tables = report.TablesProcessed + } + args.Metrics.RecordVacuumComplete(ctx, mechanismName, duration, tables) + } + return nil } diff --git a/extensions/tn_vacuum/vacuum_test.go b/extensions/tn_vacuum/vacuum_test.go index da47809a2..ba5ccacfe 100644 --- a/extensions/tn_vacuum/vacuum_test.go +++ b/extensions/tn_vacuum/vacuum_test.go @@ -4,6 +4,7 @@ import ( "context" "errors" "testing" + "time" "github.com/stretchr/testify/require" "github.com/trufnetwork/kwil-db/common" @@ -26,7 +27,12 @@ func (s *stubMechanism) Prepare(ctx context.Context, deps MechanismDeps) error { func (s *stubMechanism) Run(ctx context.Context, req RunRequest) (*RunReport, error) { s.runs = append(s.runs, req) - return &RunReport{Mechanism: s.Name(), Status: "ok"}, nil + return &RunReport{ + Mechanism: s.Name(), + Status: StatusOK, + Duration: 100 * time.Millisecond, + TablesProcessed: 5, + }, nil } func (s *stubMechanism) Close(ctx context.Context) error { @@ -45,6 +51,28 @@ func (f *failingMechanism) Run(ctx context.Context, req RunRequest) (*RunReport, } func (f *failingMechanism) Close(ctx context.Context) error { return nil } +type nilReportMechanism struct{} + +func (n *nilReportMechanism) Name() string { return "nil_report" } +func (n *nilReportMechanism) Prepare(ctx context.Context, deps MechanismDeps) error { + return nil +} +func (n *nilReportMechanism) Run(ctx context.Context, req RunRequest) (*RunReport, error) { + return nil, nil +} +func (n *nilReportMechanism) Close(ctx context.Context) error { return nil } + +type errorRunMechanism struct{} + +func (e *errorRunMechanism) Name() string { return "error_run" } +func (e *errorRunMechanism) Prepare(ctx context.Context, deps MechanismDeps) error { + return nil +} +func (e *errorRunMechanism) Run(ctx context.Context, req RunRequest) (*RunReport, error) { + return nil, errors.New("run failed") +} +func (e *errorRunMechanism) Close(ctx context.Context) error { return nil } + func TestConfigureDisabledSkipsMechanism(t *testing.T) { ctx := context.Background() ResetForTest() @@ -109,3 +137,172 @@ func TestConfigureFailureLeavesMechanismNil(t *testing.T) { defer ext.mu.RUnlock() require.Nil(t, ext.mechanism) } + +func TestRunReportEnhancement(t *testing.T) { + ctx := context.Background() + ResetForTest() + + stub := &stubMechanism{} + setMechanismFactoryForTest(func() Mechanism { return stub }) + defer resetMechanismFactory() + + svc := &common.Service{ + Logger: log.New(), + LocalConfig: &config.Config{Extensions: map[string]map[string]string{ + ExtensionName: { + ConfigKeyEnabled: "true", + ConfigKeyBlockInterval: "1", + }, + }}, + } + + app := &common.App{Service: svc} + require.NoError(t, engineReadyHook(ctx, app)) + + block := &common.BlockContext{Height: 1} + require.NoError(t, endBlockHook(ctx, app, block)) + + require.Len(t, stub.runs, 1) + + // Verify the stub returns enhanced report data + ext := GetExtension() + runner := ext.runner + require.NotNil(t, runner) + + report, err := stub.Run(ctx, RunRequest{Reason: "test"}) + require.NoError(t, err) + require.NotNil(t, report) + require.Equal(t, "stub", report.Mechanism) + require.Equal(t, StatusOK, report.Status) + require.Equal(t, 100*time.Millisecond, report.Duration) + require.Equal(t, 5, report.TablesProcessed) +} + +func TestVacuumSkippedMetrics(t *testing.T) { + ctx := context.Background() + ResetForTest() + + stub := &stubMechanism{} + setMechanismFactoryForTest(func() Mechanism { return stub }) + defer resetMechanismFactory() + + svc := &common.Service{ + Logger: log.New(), + LocalConfig: &config.Config{Extensions: map[string]map[string]string{ + ExtensionName: { + ConfigKeyEnabled: "true", + ConfigKeyBlockInterval: "10", + }, + }}, + } + + app := &common.App{Service: svc} + require.NoError(t, engineReadyHook(ctx, app)) + + // First run at height 1 + block := &common.BlockContext{Height: 1} + require.NoError(t, endBlockHook(ctx, app, block)) + require.Len(t, stub.runs, 1) + + // Should be skipped at height 5 (interval not met) + block = &common.BlockContext{Height: 5} + require.NoError(t, endBlockHook(ctx, app, block)) + require.Len(t, stub.runs, 1, "should not run - interval not met") + + // Should run at height 11 (interval met) + block = &common.BlockContext{Height: 11} + require.NoError(t, endBlockHook(ctx, app, block)) + require.Len(t, stub.runs, 2, "should run - interval met") +} + +func TestRunnerHandlesNilReport(t *testing.T) { + ctx := context.Background() + runner := &Runner{logger: log.New()} + metricsStub := &stubMetricsRecorder{} + + require.NoError(t, runner.Execute(ctx, RunnerArgs{ + Mechanism: &nilReportMechanism{}, + Logger: log.New(), + Reason: "test", + Metrics: metricsStub, + })) + + require.Equal(t, 1, metricsStub.startCount) + require.Equal(t, 1, metricsStub.completeCount) + require.Zero(t, metricsStub.lastDuration) + require.Zero(t, metricsStub.lastTables) + require.Equal(t, "nil_report", metricsStub.lastMechanism) +} + +func TestMaybeRunRecordsErrorOnce(t *testing.T) { + ctx := context.Background() + ResetForTest() + + setMechanismFactoryForTest(func() Mechanism { return &errorRunMechanism{} }) + defer resetMechanismFactory() + + svc := &common.Service{ + Logger: log.New(), + LocalConfig: &config.Config{Extensions: map[string]map[string]string{ + ExtensionName: { + ConfigKeyEnabled: "true", + ConfigKeyBlockInterval: "1", + }, + }}, + } + + app := &common.App{Service: svc} + require.NoError(t, engineReadyHook(ctx, app)) + + metricsStub := &stubMetricsRecorder{} + ext := GetExtension() + ext.mu.Lock() + ext.metrics = metricsStub + ext.mu.Unlock() + + require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 1})) + require.Equal(t, 1, metricsStub.errorCount) + require.Equal(t, 1, metricsStub.startCount) + require.Equal(t, "error_run", metricsStub.lastErrorMechanism) +} + +type stubMetricsRecorder struct { + startCount int + completeCount int + errorCount int + skippedCount int + lastDuration time.Duration + lastTables int + lastMechanism string + lastErrorType string + lastErrorMechanism string + lastSkipReason string + lastHeight int64 +} + +func (s *stubMetricsRecorder) RecordVacuumStart(ctx context.Context, mechanism string) { + s.startCount++ + s.lastMechanism = mechanism +} + +func (s *stubMetricsRecorder) RecordVacuumComplete(ctx context.Context, mechanism string, duration time.Duration, tablesProcessed int) { + s.completeCount++ + s.lastMechanism = mechanism + s.lastDuration = duration + s.lastTables = tablesProcessed +} + +func (s *stubMetricsRecorder) RecordVacuumError(ctx context.Context, mechanism string, errType string) { + s.errorCount++ + s.lastErrorMechanism = mechanism + s.lastErrorType = errType +} + +func (s *stubMetricsRecorder) RecordVacuumSkipped(ctx context.Context, reason string) { + s.skippedCount++ + s.lastSkipReason = reason +} + +func (s *stubMetricsRecorder) RecordLastRunHeight(ctx context.Context, height int64) { + s.lastHeight = height +} diff --git a/go.mod b/go.mod index 97b511e85..d4bf15f09 100644 --- a/go.mod +++ b/go.mod @@ -19,8 +19,8 @@ require ( github.com/spf13/cobra v1.9.1 github.com/stretchr/testify v1.10.0 github.com/testcontainers/testcontainers-go v0.37.0 - github.com/trufnetwork/kwil-db v0.10.3-0.20250926181531-88158eb10b64 - github.com/trufnetwork/kwil-db/core v0.4.3-0.20250926181531-88158eb10b64 + github.com/trufnetwork/kwil-db v0.10.3-0.20250929173952-120a6dd2189e + github.com/trufnetwork/kwil-db/core v0.4.3-0.20250929173952-120a6dd2189e github.com/trufnetwork/sdk-go v0.3.2-0.20250630062504-841b40cdb709 go.uber.org/zap v1.27.0 golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa diff --git a/go.sum b/go.sum index f00599225..5ba806fc4 100644 --- a/go.sum +++ b/go.sum @@ -1214,8 +1214,12 @@ github.com/tklauser/numcpus v0.9.0 h1:lmyCHtANi8aRUgkckBgoDk1nHCux3n2cgkJLXdQGPD github.com/tklauser/numcpus v0.9.0/go.mod h1:SN6Nq1O3VychhC1npsWostA+oW+VOQTxZrS604NSRyI= github.com/trufnetwork/kwil-db v0.10.3-0.20250926181531-88158eb10b64 h1:FGv9XArb0LrzNBFVhL250+mp30jnnsZkPJChEM2zHgk= github.com/trufnetwork/kwil-db v0.10.3-0.20250926181531-88158eb10b64/go.mod h1:LiBAC48uZl2B0IiLtD2hpOce7RNfpuDdghVAOc3u1Qo= +github.com/trufnetwork/kwil-db v0.10.3-0.20250929173952-120a6dd2189e h1:31DihFBWOrV02Y59DsiHUT3fs0yEr5rnNs/zDZX3EVE= +github.com/trufnetwork/kwil-db v0.10.3-0.20250929173952-120a6dd2189e/go.mod h1:LiBAC48uZl2B0IiLtD2hpOce7RNfpuDdghVAOc3u1Qo= github.com/trufnetwork/kwil-db/core v0.4.3-0.20250926181531-88158eb10b64 h1:+HCpXbJ8sNcoADmBpgzz2ceqFc4JbKvGrVF4G7velsU= github.com/trufnetwork/kwil-db/core v0.4.3-0.20250926181531-88158eb10b64/go.mod h1:HnOsh9+BN13LJCjiH0+XKaJzyjWKf+H9AofFFp90KwQ= +github.com/trufnetwork/kwil-db/core v0.4.3-0.20250929173952-120a6dd2189e h1:ruVB/uBGMVhX7G31Dp2CyNE9XpmJtzX0+3Csw6XTq6s= +github.com/trufnetwork/kwil-db/core v0.4.3-0.20250929173952-120a6dd2189e/go.mod h1:HnOsh9+BN13LJCjiH0+XKaJzyjWKf+H9AofFFp90KwQ= github.com/trufnetwork/openzeppelin-merkle-tree-go v0.0.2 h1:DCq8MzbWH0wZmICNmMVsSzUHUPl+2vqRhluEABjxl88= github.com/trufnetwork/openzeppelin-merkle-tree-go v0.0.2/go.mod h1:Y0MJpPp9QXU5vC6Gpoilql2NkgmGNcbHm9HYC2v2N8s= github.com/trufnetwork/sdk-go v0.3.2-0.20250630062504-841b40cdb709 h1:d9EqPXIjbq/atzEncK5dM3Z9oStx1BxCGuL/sjefeCw= From e4c8b30331dc26e7386d307ba80c204b2c4fbd1e Mon Sep 17 00:00:00 2001 From: Raffael Campos Date: Mon, 29 Sep 2025 18:08:48 -0300 Subject: [PATCH 08/13] feat: implement state persistence for tn_vacuum extension This commit introduces state persistence to the `tn_vacuum` extension, allowing it to track the last successful vacuum run's block height and timestamp. Key changes include: - Added a new `runState` struct to encapsulate the last run information. - Implemented a `stateStore` interface and a `pgStateStore` struct for managing state persistence in PostgreSQL. - Enhanced the `Extension` struct to include state management methods for initializing, loading, and saving the run state. - Updated the `README.md` to document the new state persistence feature. - Added tests to validate the state persistence functionality and ensure correct behavior during vacuum operations. These enhancements improve the reliability of the vacuum process by allowing the extension to resume its operations seamlessly after restarts. --- extensions/tn_vacuum/README.md | 8 ++ extensions/tn_vacuum/extension.go | 124 ++++++++++++++++++++++---- extensions/tn_vacuum/state.go | 118 +++++++++++++++++++++++++ extensions/tn_vacuum/tn_vacuum.go | 4 + extensions/tn_vacuum/vacuum_test.go | 132 ++++++++++++++++++++++++++++ 5 files changed, 367 insertions(+), 19 deletions(-) create mode 100644 extensions/tn_vacuum/state.go diff --git a/extensions/tn_vacuum/README.md b/extensions/tn_vacuum/README.md index c43bdaa3e..d53417724 100644 --- a/extensions/tn_vacuum/README.md +++ b/extensions/tn_vacuum/README.md @@ -42,6 +42,14 @@ sudo yum install postgresql16 pg_repack_16 The extension automatically creates the `pg_repack` PostgreSQL extension on first run. +## State Persistence + +The extension keeps a lightweight bookkeeping table in your node database at +`ext_tn_vacuum.run_state`. On every successful run it records the block height +and timestamp, allowing restarts to pick up the schedule without re-running +immediately. The schema is prefixed with `ext_`, so it is ignored by consensus +hashing and remains entirely node-local. + ## Metrics When OpenTelemetry is enabled, the extension provides: diff --git a/extensions/tn_vacuum/extension.go b/extensions/tn_vacuum/extension.go index fd89972d2..5e02712dd 100644 --- a/extensions/tn_vacuum/extension.go +++ b/extensions/tn_vacuum/extension.go @@ -4,21 +4,25 @@ import ( "context" "fmt" "sync" + "time" "github.com/trufnetwork/kwil-db/common" "github.com/trufnetwork/kwil-db/core/log" + sql "github.com/trufnetwork/kwil-db/node/types/sql" "github.com/trufnetwork/node/extensions/tn_vacuum/metrics" ) type Extension struct { - mu sync.RWMutex - logger log.Logger - service *common.Service - config Config - mechanism Mechanism - runner *Runner - lastRunHeight int64 - metrics metrics.MetricsRecorder + mu sync.RWMutex + logger log.Logger + service *common.Service + config Config + mechanism Mechanism + runner *Runner + state runState + stateStore stateStore + now func() time.Time + metrics metrics.MetricsRecorder } var ( @@ -32,12 +36,16 @@ func GetExtension() *Extension { extInstance = &Extension{ logger: logger, metrics: metrics.NewMetricsRecorder(logger), + now: time.Now, } }) return extInstance } func SetExtension(e *Extension) { + if e != nil && e.now == nil { + e.now = time.Now + } extInstance = e } @@ -46,6 +54,22 @@ func ResetForTest() { extInstance = nil } +// setStateStore overrides the persistent state backend. Tests use this to +// inject a stub without touching a real database connection. +func (e *Extension) setStateStore(store stateStore) { + e.mu.Lock() + defer e.mu.Unlock() + e.stateStore = store +} + +// setNowFunc overrides the clock used for persisted timestamps. Tests provide +// deterministic values through this hook. +func (e *Extension) setNowFunc(now func() time.Time) { + e.mu.Lock() + defer e.mu.Unlock() + e.now = now +} + func (e *Extension) Logger() log.Logger { e.mu.RLock() defer e.mu.RUnlock() @@ -65,6 +89,50 @@ func (e *Extension) setService(s *common.Service) { e.service = s } +// initializeState prepares the persistence backend and loads the last run +// information from disk. It is safe to call multiple times; the underlying +// operations are idempotent. +func (e *Extension) initializeState(ctx context.Context, db sql.DB) { + e.mu.Lock() + if e.stateStore == nil { + if db == nil { + e.mu.Unlock() + return + } + e.stateStore = newPGStateStore(db, e.logger) + } + store := e.stateStore + metricsRecorder := e.metrics + logger := e.logger + e.mu.Unlock() + + if store == nil { + return + } + + if err := store.Ensure(ctx); err != nil { + logger.Warn("failed to prepare tn_vacuum state store", "error", err) + return + } + + state, ok, err := store.Load(ctx) + if err != nil { + logger.Warn("failed to load tn_vacuum state", "error", err) + return + } + if !ok { + return + } + + e.mu.Lock() + e.state = state + e.mu.Unlock() + + if metricsRecorder != nil { + metricsRecorder.RecordLastRunHeight(ctx, state.LastRunHeight) + } +} + func (e *Extension) configure(ctx context.Context, cfg Config) error { e.mu.Lock() defer e.mu.Unlock() @@ -75,7 +143,6 @@ func (e *Extension) configure(ctx context.Context, cfg Config) error { } e.config = cfg - e.lastRunHeight = 0 if !cfg.Enabled { return nil @@ -101,21 +168,27 @@ func (e *Extension) maybeRun(ctx context.Context, blockHeight int64) { cfg := e.config mech := e.mechanism runner := e.runner - last := e.lastRunHeight + state := e.state logger := e.logger svc := e.service metricsRecorder := e.metrics + nowFn := e.now e.mu.RUnlock() if !cfg.Enabled || mech == nil || runner == nil { return } - if last != 0 && blockHeight-last < cfg.BlockInterval { - if metricsRecorder != nil { - metricsRecorder.RecordVacuumSkipped(ctx, "block_interval_not_met") + if state.LastRunHeight != 0 { + if blockHeight <= state.LastRunHeight { + return + } + if blockHeight-state.LastRunHeight < cfg.BlockInterval { + if metricsRecorder != nil { + metricsRecorder.RecordVacuumSkipped(ctx, "block_interval_not_met") + } + return } - return } reason := fmt.Sprintf("block_interval:%d", blockHeight) @@ -131,14 +204,27 @@ func (e *Extension) maybeRun(ctx context.Context, blockHeight int64) { return } + newState := runState{LastRunHeight: blockHeight} + if nowFn != nil { + newState.LastRunAt = nowFn().UTC() + } + + var store stateStore e.mu.Lock() - if blockHeight > e.lastRunHeight { - e.lastRunHeight = blockHeight - if metricsRecorder != nil { - metricsRecorder.RecordLastRunHeight(ctx, blockHeight) - } + if blockHeight > e.state.LastRunHeight { + e.state = newState + store = e.stateStore } e.mu.Unlock() + + if store != nil { + if err := store.Save(ctx, newState); err != nil { + logger.Warn("failed to persist tn_vacuum state", "error", err) + } + } + if metricsRecorder != nil { + metricsRecorder.RecordLastRunHeight(ctx, blockHeight) + } } func (e *Extension) Close(ctx context.Context) { diff --git a/extensions/tn_vacuum/state.go b/extensions/tn_vacuum/state.go new file mode 100644 index 000000000..9a6865460 --- /dev/null +++ b/extensions/tn_vacuum/state.go @@ -0,0 +1,118 @@ +package tn_vacuum + +import ( + "context" + "fmt" + "time" + + "github.com/trufnetwork/kwil-db/core/log" + sql "github.com/trufnetwork/kwil-db/node/types/sql" +) + +const ( + // vacuumSchemaName is the private Postgres schema used to persist the + // extension's local bookkeeping. Schemas prefixed with ext_ are ignored by + // consensus hashing, keeping this purely node-local state. + vacuumSchemaName = "ext_tn_vacuum" +) + +// runState is the minimal persisted view of the extension. It tracks the block +// height and timestamp of the last successful vacuum run. +type runState struct { + LastRunHeight int64 + LastRunAt time.Time +} + +// stateStore represents a persistence backend capable of storing and loading +// runState snapshots. +type stateStore interface { + Ensure(ctx context.Context) error + Load(ctx context.Context) (runState, bool, error) + Save(ctx context.Context, state runState) error +} + +// pgStateStore implements stateStore using the node's Postgres connection. +type pgStateStore struct { + db sql.DB + logger log.Logger +} + +// newPGStateStore constructs a Postgres-backed store. +func newPGStateStore(db sql.DB, logger log.Logger) stateStore { + return &pgStateStore{db: db, logger: logger} +} + +// Ensure creates the schema and table necessary for persistence. The +// operations are idempotent so the method can be invoked on every start. +func (s *pgStateStore) Ensure(ctx context.Context) error { + tx, err := s.db.BeginTx(ctx) + if err != nil { + return fmt.Errorf("begin state setup tx: %w", err) + } + defer func() { _ = tx.Rollback(ctx) }() + + if _, err := tx.Execute(ctx, fmt.Sprintf("CREATE SCHEMA IF NOT EXISTS %s", vacuumSchemaName)); err != nil { + return fmt.Errorf("create tn_vacuum schema: %w", err) + } + + if _, err := tx.Execute(ctx, fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s.run_state ( + id INT PRIMARY KEY, + last_run_height BIGINT NOT NULL, + last_run_at TIMESTAMPTZ NOT NULL, + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + )`, vacuumSchemaName)); err != nil { + return fmt.Errorf("create run_state table: %w", err) + } + + if err := tx.Commit(ctx); err != nil { + return fmt.Errorf("commit state setup tx: %w", err) + } + return nil +} + +// Load returns the previously persisted runState. The boolean indicates +// whether a record exists. +func (s *pgStateStore) Load(ctx context.Context) (runState, bool, error) { + rs, err := s.db.Execute(ctx, fmt.Sprintf(` + SELECT last_run_height, last_run_at + FROM %s.run_state + WHERE id = 1 + `, vacuumSchemaName)) + if err != nil { + return runState{}, false, fmt.Errorf("load run_state: %w", err) + } + + if len(rs.Rows) == 0 { + return runState{}, false, nil + } + + row := rs.Rows[0] + height, ok := row[0].(int64) + if !ok { + return runState{}, false, fmt.Errorf("unexpected type for last_run_height: %T", row[0]) + } + ts, ok := row[1].(time.Time) + if !ok { + return runState{}, false, fmt.Errorf("unexpected type for last_run_at: %T", row[1]) + } + return runState{LastRunHeight: height, LastRunAt: ts.UTC()}, true, nil +} + +// Save upserts the supplied runState. The extension only stores a single row, +// so the implementation uses a fixed primary key. +func (s *pgStateStore) Save(ctx context.Context, state runState) error { + _, err := s.db.Execute(ctx, fmt.Sprintf(` + INSERT INTO %s.run_state (id, last_run_height, last_run_at, updated_at) + VALUES (1, $1, $2, NOW()) + ON CONFLICT (id) + DO UPDATE SET + last_run_height = EXCLUDED.last_run_height, + last_run_at = EXCLUDED.last_run_at, + updated_at = NOW() + `, vacuumSchemaName), state.LastRunHeight, state.LastRunAt) + if err != nil { + return fmt.Errorf("persist run_state: %w", err) + } + return nil +} diff --git a/extensions/tn_vacuum/tn_vacuum.go b/extensions/tn_vacuum/tn_vacuum.go index ebe6c8bb9..f1f8f5395 100644 --- a/extensions/tn_vacuum/tn_vacuum.go +++ b/extensions/tn_vacuum/tn_vacuum.go @@ -38,6 +38,10 @@ func engineReadyHook(ctx context.Context, app *common.App) error { ext.setService(app.Service) } + if app != nil { + ext.initializeState(ctx, app.DB) + } + svc := (*common.Service)(nil) if app != nil { svc = app.Service diff --git a/extensions/tn_vacuum/vacuum_test.go b/extensions/tn_vacuum/vacuum_test.go index ba5ccacfe..032367104 100644 --- a/extensions/tn_vacuum/vacuum_test.go +++ b/extensions/tn_vacuum/vacuum_test.go @@ -73,6 +73,39 @@ func (e *errorRunMechanism) Run(ctx context.Context, req RunRequest) (*RunReport } func (e *errorRunMechanism) Close(ctx context.Context) error { return nil } +type stubStateStore struct { + ensureCount int + loadCount int + saveCount int + loadState runState + loadOK bool + loadErr error + saveErr error + lastSaved runState +} + +func (s *stubStateStore) Ensure(ctx context.Context) error { + s.ensureCount++ + return nil +} + +func (s *stubStateStore) Load(ctx context.Context) (runState, bool, error) { + s.loadCount++ + if s.loadErr != nil { + return runState{}, false, s.loadErr + } + return s.loadState, s.loadOK, nil +} + +func (s *stubStateStore) Save(ctx context.Context, state runState) error { + s.saveCount++ + s.lastSaved = state + if s.saveErr != nil { + return s.saveErr + } + return nil +} + func TestConfigureDisabledSkipsMechanism(t *testing.T) { ctx := context.Background() ResetForTest() @@ -215,6 +248,105 @@ func TestVacuumSkippedMetrics(t *testing.T) { require.Len(t, stub.runs, 2, "should run - interval met") } +func TestEngineReadyLoadsPersistedState(t *testing.T) { + ctx := context.Background() + ResetForTest() + + stub := &stubMechanism{} + setMechanismFactoryForTest(func() Mechanism { return stub }) + defer resetMechanismFactory() + + store := &stubStateStore{ + loadState: runState{LastRunHeight: 12, LastRunAt: time.Unix(50, 0)}, + loadOK: true, + } + + svc := &common.Service{ + Logger: log.New(), + LocalConfig: &config.Config{Extensions: map[string]map[string]string{ + ExtensionName: { + ConfigKeyEnabled: "true", + ConfigKeyBlockInterval: "5", + }, + }}, + } + + app := &common.App{Service: svc} + + ext := GetExtension() + ext.setLogger(log.New()) + ext.setStateStore(store) + + require.NoError(t, engineReadyHook(ctx, app)) + require.Equal(t, 1, store.ensureCount) + require.Equal(t, 1, store.loadCount) + + ext.mu.RLock() + require.Equal(t, int64(12), ext.state.LastRunHeight) + ext.mu.RUnlock() + + metricsStub := &stubMetricsRecorder{} + ext.mu.Lock() + ext.metrics = metricsStub + ext.mu.Unlock() + + require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 14})) + require.Len(t, stub.runs, 0, "should not run before interval is met") + + require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 18})) + require.Len(t, stub.runs, 1, "should run once interval is met") + require.Equal(t, 1, store.saveCount) + require.Equal(t, int64(18), metricsStub.lastHeight) +} + +func TestSuccessfulRunPersistsState(t *testing.T) { + ctx := context.Background() + ResetForTest() + + stub := &stubMechanism{} + setMechanismFactoryForTest(func() Mechanism { return stub }) + defer resetMechanismFactory() + + store := &stubStateStore{} + + svc := &common.Service{ + Logger: log.New(), + LocalConfig: &config.Config{Extensions: map[string]map[string]string{ + ExtensionName: { + ConfigKeyEnabled: "true", + ConfigKeyBlockInterval: "1", + }, + }}, + } + + app := &common.App{Service: svc} + + ext := GetExtension() + ext.setLogger(log.New()) + ext.setStateStore(store) + + now := time.Unix(100, 0) + ext.setNowFunc(func() time.Time { return now }) + + require.NoError(t, engineReadyHook(ctx, app)) + require.Equal(t, 1, store.ensureCount) + + metricsStub := &stubMetricsRecorder{} + ext.mu.Lock() + ext.metrics = metricsStub + ext.mu.Unlock() + + require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 5})) + require.Equal(t, 1, store.saveCount) + require.Equal(t, int64(5), store.lastSaved.LastRunHeight) + require.Equal(t, now.UTC(), store.lastSaved.LastRunAt) + require.Equal(t, 1, metricsStub.completeCount) + require.Equal(t, int64(5), metricsStub.lastHeight) + + require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 5})) + require.Equal(t, 1, store.saveCount, "duplicate height should not persist again") +} + func TestRunnerHandlesNilReport(t *testing.T) { ctx := context.Background() runner := &Runner{logger: log.New()} From 09162f71ef080f920b8b9862dfa81c1b2568bc09 Mon Sep 17 00:00:00 2001 From: Raffael Campos Date: Tue, 30 Sep 2025 12:13:26 -0300 Subject: [PATCH 09/13] refactor: update Dockerfile to build pg_repack against PostgreSQL 16 This commit modifies the Dockerfile to enhance the build process by introducing a new stage for building `pg_repack` against PostgreSQL 16 client libraries. Key changes include: - Replaced the busybox base image with a PostgreSQL 16-alpine image for building `pg_repack`. - Added necessary build dependencies and commands to compile and install `pg_repack`. - Updated the final image to copy the `pg_repack` binary from the build stage, ensuring it is available for use. These changes streamline the Docker image setup and ensure compatibility with the latest PostgreSQL version. --- deployments/Dockerfile | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/deployments/Dockerfile b/deployments/Dockerfile index 902819ff2..ac0c5bf88 100644 --- a/deployments/Dockerfile +++ b/deployments/Dockerfile @@ -17,10 +17,20 @@ COPY deployments/tn-entrypoint.sh ./deployments/tn-entrypoint.sh # todo: incorporate task build process, otherwise images will lack information about the build RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /app/.build/kwild /app/cmd/kwild/main.go -FROM busybox:1.35.0-uclibc as busybox -# busy box will provide us with a shell to run commands in distroless +FROM postgres:16-alpine AS pg_repack_builder -FROM alpine:latest +# build pg_repack against postgres 16 client libraries +RUN apk add --no-cache build-base clang19 gawk llvm19 curl zlib-dev readline-dev openssl-dev lz4-dev zstd-dev && \ + curl -fsSL -o /tmp/pg_repack.tar.gz https://codeload.github.com/reorg/pg_repack/tar.gz/refs/tags/ver_1.5.2 && \ + tar -xzf /tmp/pg_repack.tar.gz && rm /tmp/pg_repack.tar.gz && \ + cd pg_repack-ver_1.5.2 && \ + make USE_PGXS=1 PG_CONFIG=/usr/local/bin/pg_config && \ + make USE_PGXS=1 PG_CONFIG=/usr/local/bin/pg_config install && \ + PG_BINDIR="$(/usr/local/bin/pg_config --bindir)" && \ + install -Dm755 "$PG_BINDIR/pg_repack" /opt/pg_repack/bin/pg_repack && \ + cd .. && rm -rf pg_repack-ver_1.5.2 + +FROM alpine:3.21 ENV SETUP_CHAIN_ID=truflation-dev ENV SETUP_DB_OWNER= @@ -29,8 +39,11 @@ ENV CONFIG_PATH=/root/.kwild WORKDIR /app -# add postgres client tools and pg_repack binary used by tn_vacuum -RUN apk add --no-cache postgresql16-client pg_repack +# add postgres client tools and the pg_repack binary used by tn_vacuum +RUN apk add --no-cache postgresql16-client + +# copy pg_repack CLI from build stage +COPY --from=pg_repack_builder /opt/pg_repack/bin/pg_repack /usr/local/bin/pg_repack # move .build content to /app COPY --from=build /app/.build/* /app/ From d67db03be34fd610230d50ce271bbe31864f02b1 Mon Sep 17 00:00:00 2001 From: Raffael Campos Date: Tue, 30 Sep 2025 12:15:01 -0300 Subject: [PATCH 10/13] chore: update kwil-db and core dependencies to latest versions This commit updates the `go.mod` and `go.sum` files to reference the latest versions of the `kwil-db` and `kwil-db/core` packages, ensuring the project benefits from recent improvements and fixes. Additionally, it introduces a new configuration option `pg_repack_jobs` to the `tn_vacuum` extension, allowing users to limit the concurrency of `pg_repack` operations. The README has been updated to reflect this new configuration option, and tests have been added to validate the new functionality. --- extensions/tn_vacuum/README.md | 14 +- extensions/tn_vacuum/config.go | 12 + extensions/tn_vacuum/constants.go | 1 + extensions/tn_vacuum/extension.go | 268 ++++++++++++++---- extensions/tn_vacuum/mechanism.go | 6 +- extensions/tn_vacuum/mechanism_repack.go | 43 ++- extensions/tn_vacuum/mechanism_repack_test.go | 39 +++ extensions/tn_vacuum/runner.go | 19 +- extensions/tn_vacuum/state.go | 124 ++++---- extensions/tn_vacuum/tn_vacuum.go | 5 + extensions/tn_vacuum/vacuum_test.go | 67 ++++- go.mod | 4 +- go.sum | 4 + 13 files changed, 467 insertions(+), 139 deletions(-) create mode 100644 extensions/tn_vacuum/mechanism_repack_test.go diff --git a/extensions/tn_vacuum/README.md b/extensions/tn_vacuum/README.md index d53417724..3321f5725 100644 --- a/extensions/tn_vacuum/README.md +++ b/extensions/tn_vacuum/README.md @@ -4,18 +4,22 @@ Automated database maintenance through periodic vacuuming operations. Reclaims d ## Configuration -Add to your node's configuration file: +Add to your node's configuration file (all values are strings due to the +extension config format): ```toml [extensions.tn_vacuum] -enabled = true -block_interval = 50000 +enabled = "true" +block_interval = "50000" +# Optional tuning +# pg_repack_jobs = "1" # limit pg_repack concurrency (default: auto) ``` | Option | Type | Default | Description | |--------|------|---------|-------------| -| `enabled` | boolean | `true` | Enable/disable the vacuum extension (`true` or `false`) | -| `block_interval` | integer | `50000` | Number of blocks between vacuum runs | +| `enabled` | string (`"true"`/`"false"`) | `"true"` | Enable/disable the vacuum extension | +| `block_interval` | string (numeric) | `"50000"` | Number of blocks between vacuum runs | +| `pg_repack_jobs` | string (numeric) | _unset_ | Passes `--jobs=N` to `pg_repack` to throttle concurrency (omit to use the binary default) | **Tuning `block_interval`:** - High-write environments: `25000` - `50000` blocks diff --git a/extensions/tn_vacuum/config.go b/extensions/tn_vacuum/config.go index 088771275..267eb1e3a 100644 --- a/extensions/tn_vacuum/config.go +++ b/extensions/tn_vacuum/config.go @@ -11,6 +11,7 @@ import ( type Config struct { Enabled bool BlockInterval int64 + PgRepackJobs int } func LoadConfig(service *common.Service) (Config, error) { @@ -47,6 +48,17 @@ func LoadConfig(service *common.Service) (Config, error) { cfg.BlockInterval = val } + if v, ok := raw[ConfigKeyPgRepackJobs]; ok { + val, err := strconv.Atoi(strings.TrimSpace(v)) + if err != nil { + return cfg, fmt.Errorf("parse pg_repack_jobs: %w", err) + } + if val < 0 { + val = 0 + } + cfg.PgRepackJobs = val + } + return cfg, nil } diff --git a/extensions/tn_vacuum/constants.go b/extensions/tn_vacuum/constants.go index 5537a2d03..5326b291c 100644 --- a/extensions/tn_vacuum/constants.go +++ b/extensions/tn_vacuum/constants.go @@ -14,6 +14,7 @@ const ( const ( ConfigKeyEnabled = "enabled" ConfigKeyBlockInterval = "block_interval" + ConfigKeyPgRepackJobs = "pg_repack_jobs" ) // Database connection defaults diff --git a/extensions/tn_vacuum/extension.go b/extensions/tn_vacuum/extension.go index 5e02712dd..15c2499e6 100644 --- a/extensions/tn_vacuum/extension.go +++ b/extensions/tn_vacuum/extension.go @@ -13,16 +13,21 @@ import ( ) type Extension struct { - mu sync.RWMutex - logger log.Logger - service *common.Service - config Config - mechanism Mechanism - runner *Runner - state runState - stateStore stateStore - now func() time.Time - metrics metrics.MetricsRecorder + mu sync.RWMutex + logger log.Logger + service *common.Service + config Config + mechanism Mechanism + runner *Runner + state runState + stateStore stateStore + now func() time.Time + metrics metrics.MetricsRecorder + runQueue chan runRequest + workerCtx context.Context + workerCancel context.CancelFunc + workerWG sync.WaitGroup + runInProgress bool } var ( @@ -30,6 +35,15 @@ var ( once sync.Once ) +type runRequest struct { + height int64 + reason string + dbConfig DBConnConfig + triggeredAt time.Time + PgRepackJobs int + PgRepackNoOrder bool +} + func GetExtension() *Extension { once.Do(func() { logger := log.New(log.WithLevel(log.LevelInfo)) @@ -50,6 +64,9 @@ func SetExtension(e *Extension) { } func ResetForTest() { + if extInstance != nil { + extInstance.Close(context.Background()) + } once = sync.Once{} extInstance = nil } @@ -89,22 +106,81 @@ func (e *Extension) setService(s *common.Service) { e.service = s } +// startWorkerLocked spins up the background worker responsible for consuming +// queued run requests. The caller must hold e.mu. +func (e *Extension) startWorkerLocked(parent context.Context) { + if e.runQueue != nil { + return + } + if parent == nil { + parent = context.Background() + } + ctx, cancel := context.WithCancel(parent) + e.workerCtx = ctx + e.workerCancel = cancel + e.runQueue = make(chan runRequest, 1) + e.workerWG.Add(1) + runQueue := e.runQueue + eworker := e + go func() { + defer eworker.workerWG.Done() + for { + select { + case <-ctx.Done(): + return + case req, ok := <-runQueue: + if !ok { + return + } + eworker.processRun(ctx, req) + } + } + }() +} + // initializeState prepares the persistence backend and loads the last run // information from disk. It is safe to call multiple times; the underlying // operations are idempotent. func (e *Extension) initializeState(ctx context.Context, db sql.DB) { - e.mu.Lock() - if e.stateStore == nil { - if db == nil { + e.mu.RLock() + store := e.stateStore + service := e.service + logger := e.logger + metricsRecorder := e.metrics + e.mu.RUnlock() + + if store == nil { + cfg := DBConnConfig{} + if service != nil { + cfg = dbConnFromService(service) + } + if cfg.Database == "" { + logger.Warn("tn_vacuum state persistence disabled: database name missing") + e.mu.Lock() + if e.stateStore == nil { + e.stateStore = noopStateStore{} + } e.mu.Unlock() return } - e.stateStore = newPGStateStore(db, e.logger) + + newStore, err := newPGStateStore(ctx, cfg, logger) + if err != nil { + logger.Warn("failed to initialize tn_vacuum state store", "error", err) + return + } + + e.mu.Lock() + if e.stateStore == nil { + e.stateStore = newStore + store = newStore + metricsRecorder = e.metrics + } else { + store = e.stateStore + newStore.Close() + } + e.mu.Unlock() } - store := e.stateStore - metricsRecorder := e.metrics - logger := e.logger - e.mu.Unlock() if store == nil { return @@ -133,6 +209,94 @@ func (e *Extension) initializeState(ctx context.Context, db sql.DB) { } } +// processRun executes a scheduled vacuum request on the worker goroutine. It +// assumes only a single worker is active at a time so no additional locking is +// required outside of bookkeeping updates. +func (e *Extension) processRun(ctx context.Context, req runRequest) { + e.mu.Lock() + mech := e.mechanism + runner := e.runner + logger := e.logger + metricsRecorder := e.metrics + store := e.stateStore + nowFn := e.now + config := e.config + e.runInProgress = true + e.mu.Unlock() + + if !config.Enabled || mech == nil || runner == nil { + e.mu.Lock() + e.runInProgress = false + e.mu.Unlock() + return + } + + runCtx, cancel := context.WithCancel(ctx) + defer cancel() + + err := runner.Execute(runCtx, RunnerArgs{ + Mechanism: mech, + Logger: logger, + Reason: req.reason, + DB: req.dbConfig, + Metrics: metricsRecorder, + PgRepackJobs: req.PgRepackJobs, + PgRepackNoOrder: req.PgRepackNoOrder, + }) + + if err != nil { + logger.Warn("vacuum run failed", "error", err, "height", req.height, "reason", req.reason) + e.mu.Lock() + e.runInProgress = false + e.mu.Unlock() + return + } + + newState := runState{LastRunHeight: req.height} + if nowFn != nil { + newState.LastRunAt = nowFn().UTC() + } + + if store != nil { + if err := store.Save(runCtx, newState); err != nil { + logger.Warn("failed to persist tn_vacuum state", "error", err) + } + } + if metricsRecorder != nil { + metricsRecorder.RecordLastRunHeight(runCtx, req.height) + } + + e.mu.Lock() + if req.height > e.state.LastRunHeight { + e.state = newState + } + e.runInProgress = false + e.mu.Unlock() +} + +// enqueueRun places a run request on the worker queue if no job is already +// pending or in progress. It returns false when the worker is busy so callers +// can record a skip metric. +func (e *Extension) enqueueRun(ctx context.Context, req runRequest) bool { + e.mu.Lock() + defer e.mu.Unlock() + if e.runQueue == nil { + e.startWorkerLocked(ctx) + } + if e.runInProgress { + return false + } + if len(e.runQueue) > 0 { + return false + } + select { + case e.runQueue <- req: + return true + default: + return false + } +} + func (e *Extension) configure(ctx context.Context, cfg Config) error { e.mu.Lock() defer e.mu.Unlock() @@ -192,49 +356,53 @@ func (e *Extension) maybeRun(ctx context.Context, blockHeight int64) { } reason := fmt.Sprintf("block_interval:%d", blockHeight) - err := runner.Execute(ctx, RunnerArgs{ - Mechanism: mech, - Logger: logger, - Reason: reason, - DB: dbConnFromService(svc), - Metrics: metricsRecorder, - }) - if err != nil { - logger.Warn("vacuum run failed", "error", err, "height", blockHeight, "reason", reason) - return - } - - newState := runState{LastRunHeight: blockHeight} + triggeredAt := time.Now() if nowFn != nil { - newState.LastRunAt = nowFn().UTC() + triggeredAt = nowFn() } - - var store stateStore - e.mu.Lock() - if blockHeight > e.state.LastRunHeight { - e.state = newState - store = e.stateStore + req := runRequest{ + height: blockHeight, + reason: reason, + dbConfig: dbConnFromService(svc), + triggeredAt: triggeredAt, + PgRepackJobs: cfg.PgRepackJobs, } - e.mu.Unlock() - if store != nil { - if err := store.Save(ctx, newState); err != nil { - logger.Warn("failed to persist tn_vacuum state", "error", err) + if !e.enqueueRun(ctx, req) { + if metricsRecorder != nil { + metricsRecorder.RecordVacuumSkipped(ctx, "worker_busy") } - } - if metricsRecorder != nil { - metricsRecorder.RecordLastRunHeight(ctx, blockHeight) + logger.Debug("vacuum run already queued or in progress", "height", blockHeight, "reason", reason) } } func (e *Extension) Close(ctx context.Context) { e.mu.Lock() - defer e.mu.Unlock() - if e.mechanism != nil { - _ = e.mechanism.Close(ctx) - e.mechanism = nil - } + mech := e.mechanism + e.mechanism = nil e.runner = nil + store := e.stateStore + e.stateStore = nil + queue := e.runQueue + e.runQueue = nil + cancel := e.workerCancel + e.workerCancel = nil + e.workerCtx = nil + e.mu.Unlock() + + if mech != nil { + _ = mech.Close(ctx) + } + if store != nil { + store.Close() + } + if cancel != nil { + cancel() + } + if queue != nil { + close(queue) + } + e.workerWG.Wait() } func dbConnFromService(service *common.Service) DBConnConfig { diff --git a/extensions/tn_vacuum/mechanism.go b/extensions/tn_vacuum/mechanism.go index 72d539864..229e50479 100644 --- a/extensions/tn_vacuum/mechanism.go +++ b/extensions/tn_vacuum/mechanism.go @@ -20,8 +20,10 @@ type MechanismDeps struct { } type RunRequest struct { - Reason string - DB DBConnConfig + Reason string + DB DBConnConfig + PgRepackJobs int + PgRepackNoOrder bool } type RunReport struct { diff --git a/extensions/tn_vacuum/mechanism_repack.go b/extensions/tn_vacuum/mechanism_repack.go index 485ce0dde..805b27046 100644 --- a/extensions/tn_vacuum/mechanism_repack.go +++ b/extensions/tn_vacuum/mechanism_repack.go @@ -73,6 +73,12 @@ func (m *pgRepackMechanism) Run(ctx context.Context, req RunRequest) (*RunReport args = append(args, fmt.Sprintf("--username=%s", db.User)) } + if req.PgRepackJobs > 0 { + args = append(args, fmt.Sprintf("--jobs=%d", req.PgRepackJobs)) + } + // Always skip reordering to minimize swap time; logical data remains unchanged. + args = append(args, "--no-order") + cmd := exec.CommandContext(ctx, m.binaryPath, args...) env := os.Environ() if db.Password != "" { @@ -94,14 +100,45 @@ func (m *pgRepackMechanism) Run(ctx context.Context, req RunRequest) (*RunReport } report.Duration = time.Since(startTime) - // Parse stdout to count tables if possible (pg_repack outputs "INFO: repacking table...") - tablesProcessed := strings.Count(stdout.String(), "INFO: repacking table") + output := stdout.String() + stderr.String() + if err := detectPgRepackSoftFailure(stderr.String()); err != nil { + report.Status = StatusFailed + report.Error = err.Error() + m.logger.Warn("pg_repack reported incompatibility", "stderr", stderr.String(), "duration", report.Duration) + return report, err + } + tablesProcessed := strings.Count(output, "INFO: repacking table") report.TablesProcessed = tablesProcessed - m.logger.Info("pg_repack completed", "stdout", stdout.String(), "duration", report.Duration, "tables", tablesProcessed) + if tablesProcessed == 0 { + report.Status = StatusFailed + report.Error = "pg_repack completed without processing any tables" + m.logger.Warn("pg_repack completed but processed no tables", "stderr", stderr.String()) + return report, fmt.Errorf("pg_repack processed zero tables") + } + + m.logger.Info("pg_repack completed", "stdout", stdout.String(), "stderr", stderr.String(), "duration", report.Duration, "tables", tablesProcessed) return report, nil } +func detectPgRepackSoftFailure(stderr string) error { + lowered := strings.ToLower(stderr) + switch { + case strings.Contains(lowered, "does not match database library"): + return fmt.Errorf("pg_repack version mismatch: %s", summarizePgRepackError(stderr)) + default: + return nil + } +} + +func summarizePgRepackError(stderr string) string { + lines := strings.Split(strings.TrimSpace(stderr), "\n") + if len(lines) == 0 { + return "" + } + return strings.TrimSpace(lines[len(lines)-1]) +} + func (m *pgRepackMechanism) Close(ctx context.Context) error { if m.logger != nil { m.logger.Info("pg_repack mechanism closed") diff --git a/extensions/tn_vacuum/mechanism_repack_test.go b/extensions/tn_vacuum/mechanism_repack_test.go new file mode 100644 index 000000000..cfc535d3b --- /dev/null +++ b/extensions/tn_vacuum/mechanism_repack_test.go @@ -0,0 +1,39 @@ +package tn_vacuum + +import "testing" + +func TestDetectPgRepackSoftFailure(t *testing.T) { + tests := []struct { + name string + stderr string + expects bool + }{ + { + name: "version mismatch", + stderr: "INFO: database \"kwild\" skipped: program 'pg_repack 1.5.0' does not match database library 'pg_repack 1.5.2'", + expects: true, + }, + { + name: "extension missing", + stderr: "INFO: database \"kwild\" skipped: pg_repack 1.5.0 is not installed in the database", + expects: false, + }, + { + name: "no issues", + stderr: "INFO: repacking database \"kwild\"\nINFO: repacking table \"public\".\"foo\"", + expects: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := detectPgRepackSoftFailure(tt.stderr) + if tt.expects && err == nil { + t.Fatalf("expected error, got nil") + } + if !tt.expects && err != nil { + t.Fatalf("expected no error, got %v", err) + } + }) + } +} diff --git a/extensions/tn_vacuum/runner.go b/extensions/tn_vacuum/runner.go index b09370424..ae4746ba0 100644 --- a/extensions/tn_vacuum/runner.go +++ b/extensions/tn_vacuum/runner.go @@ -13,11 +13,13 @@ type Runner struct { } type RunnerArgs struct { - Mechanism Mechanism - Logger log.Logger - Reason string - DB DBConnConfig - Metrics metrics.MetricsRecorder + Mechanism Mechanism + Logger log.Logger + Reason string + DB DBConnConfig + Metrics metrics.MetricsRecorder + PgRepackJobs int + PgRepackNoOrder bool } func (r *Runner) Execute(ctx context.Context, args RunnerArgs) error { @@ -38,7 +40,12 @@ func (r *Runner) Execute(ctx context.Context, args RunnerArgs) error { args.Metrics.RecordVacuumStart(ctx, mechanismName) } - report, err := args.Mechanism.Run(ctx, RunRequest{Reason: args.Reason, DB: args.DB}) + report, err := args.Mechanism.Run(ctx, RunRequest{ + Reason: args.Reason, + DB: args.DB, + PgRepackJobs: args.PgRepackJobs, + PgRepackNoOrder: args.PgRepackNoOrder, + }) if err != nil { if logger != nil { logger.Warn("vacuum runner failed", "error", err) diff --git a/extensions/tn_vacuum/state.go b/extensions/tn_vacuum/state.go index 9a6865460..8c8078971 100644 --- a/extensions/tn_vacuum/state.go +++ b/extensions/tn_vacuum/state.go @@ -2,11 +2,13 @@ package tn_vacuum import ( "context" + "errors" "fmt" "time" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" "github.com/trufnetwork/kwil-db/core/log" - sql "github.com/trufnetwork/kwil-db/node/types/sql" ) const ( @@ -29,90 +31,92 @@ type stateStore interface { Ensure(ctx context.Context) error Load(ctx context.Context) (runState, bool, error) Save(ctx context.Context, state runState) error + Close() } -// pgStateStore implements stateStore using the node's Postgres connection. +// pgStateStore implements stateStore using a dedicated pgx connection pool. type pgStateStore struct { - db sql.DB + pool *pgxpool.Pool logger log.Logger } -// newPGStateStore constructs a Postgres-backed store. -func newPGStateStore(db sql.DB, logger log.Logger) stateStore { - return &pgStateStore{db: db, logger: logger} -} - -// Ensure creates the schema and table necessary for persistence. The -// operations are idempotent so the method can be invoked on every start. -func (s *pgStateStore) Ensure(ctx context.Context) error { - tx, err := s.db.BeginTx(ctx) +// newPGStateStore constructs a Postgres-backed store with its own pool. +func newPGStateStore(ctx context.Context, cfg DBConnConfig, logger log.Logger) (stateStore, error) { + connStr := buildConnString(cfg) + poolCfg, err := pgxpool.ParseConfig(connStr) if err != nil { - return fmt.Errorf("begin state setup tx: %w", err) + return nil, fmt.Errorf("parse connection config: %w", err) } - defer func() { _ = tx.Rollback(ctx) }() + pool, err := pgxpool.NewWithConfig(ctx, poolCfg) + if err != nil { + return nil, fmt.Errorf("create connection pool: %w", err) + } + return &pgStateStore{pool: pool, logger: logger}, nil +} - if _, err := tx.Execute(ctx, fmt.Sprintf("CREATE SCHEMA IF NOT EXISTS %s", vacuumSchemaName)); err != nil { +func (s *pgStateStore) Ensure(ctx context.Context) error { + if _, err := s.pool.Exec(ctx, fmt.Sprintf("CREATE SCHEMA IF NOT EXISTS %s", vacuumSchemaName)); err != nil { return fmt.Errorf("create tn_vacuum schema: %w", err) } - if _, err := tx.Execute(ctx, fmt.Sprintf(` - CREATE TABLE IF NOT EXISTS %s.run_state ( - id INT PRIMARY KEY, - last_run_height BIGINT NOT NULL, - last_run_at TIMESTAMPTZ NOT NULL, - updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() - )`, vacuumSchemaName)); err != nil { + if _, err := s.pool.Exec(ctx, fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s.run_state ( + id INT PRIMARY KEY, + last_run_height BIGINT NOT NULL, + last_run_at TIMESTAMPTZ NOT NULL, + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + )`, vacuumSchemaName)); err != nil { return fmt.Errorf("create run_state table: %w", err) } - - if err := tx.Commit(ctx); err != nil { - return fmt.Errorf("commit state setup tx: %w", err) - } return nil } -// Load returns the previously persisted runState. The boolean indicates -// whether a record exists. func (s *pgStateStore) Load(ctx context.Context) (runState, bool, error) { - rs, err := s.db.Execute(ctx, fmt.Sprintf(` - SELECT last_run_height, last_run_at - FROM %s.run_state - WHERE id = 1 - `, vacuumSchemaName)) - if err != nil { - return runState{}, false, fmt.Errorf("load run_state: %w", err) - } - - if len(rs.Rows) == 0 { - return runState{}, false, nil - } + row := s.pool.QueryRow(ctx, fmt.Sprintf(` + SELECT last_run_height, last_run_at + FROM %s.run_state + WHERE id = 1 + `, vacuumSchemaName)) - row := rs.Rows[0] - height, ok := row[0].(int64) - if !ok { - return runState{}, false, fmt.Errorf("unexpected type for last_run_height: %T", row[0]) - } - ts, ok := row[1].(time.Time) - if !ok { - return runState{}, false, fmt.Errorf("unexpected type for last_run_at: %T", row[1]) + var state runState + if err := row.Scan(&state.LastRunHeight, &state.LastRunAt); err != nil { + if errors.Is(err, pgx.ErrNoRows) { + return runState{}, false, nil + } + return runState{}, false, fmt.Errorf("load run_state: %w", err) } - return runState{LastRunHeight: height, LastRunAt: ts.UTC()}, true, nil + state.LastRunAt = state.LastRunAt.UTC() + return state, true, nil } -// Save upserts the supplied runState. The extension only stores a single row, -// so the implementation uses a fixed primary key. func (s *pgStateStore) Save(ctx context.Context, state runState) error { - _, err := s.db.Execute(ctx, fmt.Sprintf(` - INSERT INTO %s.run_state (id, last_run_height, last_run_at, updated_at) - VALUES (1, $1, $2, NOW()) - ON CONFLICT (id) - DO UPDATE SET - last_run_height = EXCLUDED.last_run_height, - last_run_at = EXCLUDED.last_run_at, - updated_at = NOW() - `, vacuumSchemaName), state.LastRunHeight, state.LastRunAt) + _, err := s.pool.Exec(ctx, fmt.Sprintf(` + INSERT INTO %s.run_state (id, last_run_height, last_run_at, updated_at) + VALUES (1, $1, $2, NOW()) + ON CONFLICT (id) + DO UPDATE SET + last_run_height = EXCLUDED.last_run_height, + last_run_at = EXCLUDED.last_run_at, + updated_at = NOW() + `, vacuumSchemaName), state.LastRunHeight, state.LastRunAt) if err != nil { return fmt.Errorf("persist run_state: %w", err) } return nil } + +func (s *pgStateStore) Close() { + if s.pool != nil { + s.pool.Close() + } +} + +// noopStateStore is used when persistence is disabled or misconfigured. +type noopStateStore struct{} + +func (noopStateStore) Ensure(ctx context.Context) error { return nil } +func (noopStateStore) Load(ctx context.Context) (runState, bool, error) { + return runState{}, false, nil +} +func (noopStateStore) Save(ctx context.Context, state runState) error { return nil } +func (noopStateStore) Close() {} diff --git a/extensions/tn_vacuum/tn_vacuum.go b/extensions/tn_vacuum/tn_vacuum.go index f1f8f5395..99a7e52ad 100644 --- a/extensions/tn_vacuum/tn_vacuum.go +++ b/extensions/tn_vacuum/tn_vacuum.go @@ -56,6 +56,11 @@ func engineReadyHook(ctx context.Context, app *common.App) error { if err := ext.configure(ctx, cfg); err != nil { ext.Logger().Warn("failed to configure tn_vacuum", "error", err) } + if cfg.Enabled { + ext.mu.Lock() + ext.startWorkerLocked(ctx) + ext.mu.Unlock() + } return nil } diff --git a/extensions/tn_vacuum/vacuum_test.go b/extensions/tn_vacuum/vacuum_test.go index 032367104..b45da404d 100644 --- a/extensions/tn_vacuum/vacuum_test.go +++ b/extensions/tn_vacuum/vacuum_test.go @@ -106,6 +106,8 @@ func (s *stubStateStore) Save(ctx context.Context, state runState) error { return nil } +func (s *stubStateStore) Close() {} + func TestConfigureDisabledSkipsMechanism(t *testing.T) { ctx := context.Background() ResetForTest() @@ -132,8 +134,9 @@ func TestEngineReadyPreparesMechanism(t *testing.T) { Logger: log.New(), LocalConfig: &config.Config{Extensions: map[string]map[string]string{ ExtensionName: { - "enabled": "true", - "block_interval": "3", + "enabled": "true", + "block_interval": "3", + ConfigKeyPgRepackJobs: "2", }, }}, } @@ -144,14 +147,16 @@ func TestEngineReadyPreparesMechanism(t *testing.T) { block := &common.BlockContext{Height: 1} require.NoError(t, endBlockHook(ctx, app, block)) - require.Len(t, stub.runs, 1) + waitForRunCount(t, stub, 1) require.Equal(t, "block_interval:1", stub.runs[0].Reason) + require.Equal(t, 2, stub.runs[0].PgRepackJobs) require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 2})) + time.Sleep(50 * time.Millisecond) require.Len(t, stub.runs, 1) require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 4})) - require.Len(t, stub.runs, 2) + waitForRunCount(t, stub, 2) } func TestConfigureFailureLeavesMechanismNil(t *testing.T) { @@ -195,7 +200,7 @@ func TestRunReportEnhancement(t *testing.T) { block := &common.BlockContext{Height: 1} require.NoError(t, endBlockHook(ctx, app, block)) - require.Len(t, stub.runs, 1) + waitForRunCount(t, stub, 1) // Verify the stub returns enhanced report data ext := GetExtension() @@ -235,17 +240,18 @@ func TestVacuumSkippedMetrics(t *testing.T) { // First run at height 1 block := &common.BlockContext{Height: 1} require.NoError(t, endBlockHook(ctx, app, block)) - require.Len(t, stub.runs, 1) + waitForRunCount(t, stub, 1) // Should be skipped at height 5 (interval not met) block = &common.BlockContext{Height: 5} require.NoError(t, endBlockHook(ctx, app, block)) + time.Sleep(50 * time.Millisecond) require.Len(t, stub.runs, 1, "should not run - interval not met") // Should run at height 11 (interval met) block = &common.BlockContext{Height: 11} require.NoError(t, endBlockHook(ctx, app, block)) - require.Len(t, stub.runs, 2, "should run - interval met") + waitForRunCount(t, stub, 2) } func TestEngineReadyLoadsPersistedState(t *testing.T) { @@ -291,11 +297,12 @@ func TestEngineReadyLoadsPersistedState(t *testing.T) { ext.mu.Unlock() require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 14})) + time.Sleep(50 * time.Millisecond) require.Len(t, stub.runs, 0, "should not run before interval is met") require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 18})) - require.Len(t, stub.runs, 1, "should run once interval is met") - require.Equal(t, 1, store.saveCount) + waitForRunCount(t, stub, 1) + waitForCondition(t, time.Second, func() bool { return store.saveCount == 1 }) require.Equal(t, int64(18), metricsStub.lastHeight) } @@ -337,13 +344,15 @@ func TestSuccessfulRunPersistsState(t *testing.T) { ext.mu.Unlock() require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 5})) - require.Equal(t, 1, store.saveCount) + waitForRunCount(t, stub, 1) + waitForCondition(t, time.Second, func() bool { return store.saveCount == 1 }) require.Equal(t, int64(5), store.lastSaved.LastRunHeight) require.Equal(t, now.UTC(), store.lastSaved.LastRunAt) require.Equal(t, 1, metricsStub.completeCount) require.Equal(t, int64(5), metricsStub.lastHeight) require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 5})) + time.Sleep(50 * time.Millisecond) require.Equal(t, 1, store.saveCount, "duplicate height should not persist again") } @@ -393,11 +402,26 @@ func TestMaybeRunRecordsErrorOnce(t *testing.T) { ext.mu.Unlock() require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 1})) - require.Equal(t, 1, metricsStub.errorCount) + waitForCondition(t, time.Second, func() bool { return metricsStub.errorCount == 1 }) require.Equal(t, 1, metricsStub.startCount) require.Equal(t, "error_run", metricsStub.lastErrorMechanism) } +func TestEnqueueRunBusy(t *testing.T) { + ctx := context.Background() + ResetForTest() + + ext := GetExtension() + ext.setLogger(log.New()) + ext.mu.Lock() + ext.runQueue = make(chan runRequest, 1) + ext.runInProgress = true + ext.mu.Unlock() + + req := runRequest{height: 1, reason: "test"} + require.False(t, ext.enqueueRun(ctx, req)) +} + type stubMetricsRecorder struct { startCount int completeCount int @@ -438,3 +462,24 @@ func (s *stubMetricsRecorder) RecordVacuumSkipped(ctx context.Context, reason st func (s *stubMetricsRecorder) RecordLastRunHeight(ctx context.Context, height int64) { s.lastHeight = height } + +func waitForCondition(t *testing.T, timeout time.Duration, fn func() bool) { + t.Helper() + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if fn() { + return + } + time.Sleep(10 * time.Millisecond) + } + if fn() { + return + } + t.Fatalf("condition not met within %s", timeout) +} + +func waitForRunCount(t *testing.T, stub *stubMechanism, count int) { + waitForCondition(t, time.Second, func() bool { + return len(stub.runs) >= count + }) +} diff --git a/go.mod b/go.mod index d4bf15f09..90e70c245 100644 --- a/go.mod +++ b/go.mod @@ -19,8 +19,8 @@ require ( github.com/spf13/cobra v1.9.1 github.com/stretchr/testify v1.10.0 github.com/testcontainers/testcontainers-go v0.37.0 - github.com/trufnetwork/kwil-db v0.10.3-0.20250929173952-120a6dd2189e - github.com/trufnetwork/kwil-db/core v0.4.3-0.20250929173952-120a6dd2189e + github.com/trufnetwork/kwil-db v0.10.3-0.20250930151143-372c7bcfcb2c + github.com/trufnetwork/kwil-db/core v0.4.3-0.20250930151143-372c7bcfcb2c github.com/trufnetwork/sdk-go v0.3.2-0.20250630062504-841b40cdb709 go.uber.org/zap v1.27.0 golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa diff --git a/go.sum b/go.sum index 5ba806fc4..ff19fad08 100644 --- a/go.sum +++ b/go.sum @@ -1216,10 +1216,14 @@ github.com/trufnetwork/kwil-db v0.10.3-0.20250926181531-88158eb10b64 h1:FGv9XArb github.com/trufnetwork/kwil-db v0.10.3-0.20250926181531-88158eb10b64/go.mod h1:LiBAC48uZl2B0IiLtD2hpOce7RNfpuDdghVAOc3u1Qo= github.com/trufnetwork/kwil-db v0.10.3-0.20250929173952-120a6dd2189e h1:31DihFBWOrV02Y59DsiHUT3fs0yEr5rnNs/zDZX3EVE= github.com/trufnetwork/kwil-db v0.10.3-0.20250929173952-120a6dd2189e/go.mod h1:LiBAC48uZl2B0IiLtD2hpOce7RNfpuDdghVAOc3u1Qo= +github.com/trufnetwork/kwil-db v0.10.3-0.20250930151143-372c7bcfcb2c h1:k1nUWyzL16z3gObbvNqXJevhPsYKKdU59JDquYnoIdY= +github.com/trufnetwork/kwil-db v0.10.3-0.20250930151143-372c7bcfcb2c/go.mod h1:LiBAC48uZl2B0IiLtD2hpOce7RNfpuDdghVAOc3u1Qo= github.com/trufnetwork/kwil-db/core v0.4.3-0.20250926181531-88158eb10b64 h1:+HCpXbJ8sNcoADmBpgzz2ceqFc4JbKvGrVF4G7velsU= github.com/trufnetwork/kwil-db/core v0.4.3-0.20250926181531-88158eb10b64/go.mod h1:HnOsh9+BN13LJCjiH0+XKaJzyjWKf+H9AofFFp90KwQ= github.com/trufnetwork/kwil-db/core v0.4.3-0.20250929173952-120a6dd2189e h1:ruVB/uBGMVhX7G31Dp2CyNE9XpmJtzX0+3Csw6XTq6s= github.com/trufnetwork/kwil-db/core v0.4.3-0.20250929173952-120a6dd2189e/go.mod h1:HnOsh9+BN13LJCjiH0+XKaJzyjWKf+H9AofFFp90KwQ= +github.com/trufnetwork/kwil-db/core v0.4.3-0.20250930151143-372c7bcfcb2c h1:n7s2AehSgLYUpTZi7GfhkoCebB0ClkMBr8+p0iYd2vI= +github.com/trufnetwork/kwil-db/core v0.4.3-0.20250930151143-372c7bcfcb2c/go.mod h1:HnOsh9+BN13LJCjiH0+XKaJzyjWKf+H9AofFFp90KwQ= github.com/trufnetwork/openzeppelin-merkle-tree-go v0.0.2 h1:DCq8MzbWH0wZmICNmMVsSzUHUPl+2vqRhluEABjxl88= github.com/trufnetwork/openzeppelin-merkle-tree-go v0.0.2/go.mod h1:Y0MJpPp9QXU5vC6Gpoilql2NkgmGNcbHm9HYC2v2N8s= github.com/trufnetwork/sdk-go v0.3.2-0.20250630062504-841b40cdb709 h1:d9EqPXIjbq/atzEncK5dM3Z9oStx1BxCGuL/sjefeCw= From 2118b3c52f06811ffa1e5e012f19da1180cbc165 Mon Sep 17 00:00:00 2001 From: Raffael Campos Date: Tue, 30 Sep 2025 15:07:22 -0300 Subject: [PATCH 11/13] refactor: improve error handling and logging in tn_vacuum extension This commit enhances the `tn_vacuum` extension by refining error handling and logging mechanisms. Key changes include: - Updated the `initializeState` method to return errors instead of logging warnings, providing better error propagation. - Improved logging messages in the `pgRepackMechanism` to use error level for critical issues, ensuring visibility of failures. - Refactored the `RunnerArgs` and `RunRequest` structures for consistency in field ordering. - Added checks for database configuration in the `configure` method to enforce required settings. These improvements enhance the robustness and maintainability of the extension, ensuring clearer error reporting and better handling of critical conditions. --- extensions/tn_vacuum/extension.go | 52 +++++----- extensions/tn_vacuum/mechanism.go | 19 +++- extensions/tn_vacuum/mechanism_repack.go | 2 +- extensions/tn_vacuum/runner.go | 20 ++-- extensions/tn_vacuum/tn_vacuum.go | 4 +- extensions/tn_vacuum/vacuum_test.go | 95 ++++++++++++------- .../erc20/meta_extension_deposit_test.go | 81 ---------------- 7 files changed, 112 insertions(+), 161 deletions(-) delete mode 100644 node/exts/erc20-bridge/erc20/meta_extension_deposit_test.go diff --git a/extensions/tn_vacuum/extension.go b/extensions/tn_vacuum/extension.go index 15c2499e6..7faa4aaf6 100644 --- a/extensions/tn_vacuum/extension.go +++ b/extensions/tn_vacuum/extension.go @@ -36,12 +36,11 @@ var ( ) type runRequest struct { - height int64 - reason string - dbConfig DBConnConfig - triggeredAt time.Time - PgRepackJobs int - PgRepackNoOrder bool + height int64 + reason string + dbConfig DBConnConfig + triggeredAt time.Time + PgRepackJobs int } func GetExtension() *Extension { @@ -141,7 +140,7 @@ func (e *Extension) startWorkerLocked(parent context.Context) { // initializeState prepares the persistence backend and loads the last run // information from disk. It is safe to call multiple times; the underlying // operations are idempotent. -func (e *Extension) initializeState(ctx context.Context, db sql.DB) { +func (e *Extension) initializeState(ctx context.Context, db sql.DB) error { e.mu.RLock() store := e.stateStore service := e.service @@ -155,19 +154,12 @@ func (e *Extension) initializeState(ctx context.Context, db sql.DB) { cfg = dbConnFromService(service) } if cfg.Database == "" { - logger.Warn("tn_vacuum state persistence disabled: database name missing") - e.mu.Lock() - if e.stateStore == nil { - e.stateStore = noopStateStore{} - } - e.mu.Unlock() - return + return fmt.Errorf("tn_vacuum state persistence requires database name") } newStore, err := newPGStateStore(ctx, cfg, logger) if err != nil { - logger.Warn("failed to initialize tn_vacuum state store", "error", err) - return + return fmt.Errorf("initialize tn_vacuum state store: %w", err) } e.mu.Lock() @@ -183,21 +175,19 @@ func (e *Extension) initializeState(ctx context.Context, db sql.DB) { } if store == nil { - return + return fmt.Errorf("tn_vacuum state store unavailable") } if err := store.Ensure(ctx); err != nil { - logger.Warn("failed to prepare tn_vacuum state store", "error", err) - return + return fmt.Errorf("prepare tn_vacuum state store: %w", err) } state, ok, err := store.Load(ctx) if err != nil { - logger.Warn("failed to load tn_vacuum state", "error", err) - return + return fmt.Errorf("load tn_vacuum state: %w", err) } if !ok { - return + return nil } e.mu.Lock() @@ -207,6 +197,8 @@ func (e *Extension) initializeState(ctx context.Context, db sql.DB) { if metricsRecorder != nil { metricsRecorder.RecordLastRunHeight(ctx, state.LastRunHeight) } + + return nil } // processRun executes a scheduled vacuum request on the worker goroutine. It @@ -235,13 +227,12 @@ func (e *Extension) processRun(ctx context.Context, req runRequest) { defer cancel() err := runner.Execute(runCtx, RunnerArgs{ - Mechanism: mech, - Logger: logger, - Reason: req.reason, - DB: req.dbConfig, - Metrics: metricsRecorder, - PgRepackJobs: req.PgRepackJobs, - PgRepackNoOrder: req.PgRepackNoOrder, + Mechanism: mech, + Logger: logger, + Reason: req.reason, + DB: req.dbConfig, + Metrics: metricsRecorder, + PgRepackJobs: req.PgRepackJobs, }) if err != nil { @@ -314,6 +305,9 @@ func (e *Extension) configure(ctx context.Context, cfg Config) error { mech := newMechanism() deps := MechanismDeps{Logger: e.logger, DB: dbConnFromService(e.service)} + if deps.DB.Database == "" { + return fmt.Errorf("tn_vacuum requires database name in configuration") + } if err := mech.Prepare(ctx, deps); err != nil { return err } diff --git a/extensions/tn_vacuum/mechanism.go b/extensions/tn_vacuum/mechanism.go index 229e50479..5f808f5e3 100644 --- a/extensions/tn_vacuum/mechanism.go +++ b/extensions/tn_vacuum/mechanism.go @@ -2,6 +2,7 @@ package tn_vacuum import ( "context" + "sync" "time" "github.com/trufnetwork/kwil-db/core/log" @@ -20,10 +21,9 @@ type MechanismDeps struct { } type RunRequest struct { - Reason string - DB DBConnConfig - PgRepackJobs int - PgRepackNoOrder bool + Reason string + DB DBConnConfig + PgRepackJobs int } type RunReport struct { @@ -42,16 +42,25 @@ type DBConnConfig struct { Database string } -var mechanismFactory = func() Mechanism { return NewPgRepackMechanism() } +var ( + mechanismFactory = func() Mechanism { return NewPgRepackMechanism() } + mechanismFactoryMu sync.RWMutex +) func newMechanism() Mechanism { + mechanismFactoryMu.RLock() + defer mechanismFactoryMu.RUnlock() return mechanismFactory() } func setMechanismFactoryForTest(f func() Mechanism) { + mechanismFactoryMu.Lock() + defer mechanismFactoryMu.Unlock() mechanismFactory = f } func resetMechanismFactory() { + mechanismFactoryMu.Lock() + defer mechanismFactoryMu.Unlock() mechanismFactory = func() Mechanism { return NewPgRepackMechanism() } } diff --git a/extensions/tn_vacuum/mechanism_repack.go b/extensions/tn_vacuum/mechanism_repack.go index 805b27046..92320c4c2 100644 --- a/extensions/tn_vacuum/mechanism_repack.go +++ b/extensions/tn_vacuum/mechanism_repack.go @@ -33,7 +33,7 @@ func (m *pgRepackMechanism) Prepare(ctx context.Context, deps MechanismDeps) err m.db = deps.DB path, err := exec.LookPath("pg_repack") if err != nil { - m.logger.Warn("pg_repack binary not found; vacuum runs will fail until available", "error", err) + m.logger.Error("pg_repack binary not found; extension cannot start", "error", err) return ErrPgRepackUnavailable } m.binaryPath = path diff --git a/extensions/tn_vacuum/runner.go b/extensions/tn_vacuum/runner.go index ae4746ba0..9d9f1f215 100644 --- a/extensions/tn_vacuum/runner.go +++ b/extensions/tn_vacuum/runner.go @@ -13,13 +13,12 @@ type Runner struct { } type RunnerArgs struct { - Mechanism Mechanism - Logger log.Logger - Reason string - DB DBConnConfig - Metrics metrics.MetricsRecorder - PgRepackJobs int - PgRepackNoOrder bool + Mechanism Mechanism + Logger log.Logger + Reason string + DB DBConnConfig + Metrics metrics.MetricsRecorder + PgRepackJobs int } func (r *Runner) Execute(ctx context.Context, args RunnerArgs) error { @@ -41,10 +40,9 @@ func (r *Runner) Execute(ctx context.Context, args RunnerArgs) error { } report, err := args.Mechanism.Run(ctx, RunRequest{ - Reason: args.Reason, - DB: args.DB, - PgRepackJobs: args.PgRepackJobs, - PgRepackNoOrder: args.PgRepackNoOrder, + Reason: args.Reason, + DB: args.DB, + PgRepackJobs: args.PgRepackJobs, }) if err != nil { if logger != nil { diff --git a/extensions/tn_vacuum/tn_vacuum.go b/extensions/tn_vacuum/tn_vacuum.go index 99a7e52ad..9bae64f33 100644 --- a/extensions/tn_vacuum/tn_vacuum.go +++ b/extensions/tn_vacuum/tn_vacuum.go @@ -39,7 +39,9 @@ func engineReadyHook(ctx context.Context, app *common.App) error { } if app != nil { - ext.initializeState(ctx, app.DB) + if err := ext.initializeState(ctx, app.DB); err != nil { + ext.Logger().Warn("failed to initialize tn_vacuum state", "error", err) + } } svc := (*common.Service)(nil) diff --git a/extensions/tn_vacuum/vacuum_test.go b/extensions/tn_vacuum/vacuum_test.go index b45da404d..5f0b73fe3 100644 --- a/extensions/tn_vacuum/vacuum_test.go +++ b/extensions/tn_vacuum/vacuum_test.go @@ -132,15 +132,21 @@ func TestEngineReadyPreparesMechanism(t *testing.T) { svc := &common.Service{ Logger: log.New(), - LocalConfig: &config.Config{Extensions: map[string]map[string]string{ - ExtensionName: { - "enabled": "true", - "block_interval": "3", - ConfigKeyPgRepackJobs: "2", + LocalConfig: &config.Config{ + DB: config.DBConfig{DBName: "kwild_test"}, + Extensions: map[string]map[string]string{ + ExtensionName: { + "enabled": "true", + "block_interval": "3", + ConfigKeyPgRepackJobs: "2", + }, }, - }}, + }, } + ext := GetExtension() + ext.setStateStore(&stubStateStore{}) + app := &common.App{Service: svc} require.NoError(t, engineReadyHook(ctx, app)) require.Equal(t, 1, stub.prepared) @@ -164,6 +170,7 @@ func TestConfigureFailureLeavesMechanismNil(t *testing.T) { ResetForTest() ext := GetExtension() ext.setLogger(log.New()) + ext.setService(&common.Service{LocalConfig: &config.Config{DB: config.DBConfig{DBName: "kwild_test"}}}) setMechanismFactoryForTest(func() Mechanism { return &failingMechanism{} }) defer resetMechanismFactory() @@ -186,14 +193,20 @@ func TestRunReportEnhancement(t *testing.T) { svc := &common.Service{ Logger: log.New(), - LocalConfig: &config.Config{Extensions: map[string]map[string]string{ - ExtensionName: { - ConfigKeyEnabled: "true", - ConfigKeyBlockInterval: "1", + LocalConfig: &config.Config{ + DB: config.DBConfig{DBName: "kwild_test"}, + Extensions: map[string]map[string]string{ + ExtensionName: { + ConfigKeyEnabled: "true", + ConfigKeyBlockInterval: "1", + }, }, - }}, + }, } + ext := GetExtension() + ext.setStateStore(&stubStateStore{}) + app := &common.App{Service: svc} require.NoError(t, engineReadyHook(ctx, app)) @@ -203,7 +216,6 @@ func TestRunReportEnhancement(t *testing.T) { waitForRunCount(t, stub, 1) // Verify the stub returns enhanced report data - ext := GetExtension() runner := ext.runner require.NotNil(t, runner) @@ -226,14 +238,20 @@ func TestVacuumSkippedMetrics(t *testing.T) { svc := &common.Service{ Logger: log.New(), - LocalConfig: &config.Config{Extensions: map[string]map[string]string{ - ExtensionName: { - ConfigKeyEnabled: "true", - ConfigKeyBlockInterval: "10", + LocalConfig: &config.Config{ + DB: config.DBConfig{DBName: "kwild_test"}, + Extensions: map[string]map[string]string{ + ExtensionName: { + ConfigKeyEnabled: "true", + ConfigKeyBlockInterval: "10", + }, }, - }}, + }, } + ext := GetExtension() + ext.setStateStore(&stubStateStore{}) + app := &common.App{Service: svc} require.NoError(t, engineReadyHook(ctx, app)) @@ -269,12 +287,15 @@ func TestEngineReadyLoadsPersistedState(t *testing.T) { svc := &common.Service{ Logger: log.New(), - LocalConfig: &config.Config{Extensions: map[string]map[string]string{ - ExtensionName: { - ConfigKeyEnabled: "true", - ConfigKeyBlockInterval: "5", + LocalConfig: &config.Config{ + DB: config.DBConfig{DBName: "kwild_test"}, + Extensions: map[string]map[string]string{ + ExtensionName: { + ConfigKeyEnabled: "true", + ConfigKeyBlockInterval: "5", + }, }, - }}, + }, } app := &common.App{Service: svc} @@ -318,12 +339,15 @@ func TestSuccessfulRunPersistsState(t *testing.T) { svc := &common.Service{ Logger: log.New(), - LocalConfig: &config.Config{Extensions: map[string]map[string]string{ - ExtensionName: { - ConfigKeyEnabled: "true", - ConfigKeyBlockInterval: "1", + LocalConfig: &config.Config{ + DB: config.DBConfig{DBName: "kwild_test"}, + Extensions: map[string]map[string]string{ + ExtensionName: { + ConfigKeyEnabled: "true", + ConfigKeyBlockInterval: "1", + }, }, - }}, + }, } app := &common.App{Service: svc} @@ -384,19 +408,24 @@ func TestMaybeRunRecordsErrorOnce(t *testing.T) { svc := &common.Service{ Logger: log.New(), - LocalConfig: &config.Config{Extensions: map[string]map[string]string{ - ExtensionName: { - ConfigKeyEnabled: "true", - ConfigKeyBlockInterval: "1", + LocalConfig: &config.Config{ + DB: config.DBConfig{DBName: "kwild_test"}, + Extensions: map[string]map[string]string{ + ExtensionName: { + ConfigKeyEnabled: "true", + ConfigKeyBlockInterval: "1", + }, }, - }}, + }, } + ext := GetExtension() + ext.setStateStore(&stubStateStore{}) + app := &common.App{Service: svc} require.NoError(t, engineReadyHook(ctx, app)) metricsStub := &stubMetricsRecorder{} - ext := GetExtension() ext.mu.Lock() ext.metrics = metricsStub ext.mu.Unlock() diff --git a/node/exts/erc20-bridge/erc20/meta_extension_deposit_test.go b/node/exts/erc20-bridge/erc20/meta_extension_deposit_test.go deleted file mode 100644 index dbc591080..000000000 --- a/node/exts/erc20-bridge/erc20/meta_extension_deposit_test.go +++ /dev/null @@ -1,81 +0,0 @@ -//go:build kwiltest - -package erc20 - -import ( - "context" - "math/big" - "testing" - - ethcommon "github.com/ethereum/go-ethereum/common" - ethtypes "github.com/ethereum/go-ethereum/core/types" - "github.com/stretchr/testify/require" - - "github.com/trufnetwork/kwil-db/core/types" - "github.com/trufnetwork/kwil-db/node/exts/evm-sync/chains" - orderedsync "github.com/trufnetwork/kwil-db/node/exts/ordered-sync" -) - -// TestApplyDepositLog verifies that applyDepositLog credits the deposit recipient. -func TestApplyDepositLog(t *testing.T) { - ctx := context.Background() - db, err := newTestDB() - require.NoError(t, err) - defer db.Close() - - tx, err := db.BeginTx(ctx) - require.NoError(t, err) - defer tx.Rollback(ctx) - - app := setup(t, tx) - - id := newUUID() - chainInfo, ok := chains.GetChainInfoByID("11155111") - if !ok { - t.Fatalf("missing chain info for test chain") - } - - upd := &userProvidedData{ - ID: id, - ChainInfo: &chainInfo, - EscrowAddress: ethcommon.HexToAddress("0x00000000000000000000000000000000000000aa"), - DistributionPeriod: 3600, - } - - require.NoError(t, createNewRewardInstance(ctx, app, upd)) - - require.NoError(t, setRewardSynced(ctx, app, id, 1, &syncedRewardData{ - Erc20Address: ethcommon.HexToAddress("0x00000000000000000000000000000000000000bb"), - Erc20Decimals: 18, - })) - - recipient := ethcommon.HexToAddress("0x00000000000000000000000000000000000000cc") - amount := big.NewInt(1_500_000_000_000_000_000) - - var data [64]byte - copy(data[32-len(recipient.Bytes()):32], recipient.Bytes()) - copy(data[64-len(amount.Bytes()):], amount.Bytes()) - - depositLog := ethtypes.Log{ - Address: upd.EscrowAddress, - Topics: []ethcommon.Hash{ - ethcommon.HexToHash("0xe1fffcc4923d04b559f4d29a8bfc6cda04eb5b0d3c460751c2402c5c5cc9109c"), - }, - Data: data[:], - } - - require.NoError(t, applyDepositLog(ctx, app, id, depositLog)) - - balRecipient, err := balanceOf(ctx, app, id, recipient) - require.NoError(t, err) - require.NotNil(t, balRecipient) - require.Equal(t, types.MustParseDecimal(amount.String()), balRecipient) - - other := ethcommon.HexToAddress("0x00000000000000000000000000000000000000dd") - balOther, err := balanceOf(ctx, app, id, other) - require.NoError(t, err) - require.Nil(t, balOther) - - orderedsync.ForTestingReset() -} - From 90d90831c6feedb5ad4e28e73f9537dfe9c9ebbb Mon Sep 17 00:00:00 2001 From: Raffael Campos Date: Tue, 30 Sep 2025 15:23:55 -0300 Subject: [PATCH 12/13] refactor: enhance thread safety in tn_vacuum extension This commit improves the thread safety of the `tn_vacuum` extension by introducing `sync.RWMutex` locks in various structures. Key changes include: - Added mutex locks to the `stubMechanism` and `stubStateStore` types to protect shared state during concurrent access. - Refactored methods to use the new locking mechanisms, ensuring safe read and write operations. - Updated test assertions to utilize snapshot methods for verifying state without direct access to the underlying data. These enhancements ensure that the extension operates reliably in concurrent environments, reducing the risk of data races and improving overall stability. --- extensions/tn_vacuum/vacuum_test.go | 179 +++++++++++++++++++++++----- 1 file changed, 147 insertions(+), 32 deletions(-) diff --git a/extensions/tn_vacuum/vacuum_test.go b/extensions/tn_vacuum/vacuum_test.go index 5f0b73fe3..26a02d8b6 100644 --- a/extensions/tn_vacuum/vacuum_test.go +++ b/extensions/tn_vacuum/vacuum_test.go @@ -3,6 +3,7 @@ package tn_vacuum import ( "context" "errors" + "sync" "testing" "time" @@ -13,6 +14,7 @@ import ( ) type stubMechanism struct { + mu sync.RWMutex prepared int runs []RunRequest closeCnt int @@ -21,12 +23,16 @@ type stubMechanism struct { func (s *stubMechanism) Name() string { return "stub" } func (s *stubMechanism) Prepare(ctx context.Context, deps MechanismDeps) error { + s.mu.Lock() s.prepared++ + s.mu.Unlock() return nil } func (s *stubMechanism) Run(ctx context.Context, req RunRequest) (*RunReport, error) { + s.mu.Lock() s.runs = append(s.runs, req) + s.mu.Unlock() return &RunReport{ Mechanism: s.Name(), Status: StatusOK, @@ -36,10 +42,41 @@ func (s *stubMechanism) Run(ctx context.Context, req RunRequest) (*RunReport, er } func (s *stubMechanism) Close(ctx context.Context) error { + s.mu.Lock() s.closeCnt++ + s.mu.Unlock() return nil } +func (s *stubMechanism) preparedCount() int { + s.mu.RLock() + defer s.mu.RUnlock() + return s.prepared +} + +func (s *stubMechanism) runCount() int { + s.mu.RLock() + defer s.mu.RUnlock() + return len(s.runs) +} + +func (s *stubMechanism) runAt(i int) (RunRequest, bool) { + s.mu.RLock() + defer s.mu.RUnlock() + if i < 0 || i >= len(s.runs) { + return RunRequest{}, false + } + return s.runs[i], true +} + +func (s *stubMechanism) runsSnapshot() []RunRequest { + s.mu.RLock() + defer s.mu.RUnlock() + copyRuns := make([]RunRequest, len(s.runs)) + copy(copyRuns, s.runs) + return copyRuns +} + type failingMechanism struct{} func (f *failingMechanism) Name() string { return "fail" } @@ -74,6 +111,7 @@ func (e *errorRunMechanism) Run(ctx context.Context, req RunRequest) (*RunReport func (e *errorRunMechanism) Close(ctx context.Context) error { return nil } type stubStateStore struct { + mu sync.RWMutex ensureCount int loadCount int saveCount int @@ -85,29 +123,63 @@ type stubStateStore struct { } func (s *stubStateStore) Ensure(ctx context.Context) error { + s.mu.Lock() s.ensureCount++ + s.mu.Unlock() return nil } func (s *stubStateStore) Load(ctx context.Context) (runState, bool, error) { + s.mu.Lock() s.loadCount++ - if s.loadErr != nil { - return runState{}, false, s.loadErr + err := s.loadErr + state := s.loadState + ok := s.loadOK + s.mu.Unlock() + if err != nil { + return runState{}, false, err } - return s.loadState, s.loadOK, nil + return state, ok, nil } func (s *stubStateStore) Save(ctx context.Context, state runState) error { + s.mu.Lock() s.saveCount++ s.lastSaved = state - if s.saveErr != nil { - return s.saveErr + err := s.saveErr + s.mu.Unlock() + if err != nil { + return err } return nil } func (s *stubStateStore) Close() {} +func (s *stubStateStore) ensureCountValue() int { + s.mu.RLock() + defer s.mu.RUnlock() + return s.ensureCount +} + +func (s *stubStateStore) loadCountValue() int { + s.mu.RLock() + defer s.mu.RUnlock() + return s.loadCount +} + +func (s *stubStateStore) saveCountValue() int { + s.mu.RLock() + defer s.mu.RUnlock() + return s.saveCount +} + +func (s *stubStateStore) lastSavedState() runState { + s.mu.RLock() + defer s.mu.RUnlock() + return s.lastSaved +} + func TestConfigureDisabledSkipsMechanism(t *testing.T) { ctx := context.Background() ResetForTest() @@ -119,7 +191,7 @@ func TestConfigureDisabledSkipsMechanism(t *testing.T) { defer resetMechanismFactory() require.NoError(t, ext.configure(ctx, Config{Enabled: false, BlockInterval: 5})) - require.Equal(t, 0, stub.prepared) + require.Equal(t, 0, stub.preparedCount()) } func TestEngineReadyPreparesMechanism(t *testing.T) { @@ -149,17 +221,19 @@ func TestEngineReadyPreparesMechanism(t *testing.T) { app := &common.App{Service: svc} require.NoError(t, engineReadyHook(ctx, app)) - require.Equal(t, 1, stub.prepared) + require.Equal(t, 1, stub.preparedCount()) block := &common.BlockContext{Height: 1} require.NoError(t, endBlockHook(ctx, app, block)) waitForRunCount(t, stub, 1) - require.Equal(t, "block_interval:1", stub.runs[0].Reason) - require.Equal(t, 2, stub.runs[0].PgRepackJobs) + firstRun, ok := stub.runAt(0) + require.True(t, ok) + require.Equal(t, "block_interval:1", firstRun.Reason) + require.Equal(t, 2, firstRun.PgRepackJobs) require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 2})) time.Sleep(50 * time.Millisecond) - require.Len(t, stub.runs, 1) + require.Len(t, stub.runsSnapshot(), 1) require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 4})) waitForRunCount(t, stub, 2) @@ -264,7 +338,7 @@ func TestVacuumSkippedMetrics(t *testing.T) { block = &common.BlockContext{Height: 5} require.NoError(t, endBlockHook(ctx, app, block)) time.Sleep(50 * time.Millisecond) - require.Len(t, stub.runs, 1, "should not run - interval not met") + require.Len(t, stub.runsSnapshot(), 1, "should not run - interval not met") // Should run at height 11 (interval met) block = &common.BlockContext{Height: 11} @@ -305,8 +379,8 @@ func TestEngineReadyLoadsPersistedState(t *testing.T) { ext.setStateStore(store) require.NoError(t, engineReadyHook(ctx, app)) - require.Equal(t, 1, store.ensureCount) - require.Equal(t, 1, store.loadCount) + require.Equal(t, 1, store.ensureCountValue()) + require.Equal(t, 1, store.loadCountValue()) ext.mu.RLock() require.Equal(t, int64(12), ext.state.LastRunHeight) @@ -319,12 +393,12 @@ func TestEngineReadyLoadsPersistedState(t *testing.T) { require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 14})) time.Sleep(50 * time.Millisecond) - require.Len(t, stub.runs, 0, "should not run before interval is met") + require.Len(t, stub.runsSnapshot(), 0, "should not run before interval is met") require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 18})) waitForRunCount(t, stub, 1) - waitForCondition(t, time.Second, func() bool { return store.saveCount == 1 }) - require.Equal(t, int64(18), metricsStub.lastHeight) + waitForCondition(t, time.Second, func() bool { return store.saveCountValue() == 1 }) + require.Equal(t, int64(18), metricsStub.snapshot().lastHeight) } func TestSuccessfulRunPersistsState(t *testing.T) { @@ -360,7 +434,7 @@ func TestSuccessfulRunPersistsState(t *testing.T) { ext.setNowFunc(func() time.Time { return now }) require.NoError(t, engineReadyHook(ctx, app)) - require.Equal(t, 1, store.ensureCount) + require.Equal(t, 1, store.ensureCountValue()) metricsStub := &stubMetricsRecorder{} ext.mu.Lock() @@ -369,15 +443,17 @@ func TestSuccessfulRunPersistsState(t *testing.T) { require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 5})) waitForRunCount(t, stub, 1) - waitForCondition(t, time.Second, func() bool { return store.saveCount == 1 }) - require.Equal(t, int64(5), store.lastSaved.LastRunHeight) - require.Equal(t, now.UTC(), store.lastSaved.LastRunAt) - require.Equal(t, 1, metricsStub.completeCount) - require.Equal(t, int64(5), metricsStub.lastHeight) + waitForCondition(t, time.Second, func() bool { return store.saveCountValue() == 1 }) + lastState := store.lastSavedState() + require.Equal(t, int64(5), lastState.LastRunHeight) + require.Equal(t, now.UTC(), lastState.LastRunAt) + snap := metricsStub.snapshot() + require.Equal(t, 1, snap.completeCount) + require.Equal(t, int64(5), snap.lastHeight) require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 5})) time.Sleep(50 * time.Millisecond) - require.Equal(t, 1, store.saveCount, "duplicate height should not persist again") + require.Equal(t, 1, store.saveCountValue(), "duplicate height should not persist again") } func TestRunnerHandlesNilReport(t *testing.T) { @@ -392,11 +468,12 @@ func TestRunnerHandlesNilReport(t *testing.T) { Metrics: metricsStub, })) - require.Equal(t, 1, metricsStub.startCount) - require.Equal(t, 1, metricsStub.completeCount) - require.Zero(t, metricsStub.lastDuration) - require.Zero(t, metricsStub.lastTables) - require.Equal(t, "nil_report", metricsStub.lastMechanism) + snapshot := metricsStub.snapshot() + require.Equal(t, 1, snapshot.startCount) + require.Equal(t, 1, snapshot.completeCount) + require.Zero(t, snapshot.lastDuration) + require.Zero(t, snapshot.lastTables) + require.Equal(t, "nil_report", snapshot.lastMechanism) } func TestMaybeRunRecordsErrorOnce(t *testing.T) { @@ -431,9 +508,10 @@ func TestMaybeRunRecordsErrorOnce(t *testing.T) { ext.mu.Unlock() require.NoError(t, endBlockHook(ctx, app, &common.BlockContext{Height: 1})) - waitForCondition(t, time.Second, func() bool { return metricsStub.errorCount == 1 }) - require.Equal(t, 1, metricsStub.startCount) - require.Equal(t, "error_run", metricsStub.lastErrorMechanism) + waitForCondition(t, time.Second, func() bool { return metricsStub.snapshot().errorCount == 1 }) + errorSnapshot := metricsStub.snapshot() + require.Equal(t, 1, errorSnapshot.startCount) + require.Equal(t, "error_run", errorSnapshot.lastErrorMechanism) } func TestEnqueueRunBusy(t *testing.T) { @@ -452,6 +530,7 @@ func TestEnqueueRunBusy(t *testing.T) { } type stubMetricsRecorder struct { + mu sync.RWMutex startCount int completeCount int errorCount int @@ -466,30 +545,66 @@ type stubMetricsRecorder struct { } func (s *stubMetricsRecorder) RecordVacuumStart(ctx context.Context, mechanism string) { + s.mu.Lock() s.startCount++ s.lastMechanism = mechanism + s.mu.Unlock() } func (s *stubMetricsRecorder) RecordVacuumComplete(ctx context.Context, mechanism string, duration time.Duration, tablesProcessed int) { + s.mu.Lock() s.completeCount++ s.lastMechanism = mechanism s.lastDuration = duration s.lastTables = tablesProcessed + s.mu.Unlock() } func (s *stubMetricsRecorder) RecordVacuumError(ctx context.Context, mechanism string, errType string) { + s.mu.Lock() s.errorCount++ s.lastErrorMechanism = mechanism s.lastErrorType = errType + s.mu.Unlock() } func (s *stubMetricsRecorder) RecordVacuumSkipped(ctx context.Context, reason string) { + s.mu.Lock() s.skippedCount++ s.lastSkipReason = reason + s.mu.Unlock() } func (s *stubMetricsRecorder) RecordLastRunHeight(ctx context.Context, height int64) { + s.mu.Lock() s.lastHeight = height + s.mu.Unlock() +} + +func (s *stubMetricsRecorder) snapshot() stubMetricsSnapshot { + s.mu.RLock() + defer s.mu.RUnlock() + return stubMetricsSnapshot{ + startCount: s.startCount, + completeCount: s.completeCount, + errorCount: s.errorCount, + lastDuration: s.lastDuration, + lastTables: s.lastTables, + lastMechanism: s.lastMechanism, + lastErrorMechanism: s.lastErrorMechanism, + lastHeight: s.lastHeight, + } +} + +type stubMetricsSnapshot struct { + startCount int + completeCount int + errorCount int + lastDuration time.Duration + lastTables int + lastMechanism string + lastErrorMechanism string + lastHeight int64 } func waitForCondition(t *testing.T, timeout time.Duration, fn func() bool) { @@ -509,6 +624,6 @@ func waitForCondition(t *testing.T, timeout time.Duration, fn func() bool) { func waitForRunCount(t *testing.T, stub *stubMechanism, count int) { waitForCondition(t, time.Second, func() bool { - return len(stub.runs) >= count + return stub.runCount() >= count }) } From 18ac42d654077e41d16655a05d6b5719c1d27815 Mon Sep 17 00:00:00 2001 From: Raffael Campos Date: Tue, 30 Sep 2025 15:40:26 -0300 Subject: [PATCH 13/13] chore: update PostgreSQL image references to ghcr.io/trufnetwork/kwil-postgres This commit updates all instances of the PostgreSQL image from `kwildb/postgres` to `ghcr.io/trufnetwork/kwil-postgres` across various configuration files and documentation. Key changes include: - Updated `compose.yaml`, `Taskfile.yml`, and multiple `docker-compose` files to use the new image. - Modified documentation to reflect the new image source and its configuration. - Adjusted scripts to ensure compatibility with the updated image. These changes ensure consistency in using the new PostgreSQL image for the TRUF.NETWORK project. --- Taskfile.yml | 2 +- compose.yaml | 2 +- deployments/dev-net/devnet-compose.yaml | 4 ++-- deployments/infra/README.md | 4 ++-- deployments/infra/stacks/docker-compose.template.yml | 4 ++-- docs/container-image-guide.md | 4 ++-- docs/examples/mcp-reverse-proxy/README.md | 6 +++--- .../mcp-reverse-proxy/docker-compose.sse.yaml | 8 ++++---- docs/examples/mcp-reverse-proxy/traefik.yml.example | 4 ++-- docs/mcp-reverse-proxy.md | 4 ++-- docs/node-operator-guide.md | 4 ++-- docs/node-upgrade-guide.md | 2 +- scripts/ci-cleanup.sh | 2 +- scripts/test-ami.sh | 12 ++++++------ tests/extensions/tn_cache_metrics/docker-compose.yml | 2 +- tests/extensions/tn_digest/docker-compose.yml | 2 +- tests/setup/simple_node.go | 2 +- 17 files changed, 34 insertions(+), 34 deletions(-) diff --git a/Taskfile.yml b/Taskfile.yml index c6dfdbafd..3bc3c8f4c 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -96,7 +96,7 @@ tasks: host:postgres:start: desc: Run a postgres container cmds: - - docker start kwil-postgres || docker run -d -p 5432:5432 --name kwil-postgres -e "POSTGRES_HOST_AUTH_METHOD=trust" kwildb/postgres:latest + - docker start kwil-postgres || docker run -d -p 5432:5432 --name kwil-postgres -e "POSTGRES_HOST_AUTH_METHOD=trust" ghcr.io/trufnetwork/kwil-postgres:latest host:indexer:start: desc: Run the indexer diff --git a/compose.yaml b/compose.yaml index 89db4aebc..d7de8d3d7 100644 --- a/compose.yaml +++ b/compose.yaml @@ -1,6 +1,6 @@ services: kwil-postgres: - image: "kwildb/postgres:16.8-1" + image: "ghcr.io/trufnetwork/kwil-postgres:16.8-1" hostname: kwil-postgres shm_size: 2G restart: unless-stopped diff --git a/deployments/dev-net/devnet-compose.yaml b/deployments/dev-net/devnet-compose.yaml index 7eaa2892f..e2dcfbd62 100644 --- a/deployments/dev-net/devnet-compose.yaml +++ b/deployments/dev-net/devnet-compose.yaml @@ -33,7 +33,7 @@ x-common-logging: &common_logging services: kwil-postgres-1: - image: "kwildb/postgres:16.8-1" + image: "ghcr.io/trufnetwork/kwil-postgres:16.8-1" ports: - "5432:5432" <<: *postgres_common @@ -42,7 +42,7 @@ services: source: data-kwil-postgres-1 kwil-postgres-2: - image: "kwildb/postgres:16.8-1" + image: "ghcr.io/trufnetwork/kwil-postgres:16.8-1" ports: - "5433:5432" <<: *postgres_common diff --git a/deployments/infra/README.md b/deployments/infra/README.md index a551bdf8c..2821a43d5 100644 --- a/deployments/infra/README.md +++ b/deployments/infra/README.md @@ -331,7 +331,7 @@ The AMI includes: - **Base OS**: Ubuntu 24.04 LTS - **Docker**: Latest Docker CE with docker-compose - **TRUF.NETWORK Stack**: - - PostgreSQL (kwildb/postgres:16.8-1) + - PostgreSQL (ghcr.io/trufnetwork/kwil-postgres:16.8-1) - TRUF.NETWORK Node - ⚠️ To be added when ghcr image is published - PostgreSQL MCP Server (crystaldba/postgres-mcp:latest) - Will need to be adjusted later on - **Configuration Scripts**: @@ -382,4 +382,4 @@ aws logs describe-log-groups --log-group-name-prefix /aws/imagebuilder ## Important -Always use these commands responsibly, especially in non-production environments. Remember to delete the stack after testing to avoid unnecessary AWS charges. \ No newline at end of file +Always use these commands responsibly, especially in non-production environments. Remember to delete the stack after testing to avoid unnecessary AWS charges. diff --git a/deployments/infra/stacks/docker-compose.template.yml b/deployments/infra/stacks/docker-compose.template.yml index 1ee704e08..a597bb92b 100644 --- a/deployments/infra/stacks/docker-compose.template.yml +++ b/deployments/infra/stacks/docker-compose.template.yml @@ -1,6 +1,6 @@ services: tn-postgres: - image: kwildb/postgres:latest + image: ghcr.io/trufnetwork/kwil-postgres:latest container_name: tn-postgres environment: POSTGRES_HOST_AUTH_METHOD: trust @@ -102,4 +102,4 @@ services: networks: tn-network: - driver: bridge \ No newline at end of file + driver: bridge diff --git a/docs/container-image-guide.md b/docs/container-image-guide.md index 15ed740f4..675fe30fa 100644 --- a/docs/container-image-guide.md +++ b/docs/container-image-guide.md @@ -7,7 +7,7 @@ Run the TRUF.NETWORK node container with Docker Compose while keeping the standa ## Prerequisites - Docker Engine 24+ with the `docker compose` plugin. -- Pull access to `ghcr.io/trufnetwork/node` and `kwildb/postgres`. +- Pull access to `ghcr.io/trufnetwork/node` and `ghcr.io/trufnetwork/kwil-postgres`. - Optional: the [`kwild` CLI](https://github.com/trufnetwork/node/releases) if you want to pre-populate configuration files instead of using the container’s auto-initialization path. ## 1. Prepare the workspace @@ -26,7 +26,7 @@ Create `docker-compose.yml` in `~/truf-node` using the minimal stack below. ```yaml services: postgres: - image: kwildb/postgres:16.8-1 + image: ghcr.io/trufnetwork/kwil-postgres:latest restart: unless-stopped environment: POSTGRES_HOST_AUTH_METHOD: trust diff --git a/docs/examples/mcp-reverse-proxy/README.md b/docs/examples/mcp-reverse-proxy/README.md index 43a20a389..d2f5dadc4 100644 --- a/docs/examples/mcp-reverse-proxy/README.md +++ b/docs/examples/mcp-reverse-proxy/README.md @@ -28,7 +28,7 @@ This directory contains ready-to-use configuration examples for deploying the TR 2. **Create environment file:** ```bash cat > .env << EOF - # TRUF.NETWORK uses kwildb/postgres image which auto-creates kwild user/database + # TRUF.NETWORK uses ghcr.io/trufnetwork/kwil-postgres image which auto-creates kwild user/database # Note: No password needed - uses POSTGRES_HOST_AUTH_METHOD=trust DOMAIN=mcp.your-domain.com ACME_EMAIL=admin@your-domain.com @@ -276,7 +276,7 @@ Have a working configuration for another reverse proxy? Please contribute: git clone https://github.com/trufnetwork/node.git cd node/docs/examples/mcp-reverse-proxy -# Configure environment (kwildb/postgres auto-creates kwild user/database) +# Configure environment (ghcr.io/trufnetwork/kwil-postgres auto-creates kwild user/database) cat > .env << EOF DOMAIN=mcp.your-domain.com ACME_EMAIL=admin@your-domain.com @@ -291,4 +291,4 @@ docker compose logs -f mcp-server # Test curl -H "Accept: text/event-stream" http://your-domain.com/sse -``` \ No newline at end of file +``` diff --git a/docs/examples/mcp-reverse-proxy/docker-compose.sse.yaml b/docs/examples/mcp-reverse-proxy/docker-compose.sse.yaml index 441c9d807..7c5640806 100644 --- a/docs/examples/mcp-reverse-proxy/docker-compose.sse.yaml +++ b/docs/examples/mcp-reverse-proxy/docker-compose.sse.yaml @@ -13,7 +13,7 @@ version: '3.8' services: # PostgreSQL Database (Kwil-configured image for TRUF.NETWORK) postgres: - image: kwildb/postgres:latest + image: ghcr.io/trufnetwork/kwil-postgres:latest container_name: tn-postgres restart: unless-stopped environment: @@ -200,8 +200,8 @@ volumes: # Example environment file (.env) # Copy to .env and customize: # -# # Database settings (TRUF.NETWORK uses kwildb/postgres image with trust auth) -# # The kwildb/postgres image automatically creates kwild user and database +# # Database settings (TRUF.NETWORK uses ghcr.io/trufnetwork/kwil-postgres image with trust auth) +# # The ghcr.io/trufnetwork/kwil-postgres image automatically creates kwild user and database # # Note: No password needed due to POSTGRES_HOST_AUTH_METHOD=trust # # # MCP Server settings @@ -251,4 +251,4 @@ volumes: # docker compose -f docker-compose.sse.yaml down # # 7. Full cleanup: -# docker compose -f docker-compose.sse.yaml down -v --remove-orphans \ No newline at end of file +# docker compose -f docker-compose.sse.yaml down -v --remove-orphans diff --git a/docs/examples/mcp-reverse-proxy/traefik.yml.example b/docs/examples/mcp-reverse-proxy/traefik.yml.example index 7daad3632..eef77fa68 100644 --- a/docs/examples/mcp-reverse-proxy/traefik.yml.example +++ b/docs/examples/mcp-reverse-proxy/traefik.yml.example @@ -267,7 +267,7 @@ services: - "traefik.http.middlewares.dashboard-auth.basicauth.users=admin:$$apr1$$YOUR_HASHED_PASSWORD_HERE" # Generate with: htpasswd -nb admin your-secure-password postgres: - image: kwildb/postgres:latest + image: ghcr.io/trufnetwork/kwil-postgres:latest restart: unless-stopped environment: - POSTGRES_HOST_AUTH_METHOD=trust @@ -310,4 +310,4 @@ networks: volumes: postgres-data: - traefik-logs: \ No newline at end of file + traefik-logs: diff --git a/docs/mcp-reverse-proxy.md b/docs/mcp-reverse-proxy.md index 5a9d0accc..ebc1d855e 100644 --- a/docs/mcp-reverse-proxy.md +++ b/docs/mcp-reverse-proxy.md @@ -252,7 +252,7 @@ LoadModule headers_module modules/mod_headers.so ## MCP Server SSE Configuration -**Important**: TRUF.NETWORK uses the Kwil PostgreSQL Docker image (`kwildb/postgres`) which is configured with `POSTGRES_HOST_AUTH_METHOD=trust` and automatically creates a `kwild` database with a `kwild` user. This matches the standard node setup described in the [Node Operator Guide](./node-operator-guide.md). +**Important**: TRUF.NETWORK uses the Kwil PostgreSQL Docker image (`ghcr.io/trufnetwork/kwil-postgres`) which is configured with `POSTGRES_HOST_AUTH_METHOD=trust` and automatically creates a `kwild` database with a `kwild` user. This matches the standard node setup described in the [Node Operator Guide](./node-operator-guide.md). ### Starting the MCP Server with SSE @@ -650,4 +650,4 @@ After implementing reverse proxy configuration: 4. **Plan Scaling**: Consider load balancing for high-traffic scenarios 5. **Security Audits**: Regular security reviews and updates -For additional support with MCP server deployment, consult the [Node Operator Guide](./node-operator-guide.md) and [MCP Server Documentation](./mcp-server.md). \ No newline at end of file +For additional support with MCP server deployment, consult the [Node Operator Guide](./node-operator-guide.md) and [MCP Server Documentation](./mcp-server.md). diff --git a/docs/node-operator-guide.md b/docs/node-operator-guide.md index cf4638fe1..bf3f3a5a2 100644 --- a/docs/node-operator-guide.md +++ b/docs/node-operator-guide.md @@ -224,7 +224,7 @@ docker run -d -p 127.0.0.1:5432:5432 --name tn-postgres \ -e "POSTGRES_HOST_AUTH_METHOD=trust" \ -v tn-pgdata:/var/lib/postgresql/data \ --shm-size=1gb \ - kwildb/postgres:latest + ghcr.io/trufnetwork/kwil-postgres:latest ``` > **Warning**: Critical Security Requirements @@ -906,7 +906,7 @@ Sometimes you may need to reset your node to sync from a specific point or recov -e "POSTGRES_HOST_AUTH_METHOD=trust" \ -v tn-pgdata:/var/lib/postgresql/data \ --shm-size=1gb \ - kwildb/postgres:latest + ghcr.io/trufnetwork/kwil-postgres:latest ``` 8. **Re-enable and start services**: diff --git a/docs/node-upgrade-guide.md b/docs/node-upgrade-guide.md index f2444b568..ea5e3e029 100644 --- a/docs/node-upgrade-guide.md +++ b/docs/node-upgrade-guide.md @@ -203,7 +203,7 @@ kwild blacklist list ## What About PostgreSQL & Other Components? *Minor* Kwil releases do **not** require a database upgrade. -If the release notes specify a new official Postgres image (e.g. `kwildb/postgres:x.y-z`) you can recreate the container at your convenience – data volumes are preserved. +If the release notes specify a new official Postgres image (e.g. `ghcr.io/trufnetwork/kwil-postgres:x.y-z`) you can recreate the container at your convenience – data volumes are preserved. --- diff --git a/scripts/ci-cleanup.sh b/scripts/ci-cleanup.sh index ff5b19187..096a904dc 100644 --- a/scripts/ci-cleanup.sh +++ b/scripts/ci-cleanup.sh @@ -11,7 +11,7 @@ fi # Common container names/images names=("tn-db" "kwil-postgres" "kwild" "postgres") -images=("kwildb/postgres" "ghcr.io/trufnetwork/node:local" "kwildb/postgres:latest" "kwildb/postgres:16.8-1") +images=("ghcr.io/trufnetwork/kwil-postgres" "ghcr.io/trufnetwork/node:local" "ghcr.io/trufnetwork/kwil-postgres:latest" "ghcr.io/trufnetwork/kwil-postgres:16.8-1") echo "[ci-cleanup] Stopping/removing lingering containers by name..." for n in "${names[@]}"; do diff --git a/scripts/test-ami.sh b/scripts/test-ami.sh index 25f531aff..d31794147 100755 --- a/scripts/test-ami.sh +++ b/scripts/test-ami.sh @@ -365,11 +365,11 @@ echo "Checking if required Docker images can be pulled..." DOCKER_IMAGES_AVAILABLE=true -echo "Checking kwildb/postgres:16.8-1..." -if docker manifest inspect kwildb/postgres:16.8-1 >/dev/null 2>&1; then - echo -e "${GREEN}✓ kwildb/postgres:16.8-1 available${NC}" +echo "Checking ghcr.io/trufnetwork/kwil-postgres:16.8-1..." +if docker manifest inspect ghcr.io/trufnetwork/kwil-postgres:16.8-1 >/dev/null 2>&1; then + echo -e "${GREEN}✓ ghcr.io/trufnetwork/kwil-postgres:16.8-1 available${NC}" else - echo -e "${RED}❌ kwildb/postgres:16.8-1 not available${NC}" + echo -e "${RED}❌ ghcr.io/trufnetwork/kwil-postgres:16.8-1 not available${NC}" DOCKER_IMAGES_AVAILABLE=false fi @@ -454,7 +454,7 @@ fi echo "✓ Using $COMPOSE" echo "✓ Simulated pulling ghcr.io/trufnetwork/node:latest" -echo "✓ Simulated pulling kwildb/postgres:16.8-1" +echo "✓ Simulated pulling ghcr.io/trufnetwork/kwil-postgres:16.8-1" echo "✓ Simulated pulling ghcr.io/trufnetwork/postgres-mcp:latest" echo "🔄 Restarting services..." @@ -506,4 +506,4 @@ else echo "📊 Test Results: ${TESTS_PASSED}/${TOTAL_TESTS} tests passed, ${TESTS_FAILED} failed" echo -e "${RED}Please fix the issues before deployment.${NC}" exit 1 -fi \ No newline at end of file +fi diff --git a/tests/extensions/tn_cache_metrics/docker-compose.yml b/tests/extensions/tn_cache_metrics/docker-compose.yml index 39f6cdcf1..8199ebad0 100644 --- a/tests/extensions/tn_cache_metrics/docker-compose.yml +++ b/tests/extensions/tn_cache_metrics/docker-compose.yml @@ -2,7 +2,7 @@ version: '3.8' services: postgres: - image: kwildb/postgres:16.8-1 + image: ghcr.io/trufnetwork/kwil-postgres:16.8-1 environment: - POSTGRES_HOST_AUTH_METHOD=trust ports: diff --git a/tests/extensions/tn_digest/docker-compose.yml b/tests/extensions/tn_digest/docker-compose.yml index 3a9f17820..85cd07c91 100644 --- a/tests/extensions/tn_digest/docker-compose.yml +++ b/tests/extensions/tn_digest/docker-compose.yml @@ -2,7 +2,7 @@ version: '3.8' services: postgres: - image: kwildb/postgres:16.8-1 + image: ghcr.io/trufnetwork/kwil-postgres:16.8-1 environment: - POSTGRES_HOST_AUTH_METHOD=trust ports: diff --git a/tests/setup/simple_node.go b/tests/setup/simple_node.go index bc3a4f0dc..a92e5f147 100644 --- a/tests/setup/simple_node.go +++ b/tests/setup/simple_node.go @@ -116,7 +116,7 @@ func (f *SimpleNodeFixture) Setup(ctx context.Context, image string, config *Kwi // startPostgres starts a PostgreSQL container func (f *SimpleNodeFixture) startPostgres(ctx context.Context, networkName string) (testcontainers.Container, error) { req := testcontainers.ContainerRequest{ - Image: "kwildb/postgres:16.8-1", + Image: "ghcr.io/trufnetwork/kwil-postgres:16.8-1", ExposedPorts: []string{"5432/tcp"}, Name: "postgres", Networks: []string{networkName},