From 6f757eb25471956505556ff30bc3b8ed5684a878 Mon Sep 17 00:00:00 2001 From: hardy Date: Tue, 26 May 2026 09:58:20 +0800 Subject: [PATCH 1/4] feat: improve data migration flow Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- app.go | 31 +- app_test.go | 20 + core/config/fs_watcher.go | 73 +- core/config/fs_watcher_test.go | 97 +++ core/elastic/actions.go | 61 +- core/elastic/actions_test.go | 90 +++ core/elastic/common_command.go | 1 + core/elastic/domain_actions.go | 8 +- core/elastic/domain_actions_test.go | 35 + core/elastic/index.go | 106 +++ core/elastic/index_test.go | 47 ++ core/elastic/partition.go | 752 +++++++++++++++++- core/elastic/partition_test.go | 298 +++++++ core/env/env.go | 33 +- core/env/env_test.go | 51 ++ core/orm/registry.go | 20 + core/orm/registry_test.go | 39 + core/pipeline/context.go | 68 +- core/pipeline/context_result_test.go | 59 ++ core/queue/consumer_config.go | 2 +- core/queue/queue_config.go | 2 +- core/task/task.go | 24 +- modules/elastic/adapter/elasticsearch/v0.go | 30 +- modules/elastic/adapter/ver.go | 8 + modules/elastic/adapter/ver_test.go | 30 + modules/elastic/common/config.go | 20 +- modules/elastic/common/config_test.go | 27 + modules/elastic/metadata.go | 16 +- modules/elastic/module.go | 70 +- modules/elastic/module_test.go | 78 +- modules/elastic/schema.go | 96 +++ modules/elastic/schema_test.go | 16 + modules/pipeline/model.go | 21 +- modules/pipeline/module.go | 5 + modules/pipeline/pipeline_test.go | 50 ++ modules/pipeline/tasks.go | 17 +- modules/queue/disk_queue/cleanup.go | 2 +- modules/queue/disk_queue/compress.go | 2 +- modules/queue/disk_queue/consumer.go | 69 +- modules/queue/disk_queue/diskqueue.go | 31 +- modules/queue/disk_queue/diskqueue_test.go | 182 +++++ modules/queue/disk_queue/module.go | 35 +- modules/queue/disk_queue/module_test.go | 49 ++ .../elastic/bulk_indexing/bulk_indexing.go | 250 ++++-- .../bulk_indexing/bulk_indexing_test.go | 91 +++ 45 files changed, 2830 insertions(+), 282 deletions(-) create mode 100644 app_test.go create mode 100644 core/config/fs_watcher_test.go create mode 100644 core/elastic/actions_test.go create mode 100644 core/elastic/domain_actions_test.go create mode 100644 core/elastic/partition_test.go create mode 100644 core/orm/registry_test.go create mode 100644 core/pipeline/context_result_test.go create mode 100644 modules/elastic/adapter/ver_test.go create mode 100644 modules/elastic/common/config_test.go create mode 100644 modules/pipeline/pipeline_test.go create mode 100644 modules/queue/disk_queue/diskqueue_test.go create mode 100644 modules/queue/disk_queue/module_test.go diff --git a/app.go b/app.go index 30e73c761..af1ef24e5 100755 --- a/app.go +++ b/app.go @@ -31,20 +31,21 @@ import ( "context" "flag" "fmt" + "github.com/fsnotify/fsnotify" + "github.com/shirou/gopsutil/v3/process" + "infini.sh/framework/core/task" + "infini.sh/framework/core/wrapper/taskset" + "infini.sh/framework/modules/configs/client" "os" "os/signal" + "path/filepath" "runtime" "runtime/debug" "sync" "syscall" "time" - "github.com/fsnotify/fsnotify" - "github.com/shirou/gopsutil/v4/process" - "infini.sh/framework/core/task" - "infini.sh/framework/core/wrapper/taskset" - "infini.sh/framework/modules/configs/client" - + log "github.com/cihub/seelog" "github.com/kardianos/service" "infini.sh/framework/core/config" "infini.sh/framework/core/daemon" @@ -52,7 +53,6 @@ import ( "infini.sh/framework/core/errors" "infini.sh/framework/core/global" "infini.sh/framework/core/keystore" - "infini.sh/framework/core/log" _ "infini.sh/framework/core/logging" "infini.sh/framework/core/logging/logger" "infini.sh/framework/core/module" @@ -85,6 +85,18 @@ type App struct { svcFlag string } +func getServiceWorkingDirectory() string { + executablePath, err := os.Executable() + if err == nil { + return filepath.Dir(executablePath) + } + workdir, err := os.Getwd() + if err != nil { + panic(err) + } + return workdir +} + const ( env_SILENT_GREETINGS = "SILENT_GREETINGS" env_SERVICE_NAME = "SERVICE_NAME" @@ -575,10 +587,7 @@ func (app *App) Run() { svcOptions["SuccessExitStatus"] = "1 2 8 SIGKILL" svcOptions["LimitNOFILE"] = 1024000 - workdir, err := os.Getwd() - if err != nil { - panic(err) - } + workdir := getServiceWorkingDirectory() serviceName := app.environment.GetAppLowercaseName() if v, ok := os.LookupEnv(env_SERVICE_NAME); ok { diff --git a/app_test.go b/app_test.go new file mode 100644 index 000000000..702a85f48 --- /dev/null +++ b/app_test.go @@ -0,0 +1,20 @@ +package framework + +import ( + "os" + "path/filepath" + "testing" +) + +func TestGetServiceWorkingDirectoryUsesExecutableDir(t *testing.T) { + executablePath, err := os.Executable() + if err != nil { + t.Fatalf("failed to get executable path: %v", err) + } + + got := getServiceWorkingDirectory() + want := filepath.Dir(executablePath) + if got != want { + t.Fatalf("expected service working directory %q, got %q", want, got) + } +} diff --git a/core/config/fs_watcher.go b/core/config/fs_watcher.go index 45706c402..873dd0700 100644 --- a/core/config/fs_watcher.go +++ b/core/config/fs_watcher.go @@ -62,6 +62,36 @@ func loadConfigFile(file string) *Config { return nil } +func dispatchConfigChangeEvent(ev fsnotify.Event, watcherCallbacks []CallbackFunc) { + for _, v := range watcherCallbacks { + v(ev.Name, ev.Op) + } + + cfg := loadConfigFile(ev.Name) + if cfg != nil { + for _, k := range sectionCallbackOrder { + callbacks, ok := sectionCallbacks[k] + if !ok || !cfg.HasField(k) { + continue + } + currentCfg, err := cfg.Child(k, -1) + if err != nil { + log.Error(err) + continue + } + previousCfg, _ := latestConfig[k] + for _, f := range callbacks { + f(previousCfg, currentCfg) + } + latestConfig[k] = currentCfg + } + } + + for _, v := range configCallbacks { + v(ev) + } +} + var validExtensions = []string{".yml", ".yaml", ".tpl"} func SetValidExtension(v []string) { @@ -153,40 +183,7 @@ func AddPathToWatch(path string, callback CallbackFunc) { time.Sleep(2 * time.Second) log.Trace("2 seconds out, on:", ev.String()) - // AddPathToWatch - - for _, v := range watcher.callbacks { - v(ev.Name, ev.Op) - } - - // NotifyOnConfigChange - - for _, v := range configCallbacks { - v(ev) - } - - // NotifyOnConfigSectionChange - - cfg := loadConfigFile(ev.Name) - if cfg == nil { - continue - } - - for k, v := range sectionCallbacks { - if cfg.HasField(k) { - currentCfg, err := cfg.Child(k, -1) - if err != nil { - log.Error(err) - continue - } - // diff config - previousCfg, _ := latestConfig[k] - for _, f := range v { - f(previousCfg, currentCfg) - } - latestConfig[k] = currentCfg - } - } + dispatchConfigChangeEvent(ev, watcher.callbacks) } }() }) @@ -255,11 +252,13 @@ func StopWatchers() { } var sectionCallbacks = map[string][]func(pCfg, cCfg *Config){} +var sectionCallbackOrder = []string{} var configCallbacks = []func(fsnotify.Event){} var cfgLocker = sync.RWMutex{} // NotifyOnConfigSectionChange will trigger callback when any configuration file change detected and -// configKey present in the changed file +// configKey present in the changed file. Section callbacks run before generic NotifyOnConfigChange +// callbacks so section-scoped state can be refreshed before dependent consumers reload. func NotifyOnConfigSectionChange(configKey string, f func(pCfg, cCfg *Config)) { cfgLocker.Lock() defer cfgLocker.Unlock() @@ -268,12 +267,14 @@ func NotifyOnConfigSectionChange(configKey string, f func(pCfg, cCfg *Config)) { if !ok { v = []func(pCfg, cCfg *Config){} sectionCallbacks[configKey] = v + sectionCallbackOrder = append(sectionCallbackOrder, configKey) } v = append(v, f) sectionCallbacks[configKey] = v } -// NotifyOnConfigChange will trigger callback when any configuration file change detected +// NotifyOnConfigChange will trigger callback when any configuration file change detected, after any +// matching NotifyOnConfigSectionChange callbacks for the same event have run. func NotifyOnConfigChange(f func(fsnotify.Event)) { cfgLocker.Lock() defer cfgLocker.Unlock() diff --git a/core/config/fs_watcher_test.go b/core/config/fs_watcher_test.go new file mode 100644 index 000000000..c1ddf1f75 --- /dev/null +++ b/core/config/fs_watcher_test.go @@ -0,0 +1,97 @@ +package config + +import ( + "os" + "path/filepath" + "testing" + + "github.com/fsnotify/fsnotify" +) + +func TestDispatchConfigChangeEventRunsSectionCallbacksBeforeGenericCallbacks(t *testing.T) { + dir := t.TempDir() + file := filepath.Join(dir, "generated_metrics_tasks.yml") + content := []byte("elasticsearch:\n - id: \"cluster-1\"\n name: \"cluster-1\"\n enabled: true\n endpoint: \"http://127.0.0.1:9200\"\n") + if err := os.WriteFile(file, content, 0o644); err != nil { + t.Fatalf("write config file: %v", err) + } + + previousSections := sectionCallbacks + previousOrder := sectionCallbackOrder + previousConfigs := configCallbacks + previousLatest := latestConfig + sectionCallbacks = map[string][]func(pCfg, cCfg *Config){} + sectionCallbackOrder = nil + configCallbacks = nil + latestConfig = map[string]*Config{} + t.Cleanup(func() { + sectionCallbacks = previousSections + sectionCallbackOrder = previousOrder + configCallbacks = previousConfigs + latestConfig = previousLatest + }) + + var order []string + NotifyOnConfigSectionChange("elasticsearch", func(pCfg, cCfg *Config) { + order = append(order, "section") + }) + NotifyOnConfigChange(func(ev fsnotify.Event) { + order = append(order, "generic") + }) + + dispatchConfigChangeEvent(fsnotify.Event{Name: file, Op: fsnotify.Write}, nil) + + if len(order) != 2 { + t.Fatalf("expected 2 callbacks, got %d (%v)", len(order), order) + } + if order[0] != "section" || order[1] != "generic" { + t.Fatalf("expected section callback before generic callback, got %v", order) + } +} + +func TestDispatchConfigChangeEventRunsSectionCallbacksInRegistrationOrder(t *testing.T) { + dir := t.TempDir() + file := filepath.Join(dir, "gateway.yml") + content := []byte("flow:\n - name: flow-1\nrouter:\n - name: router-1\nentry:\n - name: entry-1\n") + if err := os.WriteFile(file, content, 0o644); err != nil { + t.Fatalf("write config file: %v", err) + } + + previousSections := sectionCallbacks + previousOrder := sectionCallbackOrder + previousConfigs := configCallbacks + previousLatest := latestConfig + sectionCallbacks = map[string][]func(pCfg, cCfg *Config){} + sectionCallbackOrder = nil + configCallbacks = nil + latestConfig = map[string]*Config{} + t.Cleanup(func() { + sectionCallbacks = previousSections + sectionCallbackOrder = previousOrder + configCallbacks = previousConfigs + latestConfig = previousLatest + }) + + var order []string + NotifyOnConfigSectionChange("flow", func(pCfg, cCfg *Config) { + order = append(order, "flow") + }) + NotifyOnConfigSectionChange("router", func(pCfg, cCfg *Config) { + order = append(order, "router") + }) + NotifyOnConfigSectionChange("entry", func(pCfg, cCfg *Config) { + order = append(order, "entry") + }) + + dispatchConfigChangeEvent(fsnotify.Event{Name: file, Op: fsnotify.Write}, nil) + + expected := []string{"flow", "router", "entry"} + if len(order) != len(expected) { + t.Fatalf("expected %d callbacks, got %d (%v)", len(expected), len(order), order) + } + for i, want := range expected { + if order[i] != want { + t.Fatalf("expected callback order %v, got %v", expected, order) + } + } +} diff --git a/core/elastic/actions.go b/core/elastic/actions.go index dcc11b03e..5ff110ff2 100644 --- a/core/elastic/actions.go +++ b/core/elastic/actions.go @@ -118,10 +118,33 @@ func (node *NodeAvailable) IsDead() bool { } func (meta *ElasticsearchMetadata) IsAvailable() bool { - if meta.Config == nil || !meta.Config.Enabled { + if meta.Config == nil { + if rate.GetRateLimiter("cluster_available_check", "nil_config", 1, 1, 30*time.Second).Allow() { + log.Debug("elasticsearch metadata is unavailable: config is nil") + } return false } - return meta.clusterAvailable + if !meta.Config.Enabled { + clusterID := meta.Config.ID + if clusterID == "" { + clusterID = meta.Config.Name + } + if rate.GetRateLimiter("cluster_available_check", clusterID, 1, 1, 30*time.Second).Allow() { + log.Debugf("elasticsearch [%v] is unavailable: config disabled", meta.Config.Name) + } + return false + } + if !meta.clusterAvailable { + clusterID := meta.Config.ID + if clusterID == "" { + clusterID = meta.Config.Name + } + if rate.GetRateLimiter("cluster_available_check", clusterID, 1, 1, 30*time.Second).Allow() { + log.Debugf("elasticsearch [%v] is unavailable: clusterAvailable=false", meta.Config.Name) + } + return false + } + return true } func (meta *ElasticsearchMetadata) Init(health bool) { @@ -186,13 +209,8 @@ func (meta *ElasticsearchMetadata) GetActiveEndpoint() string { } func (meta *ElasticsearchMetadata) GetActivePreferredSeedHost() string { - hosts := meta.GetSeedHosts() - if len(hosts) > 0 { - for _, v := range hosts { - if v != "" && IsHostAvailable(v) { - return v - } - } + if host, _ := meta.getAvailableSeedHost(); host != "" { + return host } return meta.Config.Host } @@ -263,6 +281,12 @@ func (meta *ElasticsearchMetadata) GetActiveHosts() int { } func (meta *ElasticsearchMetadata) GetActiveHost() string { + if host, info := meta.getAvailableSeedHost(); host != "" { + if info != nil { + meta.activeHost = info + } + return host + } if meta.activeHost != nil { if meta.activeHost.IsAvailable() { @@ -320,6 +344,25 @@ func (meta *ElasticsearchMetadata) GetActiveHost() string { return hosts[0] } +func (meta *ElasticsearchMetadata) getAvailableSeedHost() (string, *NodeAvailable) { + hosts := meta.GetSeedHosts() + if hosts == nil || len(hosts) == 0 { + return "", nil + } + + for _, host := range hosts { + if host == "" || !IsHostAvailable(host) { + continue + } + if info, ok := GetHostAvailableInfo(host); ok && info != nil && info.IsAvailable() { + return host, info + } + return host, nil + } + + return "", nil +} + func (meta *ElasticsearchMetadata) IsTLS() bool { return meta.GetSchema() == "https" } diff --git a/core/elastic/actions_test.go b/core/elastic/actions_test.go new file mode 100644 index 000000000..f7d2ef830 --- /dev/null +++ b/core/elastic/actions_test.go @@ -0,0 +1,90 @@ +package elastic + +import ( + "testing" + "time" + + "infini.sh/framework/core/orm" +) + +func TestGetActiveHostPrefersAvailableSeedHostOverCachedDiscoveredHost(t *testing.T) { + const ( + clusterID = "docker-mapped-port-cluster" + seedHost = "192.168.3.185:9211" + discoveredHost = "172.18.1.18:9200" + ) + + cfg := &ElasticsearchConfig{ + ORMObjectBase: orm.ORMObjectBase{ID: clusterID}, + Name: clusterID, + Host: seedHost, + Hosts: []string{seedHost}, + Enabled: true, + } + cfg.Discovery.Enabled = true + + meta := &ElasticsearchMetadata{ + Config: cfg, + Nodes: &map[string]NodesInfo{ + "node-1": { + Http: struct { + BoundAddress []string `json:"bound_address"` + PublishAddress string `json:"publish_address,omitempty"` + MaxContentLengthInBytes int64 `json:"max_content_length_in_bytes,omitempty"` + }{ + PublishAddress: discoveredHost, + }, + }, + }, + activeHost: &NodeAvailable{Host: discoveredHost, available: true, lastCheck: time.Now()}, + } + + hosts.Store(seedHost, &NodeAvailable{Host: seedHost, ClusterID: clusterID, available: true, lastCheck: time.Now()}) + hosts.Store(discoveredHost, &NodeAvailable{Host: discoveredHost, ClusterID: clusterID, available: true, lastCheck: time.Now()}) + t.Cleanup(func() { + hosts.Delete(seedHost) + hosts.Delete(discoveredHost) + }) + + got := meta.GetActiveHost() + if got != seedHost { + t.Fatalf("expected seed host %q to be preferred over discovered host %q, got %q", seedHost, discoveredHost, got) + } + if meta.activeHost == nil || meta.activeHost.Host != seedHost { + t.Fatalf("expected activeHost to be updated to seed host %q, got %#v", seedHost, meta.activeHost) + } +} + +func TestGetActiveHostFallsBackToCachedDiscoveredHostWhenSeedUnavailable(t *testing.T) { + const ( + clusterID = "docker-discovery-fallback-cluster" + seedHost = "192.168.3.185:9211" + discoveredHost = "172.18.1.18:9200" + ) + + cfg := &ElasticsearchConfig{ + ORMObjectBase: orm.ORMObjectBase{ID: clusterID}, + Name: clusterID, + Host: seedHost, + Hosts: []string{seedHost}, + Enabled: true, + } + cfg.Discovery.Enabled = true + + meta := &ElasticsearchMetadata{ + Config: cfg, + activeHost: &NodeAvailable{Host: discoveredHost, available: true, lastCheck: time.Now()}, + } + + hosts.Store(seedHost, &NodeAvailable{Host: seedHost, ClusterID: clusterID, available: false, lastCheck: time.Now()}) + hosts.Store(discoveredHost, &NodeAvailable{Host: discoveredHost, ClusterID: clusterID, available: true, lastCheck: time.Now()}) + t.Cleanup(func() { + hosts.Delete(seedHost) + hosts.Delete(discoveredHost) + }) + + got := meta.GetActiveHost() + if got != discoveredHost { + t.Fatalf("expected discovered host %q when seed host is unavailable, got %q", discoveredHost, got) + } +} diff --git a/core/elastic/common_command.go b/core/elastic/common_command.go index bc2cbdccf..0a4e9cd91 100644 --- a/core/elastic/common_command.go +++ b/core/elastic/common_command.go @@ -35,6 +35,7 @@ type CommonCommand struct { ID string `json:"-" index:"id"` Title string `json:"title" elastic_mapping:"title:{type:text,fields:{keyword:{type:keyword}}}"` Tag []string `json:"tag" elastic_mapping:"tag:{type:keyword}"` + Creator string `json:"creator,omitempty" elastic_mapping:"creator:{type:keyword}"` Requests []CommandRequest `json:"requests" elastic_mapping:"requests:{type:object}"` Created time.Time `json:"created,omitempty" elastic_mapping:"created:{type:date}"` } diff --git a/core/elastic/domain_actions.go b/core/elastic/domain_actions.go index d40bceadf..90e84f388 100644 --- a/core/elastic/domain_actions.go +++ b/core/elastic/domain_actions.go @@ -99,8 +99,14 @@ func RegisterInstance(cfg ElasticsearchConfig, handler API) { UpdateClient(cfg, handler) UpdateConfig(cfg) + meta := GetMetadata(cfg.ID) + if meta == nil { + InitMetadata(&cfg, false) + return + } + if exists && oldCfg != nil { - InitMetadata(&cfg, true) + InitMetadata(&cfg, meta.IsAvailable()) } } diff --git a/core/elastic/domain_actions_test.go b/core/elastic/domain_actions_test.go new file mode 100644 index 000000000..1a2c05a56 --- /dev/null +++ b/core/elastic/domain_actions_test.go @@ -0,0 +1,35 @@ +package elastic + +import ( + "testing" + + "infini.sh/framework/core/orm" +) + +func TestRegisterInstanceInitializesMetadataOnFirstRegistration(t *testing.T) { + cfg := ElasticsearchConfig{ + ORMObjectBase: orm.ORMObjectBase{ID: "test-first-sync"}, + Name: "test-first-sync", + Enabled: true, + ClusterUUID: "cluster-uuid-1", + } + + t.Cleanup(func() { + cfgs.Delete(cfg.ID) + apis.Delete(cfg.ID) + metas.Delete(cfg.ID) + }) + + RegisterInstance(cfg, nil) + + meta := GetMetadata(cfg.ID) + if meta == nil { + t.Fatalf("expected metadata to be initialized for %s", cfg.ID) + } + if meta.Config == nil { + t.Fatalf("expected metadata config to be initialized for %s", cfg.ID) + } + if meta.Config.ClusterUUID != cfg.ClusterUUID { + t.Fatalf("expected cluster uuid %q, got %q", cfg.ClusterUUID, meta.Config.ClusterUUID) + } +} diff --git a/core/elastic/index.go b/core/elastic/index.go index 6ca476b22..27768169e 100755 --- a/core/elastic/index.go +++ b/core/elastic/index.go @@ -24,10 +24,13 @@ package elastic import ( + "bytes" "errors" "github.com/buger/jsonparser" "github.com/segmentio/encoding/json" "infini.sh/framework/core/util" + "sort" + "strconv" "strings" "time" ) @@ -217,6 +220,67 @@ type AggregationResponse struct { Value interface{} `json:"value,omitempty"` } +func (a *AggregationResponse) UnmarshalJSON(data []byte) error { + type alias struct { + Buckets json.RawMessage `json:"buckets,omitempty"` + Value interface{} `json:"value,omitempty"` + } + + var aux alias + if err := json.Unmarshal(data, &aux); err != nil { + return err + } + a.Value = aux.Value + + buckets := bytes.TrimSpace(aux.Buckets) + if len(buckets) == 0 || bytes.Equal(buckets, []byte("null")) { + a.Buckets = nil + return nil + } + + switch buckets[0] { + case '[': + return json.Unmarshal(buckets, &a.Buckets) + case '{': + keyedBuckets := map[string]BucketBase{} + if err := json.Unmarshal(buckets, &keyedBuckets); err != nil { + return err + } + + keys := make([]string, 0, len(keyedBuckets)) + for key := range keyedBuckets { + keys = append(keys, key) + } + sort.Slice(keys, func(i, j int) bool { + return compareBucketKeys(keys[i], keys[j]) + }) + + a.Buckets = make([]BucketBase, 0, len(keys)) + for _, key := range keys { + bucket := keyedBuckets[key] + if bucket == nil { + bucket = BucketBase{} + } + if _, ok := bucket["key"]; !ok { + bucket["key"] = key + } + a.Buckets = append(a.Buckets, bucket) + } + return nil + default: + return nil + } +} + +func compareBucketKeys(left, right string) bool { + leftInt, leftErr := strconv.ParseInt(left, 10, 64) + rightInt, rightErr := strconv.ParseInt(right, 10, 64) + if leftErr == nil && rightErr == nil { + return leftInt < rightInt + } + return left < right +} + type ResponseBase struct { RawResult *util.Result `json:"-"` StatusCode int `json:"-"` @@ -235,6 +299,48 @@ type ErrorDetail struct { Reason string `json:"reason,omitempty"` } +func (d *ErrorDetail) UnmarshalJSON(data []byte) error { + data = bytes.TrimSpace(data) + if len(data) == 0 || bytes.Equal(data, []byte("null")) { + return nil + } + + if len(data) > 0 && data[0] == '"' { + return json.Unmarshal(data, &d.Reason) + } + + type alias ErrorDetail + var aux alias + if err := json.Unmarshal(data, &aux); err != nil { + return err + } + *d = ErrorDetail(aux) + return nil +} + +func (d *ErrorDetail) Message() string { + if d == nil { + return "" + } + if d.Reason != "" { + return d.Reason + } + if len(d.RootCause) > 0 { + var reasons []string + for _, cause := range d.RootCause { + if cause.Reason != "" { + reasons = append(reasons, cause.Reason) + } else if cause.Type != "" { + reasons = append(reasons, cause.Type) + } + } + if len(reasons) > 0 { + return strings.Join(reasons, "; ") + } + } + return d.Type +} + type RootCause struct { Type string `json:"type,omitempty"` Reason string `json:"reason,omitempty"` diff --git a/core/elastic/index_test.go b/core/elastic/index_test.go index d36ae4d1c..399a9d6bc 100644 --- a/core/elastic/index_test.go +++ b/core/elastic/index_test.go @@ -25,8 +25,27 @@ package elastic import ( "testing" + + "github.com/segmentio/encoding/json" ) +func TestAggregationResponseUnmarshalKeyedBuckets(t *testing.T) { + var agg AggregationResponse + err := json.Unmarshal([]byte(`{"buckets":{"0":{"doc_count":1740269},"1":{"doc_count":42}}}`), &agg) + if err != nil { + t.Fatalf("unexpected unmarshal error: %v", err) + } + if len(agg.Buckets) != 2 { + t.Fatalf("unexpected bucket count: %d", len(agg.Buckets)) + } + if agg.Buckets[0]["key"] != "0" || agg.Buckets[0]["doc_count"] != float64(1740269) { + t.Fatalf("unexpected first bucket: %#v", agg.Buckets[0]) + } + if agg.Buckets[1]["key"] != "1" || agg.Buckets[1]["doc_count"] != float64(42) { + t.Fatalf("unexpected second bucket: %#v", agg.Buckets[1]) + } +} + func TestIndexDocument_GetStringFieldFromSource(t *testing.T) { tests := []struct { name string @@ -220,3 +239,31 @@ func TestIndexDocument_TryGetStringFieldFromSource(t *testing.T) { }) } } + +func TestErrorDetailUnmarshalJSONString(t *testing.T) { + var detail ErrorDetail + if err := json.Unmarshal([]byte(`"initializing"`), &detail); err != nil { + t.Fatalf("unexpected unmarshal error: %v", err) + } + + if detail.Reason != "initializing" { + t.Fatalf("unexpected reason: %q", detail.Reason) + } + if detail.Message() != "initializing" { + t.Fatalf("unexpected message: %q", detail.Message()) + } +} + +func TestErrorDetailUnmarshalJSONObject(t *testing.T) { + var detail ErrorDetail + if err := json.Unmarshal([]byte(`{"type":"search_phase_execution_exception","reason":"all shards failed"}`), &detail); err != nil { + t.Fatalf("unexpected unmarshal error: %v", err) + } + + if detail.Type != "search_phase_execution_exception" { + t.Fatalf("unexpected type: %q", detail.Type) + } + if detail.Message() != "all shards failed" { + t.Fatalf("unexpected message: %q", detail.Message()) + } +} diff --git a/core/elastic/partition.go b/core/elastic/partition.go index 63d474fb1..d18855642 100644 --- a/core/elastic/partition.go +++ b/core/elastic/partition.go @@ -32,6 +32,7 @@ import ( "fmt" "math" "net/http" + "sort" "strconv" "strings" @@ -40,12 +41,14 @@ import ( ) type PartitionQuery struct { - IndexName string `json:"index_name"` - FieldType string `json:"field_type"` - FieldName string `json:"field_name"` - Step interface{} `json:"step"` - Filter interface{} `json:"filter"` - DocType string `json:"doc_type"` + IndexName string `json:"index_name"` + FieldType string `json:"field_type"` + FieldName string `json:"field_name"` + Strategy string `json:"strategy,omitempty"` + Step interface{} `json:"step,omitempty"` + PartitionCount int `json:"partition_count,omitempty"` + Filter interface{} `json:"filter"` + DocType string `json:"doc_type"` } type PartitionInfo struct { @@ -54,6 +57,8 @@ type PartitionInfo struct { End float64 `json:"end"` Filter map[string]interface{} `json:"filter"` Docs int64 `json:"docs"` + Label string `json:"label,omitempty"` + Values []string `json:"values,omitempty"` Other bool } @@ -68,6 +73,11 @@ const ( PartitionByDate = "date" PartitionByKeyword = "keyword" PartitionByNumber = "number" + + PartitionStrategyStep = "step" + PartitionStrategyQuantile = "quantile" + PartitionStrategyTerms = "terms" + PartitionStrategyHash = "hash" ) func GetPartitions(q *PartitionQuery, client API) ([]PartitionInfo, error) { @@ -100,32 +110,6 @@ func GetPartitions(q *PartitionQuery, client API) ([]PartitionInfo, error) { switch q.FieldType { case PartitionByDate, PartitionByNumber: - var step float64 - if q.FieldType == PartitionByDate { - if stepV, ok := q.Step.(string); !ok { - return nil, fmt.Errorf("expect step value of string type since filedtype is %s", PartitionByDate) - } else { - du, err := util.ParseDuration(stepV) - if err != nil { - return nil, fmt.Errorf("parse step duration error: %w", err) - } - step = float64(du.Milliseconds()) - } - } else { - switch q.Step.(type) { - case float64: - step = q.Step.(float64) - case string: - v, err := strconv.Atoi(q.Step.(string)) - if err != nil { - return nil, fmt.Errorf("convert step error: %w", err) - } - step = float64(v) - default: - return nil, fmt.Errorf("invalid parameter step: %v", q.Step) - } - } - result, err := getBoundValues(client, q.IndexName, q.FieldName, vFilter) if err != nil { return nil, err @@ -138,23 +122,110 @@ func GetPartitions(q *PartitionQuery, client API) ([]PartitionInfo, error) { var ( partitions []PartitionInfo ) - partitions, err = getPartitionsByAgg(client, q.IndexName, q.FieldName, q.FieldType, step, vFilter) - if err != nil { - return nil, err + + switch normalizePartitionStrategy(q.Strategy) { + case PartitionStrategyStep: + step, err := parsePartitionStep(q.FieldType, q.Step) + if err != nil { + return nil, err + } + partitions, err = getPartitionsByAgg(client, q.IndexName, q.FieldName, q.FieldType, step, vFilter) + if err != nil { + return nil, err + } + case PartitionStrategyQuantile: + partitions, err = getPartitionsByQuantile(client, q.IndexName, q.FieldName, q.FieldType, q.PartitionCount, result.Min, result.Max, vFilter) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("unsupported partition strategy: %s", q.Strategy) } + if result.Null > 0 { partitions = append(partitions, PartitionInfo{ Filter: result.NotExistsFilter, Other: true, + Label: "Missing values", Docs: result.Null, }) } return partitions, nil + case PartitionByKeyword: + var ( + partitions []PartitionInfo + err error + ) + switch normalizePartitionStrategy(q.Strategy) { + case PartitionStrategyTerms: + partitions, err = getPartitionsByTerms(client, q.IndexName, q.FieldName, q.PartitionCount, vFilter) + if err != nil { + return nil, err + } + case PartitionStrategyHash: + partitions, err = getPartitionsByHash(client, q.IndexName, q.FieldName, q.PartitionCount, vFilter) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("unsupported partition strategy: %s", q.Strategy) + } + + missingPartition, err := getMissingPartition(client, q.IndexName, q.FieldName, vFilter) + if err != nil { + return nil, err + } + if missingPartition != nil { + partitions = append(partitions, *missingPartition) + } + return partitions, nil default: return nil, fmt.Errorf("unsupported field type: %s", q.FieldType) } } +func normalizePartitionStrategy(strategy string) string { + switch strings.ToLower(strings.TrimSpace(strategy)) { + case "", PartitionStrategyStep: + return PartitionStrategyStep + case PartitionStrategyQuantile: + return PartitionStrategyQuantile + case PartitionStrategyTerms: + return PartitionStrategyTerms + case PartitionStrategyHash: + return PartitionStrategyHash + default: + return strings.ToLower(strings.TrimSpace(strategy)) + } +} + +func parsePartitionStep(fieldType string, stepValue interface{}) (float64, error) { + if fieldType == PartitionByDate { + stepV, ok := stepValue.(string) + if !ok { + return 0, fmt.Errorf("expect step value of string type since filedtype is %s", PartitionByDate) + } + du, err := util.ParseDuration(stepV) + if err != nil { + return 0, fmt.Errorf("parse step duration error: %w", err) + } + return float64(du.Milliseconds()), nil + } + + switch stepValue.(type) { + case float64: + return stepValue.(float64), nil + case string: + v, err := strconv.Atoi(stepValue.(string)) + if err != nil { + return 0, fmt.Errorf("convert step error: %w", err) + } + return float64(v), nil + default: + return 0, fmt.Errorf("invalid parameter step: %v", stepValue) + } +} + func getPartitionsByAgg(client API, indexName string, fieldName, fieldType string, step float64, filter interface{}) ([]PartitionInfo, error) { queryDsl := util.MapStr{ "size": 0, @@ -182,7 +253,7 @@ func getPartitionsByAgg(client API, indexName string, fieldName, fieldType strin if filter != nil { queryDsl["query"] = filter } - res, err := client.SearchWithRawQueryDSL(indexName, util.MustToJSONBytes(queryDsl)) + res, err := searchPartitionWithRawQueryDSL(client, indexName, queryDsl) if err != nil { return nil, err } @@ -217,13 +288,402 @@ func getPartitionsByAgg(client API, indexName string, fieldName, fieldType strin Docs: int64(docCount), Other: false, } - partition.Filter = buildPartitionFilter(min, max, fieldName, fieldType, filter) + partition.Filter = buildBoundedPartitionFilter(min, max, fieldName, fieldType, filter) partitions = append(partitions, partition) } } return partitions, nil } +func getPartitionsByQuantile(client API, indexName string, fieldName, fieldType string, partitionCount int, min, max float64, filter interface{}) ([]PartitionInfo, error) { + if partitionCount <= 0 { + return nil, fmt.Errorf("invalid parameter partition_count: %d", partitionCount) + } + + boundaries, err := getQuantileBoundaries(client, indexName, fieldName, partitionCount, min, max, filter) + if err != nil { + return nil, err + } + partitions := buildQuantilePartitions(boundaries, fieldName, fieldType, filter) + if len(partitions) == 0 { + return nil, nil + } + + counts, err := getPartitionDocCounts(client, indexName, partitions) + if err != nil { + return nil, err + } + + filtered := make([]PartitionInfo, 0, len(partitions)) + for i := range partitions { + partitions[i].Docs = counts[i] + if partitions[i].Docs <= 0 { + continue + } + filtered = append(filtered, partitions[i]) + } + return filtered, nil +} + +func getPartitionsByTerms(client API, indexName, fieldName string, partitionCount int, filter interface{}) ([]PartitionInfo, error) { + if partitionCount <= 0 { + return nil, fmt.Errorf("invalid parameter partition_count: %d", partitionCount) + } + + queryDsl := util.MapStr{ + "size": 0, + "aggs": util.MapStr{ + "partitions": util.MapStr{ + "terms": util.MapStr{ + "field": fieldName, + "size": partitionCount, + }, + }, + }, + } + if filter != nil { + queryDsl["query"] = filter + } + + res, err := searchPartitionWithRawQueryDSL(client, indexName, queryDsl) + if err != nil { + return nil, err + } + + var ( + partitions []PartitionInfo + values []string + ) + if partitionsAgg, ok := res.Aggregations["partitions"]; ok { + for idx, bucket := range partitionsAgg.Buckets { + value := fmt.Sprintf("%v", bucket["key"]) + docCount := util.GetInt64Value(bucket["doc_count"]) + if docCount <= 0 { + continue + } + values = append(values, value) + partitions = append(partitions, PartitionInfo{ + Key: float64(idx), + Docs: docCount, + Label: value, + Values: []string{value}, + Filter: buildExactTermPartitionFilter(value, fieldName, filter), + }) + } + } + + sumOtherDocCount, _ := jsonparser.GetInt(res.RawResult.Body, "aggregations", "partitions", "sum_other_doc_count") + if sumOtherDocCount > 0 { + partitions = append(partitions, PartitionInfo{ + Key: float64(len(partitions)), + Docs: sumOtherDocCount, + Label: "Other terms", + Values: append([]string(nil), values...), + Filter: buildOtherTermsPartitionFilter(values, fieldName, filter), + Other: true, + }) + } + + return partitions, nil +} + +func getPartitionsByHash(client API, indexName, fieldName string, partitionCount int, filter interface{}) ([]PartitionInfo, error) { + if partitionCount <= 0 { + return nil, fmt.Errorf("invalid parameter partition_count: %d", partitionCount) + } + + partitions := make([]PartitionInfo, 0, partitionCount) + for idx := 0; idx < partitionCount; idx++ { + partitions = append(partitions, PartitionInfo{ + Key: float64(idx), + Label: fmt.Sprintf("Hash %d/%d", idx+1, partitionCount), + Filter: buildHashPartitionFilter(idx, partitionCount, fieldName, filter), + }) + } + + counts, err := getHashPartitionDocCounts(client, indexName, fieldName, partitionCount, filter) + if err != nil { + return nil, err + } + + filtered := make([]PartitionInfo, 0, len(partitions)) + for idx := range partitions { + partitions[idx].Docs = counts[idx] + if partitions[idx].Docs <= 0 { + continue + } + filtered = append(filtered, partitions[idx]) + } + return filtered, nil +} + +func getHashPartitionDocCounts(client API, indexName, fieldName string, partitionCount int, filter interface{}) ([]int64, error) { + queryDsl := buildHashPartitionAggQuery(fieldName, partitionCount, filter) + res, err := searchPartitionWithRawQueryDSL(client, indexName, queryDsl) + if err != nil { + return nil, err + } + return extractHashPartitionDocCounts(res, partitionCount), nil +} + +func buildHashPartitionAggQuery(fieldName string, partitionCount int, filter interface{}) util.MapStr { + fieldLiteral := buildPainlessStringLiteral(fieldName) + queryDsl := util.MapStr{ + "size": 0, + "aggs": util.MapStr{ + "partitions": util.MapStr{ + "terms": util.MapStr{ + "size": partitionCount, + "value_type": "long", + "script": util.MapStr{ + "lang": "painless", + "source": fmt.Sprintf("if (doc[%s].size()==0 || doc[%s].value == '') return null; return (((doc[%s].value.hashCode() %% params.partition_count) + params.partition_count) %% params.partition_count);", fieldLiteral, fieldLiteral, fieldLiteral), + "params": util.MapStr{ + "partition_count": partitionCount, + }, + }, + }, + }, + }, + } + if filter != nil { + queryDsl["query"] = filter + } + return queryDsl +} + +func extractHashPartitionDocCounts(res *SearchResponse, partitionCount int) []int64 { + counts := make([]int64, partitionCount) + if res == nil { + return counts + } + partitionsAgg, ok := res.Aggregations["partitions"] + if !ok { + return counts + } + for _, bucket := range partitionsAgg.Buckets { + bucketKey, ok := extractHashPartitionBucketKey(bucket["key"]) + if !ok || bucketKey < 0 || bucketKey >= partitionCount { + continue + } + counts[bucketKey] = util.GetInt64Value(bucket["doc_count"]) + } + return counts +} + +func extractHashPartitionBucketKey(key interface{}) (int, bool) { + switch v := key.(type) { + case int: + return v, true + case int64: + return int(v), true + case int32: + return int(v), true + case uint: + return int(v), true + case uint64: + return int(v), true + case float64: + return int(v), true + case float32: + return int(v), true + case string: + parsed, err := strconv.Atoi(v) + if err != nil { + return 0, false + } + return parsed, true + default: + return 0, false + } +} + +func getQuantileBoundaries(client API, indexName, fieldName string, partitionCount int, min, max float64, filter interface{}) ([]float64, error) { + percents := buildQuantilePercents(partitionCount) + if len(percents) == 0 { + return []float64{min, max}, nil + } + + queryDsl := util.MapStr{ + "size": 0, + "aggs": util.MapStr{ + "partition_percentiles": util.MapStr{ + "percentiles": util.MapStr{ + "field": fieldName, + "percents": percents, + "keyed": false, + }, + }, + }, + } + if filter != nil { + queryDsl["query"] = filter + } + + res, err := searchPartitionWithRawQueryDSL(client, indexName, queryDsl) + if err != nil { + return nil, err + } + + boundaries := make([]float64, 0, len(percents)+2) + boundaries = append(boundaries, min) + _, err = jsonparser.ArrayEach(res.RawResult.Body, func(value []byte, _ jsonparser.ValueType, _ int, err error) { + if err != nil { + return + } + boundary, parseErr := jsonparser.GetFloat(value, "value") + if parseErr != nil || math.IsNaN(boundary) || math.IsInf(boundary, 0) { + return + } + boundaries = append(boundaries, boundary) + }, "aggregations", "partition_percentiles", "values") + if err != nil { + return nil, err + } + boundaries = append(boundaries, max) + boundaries = dedupeSortedBoundaries(boundaries) + if len(boundaries) == 1 { + return []float64{boundaries[0], boundaries[0]}, nil + } + return boundaries, nil +} + +func buildQuantilePercents(partitionCount int) []float64 { + if partitionCount <= 1 { + return nil + } + percents := make([]float64, 0, partitionCount-1) + for i := 1; i < partitionCount; i++ { + percents = append(percents, float64(i)*100/float64(partitionCount)) + } + return percents +} + +func dedupeSortedBoundaries(boundaries []float64) []float64 { + if len(boundaries) == 0 { + return nil + } + sort.Float64s(boundaries) + result := make([]float64, 0, len(boundaries)) + for _, boundary := range boundaries { + if len(result) == 0 || !sameBoundary(result[len(result)-1], boundary) { + result = append(result, boundary) + } + } + return result +} + +func sameBoundary(left, right float64) bool { + return math.Abs(left-right) <= 1e-9 +} + +func buildQuantilePartitions(boundaries []float64, fieldName, fieldType string, filter interface{}) []PartitionInfo { + if len(boundaries) < 2 { + return nil + } + + partitions := make([]PartitionInfo, 0, len(boundaries)-1) + if len(boundaries) == 2 { + partitions = append(partitions, PartitionInfo{ + Key: boundaries[1], + Start: boundaries[0], + End: boundaries[1], + Filter: buildOpenPartitionFilter(nil, nil, fieldName, fieldType, filter), + }) + return partitions + } + + for i := 1; i < len(boundaries); i++ { + lower, upper := boundaries[i-1], boundaries[i] + if sameBoundary(lower, upper) { + continue + } + + var lowerRef, upperRef *float64 + if i > 1 { + lowerRef = &lower + } + if i < len(boundaries)-1 { + upperRef = &upper + } + + partitions = append(partitions, PartitionInfo{ + Key: upper, + Start: lower, + End: upper, + Filter: buildOpenPartitionFilter(lowerRef, upperRef, fieldName, fieldType, filter), + }) + } + return partitions +} + +func getPartitionDocCounts(client API, indexName string, partitions []PartitionInfo) ([]int64, error) { + queryDsl := util.MapStr{ + "size": 0, + "aggs": util.MapStr{ + "partitions": util.MapStr{ + "filters": util.MapStr{ + "filters": buildPartitionFiltersMap(partitions), + }, + }, + }, + } + + res, err := searchPartitionWithRawQueryDSL(client, indexName, queryDsl) + if err != nil { + return nil, err + } + + counts := make([]int64, 0, len(partitions)) + for i := range partitions { + docCount, parseErr := jsonparser.GetInt(res.RawResult.Body, "aggregations", "partitions", "buckets", strconv.Itoa(i), "doc_count") + if parseErr != nil { + return nil, parseErr + } + counts = append(counts, docCount) + } + return counts, nil +} + +func buildPartitionFiltersMap(partitions []PartitionInfo) util.MapStr { + filters := util.MapStr{} + for i, partition := range partitions { + filters[strconv.Itoa(i)] = partition.Filter + } + return filters +} + +func getMissingPartition(client API, indexName, fieldName string, filter interface{}) (*PartitionInfo, error) { + queryDsl := util.MapStr{ + "size": 0, + "aggs": util.MapStr{ + "missing_field": util.MapStr{ + "filter": buildMissingFieldCondition(fieldName), + }, + }, + } + if filter != nil { + queryDsl["query"] = filter + } + + res, err := searchPartitionWithRawQueryDSL(client, indexName, queryDsl) + if err != nil { + return nil, err + } + + docCount, err := jsonparser.GetInt(res.RawResult.Body, "aggregations", "missing_field", "doc_count") + if err != nil || docCount <= 0 { + return nil, err + } + + return &PartitionInfo{ + Docs: docCount, + Label: "Missing values", + Filter: buildMissingFieldFilter(fieldName, filter), + Other: true, + }, nil +} + // NOTE: we assume GetPartitions returned sorted buckets from ES, if not, we need to manually sort source & target partitions by keys // sourcePartitions & targetPartitions must've been generated with same bucket step & offset func MergePartitions(sourcePartitions []PartitionInfo, targetPartitions []PartitionInfo, fieldName, fieldType string, filter interface{}) []PartitionInfo { @@ -253,7 +713,7 @@ func MergePartitions(sourcePartitions []PartitionInfo, targetPartitions []Partit Docs: util.MaxInt64(source.Docs, target.Docs), Other: false, } - partition.Filter = buildPartitionFilter(partition.Start, partition.End, fieldName, fieldType, filter) + partition.Filter = buildBoundedPartitionFilter(partition.Start, partition.End, fieldName, fieldType, filter) ret = append(ret, partition) sourceIdx += 1 targetIdx += 1 @@ -267,12 +727,14 @@ func MergePartitions(sourcePartitions []PartitionInfo, targetPartitions []Partit return ret } -func buildPartitionFilter(min, max float64, fieldName, fieldType string, filter interface{}) util.MapStr { +func buildBoundedPartitionFilter(min, max float64, fieldName, fieldType string, filter interface{}) util.MapStr { rv := util.MapStr{ "gte": min, "lte": max, } if fieldType == PartitionByDate { + rv["gte"] = normalizeDateRangeBoundary(min, true, true) + rv["lte"] = normalizeDateRangeBoundary(max, false, true) rv["format"] = "epoch_millis" } must := []interface{}{ @@ -290,7 +752,217 @@ func buildPartitionFilter(min, max float64, fieldName, fieldType string, filter "must": must, }, } +} + +func buildOpenPartitionFilter(lower, upper *float64, fieldName, fieldType string, filter interface{}) util.MapStr { + rv := util.MapStr{} + if lower != nil { + rv["gt"] = *lower + } + if upper != nil { + rv["lte"] = *upper + } + if fieldType == PartitionByDate { + if lower != nil { + rv["gt"] = normalizeDateRangeBoundary(*lower, true, false) + } + if upper != nil { + rv["lte"] = normalizeDateRangeBoundary(*upper, false, true) + } + rv["format"] = "epoch_millis" + } + var condition interface{} + if len(rv) == 0 || (len(rv) == 1 && rv["format"] != nil) { + condition = util.MapStr{ + "exists": util.MapStr{ + "field": fieldName, + }, + } + } else { + condition = util.MapStr{ + "range": util.MapStr{ + fieldName: rv, + }, + } + } + must := []interface{}{condition} + if filter != nil { + must = append(must, filter) + } + return util.MapStr{ + "bool": util.MapStr{ + "must": must, + }, + } + +} +func normalizeDateRangeBoundary(value float64, lower, inclusive bool) int64 { + switch { + case lower && inclusive: + return int64(math.Ceil(value)) + case lower && !inclusive: + return int64(math.Floor(value)) + case !lower && inclusive: + return int64(math.Floor(value)) + default: + return int64(math.Ceil(value)) + } +} + +func buildExactTermPartitionFilter(value, fieldName string, filter interface{}) util.MapStr { + return buildMustPartitionFilter([]interface{}{ + util.MapStr{ + "term": util.MapStr{ + fieldName: util.MapStr{ + "value": value, + }, + }, + }, + }, filter) +} + +func buildOtherTermsPartitionFilter(values []string, fieldName string, filter interface{}) util.MapStr { + boolFilter := util.MapStr{ + "must": []interface{}{ + util.MapStr{ + "exists": util.MapStr{ + "field": fieldName, + }, + }, + }, + } + if filter != nil { + boolFilter["must"] = append(boolFilter["must"].([]interface{}), filter) + } + if len(values) > 0 { + boolFilter["must_not"] = []interface{}{ + util.MapStr{ + "terms": util.MapStr{ + fieldName: values, + }, + }, + } + } + return util.MapStr{ + "bool": boolFilter, + } +} + +func buildHashPartitionFilter(partitionID, partitionCount int, fieldName string, filter interface{}) util.MapStr { + fieldLiteral := buildPainlessStringLiteral(fieldName) + return buildMustPartitionFilter([]interface{}{ + util.MapStr{ + "script": util.MapStr{ + "script": util.MapStr{ + "lang": "painless", + "source": fmt.Sprintf("doc[%s].size()!=0 && doc[%s].value != '' && (((doc[%s].value.hashCode() %% params.partition_count) + params.partition_count) %% params.partition_count) == params.partition_id", fieldLiteral, fieldLiteral, fieldLiteral), + "params": util.MapStr{ + "partition_count": partitionCount, + "partition_id": partitionID, + }, + }, + }, + }, + }, filter) +} + +func buildPainlessStringLiteral(value string) string { + replacer := strings.NewReplacer(`\`, `\\`, `'`, `\'`) + return "'" + replacer.Replace(value) + "'" +} + +func searchPartitionWithRawQueryDSL(client API, indexName string, queryDsl util.MapStr) (*SearchResponse, error) { + res, err := client.SearchWithRawQueryDSL(indexName, util.MustToJSONBytes(queryDsl)) + if err != nil { + return nil, err + } + if err := ensurePartitionSearchResponseOK(res); err != nil { + return nil, err + } + return res, nil +} + +func ensurePartitionSearchResponseOK(res *SearchResponse) error { + if res == nil { + return errors.New("empty search response") + } + if res.StatusCode == 0 || res.StatusCode == http.StatusOK { + return nil + } + if res.RawResult != nil && len(res.RawResult.Body) > 0 { + for _, path := range [][]string{ + {"error", "failed_shards", "[0]", "reason", "caused_by", "reason"}, + {"error", "failed_shards", "[0]", "reason", "reason"}, + {"error", "root_cause", "[0]", "reason"}, + {"error", "reason"}, + } { + if msg, ok := getJSONPathString(res.RawResult.Body, path...); ok && msg != "" { + return errors.New(msg) + } + } + } + if msg := res.Error.Message(); msg != "" { + return errors.New(msg) + } + if res.RawResult != nil && len(res.RawResult.Body) > 0 { + return errors.New(string(res.RawResult.Body)) + } + return fmt.Errorf("unexpected search status: %d", res.StatusCode) +} + +func getJSONPathString(data []byte, path ...string) (string, bool) { + v, err := jsonparser.GetString(data, path...) + if err != nil { + return "", false + } + return v, true +} + +func buildMissingFieldCondition(fieldName string) util.MapStr { + return util.MapStr{ + "bool": util.MapStr{ + "should": []interface{}{ + util.MapStr{ + "bool": util.MapStr{ + "must_not": []interface{}{ + util.MapStr{ + "exists": util.MapStr{ + "field": fieldName, + }, + }, + }, + }, + }, + util.MapStr{ + "term": util.MapStr{ + fieldName: util.MapStr{ + "value": "", + }, + }, + }, + }, + "minimum_should_match": 1, + }, + } +} + +func buildMissingFieldFilter(fieldName string, filter interface{}) util.MapStr { + return buildMustPartitionFilter([]interface{}{ + buildMissingFieldCondition(fieldName), + }, filter) +} + +func buildMustPartitionFilter(mustClauses []interface{}, filter interface{}) util.MapStr { + must := append([]interface{}{}, mustClauses...) + if filter != nil { + must = append(must, filter) + } + return util.MapStr{ + "bool": util.MapStr{ + "must": must, + }, + } } func getBoundValues(client API, indexName string, fieldName string, filter interface{}) (*BoundValuesResult, error) { @@ -326,7 +998,7 @@ func getBoundValues(client API, indexName string, fieldName string, filter inter if filter != nil { queryDsl["query"] = filter } - res, err := client.SearchWithRawQueryDSL(indexName, util.MustToJSONBytes(queryDsl)) + res, err := searchPartitionWithRawQueryDSL(client, indexName, queryDsl) if err != nil { return nil, err } diff --git a/core/elastic/partition_test.go b/core/elastic/partition_test.go new file mode 100644 index 000000000..1a7795a6d --- /dev/null +++ b/core/elastic/partition_test.go @@ -0,0 +1,298 @@ +package elastic + +import ( + "net/http" + "reflect" + "strings" + "testing" + + "infini.sh/framework/core/util" +) + +func TestBuildQuantilePercents(t *testing.T) { + got := buildQuantilePercents(4) + want := []float64{25, 50, 75} + if !reflect.DeepEqual(got, want) { + t.Fatalf("unexpected percents: got %v want %v", got, want) + } +} + +func TestBuildQuantilePartitionsCreatesOpenEdgeRanges(t *testing.T) { + partitions := buildQuantilePartitions([]float64{10, 20, 30}, "value", PartitionByNumber, nil) + if len(partitions) != 2 { + t.Fatalf("unexpected partition count: %d", len(partitions)) + } + + firstRange := getMustClause(t, partitions[0].Filter)["range"].(util.MapStr)["value"].(util.MapStr) + if _, ok := firstRange["gt"]; ok { + t.Fatalf("expected first partition to have no lower bound, got %v", firstRange) + } + if got := firstRange["lte"]; got != float64(20) { + t.Fatalf("unexpected first upper bound: %v", got) + } + + secondRange := getMustClause(t, partitions[1].Filter)["range"].(util.MapStr)["value"].(util.MapStr) + if got := secondRange["gt"]; got != float64(20) { + t.Fatalf("unexpected second lower bound: %v", got) + } + if _, ok := secondRange["lte"]; ok { + t.Fatalf("expected last partition to have no upper bound, got %v", secondRange) + } +} + +func TestBuildQuantilePartitionsSinglePartitionUsesExistsFilter(t *testing.T) { + partitions := buildQuantilePartitions([]float64{5, 5}, "value", PartitionByNumber, nil) + if len(partitions) != 1 { + t.Fatalf("unexpected partition count: %d", len(partitions)) + } + + clause := getMustClause(t, partitions[0].Filter) + exists, ok := clause["exists"].(util.MapStr) + if !ok { + t.Fatalf("expected exists clause, got %v", clause) + } + if exists["field"] != "value" { + t.Fatalf("unexpected exists field: %v", exists["field"]) + } +} + +func TestBuildOpenPartitionFilterPreservesDateFormat(t *testing.T) { + upper := 1000.0 + filter := buildOpenPartitionFilter(nil, &upper, "ts", PartitionByDate, nil) + rangeFilter := getMustClause(t, filter)["range"].(util.MapStr)["ts"].(util.MapStr) + if got := rangeFilter["format"]; got != "epoch_millis" { + t.Fatalf("unexpected date format: %v", got) + } + if got := rangeFilter["lte"]; got != int64(1000) { + t.Fatalf("unexpected upper bound: %v", got) + } +} + +func TestBuildOpenPartitionFilterRoundsDatePercentileBoundaries(t *testing.T) { + lower := 1779109187904.8455 + upper := 1779109187999.999 + filter := buildOpenPartitionFilter(&lower, &upper, "created_at", PartitionByDate, nil) + rangeFilter := getMustClause(t, filter)["range"].(util.MapStr)["created_at"].(util.MapStr) + if got := rangeFilter["gt"]; got != int64(1779109187904) { + t.Fatalf("unexpected lower bound: %v", got) + } + if got := rangeFilter["lte"]; got != int64(1779109187999) { + t.Fatalf("unexpected upper bound: %v", got) + } +} + +func TestBuildBoundedPartitionFilterRoundsDateBoundaries(t *testing.T) { + filter := buildBoundedPartitionFilter(1779109187904.1, 1779109187999.9, "created_at", PartitionByDate, nil) + rangeFilter := getMustClause(t, filter)["range"].(util.MapStr)["created_at"].(util.MapStr) + if got := rangeFilter["gte"]; got != int64(1779109187905) { + t.Fatalf("unexpected lower bound: %v", got) + } + if got := rangeFilter["lte"]; got != int64(1779109187999) { + t.Fatalf("unexpected upper bound: %v", got) + } +} + +func TestBuildExactTermPartitionFilter(t *testing.T) { + filter := buildExactTermPartitionFilter("pmid-1", "pmid.keyword", nil) + termFilter := getMustClause(t, filter)["term"].(util.MapStr)["pmid.keyword"].(util.MapStr) + if got := termFilter["value"]; got != "pmid-1" { + t.Fatalf("unexpected term value: %v", got) + } +} + +func TestBuildOtherTermsPartitionFilter(t *testing.T) { + filter := buildOtherTermsPartitionFilter([]string{"a", "b"}, "pmid.keyword", nil) + boolFilter := filter["bool"].(util.MapStr) + mustNot := boolFilter["must_not"].([]interface{}) + termsFilter := mustNot[0].(util.MapStr)["terms"].(util.MapStr) + values := termsFilter["pmid.keyword"].([]string) + if !reflect.DeepEqual(values, []string{"a", "b"}) { + t.Fatalf("unexpected excluded values: %v", values) + } +} + +func TestBuildHashPartitionFilter(t *testing.T) { + filter := buildHashPartitionFilter(1, 8, "pmid.keyword", nil) + scriptFilter := getMustClause(t, filter)["script"].(util.MapStr)["script"].(util.MapStr) + if scriptFilter["lang"] != "painless" { + t.Fatalf("unexpected script language: %v", scriptFilter["lang"]) + } + source, ok := scriptFilter["source"].(string) + if !ok { + t.Fatalf("unexpected script source: %T", scriptFilter["source"]) + } + if !strings.Contains(source, "doc['pmid.keyword']") { + t.Fatalf("unexpected script source: %s", source) + } + if !strings.Contains(source, "value != ''") { + t.Fatalf("expected empty strings to be excluded from hash partition, got %s", source) + } + if strings.Contains(source, "Math.floorMod") { + t.Fatalf("unexpected script source: %s", source) + } + params := scriptFilter["params"].(util.MapStr) + if params["partition_count"] != 8 || params["partition_id"] != 1 { + t.Fatalf("unexpected script params: %v", params) + } + if _, ok := params["field"]; ok { + t.Fatalf("field should not be passed as a script param: %v", params) + } +} + +func TestBuildHashPartitionAggQueryAppliesOuterFilter(t *testing.T) { + query := buildHashPartitionAggQuery("pmid.keyword", 8, util.MapStr{ + "term": util.MapStr{ + "env": util.MapStr{"value": "prod"}, + }, + }) + + if !reflect.DeepEqual(query["query"], util.MapStr{ + "term": util.MapStr{ + "env": util.MapStr{"value": "prod"}, + }, + }) { + t.Fatalf("expected outer filter to be applied at top-level query, got %v", query["query"]) + } + + termsAgg := query["aggs"].(util.MapStr)["partitions"].(util.MapStr)["terms"].(util.MapStr) + if got := termsAgg["size"]; got != 8 { + t.Fatalf("unexpected partition size: %v", got) + } + if got := termsAgg["value_type"]; got != "long" { + t.Fatalf("unexpected value_type: %v", got) + } + script := termsAgg["script"].(util.MapStr) + source, ok := script["source"].(string) + if !ok { + t.Fatalf("unexpected script source type: %T", script["source"]) + } + if !strings.Contains(source, "return null") { + t.Fatalf("expected missing values to be skipped in hash aggregation, got %s", source) + } + if !strings.Contains(source, "value == ''") { + t.Fatalf("expected empty strings to be excluded in hash aggregation, got %s", source) + } + params := script["params"].(util.MapStr) + if got := params["partition_count"]; got != 8 { + t.Fatalf("unexpected partition_count: %v", got) + } +} + +func TestExtractHashPartitionDocCountsMapsByBucketKey(t *testing.T) { + counts := extractHashPartitionDocCounts(&SearchResponse{ + Aggregations: map[string]AggregationResponse{ + "partitions": { + Buckets: []BucketBase{ + {"key": float64(5), "doc_count": float64(12)}, + {"key": "1", "doc_count": float64(7)}, + {"key": float64(99), "doc_count": float64(3)}, + }, + }, + }, + }, 8) + + expected := []int64{0, 7, 0, 0, 0, 12, 0, 0} + if !reflect.DeepEqual(counts, expected) { + t.Fatalf("unexpected hash counts: got %v want %v", counts, expected) + } +} + +func TestBuildMissingFieldConditionIncludesEmptyString(t *testing.T) { + filter := buildMissingFieldCondition("pmid.keyword") + boolFilter, ok := filter["bool"].(util.MapStr) + if !ok { + t.Fatalf("expected bool filter, got %v", filter) + } + if got := boolFilter["minimum_should_match"]; got != 1 { + t.Fatalf("unexpected minimum_should_match: %v", got) + } + should, ok := boolFilter["should"].([]interface{}) + if !ok || len(should) != 2 { + t.Fatalf("expected two should clauses, got %v", boolFilter["should"]) + } + termFilter := should[1].(util.MapStr)["term"].(util.MapStr)["pmid.keyword"].(util.MapStr) + if got := termFilter["value"]; got != "" { + t.Fatalf("unexpected empty-string term filter: %v", termFilter) + } +} + +func TestBuildMissingFieldFilterPreservesOuterFilter(t *testing.T) { + filter := buildMissingFieldFilter("pmid.keyword", util.MapStr{ + "term": util.MapStr{ + "env": util.MapStr{"value": "prod"}, + }, + }) + boolFilter, ok := filter["bool"].(util.MapStr) + if !ok { + t.Fatalf("expected bool filter, got %v", filter) + } + must, ok := boolFilter["must"].([]interface{}) + if !ok || len(must) != 2 { + t.Fatalf("expected two must clauses, got %v", boolFilter["must"]) + } + innerBool, ok := must[0].(util.MapStr)["bool"].(util.MapStr) + if !ok { + t.Fatalf("expected wrapped missing bool filter, got %v", must[0]) + } + if got := innerBool["minimum_should_match"]; got != 1 { + t.Fatalf("unexpected minimum_should_match: %v", got) + } +} + +func TestBuildPainlessStringLiteralEscapesSingleQuote(t *testing.T) { + got := buildPainlessStringLiteral("foo'bar") + if got != `'foo\'bar'` { + t.Fatalf("unexpected painless string literal: %s", got) + } +} + +func TestEnsurePartitionSearchResponseOKReturnsBackendReason(t *testing.T) { + err := ensurePartitionSearchResponseOK(&SearchResponse{ + ResponseBase: ResponseBase{ + StatusCode: http.StatusInternalServerError, + RawResult: &util.Result{ + Body: []byte(`{"error":{"reason":"runtime script failure"},"status":500}`), + }, + InternalError: InternalError{ + Error: &ErrorDetail{ + Reason: "runtime script failure", + }, + Status: http.StatusInternalServerError, + }, + }, + }) + if err == nil || err.Error() != "runtime script failure" { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestEnsurePartitionSearchResponseOKReturnsCausedByReason(t *testing.T) { + err := ensurePartitionSearchResponseOK(&SearchResponse{ + ResponseBase: ResponseBase{ + StatusCode: http.StatusBadRequest, + RawResult: &util.Result{ + Body: []byte(`{"error":{"root_cause":[{"reason":"compile error"}],"failed_shards":[{"reason":{"reason":"compile error","caused_by":{"reason":"static method [java.lang.Math, floorMod/2] not found"}}}],"reason":"all shards failed"},"status":400}`), + }, + }, + }) + if err == nil || err.Error() != "static method [java.lang.Math, floorMod/2] not found" { + t.Fatalf("unexpected error: %v", err) + } +} + +func getMustClause(t *testing.T, filter util.MapStr) util.MapStr { + t.Helper() + boolFilter, ok := filter["bool"].(util.MapStr) + if !ok { + t.Fatalf("expected bool filter, got %v", filter) + } + must, ok := boolFilter["must"].([]interface{}) + if !ok || len(must) == 0 { + t.Fatalf("expected must clauses, got %v", boolFilter["must"]) + } + clause, ok := must[0].(util.MapStr) + if !ok { + t.Fatalf("expected util.MapStr clause, got %T", must[0]) + } + return clause +} diff --git a/core/env/env.go b/core/env/env.go index 45b61ff8e..1744e37f3 100755 --- a/core/env/env.go +++ b/core/env/env.go @@ -255,7 +255,11 @@ func (env *Env) InitPaths(cfgPath string) error { if cfgObj, err = config.LoadFile(cfgPath); err != nil { return fmt.Errorf("error loading confiuration file: %v, %w", cfgPath, err) } - return cfgObj.Unpack(&env.SystemConfig) + if err := cfgObj.Unpack(&env.SystemConfig); err != nil { + return err + } + env.normalizeRelativePaths() + return nil } else { if !env.IgnoreOnConfigMissing { return errors.Errorf("config file %v not found", cfgPath) @@ -418,6 +422,7 @@ func (env *Env) loadEnvFromConfigFile(filename string) error { } env.SystemConfig = &tempCfg + env.normalizeRelativePaths() //initialize node config env.findWorkingDir() @@ -481,6 +486,30 @@ func (env *Env) loadEnvFromConfigFile(filename string) error { return nil } +func resolvePathRelativeToExecutable(p string) string { + p = strings.TrimSpace(p) + if p == "" || filepath.IsAbs(p) { + return p + } + + executablePath, err := os.Executable() + if err != nil { + return p + } + return filepath.Join(filepath.Dir(executablePath), p) +} + +func (env *Env) normalizeRelativePaths() { + if env.SystemConfig == nil { + return + } + + env.SystemConfig.PathConfig.Config = resolvePathRelativeToExecutable(env.SystemConfig.PathConfig.Config) + env.SystemConfig.PathConfig.Data = resolvePathRelativeToExecutable(env.SystemConfig.PathConfig.Data) + env.SystemConfig.PathConfig.Log = resolvePathRelativeToExecutable(env.SystemConfig.PathConfig.Log) + env.SystemConfig.PathConfig.Plugin = resolvePathRelativeToExecutable(env.SystemConfig.PathConfig.Plugin) +} + func (env *Env) GetConfigFile() string { return env.configFile } @@ -550,7 +579,7 @@ func ParseConfigSection(cfg *config.Config, configKey string, configInstance int // go-ucfg raises an error if the key does not exist, in which case // we should return and report that the configKey does not exist. if ucfgErr, ok := err.(ucfg.Error); ok && ucfgErr.Reason() == ucfg.ErrMissing { - log.Debugf("config key: %s not found", configKey) + log.Tracef("config key: %s not found", configKey) return false, nil } diff --git a/core/env/env_test.go b/core/env/env_test.go index 23e00d752..b87292741 100644 --- a/core/env/env_test.go +++ b/core/env/env_test.go @@ -24,6 +24,8 @@ package env import ( + "os" + "path/filepath" "testing" "github.com/stretchr/testify/assert" @@ -103,6 +105,55 @@ func TestParseConfigSection_ExistingKey_UnpackFails(t *testing.T) { require.Error(t, err) } +func TestResolvePathRelativeToExecutableUsesExecutableDir(t *testing.T) { + executablePath, err := os.Executable() + require.NoError(t, err) + + got := resolvePathRelativeToExecutable("data") + + assert.Equal(t, filepath.Join(filepath.Dir(executablePath), "data"), got) +} + +func TestNormalizeRelativePathsUsesExecutableDir(t *testing.T) { + executablePath, err := os.Executable() + require.NoError(t, err) + + env := EmptyEnv() + env.SystemConfig.PathConfig.Config = "config" + env.SystemConfig.PathConfig.Data = "data" + env.SystemConfig.PathConfig.Log = "log" + env.SystemConfig.PathConfig.Plugin = "plugin" + + env.normalizeRelativePaths() + + executableDir := filepath.Dir(executablePath) + assert.Equal(t, filepath.Join(executableDir, "config"), env.SystemConfig.PathConfig.Config) + assert.Equal(t, filepath.Join(executableDir, "data"), env.SystemConfig.PathConfig.Data) + assert.Equal(t, filepath.Join(executableDir, "log"), env.SystemConfig.PathConfig.Log) + assert.Equal(t, filepath.Join(executableDir, "plugin"), env.SystemConfig.PathConfig.Plugin) +} + +func TestInitPathsNormalizesRelativePathsFromConfig(t *testing.T) { + executablePath, err := os.Executable() + require.NoError(t, err) + + cfgFile, err := os.CreateTemp("", "env-paths-*.yml") + require.NoError(t, err) + defer os.Remove(cfgFile.Name()) + + _, err = cfgFile.WriteString("path.data: data\npath.log: log\npath.configs: config\n") + require.NoError(t, err) + require.NoError(t, cfgFile.Close()) + + env := EmptyEnv() + require.NoError(t, env.InitPaths(cfgFile.Name())) + + executableDir := filepath.Dir(executablePath) + assert.Equal(t, filepath.Join(executableDir, "data"), env.SystemConfig.PathConfig.Data) + assert.Equal(t, filepath.Join(executableDir, "log"), env.SystemConfig.PathConfig.Log) + assert.Equal(t, filepath.Join(executableDir, "config"), env.SystemConfig.PathConfig.Config) +} + func TestParseConfigSection_KeyExistsButPrimitive_ReturnsError(t *testing.T) { // Key exists but value is primitive (string), not an object. Child returns type error. cfg, err := config.NewConfigFrom(map[string]interface{}{ diff --git a/core/orm/registry.go b/core/orm/registry.go index 292b42e36..ab3b4513b 100644 --- a/core/orm/registry.go +++ b/core/orm/registry.go @@ -10,6 +10,11 @@ import ( var registeredSchemas = []util.KeyValue{} +func schemaRegistrationKey(t interface{}) string { + pkg, typeName := util.GetTypeAndPackageName(t, true) + return pkg + "-" + typeName +} + func MustRegisterSchemaWithIndexName(t interface{}, index string) { err := RegisterSchemaWithIndexName(t, index) if err != nil { @@ -18,6 +23,17 @@ func MustRegisterSchemaWithIndexName(t interface{}, index string) { } func RegisterSchemaWithIndexName(t interface{}, index string) error { + newKey := schemaRegistrationKey(t) + for _, registered := range registeredSchemas { + if registered.Key != index { + continue + } + existingKey := schemaRegistrationKey(registered.Payload) + if existingKey == newKey { + return nil + } + return errors.Errorf("schema index [%s] already registered by [%s]", index, existingKey) + } registeredSchemas = append(registeredSchemas, util.KeyValue{Key: index, Payload: t}) return nil } @@ -35,6 +51,10 @@ func InitSchema() error { var handler ORM +func HasHandler() bool { + return handler != nil +} + func getHandler() ORM { if handler == nil { panic(errors.New("ORM handler is not registered")) diff --git a/core/orm/registry_test.go b/core/orm/registry_test.go new file mode 100644 index 000000000..b74dc34dd --- /dev/null +++ b/core/orm/registry_test.go @@ -0,0 +1,39 @@ +package orm + +import "testing" + +type testSchemaAlpha struct{} +type testSchemaBeta struct{} + +func TestRegisterSchemaWithIndexNameDeduplicatesSameSchema(t *testing.T) { + original := registeredSchemas + registeredSchemas = nil + t.Cleanup(func() { + registeredSchemas = original + }) + + if err := RegisterSchemaWithIndexName(testSchemaAlpha{}, "test-index"); err != nil { + t.Fatalf("expected first registration to succeed, got %v", err) + } + if err := RegisterSchemaWithIndexName(&testSchemaAlpha{}, "test-index"); err != nil { + t.Fatalf("expected duplicate registration to be ignored, got %v", err) + } + if len(registeredSchemas) != 1 { + t.Fatalf("expected exactly one registered schema, got %d", len(registeredSchemas)) + } +} + +func TestRegisterSchemaWithIndexNameRejectsDifferentSchemaForSameIndex(t *testing.T) { + original := registeredSchemas + registeredSchemas = nil + t.Cleanup(func() { + registeredSchemas = original + }) + + if err := RegisterSchemaWithIndexName(testSchemaAlpha{}, "test-index"); err != nil { + t.Fatalf("expected first registration to succeed, got %v", err) + } + if err := RegisterSchemaWithIndexName(testSchemaBeta{}, "test-index"); err == nil { + t.Fatal("expected conflicting registration to fail") + } +} diff --git a/core/pipeline/context.go b/core/pipeline/context.go index 97fe18af8..8936789e4 100755 --- a/core/pipeline/context.go +++ b/core/pipeline/context.go @@ -93,21 +93,12 @@ type Context struct { id string steps int64 - // cancelFunc closes the Done channel of the embedded context.Context, - // signaling processors to stop early. - // - // This is a cooperative mechanism: it only takes effect if the processor's process() - // implementation explicitly checks IsCanceled() and returns when it is true. - cancelFunc context.CancelFunc - // True means the goroutine has been paused/suspended. - isPaused bool - pause sync.WaitGroup - // Set this to true if you want to stop the pipeline, and then, pause (suspend) the goroutine. - isQuit bool - stateLock sync.Mutex - // Set this to true if you want to let the goroutine exit, i.e., the kill signal. - released bool - // True means the goroutine already exited. + cancelFunc context.CancelFunc + isPaused bool + pause sync.WaitGroup + isQuit bool + stateLock sync.Mutex + released bool loopReleased bool } @@ -116,7 +107,6 @@ func AcquireContext(config PipelineConfigV2) *Context { ctx.ResetContext() ctx.id = util.GetUUID() ctx.createTime = time.Now() - // Placeholder state; the pipeline task execution loop will overwrite this. ctx.runningState = FINISHED ctx.Config = config return &ctx @@ -299,7 +289,21 @@ func (ctx *Context) Errors() []error { return ctx.processErrs } -// Pause suspends the goroutine that is running this pipeline. +func (ctx *Context) GetResultState() RunningState { + ctx.stateLock.Lock() + defer ctx.stateLock.Unlock() + + return ctx.getResultStateLocked() +} + +func (ctx *Context) GetResultError() string { + ctx.stateLock.Lock() + defer ctx.stateLock.Unlock() + + return formatPipelineResultError(ctx.exitErr, ctx.processErrs) +} + +// Pause will pause the pipeline running loop until Resume called func (ctx *Context) Pause() { ctx.stateLock.Lock() if ctx.isPaused { @@ -313,7 +317,7 @@ func (ctx *Context) Pause() { ctx.pause.Wait() } -// Resume wakes up the goroutine that was suspended by Pause. +// Resume recovers pipeline from Pause func (ctx *Context) Resume() { ctx.stateLock.Lock() if !ctx.isPaused { @@ -378,6 +382,30 @@ func (ctx *Context) setRunningState(newState RunningState) { } } +func (ctx *Context) getResultStateLocked() RunningState { + switch ctx.runningState { + case FINISHED, FAILED: + return ctx.runningState + case STOPPED: + if ctx.endTime == nil { + return STOPPED + } + if ctx.exitErr != nil || len(ctx.processErrs) > 0 { + return FAILED + } + return FINISHED + default: + return "" + } +} + +func formatPipelineResultError(exitErr error, processErrs []error) string { + if exitErr == nil && len(processErrs) == 0 { + return "" + } + return fmt.Sprintf("exit: %v, process: %v", exitErr, processErrs) +} + func (ctx *Context) pushPipelineLog() { if global.Env().IsDebug { log.Info("received pipeline state change, id: ", ctx.Config.Name, ", state: ", ctx.runningState) @@ -407,8 +435,8 @@ func (ctx *Context) pushPipelineLog() { result := util.MapStr{ "success": ctx.exitErr == nil, } - if ctx.exitErr != nil || len(ctx.processErrs) > 0 { - result["error"] = fmt.Sprintf("exit: %v, process: %v", ctx.exitErr, ctx.processErrs) + if errMsg := formatPipelineResultError(ctx.exitErr, ctx.processErrs); errMsg != "" { + result["error"] = errMsg } payload["result"] = result } diff --git a/core/pipeline/context_result_test.go b/core/pipeline/context_result_test.go new file mode 100644 index 000000000..addb8d4b5 --- /dev/null +++ b/core/pipeline/context_result_test.go @@ -0,0 +1,59 @@ +package pipeline + +import ( + "errors" + "testing" +) + +func TestGetResultStateReturnsFinishedAfterStoppedCompletedRun(t *testing.T) { + ctx := AcquireContext(PipelineConfigV2{}) + ctx.Started() + ctx.Finished() + ctx.Stopped() + + if got := ctx.GetResultState(); got != FINISHED { + t.Fatalf("expected FINISHED result state, got %q", got) + } + if got := ctx.GetResultError(); got != "" { + t.Fatalf("expected empty result error, got %q", got) + } +} + +func TestGetResultStateReturnsFailedAfterStoppedFailedRun(t *testing.T) { + ctx := AcquireContext(PipelineConfigV2{}) + ctx.Started() + ctx.Failed(errors.New("boom")) + ctx.Stopped() + + if got := ctx.GetResultState(); got != FAILED { + t.Fatalf("expected FAILED result state, got %q", got) + } + if got := ctx.GetResultError(); got == "" { + t.Fatal("expected result error for failed run") + } +} + +func TestGetResultStateReturnsStoppedForManualStop(t *testing.T) { + ctx := AcquireContext(PipelineConfigV2{}) + ctx.Started() + ctx.Stopping() + ctx.Stopped() + + if got := ctx.GetResultState(); got != STOPPED { + t.Fatalf("expected STOPPED result state, got %q", got) + } + if got := ctx.GetResultError(); got != "" { + t.Fatalf("expected empty result error, got %q", got) + } +} + +func TestGetResultErrorIncludesProcessErrors(t *testing.T) { + ctx := AcquireContext(PipelineConfigV2{}) + ctx.Started() + ctx.RecordError(errors.New("slice failed")) + ctx.Finished() + + if got := ctx.GetResultError(); got == "" { + t.Fatal("expected process error to be surfaced") + } +} diff --git a/core/queue/consumer_config.go b/core/queue/consumer_config.go index 8572bcf47..2f0b3f328 100644 --- a/core/queue/consumer_config.go +++ b/core/queue/consumer_config.go @@ -172,7 +172,7 @@ func RemoveAllConsumers(qConfig *QueueConfig) (bool, error) { log.Error(err) return false, err } - log.Debugf("success delete all consumers for queue:%v", qConfig.ID) + log.Tracef("success delete all consumers for queue:%v", qConfig.ID) return true, nil } diff --git a/core/queue/queue_config.go b/core/queue/queue_config.go index 8d2671869..d03b2f64c 100644 --- a/core/queue/queue_config.go +++ b/core/queue/queue_config.go @@ -118,7 +118,7 @@ func RegisterConfig(cfg *QueueConfig) (preExists bool, err error) { cfg.Created = time.Now().String() - log.Debug("init new queue config:", cfg.ID, ",", cfg.Name) + log.Trace("init new queue config:", cfg.ID, ",", cfg.Name) addCfgToCache(cfg) diff --git a/core/task/task.go b/core/task/task.go index 8ae5d6d0e..1b5eab23d 100644 --- a/core/task/task.go +++ b/core/task/task.go @@ -28,9 +28,11 @@ import ( log "github.com/cihub/seelog" "infini.sh/framework/core/errors" "infini.sh/framework/core/global" + "infini.sh/framework/core/orm" "infini.sh/framework/core/task/chrono" "infini.sh/framework/core/util" "runtime" + "strings" "sync" "sync/atomic" "time" @@ -38,6 +40,22 @@ import ( var Tasks = sync.Map{} +func shouldSilenceStartupTaskError(msg string) bool { + return !orm.HasHandler() && strings.Contains(msg, "ORM handler is not registered") +} + +func logTaskRuntimeIssue(msg string, raw interface{}) { + if shouldSilenceStartupTaskError(msg) { + log.Debug(msg) + return + } + if raw != nil { + log.Error(raw, msg) + return + } + log.Error(msg) +} + type State string const ( @@ -103,7 +121,7 @@ func RegisterTransientTask(group, tag string, f func(ctx context.Context) error, case string: v = r.(string) } - log.Error(r, v) + logTaskRuntimeIssue(v, r) } } task.State = Finished @@ -118,7 +136,7 @@ func RegisterTransientTask(group, tag string, f func(ctx context.Context) error, task.State = Running err := inner(innerCtx) if err != nil { - log.Error(err) + logTaskRuntimeIssue(err.Error(), err) } t = time.Now() task.EndTime = &t @@ -194,7 +212,7 @@ func RegisterScheduleTask(task ScheduleTask) (taskID string) { case string: v = r.(string) } - log.Error(v) + logTaskRuntimeIssue(v, nil) } } task.isTaskRunning.Store(false) diff --git a/modules/elastic/adapter/elasticsearch/v0.go b/modules/elastic/adapter/elasticsearch/v0.go index 0a13b8bd8..075731ec5 100755 --- a/modules/elastic/adapter/elasticsearch/v0.go +++ b/modules/elastic/adapter/elasticsearch/v0.go @@ -491,6 +491,20 @@ func (c *ESAPIV0) Get(indexName, docType, id string) (*elastic.GetResponse, erro return esResp, err } + if resp.StatusCode >= 400 { + if esResp.Error != nil { + errType := esResp.Error.Type + errReason := esResp.Error.Message() + if errType != "" && errReason != "" { + return esResp, errors.Errorf("status:%d, type:%s, reason:%s", resp.StatusCode, errType, errReason) + } + if errReason != "" { + return esResp, errors.Errorf("status:%d, reason:%s", resp.StatusCode, errReason) + } + } + return esResp, errors.Errorf("status:%d", resp.StatusCode) + } + return esResp, nil } @@ -1263,7 +1277,7 @@ func (s *ESAPIV0) UpdateMapping(indexName string, docType string, mappings []byt panic(err) } if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("%s", resp.Body) + return nil, fmt.Errorf("%s", string(resp.Body)) } return resp.Body, nil @@ -1416,7 +1430,7 @@ func (c *ESAPIV0) GetTemplate(templateName string) (map[string]interface{}, erro } if resp.StatusCode != 200 { - return nil, fmt.Errorf("%s", resp.Body) + return nil, fmt.Errorf("%s", string(resp.Body)) } data := map[string]interface{}{} @@ -1725,7 +1739,7 @@ func (c *ESAPIV0) Alias(body []byte) error { return err } if res.StatusCode != http.StatusOK { - return fmt.Errorf("%s", res.Body) + return fmt.Errorf("%s", string(res.Body)) } return nil } @@ -1880,7 +1894,7 @@ func (c *ESAPIV0) UpdateClusterSettings(body []byte) error { } if resp.StatusCode != http.StatusOK { - return fmt.Errorf("%s", resp.Body) + return fmt.Errorf("%s", string(resp.Body)) } return nil @@ -1893,7 +1907,7 @@ func (c *ESAPIV0) GetRemoteInfo() ([]byte, error) { return nil, err } if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("%s", resp.Body) + return nil, fmt.Errorf("%s", string(resp.Body)) } return resp.Body, nil @@ -2003,7 +2017,7 @@ func (c *ESAPIV0) Flush(indexName string) ([]byte, error) { return nil, err } if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("%s", resp.Body) + return nil, fmt.Errorf("%s", string(resp.Body)) } return resp.Body, nil } @@ -2034,7 +2048,7 @@ func (c *ESAPIV0) ClusterAllocationExplain(ctx context.Context, body []byte, par return nil, err } if resp.StatusCode != 200 { - return nil, fmt.Errorf("%s", resp.Body) + return nil, fmt.Errorf("%s", string(resp.Body)) } return resp.Body, nil } @@ -2046,7 +2060,7 @@ func (c *ESAPIV0) CatAllocation(ctx context.Context) ([]elastic.CatAllocationRes return nil, err } if resp.StatusCode != 200 { - return nil, fmt.Errorf("%s", resp.Body) + return nil, fmt.Errorf("%s", string(resp.Body)) } data := []elastic.CatAllocationResponse{} err = json.Unmarshal(resp.Body, &data) diff --git a/modules/elastic/adapter/ver.go b/modules/elastic/adapter/ver.go index 1e3120cb5..a02119217 100755 --- a/modules/elastic/adapter/ver.go +++ b/modules/elastic/adapter/ver.go @@ -169,6 +169,14 @@ func RequestTimeout(ctx *elastic.APIContext, method, url string, body []byte, me func GetClusterUUID(clusterID string) (string, error) { meta := elastic.GetMetadata(clusterID) + if meta == nil { + if cfg := elastic.GetConfigNoPanic(clusterID); cfg != nil { + if cfg.ClusterUUID != "" { + return cfg.ClusterUUID, nil + } + meta = elastic.GetOrInitMetadata(cfg) + } + } if meta == nil { return "", fmt.Errorf("metadata can not be mepty") } diff --git a/modules/elastic/adapter/ver_test.go b/modules/elastic/adapter/ver_test.go new file mode 100644 index 000000000..db5808e55 --- /dev/null +++ b/modules/elastic/adapter/ver_test.go @@ -0,0 +1,30 @@ +package adapter + +import ( + "testing" + + "infini.sh/framework/core/elastic" + "infini.sh/framework/core/orm" +) + +func TestGetClusterUUIDFallsBackToConfigWhenMetadataMissing(t *testing.T) { + cfg := elastic.ElasticsearchConfig{ + ORMObjectBase: orm.ORMObjectBase{ID: "test-cluster-uuid-fallback"}, + Name: "test-cluster-uuid-fallback", + ClusterUUID: "cluster-uuid-fallback", + } + + t.Cleanup(func() { + elastic.RemoveInstance(cfg.ID) + }) + + elastic.UpdateConfig(cfg) + + clusterUUID, err := GetClusterUUID(cfg.ID) + if err != nil { + t.Fatalf("expected cluster uuid from config fallback, got error: %v", err) + } + if clusterUUID != cfg.ClusterUUID { + t.Fatalf("expected cluster uuid %q, got %q", cfg.ClusterUUID, clusterUUID) + } +} diff --git a/modules/elastic/common/config.go b/modules/elastic/common/config.go index eeadb3dd8..4c27a29d4 100644 --- a/modules/elastic/common/config.go +++ b/modules/elastic/common/config.go @@ -87,7 +87,9 @@ func InitClientWithConfig(esConfig elastic.ElasticsearchConfig) (client elastic. ver string ) if esConfig.Version == "" || esConfig.Version == "auto" { - verInfo, err := adapter.ClusterVersion(elastic.GetOrInitMetadata(&esConfig)) + probeMeta := &elastic.ElasticsearchMetadata{Config: &esConfig} + probeMeta.Init(true) + verInfo, err := adapter.ClusterVersion(probeMeta) if err != nil { return nil, err } @@ -219,6 +221,9 @@ func InitElasticInstance(esConfig elastic.ElasticsearchConfig) (elastic.API, err log.Warn("elasticsearch ", esConfig.Name, " is not enabled") return nil, nil } + originMeta := elastic.GetMetadata(esConfig.ID) + initHealth := getInitialMetadataHealth(originMeta) + client, err := InitClientWithConfig(esConfig) if err != nil { log.Error("elasticsearch ", esConfig.Name, err) @@ -226,12 +231,6 @@ func InitElasticInstance(esConfig elastic.ElasticsearchConfig) (elastic.API, err } elastic.RegisterInstance(esConfig, client) - originMeta := elastic.GetMetadata(esConfig.ID) - initHealth := true - if originMeta != nil { - initHealth = originMeta.IsAvailable() - } - v := elastic.InitMetadata(&esConfig, initHealth) if v.Health == nil && originMeta != nil { v.Health = originMeta.Health @@ -240,6 +239,13 @@ func InitElasticInstance(esConfig elastic.ElasticsearchConfig) (elastic.API, err return client, err } +func getInitialMetadataHealth(originMeta *elastic.ElasticsearchMetadata) bool { + if originMeta == nil { + return true + } + return originMeta.IsAvailable() +} + func GetBasicAuth(esConfig *elastic.ElasticsearchConfig) (basicAuth *model.BasicAuth, err error) { if esConfig.BasicAuth != nil && esConfig.BasicAuth.Username != "" { basicAuth = esConfig.BasicAuth diff --git a/modules/elastic/common/config_test.go b/modules/elastic/common/config_test.go new file mode 100644 index 000000000..461d5286d --- /dev/null +++ b/modules/elastic/common/config_test.go @@ -0,0 +1,27 @@ +package common + +import ( + "testing" + + "infini.sh/framework/core/elastic" +) + +func TestGetInitialMetadataHealthDefaultsToAvailableForNewCluster(t *testing.T) { + if !getInitialMetadataHealth(nil) { + t.Fatal("expected new cluster metadata to start as available before first health check") + } +} + +func TestGetInitialMetadataHealthKeepsExistingAvailability(t *testing.T) { + meta := &elastic.ElasticsearchMetadata{Config: &elastic.ElasticsearchConfig{Enabled: true}} + meta.Init(false) + + if getInitialMetadataHealth(meta) { + t.Fatal("expected existing unavailable metadata to remain unavailable") + } + + meta.Init(true) + if !getInitialMetadataHealth(meta) { + t.Fatal("expected existing available metadata to remain available") + } +} diff --git a/modules/elastic/metadata.go b/modules/elastic/metadata.go index 60c07bdd9..1c6cff6ce 100644 --- a/modules/elastic/metadata.go +++ b/modules/elastic/metadata.go @@ -697,10 +697,13 @@ func (module *ElasticModule) updateNodeInfo(meta *elastic.ElasticsearchMetadata, log.Trace("update node info") if !force && !meta.IsAvailable() { + stateChanged := false if !force { - setNodeUnknown(meta.Config.ID) + stateChanged = setNodeUnknown(meta.Config.ID) + } + if stateChanged || rate.GetRateLimiter("metadata_node_info_skip", meta.Config.ID, 1, 1, 10*time.Minute).Allow() { + log.Debugf("elasticsearch [%v] is not available, skip update node info", meta.Config.Name) } - log.Debugf("elasticsearch [%v] is not available, skip update node info", meta.Config.Name) return } @@ -808,17 +811,17 @@ func (module *ElasticModule) updateNodeInfo(meta *elastic.ElasticsearchMetadata, var saveNodeMetadataMutex = sync.Mutex{} var nodeAlreadyUnknown = map[string]bool{} -func setNodeUnknown(clusterID string) { +func setNodeUnknown(clusterID string) bool { kv.DeleteKey(elastic.KVElasticNodeMetadata, []byte(clusterID)) meta := elastic.GetMetadata(clusterID) if meta == nil { - return + return false } if meta.Config.Source != elastic.ElasticsearchConfigSourceElasticsearch { - return + return false } if v, ok := nodeAlreadyUnknown[clusterID]; ok && v { - return + return false } queueConfig := queue.GetOrInitConfig(elastic.QueueElasticIndexState) if queueConfig.Labels == nil { @@ -846,6 +849,7 @@ func setNodeUnknown(clusterID string) { } nodeAlreadyUnknown[clusterID] = true + return true } func saveNodeMetadata(nodes map[string]elastic.NodesInfo, clusterID string) error { esConfig := elastic.GetConfig(clusterID) diff --git a/modules/elastic/module.go b/modules/elastic/module.go index 52264b0cc..56ad1ca42 100755 --- a/modules/elastic/module.go +++ b/modules/elastic/module.go @@ -112,10 +112,23 @@ func loadFileBasedElasticConfig() []elastic.ElasticsearchConfig { return configs } +func lookupSystemElasticsearchID() (string, bool) { + value := global.Lookup(elastic.GlobalSystemElasticsearchID) + systemID, ok := value.(string) + if !ok || systemID == "" { + return "", false + } + return systemID, true +} + func loadESBasedElasticConfig() []elastic.ElasticsearchConfig { configs := []elastic.ElasticsearchConfig{} + systemID, ok := lookupSystemElasticsearchID() + if !ok { + return configs + } query := elastic.SearchRequest{From: 0, Size: 1000} //TODO handle clusters beyond 1000 - esClient := elastic.GetClient(global.MustLookupString(elastic.GlobalSystemElasticsearchID)) + esClient := elastic.GetClient(systemID) result, err := esClient.Search(orm.GetIndexName(elastic.ElasticsearchConfig{}), &query) if err != nil { log.Error(err) @@ -394,20 +407,29 @@ func InitSchema() { var ormInited bool func (module *ElasticModule) Start() error { + systemID, hasSystemCluster := lookupSystemElasticsearchID() if moduleConfig.ORMConfig.Enabled { - client := elastic.GetClient(global.MustLookupString(elastic.GlobalSystemElasticsearchID)) - handler := ElasticORM{Client: client, Config: moduleConfig.ORMConfig} - orm.Register("elastic", &handler) + if !hasSystemCluster { + log.Warn("skip elastic ORM initialization, system cluster is not available") + } else { + client := elastic.GetClient(systemID) + handler := ElasticORM{Client: client, Config: moduleConfig.ORMConfig} + orm.Register("elastic", &handler) + } } if moduleConfig.StoreConfig.Enabled { - client := elastic.GetClient(global.MustLookupString(elastic.GlobalSystemElasticsearchID)) - module.storeHandler = &ElasticStore{Client: client, Config: moduleConfig.StoreConfig} - kv.Register("elastic", module.storeHandler) + if !hasSystemCluster { + log.Warn("skip elastic store initialization, system cluster is not available") + } else { + client := elastic.GetClient(systemID) + module.storeHandler = &ElasticStore{Client: client, Config: moduleConfig.StoreConfig} + kv.Register("elastic", module.storeHandler) + } } - if moduleConfig.ORMConfig.Enabled { + if moduleConfig.ORMConfig.Enabled && hasSystemCluster { if !ormInited { //init template InitTemplate(false) @@ -418,8 +440,12 @@ func (module *ElasticModule) Start() error { } if moduleConfig.RemoteConfigEnabled { - m := loadESBasedElasticConfig() - initElasticInstances(m, elastic.ElasticsearchConfigSourceElasticsearch) + if !hasSystemCluster { + log.Warn("skip remote elastic config loading, system cluster is not available") + } else { + m := loadESBasedElasticConfig() + initElasticInstances(m, elastic.ElasticsearchConfigSourceElasticsearch) + } } if module.storeHandler != nil { @@ -693,7 +719,18 @@ func (module *ElasticModule) refreshAllClusterMetadata() { log.Trace("update elasticsearch's metadata:", v, ok) if ok { - module.updateNodeInfo(v, false, v.Config.Discovery.Enabled) + cfg := elastic.GetConfigNoPanic(v.Config.ID) + if cfg == nil { + log.Debugf("elasticsearch metadata [%v] has no active config, removing stale metadata", v.Config.ID) + elastic.RemoveInstance(v.Config.ID) + elastic.RemoveHostsByClusterID(v.Config.ID) + return true + } + v.Config = cfg + if !cfg.Enabled || (cfg.MetadataConfigs != nil && !cfg.MetadataConfigs.MetadataRefresh.Enabled) { + return true + } + module.updateNodeInfo(v, false, cfg.Discovery.Enabled) } return true }) @@ -706,6 +743,17 @@ func (module *ElasticModule) refreshAllClusterAlias(force bool) { } v, ok := value.(*elastic.ElasticsearchMetadata) if ok { + cfg := elastic.GetConfigNoPanic(v.Config.ID) + if cfg == nil { + log.Debugf("elasticsearch metadata [%v] has no active config, removing stale metadata", v.Config.ID) + elastic.RemoveInstance(v.Config.ID) + elastic.RemoveHostsByClusterID(v.Config.ID) + return true + } + v.Config = cfg + if !cfg.Enabled || (cfg.MetadataConfigs != nil && !cfg.MetadataConfigs.MetadataRefresh.Enabled) { + return true + } updateAliases(v, force) } return true diff --git a/modules/elastic/module_test.go b/modules/elastic/module_test.go index 3accbf667..e88f7e39c 100644 --- a/modules/elastic/module_test.go +++ b/modules/elastic/module_test.go @@ -1,48 +1,48 @@ -// Copyright (C) INFINI Labs & INFINI LIMITED. -// -// The INFINI Framework is offered under the GNU Affero General Public License v3.0 -// and as commercial software. -// -// For commercial licensing, contact us at: -// - Website: infinilabs.com -// - Email: hello@infini.ltd -// -// Open Source licensed under AGPL V3: -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - package elastic import ( - "fmt" - "github.com/buger/jsonparser" - "infini.sh/framework/core/util" "testing" + + coreElastic "infini.sh/framework/core/elastic" + "infini.sh/framework/core/global" ) -func TestV7GetClusterStates(t *testing.T) { - str := "{ \"_nodes\": { \"total\": 1, \"successful\": 1, \"failed\": 0 }, \"cluster_name\": \"es-v700\", \"cluster_uuid\": \"7NtDffC3RzGChhoOmgySig\", \"timestamp\": 1629611578327, \"status\": \"green\", \"indices\": { \"count\": 0, \"shards\": {}, \"docs\": { \"count\": 0, \"deleted\": 0 }, \"store\": { \"size_in_bytes\": 0 }, \"fielddata\": { \"memory_size_in_bytes\": 0, \"evictions\": 0 }, \"query_cache\": { \"memory_size_in_bytes\": 0, \"total_count\": 0, \"hit_count\": 0, \"miss_count\": 0, \"cache_size\": 0, \"cache_count\": 0, \"evictions\": 0 }, \"completion\": { \"size_in_bytes\": 0 }, \"segments\": { \"count\": 0, \"memory_in_bytes\": 0, \"terms_memory_in_bytes\": 0, \"stored_fields_memory_in_bytes\": 0, \"term_vectors_memory_in_bytes\": 0, \"norms_memory_in_bytes\": 0, \"points_memory_in_bytes\": 0, \"doc_values_memory_in_bytes\": 0, \"index_writer_memory_in_bytes\": 0, \"version_map_memory_in_bytes\": 0, \"fixed_bit_set_memory_in_bytes\": 0, \"max_unsafe_auto_id_timestamp\": -9223372036854776000, \"file_sizes\": {} } }, \"nodes\": { \"count\": { \"total\": 1, \"data\": 1, \"coordinating_only\": 0, \"master\": 1, \"ingest\": 1 }, \"versions\": [ \"7.0.0\" ], \"os\": { \"available_processors\": 24, \"allocated_processors\": 24, \"names\": [ { \"name\": \"Windows 10\", \"count\": 1 } ], \"pretty_names\": [ { \"pretty_name\": \"Windows 10\", \"count\": 1 } ], \"mem\": { \"total_in_bytes\": 137121308672, \"free_in_bytes\": 114813546496, \"used_in_bytes\": 22307762176, \"free_percent\": 84, \"used_percent\": 16 } }, \"process\": { \"cpu\": { \"percent\": 0 }, \"open_file_descriptors\": { \"min\": -1, \"max\": -1, \"avg\": 0 } }, \"jvm\": { \"max_uptime_in_millis\": 2021226, \"versions\": [ { \"version\": \"9.0.1.3\", \"vm_name\": \"OpenJDK 64-Bit Server VM\", \"vm_version\": \"9.0.1.3+11\", \"vm_vendor\": \"Azul Systems, Inc.\", \"bundled_jdk\": false, \"using_bundled_jdk\": null, \"count\": 1 } ], \"mem\": { \"heap_used_in_bytes\": 277003800, \"heap_max_in_bytes\": 1037959168 }, \"threads\": 66 }, \"fs\": { \"total_in_bytes\": 6000527532032, \"free_in_bytes\": 3111816585216, \"available_in_bytes\": 3111816585216 }, \"plugins\": [], \"network_types\": { \"transport_types\": { \"netty4\": 1 }, \"http_types\": { \"netty4\": 1 } }, \"discovery_types\": { \"zen\": 1 } } }" +func TestLoadESBasedElasticConfigSkipsWhenSystemClusterUnavailable(t *testing.T) { + previous := global.Lookup(coreElastic.GlobalSystemElasticsearchID) + defer global.Register(coreElastic.GlobalSystemElasticsearchID, previous) + + global.Register(coreElastic.GlobalSystemElasticsearchID, "") + + configs := loadESBasedElasticConfig() + if len(configs) != 0 { + t.Fatalf("expected no remote configs when system cluster id is unavailable, got %d", len(configs)) + } +} + +func TestElasticModuleStartSkipsSystemClusterDependentInitBeforeSetup(t *testing.T) { + previousSystemID := global.Lookup(coreElastic.GlobalSystemElasticsearchID) + defer global.Register(coreElastic.GlobalSystemElasticsearchID, previousSystemID) + + previousModuleConfig := moduleConfig + defer func() { + moduleConfig = previousModuleConfig + }() + + previousOrmInited := ormInited + defer func() { + ormInited = previousOrmInited + }() + + global.Register(coreElastic.GlobalSystemElasticsearchID, "") + + moduleConfig = getDefaultConfig() + moduleConfig.ORMConfig.Enabled = true + moduleConfig.StoreConfig.Enabled = true + moduleConfig.RemoteConfigEnabled = true + ormInited = false - d1, err := jsonparser.GetInt(util.UnsafeStringToBytes(str), "indices", "segments", "max_unsafe_auto_id_timestamp") - fmt.Println("xv:", d1, err) - if err != nil { - d, err := jsonparser.Set(util.UnsafeStringToBytes(str), []byte("-1"), "indices", "segments", "max_unsafe_auto_id_timestamp") - if err == nil { - str = util.UnsafeBytesToString(d) - } + module := &ElasticModule{} + if err := module.Start(); err != nil { + t.Fatalf("expected elastic module start to succeed before setup, got %v", err) } - d1, err = jsonparser.GetInt(util.UnsafeStringToBytes(str), "indices", "segments", "max_unsafe_auto_id_timestamp") - fmt.Println("xv:", d1, err) - //xv,err:=jsonparser.GetInt([]byte(str),"indices.segments.max_unsafe_auto_id_timestamp") - //fmt.Println("xv:",xv,err) } diff --git a/modules/elastic/schema.go b/modules/elastic/schema.go index a437af49f..80119c1c1 100755 --- a/modules/elastic/schema.go +++ b/modules/elastic/schema.go @@ -35,6 +35,7 @@ import ( "sync" "unicode" + "infini.sh/framework/core/elastic" "infini.sh/framework/core/global" "github.com/buger/jsonparser" @@ -124,6 +125,62 @@ func parseAnnotation(mapping []util.Annotation) string { return json } +func ensureDefaultStringDynamicTemplates(mappingData map[string]interface{}) { + if mappingData == nil { + return + } + if _, ok := mappingData["dynamic_templates"]; ok { + return + } + mappingData["dynamic_templates"] = []interface{}{ + util.MapStr{ + "strings": util.MapStr{ + "match_mapping_type": "string", + "mapping": util.MapStr{ + "type": "keyword", + "ignore_above": 256, + }, + }, + }, + } +} + +func containsKeyDeep(value interface{}, targetKey string) bool { + switch v := value.(type) { + case map[string]interface{}: + for key, nested := range v { + if key == targetKey { + return true + } + if containsKeyDeep(nested, targetKey) { + return true + } + } + case []interface{}: + for _, nested := range v { + if containsKeyDeep(nested, targetKey) { + return true + } + } + } + return false +} + +func shouldRefreshExistingTemplate(client elastic.API, templateName string, mappingData map[string]interface{}) bool { + if mappingData == nil { + return false + } + if _, ok := mappingData["dynamic_templates"]; !ok { + return false + } + template, err := client.GetTemplate(templateName) + if err != nil { + log.Warnf("failed to inspect existing template [%s]: %v", templateName, err) + return false + } + return !containsKeyDeep(template, "dynamic_templates") +} + func initIndexName(t interface{}, indexName string) string { pkg, ojbType := util.GetTypeAndPackageName(t, true) key := fmt.Sprintf("%s-%s", pkg, ojbType) @@ -181,6 +238,7 @@ func (handler *ElasticORM) RegisterSchemaWithName(t interface{}, indexName strin } return err } + ensureDefaultStringDynamicTemplates(mappingData) template, err := handler.Client.BuildTemplate(indexName+"*", nil, mappingData) if err != nil { if handler.Config.PanicOnInitSchemaError { @@ -228,6 +286,44 @@ func (handler *ElasticORM) RegisterSchemaWithName(t interface{}, indexName strin //init index _ = handler.tryCreateInitIndex(t, indexName) + } else if handler.Config.BuildTemplateForObject { + jsonFormat := `{ %s }` + mapping := getIndexMapping(t) + js := parseAnnotation(mapping) + json := fmt.Sprintf(jsonFormat, quoteJson(js)) + + var mappingData map[string]interface{} + err = util.FromJSONBytes([]byte(json), &mappingData) + if err != nil { + if handler.Config.PanicOnInitSchemaError { + panic(err) + } + return err + } + ensureDefaultStringDynamicTemplates(mappingData) + if shouldRefreshExistingTemplate(handler.Client, indexTemplate, mappingData) { + template, err := handler.Client.BuildTemplate(indexName+"*", nil, mappingData) + if err != nil { + if handler.Config.PanicOnInitSchemaError { + panic(err) + } + return err + } + data, err := handler.Client.PutTemplate(indexTemplate, template) + if err != nil { + if handler.Config.PanicOnInitSchemaError { + panic(err) + } + return err + } + x, _, _, _ := jsonparser.Get(data, "error") + if x != nil { + log.Errorf("error on update template: %v, %v", indexName, string(x)) + if handler.Config.PanicOnInitSchemaError { + panic(string(data)) + } + } + } } return err } diff --git a/modules/elastic/schema_test.go b/modules/elastic/schema_test.go index 518da84a1..ae2c39330 100644 --- a/modules/elastic/schema_test.go +++ b/modules/elastic/schema_test.go @@ -91,3 +91,19 @@ func TestQuoteWithUnderscore(t *testing.T) { json := quoteJson(js) assert.Equal(t, json, `{ "properties":{ "id": { "type": "keyword" },"created": { "type": "date" },"updated": { "type": "date" },"_system": { "type": "object" },"name": { "type": "keyword" } } }`) } + +func TestEnsureDefaultStringDynamicTemplates(t *testing.T) { + mapping := map[string]interface{}{ + "properties": map[string]interface{}{ + "timestamp": map[string]interface{}{ + "type": "date", + }, + }, + } + + ensureDefaultStringDynamicTemplates(mapping) + + templates, ok := mapping["dynamic_templates"].([]interface{}) + assert.Equal(t, ok, true) + assert.Equal(t, len(templates), 1) +} diff --git a/modules/pipeline/model.go b/modules/pipeline/model.go index 10c4b52f5..cd386f58b 100644 --- a/modules/pipeline/model.go +++ b/modules/pipeline/model.go @@ -31,11 +31,18 @@ import ( ) type PipelineTaskStatus struct { - State pipeline.RunningState `json:"state"` - CreateTime time.Time `json:"create_time"` - StartTime *time.Time `json:"start_time"` - EndTime *time.Time `json:"end_time"` - Context util.MapStr `json:"context"` - Config *pipeline.PipelineConfigV2 `json:"config"` - Processors []map[string]interface{} `json:"processor"` + State pipeline.RunningState `json:"state"` + LastRunState pipeline.RunningState `json:"last_run_state,omitempty"` + CreateTime time.Time `json:"create_time"` + StartTime *time.Time `json:"start_time"` + EndTime *time.Time `json:"end_time"` + Context util.MapStr `json:"context"` + Result *PipelineResult `json:"result,omitempty"` + Config *pipeline.PipelineConfigV2 `json:"config"` + Processors []map[string]interface{} `json:"processor"` +} + +type PipelineResult struct { + Success bool `json:"success"` + Error string `json:"error,omitempty"` } diff --git a/modules/pipeline/module.go b/modules/pipeline/module.go index 575a0da85..03e8f7b91 100755 --- a/modules/pipeline/module.go +++ b/modules/pipeline/module.go @@ -168,6 +168,11 @@ func (module *PipeModule) stopTask(taskID string) (exists bool) { // deleteTask will clean all in-memory states and release the pipeline context func (module *PipeModule) deleteTask(taskID string) { + if ctx, ok := module.contexts.Load(taskID); ok { + if v1, ok := ctx.(*pipeline.Context); ok && !v1.IsLoopReleased() { + module.stopAndWaitForRelease([]string{taskID}, time.Minute) + } + } module.pipelines.Delete(taskID) module.configs.Delete(taskID) module.releaseContext(taskID) diff --git a/modules/pipeline/pipeline_test.go b/modules/pipeline/pipeline_test.go new file mode 100644 index 000000000..a34b123ea --- /dev/null +++ b/modules/pipeline/pipeline_test.go @@ -0,0 +1,50 @@ +package pipeline + +import ( + "testing" + "time" + + corepipeline "infini.sh/framework/core/pipeline" +) + +func TestDeleteTaskWaitsForLoopRelease(t *testing.T) { + module := &PipeModule{} + ctx := corepipeline.AcquireContext(corepipeline.PipelineConfigV2{}) + + module.contexts.Store("task-1", ctx) + module.configs.Store("task-1", corepipeline.PipelineConfigV2{Name: "task-1"}) + module.pipelines.Store("task-1", struct{}{}) + + released := make(chan struct{}) + go func() { + for !ctx.IsCanceled() { + time.Sleep(time.Millisecond) + } + time.Sleep(50 * time.Millisecond) + ctx.SetLoopReleased() + close(released) + }() + + start := time.Now() + module.deleteTask("task-1") + elapsed := time.Since(start) + + select { + case <-released: + default: + t.Fatal("expected deleteTask to wait for loop release") + } + + if elapsed < 50*time.Millisecond { + t.Fatalf("expected deleteTask to wait for loop release, returned after %v", elapsed) + } + if _, ok := module.contexts.Load("task-1"); ok { + t.Fatal("expected context to be deleted") + } + if _, ok := module.configs.Load("task-1"); ok { + t.Fatal("expected config to be deleted") + } + if _, ok := module.pipelines.Load("task-1"); ok { + t.Fatal("expected pipeline to be deleted") + } +} diff --git a/modules/pipeline/tasks.go b/modules/pipeline/tasks.go index eb7bc6662..5b598185e 100644 --- a/modules/pipeline/tasks.go +++ b/modules/pipeline/tasks.go @@ -80,11 +80,18 @@ func (module *PipeModule) getPipelineTaskStatus(id string, config string, proces return nil } ret := &PipelineTaskStatus{ - State: c1.GetRunningState(), - CreateTime: c1.GetCreateTime(), - StartTime: c1.GetStartTime(), - EndTime: c1.GetEndTime(), - Context: c1.CloneData(), + State: c1.GetRunningState(), + LastRunState: c1.GetResultState(), + CreateTime: c1.GetCreateTime(), + StartTime: c1.GetStartTime(), + EndTime: c1.GetEndTime(), + Context: c1.CloneData(), + } + if ret.LastRunState == pipeline.FINISHED || ret.LastRunState == pipeline.FAILED { + ret.Result = &PipelineResult{ + Success: c1.GetResultError() == "", + Error: c1.GetResultError(), + } } if config != "false" { v1, ok := module.configs.Load(id) diff --git a/modules/queue/disk_queue/cleanup.go b/modules/queue/disk_queue/cleanup.go index 1fed12942..3456d881f 100644 --- a/modules/queue/disk_queue/cleanup.go +++ b/modules/queue/disk_queue/cleanup.go @@ -92,7 +92,7 @@ func (module *DiskQueue) deleteUnusedFiles(queueID string, fileNum int64) { fileStartToDelete := fileNum - module.cfg.Retention.MaxNumOfLocalFiles if fileStartToDelete <= 0 || consumers <= 0 || eSegmentNum < 0 { - log.Debugf("queue: %v, no consumers or consumer/s3 already ahead of this file, %v, %v, %v", queueID, fileStartToDelete, consumers, eSegmentNum) + log.Tracef("queue: %v, no consumers or consumer/s3 already ahead of this file, %v, %v, %v", queueID, fileStartToDelete, consumers, eSegmentNum) return } diff --git a/modules/queue/disk_queue/compress.go b/modules/queue/disk_queue/compress.go index b03fdccfb..63404a21e 100644 --- a/modules/queue/disk_queue/compress.go +++ b/modules/queue/disk_queue/compress.go @@ -104,7 +104,7 @@ func (module *DiskQueue) compressFiles(queueID string, fileNum int64) { //skip compress file if fileStartToCompress <= 0 || (module.cfg.SkipZeroConsumers && consumers <= 0) || fileStartToCompress <= lastCompressedFileNum { - log.Debugf("skip compress %v", queueID) + log.Tracef("skip compress %v", queueID) return } diff --git a/modules/queue/disk_queue/consumer.go b/modules/queue/disk_queue/consumer.go index c18bbd83e..4f4529ae3 100644 --- a/modules/queue/disk_queue/consumer.go +++ b/modules/queue/disk_queue/consumer.go @@ -68,6 +68,29 @@ type Consumer struct { fileLoadCompleted bool } +func (d *Consumer) parkOnEmptyTail(fileName string) error { + if d.readFile != nil { + if err := d.readFile.Close(); err != nil && !util.ContainStr(err.Error(), "already") { + return err + } + } + d.readFile = nil + d.reader = nil + d.fileName = fileName + d.lastFileSize = 0 + d.maxBytesPerFileRead = 0 + d.fileLoadCompleted = false + return nil +} + +func (d *Consumer) waitingForTailFile() bool { + return d.diskQueue != nil && + d.readFile == nil && + d.reader == nil && + d.segment == d.diskQueue.writeSegmentNum && + d.readPos == 0 +} + func (c *Consumer) getFileSize() int64 { var err error readFile, err := os.OpenFile(c.fileName, os.O_RDONLY, 0600) @@ -144,6 +167,22 @@ READ_MSG: // check reader if d.reader == nil { + if d.waitingForTailFile() { + if d.diskQueue.writePos > 0 || util.FileExists(d.fileName) { + err = d.ResetOffset(d.segment, d.readPos) + if err != nil { + if strings.Contains(err.Error(), "not found") { + return messages, false, nil + } + return messages, false, err + } + goto READ_MSG + } + if len(messages) == 0 && d.cCfg.EOFRetryDelayInMs > 0 { + time.Sleep(time.Duration(d.cCfg.EOFRetryDelayInMs) * time.Millisecond) + } + return messages, false, nil + } return messages, false, errors.New("reader is nil") } //read message size @@ -236,7 +275,7 @@ READ_MSG: } return messages, false, err } - log.Debugf("queue:%v, offset:%v,%v, msgSize:%v", d.queue, d.segment, d.readPos, msgSize) + log.Tracef("queue:%v, offset:%v,%v, msgSize:%v", d.queue, d.segment, d.readPos, msgSize) if int32(msgSize) < d.mCfg.MinMsgSize || int32(msgSize) > d.mCfg.MaxMsgSize { //current have changes, reload file with new position newFileSize := d.getFileSize() @@ -274,8 +313,19 @@ READ_MSG: //can't read ahead before current write file if nextSegment >= d.diskQueue.writeSegmentNum { log.Debugf("need to skip to next file, but next file not exists, current write segment:%v, current read segment:%v", d.diskQueue.writeSegmentNum, d.segment) - d.diskQueue.skipToNextRWFile(false) + err = d.diskQueue.skipToNextRWFile(false) + if err != nil { + return messages, false, err + } d.diskQueue.needSync = true + err = d.ResetOffset(d.diskQueue.writeSegmentNum, 0) + if err != nil { + if strings.Contains(err.Error(), "not found") { + return messages, false, nil + } + return messages, false, err + } + ctx.UpdateNextOffset(d.segment, d.readPos) } else { //let's continue move to next file nextSegment++ @@ -332,9 +382,9 @@ READ_MSG: //still working on the same file if d.diskQueue.writeSegmentNum == d.segment { time.Sleep(100 * time.Millisecond) // Prevent catching up too quickly. - log.Debugf("invalid message size detected. this might be due to a dirty read as the file was being written while open. reloading segment: %d", d.segment) + log.Tracef("invalid message size detected. this might be due to a dirty read as the file was being written while open. reloading segment: %d", d.segment) } else { - log.Debugf("invalid message size detected. this might be due to a partial file load. reloading segment: %d", d.segment) + log.Tracef("invalid message size detected. this might be due to a partial file load. reloading segment: %d", d.segment) } d.readPos = previousPos @@ -509,6 +559,9 @@ func (d *Consumer) ResetOffset(segment, readPos int64) error { if !exists { //double check, but next file exists if !util.FileExists(fileName) { + if segment == d.diskQueue.writeSegmentNum && readPos == 0 && d.diskQueue.writePos == 0 { + return d.parkOnEmptyTail(fileName) + } if d.mCfg.AutoSkipCorruptFile { nextSegment := d.segment + 1 if nextSegment > d.diskQueue.writeSegmentNum { @@ -518,7 +571,7 @@ func (d *Consumer) ResetOffset(segment, readPos int64) error { d.qCfg.Name, d.queue, d.cCfg.Key(), d.segment, d.readPos, fileName) RETRY_NEXT_FILE: // there are segments in the middle - if nextSegment < d.diskQueue.writeSegmentNum { + if nextSegment <= d.diskQueue.writeSegmentNum { fileName, exists, next_file_exists = SmartGetFileName(d.mCfg, d.queue, nextSegment) if exists || util.FileExists(fileName) { log.Debugf("retry skip to next file: %v, exists", fileName) @@ -532,6 +585,12 @@ func (d *Consumer) ResetOffset(segment, readPos int64) error { goto RETRY_NEXT_FILE } } else { + if d.diskQueue.writePos == 0 { + d.segment = d.diskQueue.writeSegmentNum + d.readPos = 0 + d.diskQueue.UpdateSegmentConsumerInReading(d.ID, d.segment) + return d.parkOnEmptyTail(GetFileName(d.queue, d.segment)) + } return errors.New(fileName + " not found, next segment greater than current write segment") } } else { diff --git a/modules/queue/disk_queue/diskqueue.go b/modules/queue/disk_queue/diskqueue.go index d29a8f1da..e3f8c9278 100644 --- a/modules/queue/disk_queue/diskqueue.go +++ b/modules/queue/disk_queue/diskqueue.go @@ -69,6 +69,8 @@ import ( "infini.sh/framework/core/util/zstd" ) +const bytesPerMiB = 1024 * 1024 + // providing a filesystem backed FIFO queue type DiskBasedQueue struct { sync.RWMutex @@ -118,6 +120,8 @@ type DiskBasedQueue struct { // NewDiskQueue instantiates a new instance of DiskBasedQueue, retrieving metadata // from the filesystem and starting the read ahead goroutine func NewDiskQueueByConfig(name, dataPath string, cfg *DiskQueueConfig) *DiskBasedQueue { + normalizeDiskQueueConfig(cfg) + d := DiskBasedQueue{ name: name, dataPath: dataPath, @@ -177,7 +181,8 @@ func (d *DiskBasedQueue) ReadChan() <-chan []byte { // Put writes a []byte to the queue func (d *DiskBasedQueue) Put(data []byte) WriteResponse { - ctx, cancel := context.WithTimeout(context.Background(), time.Duration(d.cfg.WriteTimeoutInMS)*time.Millisecond) + writeTimeout := d.getWriteTimeout(len(data)) + ctx, cancel := context.WithTimeout(context.Background(), writeTimeout) defer cancel() size := int64(len(data)) @@ -232,7 +237,7 @@ func (d *DiskBasedQueue) Put(data []byte) WriteResponse { switch res.Error { case context.DeadlineExceeded: // Handle timeout error specifically - res.Error = fmt.Errorf("operation timed out: %w", res.Error) + res.Error = fmt.Errorf("operation timed out after %s waiting for disk queue writer availability: %w", writeTimeout, res.Error) case context.Canceled: // Handle cancellation error specifically res.Error = fmt.Errorf("operation was canceled: %w", res.Error) @@ -244,6 +249,28 @@ func (d *DiskBasedQueue) Put(data []byte) WriteResponse { } } +func (d *DiskBasedQueue) getWriteTimeout(payloadSize int) time.Duration { + timeoutInMS := defaultWriteTimeoutInMS + if d != nil && d.cfg != nil && d.cfg.WriteTimeoutInMS > 0 { + timeoutInMS = d.cfg.WriteTimeoutInMS + } + + if payloadSize > 0 { + payloadMiB := int64((payloadSize + bytesPerMiB - 1) / bytesPerMiB) + timeoutInMS += payloadMiB * adaptiveWriteTimeoutPerPayloadMiBInMS + } + + if d != nil && len(d.writeChan) > 0 { + timeoutInMS += int64(len(d.writeChan)) * adaptiveWriteTimeoutPerQueuedWriteInMS + } + + if timeoutInMS > maxAdaptiveWriteTimeoutInMS { + timeoutInMS = maxAdaptiveWriteTimeoutInMS + } + + return time.Duration(timeoutInMS) * time.Millisecond +} + // Close cleans up the queue and persists metadata func (d *DiskBasedQueue) Close() error { err := d.exit(false) diff --git a/modules/queue/disk_queue/diskqueue_test.go b/modules/queue/disk_queue/diskqueue_test.go new file mode 100644 index 000000000..b4c522da5 --- /dev/null +++ b/modules/queue/disk_queue/diskqueue_test.go @@ -0,0 +1,182 @@ +package queue + +import ( + "encoding/binary" + "os" + "path/filepath" + "testing" + "time" + + . "infini.sh/framework/core/env" + "infini.sh/framework/core/global" + corequeue "infini.sh/framework/core/queue" +) + +func TestGetWriteTimeoutIncludesPayloadAndBacklog(t *testing.T) { + dq := &DiskBasedQueue{ + cfg: &DiskQueueConfig{WriteTimeoutInMS: defaultWriteTimeoutInMS}, + writeChan: make(chan []byte, defaultWriteChanBuffer), + } + + dq.writeChan <- []byte("a") + dq.writeChan <- []byte("b") + + timeout := dq.getWriteTimeout(3 * bytesPerMiB) + + expected := time.Duration(defaultWriteTimeoutInMS+3*adaptiveWriteTimeoutPerPayloadMiBInMS+2*adaptiveWriteTimeoutPerQueuedWriteInMS) * time.Millisecond + if timeout != expected { + t.Fatalf("unexpected write timeout: got %s want %s", timeout, expected) + } +} + +func TestGetWriteTimeoutCapsAtMaximum(t *testing.T) { + dq := &DiskBasedQueue{ + cfg: &DiskQueueConfig{WriteTimeoutInMS: defaultWriteTimeoutInMS}, + writeChan: make(chan []byte, defaultWriteChanBuffer), + } + + for i := 0; i < cap(dq.writeChan); i++ { + dq.writeChan <- []byte("x") + } + + timeout := dq.getWriteTimeout(64 * bytesPerMiB) + expected := time.Duration(maxAdaptiveWriteTimeoutInMS) * time.Millisecond + if timeout != expected { + t.Fatalf("unexpected capped timeout: got %s want %s", timeout, expected) + } +} + +func TestResetOffsetSkipsMissingSegmentsUpToCurrentWriteSegment(t *testing.T) { + env1 := EmptyEnv() + env1.SystemConfig.PathConfig.Data = t.TempDir() + global.RegisterEnv(env1) + + queueName := "reset-offset-skip" + data := []byte("ok") + fileName := GetFileName(queueName, 2) + if err := os.MkdirAll(filepath.Dir(fileName), 0o755); err != nil { + t.Fatalf("failed to create queue dir: %v", err) + } + file, err := os.Create(fileName) + if err != nil { + t.Fatalf("failed to create segment file: %v", err) + } + if err := binary.Write(file, binary.BigEndian, int32(len(data))); err != nil { + t.Fatalf("failed to write message size: %v", err) + } + if _, err := file.Write(data); err != nil { + t.Fatalf("failed to write message body: %v", err) + } + if err := file.Close(); err != nil { + t.Fatalf("failed to close segment file: %v", err) + } + + dq := &DiskBasedQueue{ + name: queueName, + cfg: &DiskQueueConfig{AutoSkipCorruptFile: true, MinMsgSize: 1, MaxMsgSize: 1024}, + writeSegmentNum: 2, + writePos: int64(4 + len(data)), + } + consumer := &Consumer{ + ID: "consumer-reset", + diskQueue: dq, + mCfg: dq.cfg, + qCfg: &corequeue.QueueConfig{Name: queueName}, + cCfg: &corequeue.ConsumerConfig{}, + queue: queueName, + } + + if err := consumer.ResetOffset(1, 0); err != nil { + t.Fatalf("expected reset offset to skip to current write segment, got %v", err) + } + if consumer.segment != 2 { + t.Fatalf("expected consumer to move to segment 2, got %d", consumer.segment) + } + if consumer.reader == nil { + t.Fatalf("expected consumer reader to be initialized for segment 2") + } +} + +func TestFetchMessagesRecoversToEmptyTailWithoutRescanningCorruptFile(t *testing.T) { + env1 := EmptyEnv() + env1.SystemConfig.PathConfig.Data = t.TempDir() + global.RegisterEnv(env1) + + queueName := "fetch-empty-tail" + corruptFile := GetFileName(queueName, 1) + if err := os.MkdirAll(filepath.Dir(corruptFile), 0o755); err != nil { + t.Fatalf("failed to create queue dir: %v", err) + } + if err := os.WriteFile(corruptFile, []byte{0x7f, 0xff, 0xff, 0xff}, 0o644); err != nil { + t.Fatalf("failed to write corrupt segment: %v", err) + } + + dq := &DiskBasedQueue{ + name: queueName, + cfg: &DiskQueueConfig{AutoSkipCorruptFile: true, MinMsgSize: 1, MaxMsgSize: 1024}, + writeSegmentNum: 3, + writePos: 0, + } + consumer := &Consumer{ + ID: "consumer-fetch", + diskQueue: dq, + mCfg: dq.cfg, + qCfg: &corequeue.QueueConfig{Name: queueName}, + cCfg: &corequeue.ConsumerConfig{}, + queue: queueName, + } + + if err := consumer.ResetOffset(1, 0); err != nil { + t.Fatalf("failed to initialize consumer: %v", err) + } + + ctx := &corequeue.Context{} + messages, timeout, err := consumer.FetchMessages(ctx, 1) + if err != nil { + t.Fatalf("expected corruption recovery without error, got %v", err) + } + if timeout { + t.Fatalf("did not expect timeout during corruption recovery") + } + if len(messages) != 0 { + t.Fatalf("expected no messages during recovery, got %d", len(messages)) + } + if consumer.segment != dq.writeSegmentNum { + t.Fatalf("expected consumer to park on new tail segment %d, got %d", dq.writeSegmentNum, consumer.segment) + } + if ctx.NextOffset.Segment != dq.writeSegmentNum || ctx.NextOffset.Position != 0 { + t.Fatalf("expected next offset to advance to new tail, got %v", ctx.NextOffset) + } + + payload := []byte("hello") + tailFile := GetFileName(queueName, dq.writeSegmentNum) + file, err := os.Create(tailFile) + if err != nil { + t.Fatalf("failed to create new tail segment: %v", err) + } + if err := binary.Write(file, binary.BigEndian, int32(len(payload))); err != nil { + t.Fatalf("failed to write tail message size: %v", err) + } + if _, err := file.Write(payload); err != nil { + t.Fatalf("failed to write tail message body: %v", err) + } + if err := file.Close(); err != nil { + t.Fatalf("failed to close tail segment: %v", err) + } + dq.writePos = int64(4 + len(payload)) + + ctx = &corequeue.Context{} + messages, timeout, err = consumer.FetchMessages(ctx, 1) + if err != nil { + t.Fatalf("expected consumer to resume reading on new tail, got %v", err) + } + if timeout { + t.Fatalf("did not expect timeout when new tail data exists") + } + if len(messages) != 1 { + t.Fatalf("expected exactly one message, got %d", len(messages)) + } + if string(messages[0].Data) != "hello" { + t.Fatalf("expected payload %q, got %q", "hello", string(messages[0].Data)) + } +} diff --git a/modules/queue/disk_queue/module.go b/modules/queue/disk_queue/module.go index d7b1d8ee9..2383a68b4 100644 --- a/modules/queue/disk_queue/module.go +++ b/modules/queue/disk_queue/module.go @@ -124,8 +124,33 @@ type CompressConfig struct { Level int `config:"level"` } +const ( + defaultWriteTimeoutInMS int64 = 60 * 1000 + defaultWriteChanBuffer = 16 + minRecommendedWriteTimeoutInMS int64 = 15 * 1000 + maxAdaptiveWriteTimeoutInMS int64 = 5 * 60 * 1000 + adaptiveWriteTimeoutPerQueuedWriteInMS int64 = 3 * 1000 + adaptiveWriteTimeoutPerPayloadMiBInMS int64 = 5 * 1000 +) + var preventRead bool +func normalizeDiskQueueConfig(cfg *DiskQueueConfig) { + if cfg == nil { + return + } + + if cfg.WriteTimeoutInMS <= 0 { + cfg.WriteTimeoutInMS = defaultWriteTimeoutInMS + } else if cfg.WriteTimeoutInMS < minRecommendedWriteTimeoutInMS { + log.Warnf("disk_queue write timeout may be too small on slow disks: %dms", cfg.WriteTimeoutInMS) + } + + if cfg.WriteChanBuffer <= 0 { + cfg.WriteChanBuffer = defaultWriteChanBuffer + } +} + func checkCapacity(cfg *DiskQueueConfig) error { if cfg.CheckDiskCapacityRetryDelayInMs <= 0 { @@ -233,20 +258,20 @@ func (module *DiskQueue) Setup() { MinMsgSize: 1, MaxMsgSize: 104857600, //100MB MaxBytesPerFile: 100 * 1024 * 1024, //100MB - WriteTimeoutInMS: 1000, //1s - CheckDiskCapacityRetryDelayInMs: 10 * 000, //10s + WriteTimeoutInMS: defaultWriteTimeoutInMS, + CheckDiskCapacityRetryDelayInMs: 10 * 000, //10s EOFRetryDelayInMs: 500, SyncEveryRecords: 1000, SyncTimeoutInMS: 1000, NotifyChanBuffer: 100, ReadChanBuffer: 0, - WriteChanBuffer: 0, + WriteChanBuffer: defaultWriteChanBuffer, WarningFreeBytes: 10 * 1024 * 1024 * 1024, ReservedFreeBytes: 5 * 1024 * 1024 * 1024, PrepareFilesToRead: true, Compress: DiskCompress{ IdleThreshold: 3, - DeleteAfterCompress: false, + DeleteAfterCompress: true, NumOfFilesDecompressAhead: 3, Message: CompressConfig{ Enabled: false, @@ -262,6 +287,8 @@ func (module *DiskQueue) Setup() { panic(err) } + normalizeDiskQueueConfig(module.cfg) + if !module.cfg.Enabled { return } diff --git a/modules/queue/disk_queue/module_test.go b/modules/queue/disk_queue/module_test.go new file mode 100644 index 000000000..fc643621f --- /dev/null +++ b/modules/queue/disk_queue/module_test.go @@ -0,0 +1,49 @@ +package queue + +import ( + "testing" + + . "infini.sh/framework/core/env" + "infini.sh/framework/core/global" +) + +func TestNormalizeDiskQueueConfigAppliesRobustWriteDefaults(t *testing.T) { + cfg := &DiskQueueConfig{} + + normalizeDiskQueueConfig(cfg) + + if cfg.WriteTimeoutInMS != defaultWriteTimeoutInMS { + t.Fatalf("unexpected write timeout: %d", cfg.WriteTimeoutInMS) + } + if cfg.WriteChanBuffer != defaultWriteChanBuffer { + t.Fatalf("unexpected write chan buffer: %d", cfg.WriteChanBuffer) + } +} + +func TestNormalizeDiskQueueConfigKeepsExplicitWriteSettings(t *testing.T) { + cfg := &DiskQueueConfig{ + WriteTimeoutInMS: 45 * 1000, + WriteChanBuffer: 64, + } + + normalizeDiskQueueConfig(cfg) + + if cfg.WriteTimeoutInMS != 45*1000 { + t.Fatalf("write timeout should be preserved, got %d", cfg.WriteTimeoutInMS) + } + if cfg.WriteChanBuffer != 64 { + t.Fatalf("write chan buffer should be preserved, got %d", cfg.WriteChanBuffer) + } +} + +func TestSetupDefaultsDeleteAfterCompress(t *testing.T) { + env1 := EmptyEnv() + global.RegisterEnv(env1) + + module := DiskQueue{} + module.Setup() + + if !module.cfg.Compress.DeleteAfterCompress { + t.Fatalf("delete_after_compress should default to true") + } +} diff --git a/plugins/elastic/bulk_indexing/bulk_indexing.go b/plugins/elastic/bulk_indexing/bulk_indexing.go index f7c1f118a..02544a5fe 100755 --- a/plugins/elastic/bulk_indexing/bulk_indexing.go +++ b/plugins/elastic/bulk_indexing/bulk_indexing.go @@ -78,6 +78,8 @@ type BulkIndexingProcessor struct { bulkBufferPool *elastic.BulkBufferPool } +var queueOwners sync.Map + type Config struct { NumOfSlices int `config:"num_of_slices"` Slices []int `config:"slices"` @@ -233,7 +235,17 @@ func (processor *BulkIndexingProcessor) Process(c *pipeline.Context) error { log.Error("error in bulk indexing processor,", v) } } - log.Debug("exit bulk indexing processor") + if processor.bulkStats != nil { + log.Debugf( + "exit bulk indexing processor, success=%d, invalid=%d, failure=%d, error_msgs=%d", + processor.bulkStats.Summary.Success.Count, + processor.bulkStats.Summary.Invalid.Count, + processor.bulkStats.Summary.Failure.Count, + len(processor.bulkStats.ErrorMsgs), + ) + } else { + log.Debug("exit bulk indexing processor") + } }() //handle updates @@ -268,6 +280,7 @@ func (processor *BulkIndexingProcessor) Process(c *pipeline.Context) error { processor.wg.Done() }() + lastDispatch := time.Now() for { if global.ShuttingDown() { @@ -299,11 +312,12 @@ func (processor *BulkIndexingProcessor) Process(c *pipeline.Context) error { } //if have depth and not in in flight if !processor.config.SkipEmptyQueue || queue.HasLag(v) { - _, ok := processor.inFlightQueueConfigs.Load(v.ID) + ok := processor.hasInFlightQueue(v.ID) if !ok { if global.Env().IsDebug { log.Tracef("detecting new queue: %v", v.Name) } + lastDispatch = time.Now() processor.HandleQueueConfig(v, c) } } else { @@ -315,12 +329,31 @@ func (processor *BulkIndexingProcessor) Process(c *pipeline.Context) error { if processor.config.DetectIntervalInMs > 0 { time.Sleep(time.Millisecond * time.Duration(processor.config.DetectIntervalInMs)) } + if shouldQuitActiveQueueDetection( + lastDispatch, + time.Duration(processor.config.IdleTimeoutInSecond)*time.Second, + time.Duration(processor.config.DetectIntervalInMs)*time.Millisecond, + util.MapLength(&processor.inFlightQueueConfigs), + ) { + if processor.bulkStats != nil { + log.Debugf( + "active queue detector idle exit, success=%d, invalid=%d, failure=%d, inflight=%d", + processor.bulkStats.Summary.Success.Count, + processor.bulkStats.Summary.Invalid.Count, + processor.bulkStats.Summary.Failure.Count, + util.MapLength(&processor.inFlightQueueConfigs), + ) + } + return + } } }(c) } } else { cfgs := queue.GetConfigBySelector(&processor.config.Selector) - log.Debugf("filter queue by:%v, num of queues:%v", processor.config.Selector.ToString(), len(cfgs)) + if global.Env().IsDebug { + log.Tracef("filter queue by:%v, num of queues:%v", processor.config.Selector.ToString(), len(cfgs)) + } for _, v := range cfgs { if global.Env().IsDebug { log.Tracef("checking queue: %v", v) @@ -334,14 +367,33 @@ func (processor *BulkIndexingProcessor) Process(c *pipeline.Context) error { return nil } +func shouldQuitActiveQueueDetection(lastDispatch time.Time, idleDuration time.Duration, detectInterval time.Duration, inflight int) bool { + if idleDuration <= 0 { + return false + } + if detectInterval < 0 { + detectInterval = 0 + } + return inflight == 0 && time.Since(lastDispatch) >= idleDuration+detectInterval +} + const queueHandleSingleton = "queue_handler_singleton" func (processor *BulkIndexingProcessor) HandleQueueConfig(v *queue.QueueConfig, parentContext *pipeline.Context) { + if !processor.acquireQueueOwner(v.ID) { + if rate.GetRateLimiter("bulk_queue_owner", v.ID, 1, 1, 30*time.Second).Allow() { + log.Debugf("skip queue:[%v], already owned by another local bulk processor", v.ID) + } + return + } + defer processor.releaseQueueOwnerIfIdle(v.ID) //TODO, add config to enable/disable singleton, may have performance issue ok, _ := locker.Hold(queueHandleSingleton, v.ID, global.Env().SystemConfig.NodeConfig.ID, 60*time.Second, true) if !ok { - log.Debugf("failed to hold lock for queue:[%v], already hold by somewhere", v.ID) + if rate.GetRateLimiter("bulk_queue_lock", v.ID, 1, 1, 30*time.Second).Allow() { + log.Debugf("failed to hold lock for queue:[%v], already hold by somewhere", v.ID) + } return } @@ -433,6 +485,10 @@ func (processor *BulkIndexingProcessor) HandleQueueConfig(v *queue.QueueConfig, } func (processor *BulkIndexingProcessor) NewBulkWorker(parentContext *pipeline.Context, qConfig *queue.QueueConfig, preferedHost string) { + if global.Env().IsDebug { + // current time for monitoring and log + log.Debugf("starting bulk worker for queue: %v, host: %v at time: %v", qConfig.Name, preferedHost, time.Now().Format(time.RFC3339)) + } bulkSizeInByte := processor.config.BulkConfig.GetBulkSizeInBytes() //check slice for sliceID := 0; sliceID < processor.config.NumOfSlices; sliceID++ { @@ -460,50 +516,110 @@ func (processor *BulkIndexingProcessor) NewBulkWorker(parentContext *pipeline.Co return } - processor.Lock() - v2, exists := processor.inFlightQueueConfigs.Load(key) - if exists { + var workerID = util.GetUUID() + v2, reserved := processor.reserveInFlightQueue(key, workerID) + if !reserved { if global.Env().IsDebug { log.Tracef("[%v], queue [%v], slice_id:%v has more then one consumer, key:%v,v:%v", preferedHost, qConfig.ID, sliceID, key, v2) } - processor.Unlock() continue - } else { - var workerID = util.GetUUID() - log.Debugf("starting worker:[%v], queue:[%v], slice_id:%v, host:[%v]", workerID, qConfig.Name, sliceID, preferedHost) - - ctx1 := &pipeline.Context{} - ctx1.Set("key", key) - ctx1.Set("workerID", workerID) - ctx1.Set("sliceID", sliceID) - ctx1.Set("numOfSlices", processor.config.NumOfSlices) - ctx1.Set("tag", preferedHost) - ctx1.Set("qConfig", qConfig) - ctx1.Set("host", preferedHost) - ctx1.Set("bulkSizeInByte", bulkSizeInByte) - err := processor.pool.Submit(&pipeline.Task{ - Handler: func(ctx *pipeline.Context, v ...interface{}) { - key := ctx.MustGetString("key") - workerID := ctx.MustGetString("workerID") - host := ctx.MustGetString("host") - sliceID := ctx.MustGetInt("sliceID") - tag := ctx.MustGetString("tag") - numOfSlices := ctx.MustGetInt("numOfSlices") - bulkSizeInByte := ctx.MustGetInt("bulkSizeInByte") - qConfig := ctx.MustGet("qConfig").(*queue.QueueConfig) - pCtx := v[0].(*pipeline.Context) - processor.NewSlicedBulkWorker(pCtx, key, workerID, sliceID, numOfSlices, tag, bulkSizeInByte, qConfig, host) - }, - Context: ctx1, - Params: []interface{}{parentContext}, // 也可以在创建任务时设置参数 - }) - processor.Unlock() - if err != nil { - panic(err) - } - processor.wg.Add(1) } + + log.Tracef("starting worker:[%v], queue:[%v], slice_id:%v, host:[%v]", workerID, qConfig.Name, sliceID, preferedHost) + + ctx1 := &pipeline.Context{} + ctx1.Set("key", key) + ctx1.Set("workerID", workerID) + ctx1.Set("sliceID", sliceID) + ctx1.Set("numOfSlices", processor.config.NumOfSlices) + ctx1.Set("tag", preferedHost) + ctx1.Set("qConfig", qConfig) + ctx1.Set("host", preferedHost) + ctx1.Set("bulkSizeInByte", bulkSizeInByte) + err := processor.pool.Submit(&pipeline.Task{ + Handler: func(ctx *pipeline.Context, v ...interface{}) { + key := ctx.MustGetString("key") + workerID := ctx.MustGetString("workerID") + host := ctx.MustGetString("host") + sliceID := ctx.MustGetInt("sliceID") + tag := ctx.MustGetString("tag") + numOfSlices := ctx.MustGetInt("numOfSlices") + bulkSizeInByte := ctx.MustGetInt("bulkSizeInByte") + qConfig := ctx.MustGet("qConfig").(*queue.QueueConfig) + pCtx := v[0].(*pipeline.Context) + processor.NewSlicedBulkWorker(pCtx, key, workerID, sliceID, numOfSlices, tag, bulkSizeInByte, qConfig, host) + }, + Context: ctx1, + Params: []interface{}{parentContext}, // 也可以在创建任务时设置参数 + }) + if err != nil { + processor.inFlightQueueConfigs.Delete(key) + processor.wg.Done() + panic(err) + } + } +} + +func (processor *BulkIndexingProcessor) reserveInFlightQueue(key, workerID string) (interface{}, bool) { + processor.Lock() + defer processor.Unlock() + + v, exists := processor.inFlightQueueConfigs.Load(key) + if exists { + return v, false + } + + processor.inFlightQueueConfigs.Store(key, workerID) + processor.wg.Add(1) + + return workerID, true +} + +func (processor *BulkIndexingProcessor) hasInFlightQueue(queueID string) bool { + if _, ok := processor.inFlightQueueConfigs.Load(queueID); ok { + return true + } + + queuePrefix := fmt.Sprintf("%v-", queueID) + hasInFlight := false + processor.inFlightQueueConfigs.Range(func(key, value interface{}) bool { + keyStr, ok := key.(string) + if ok && strings.HasPrefix(keyStr, queuePrefix) { + hasInFlight = true + return false + } + return true + }) + + return hasInFlight +} + +func (processor *BulkIndexingProcessor) acquireQueueOwner(queueID string) bool { + owner, loaded := queueOwners.LoadOrStore(queueID, processor.id) + if !loaded { + return true + } + + return owner == processor.id +} + +func (processor *BulkIndexingProcessor) releaseQueueOwnerIfIdle(queueID string) { + if processor.hasInFlightQueue(queueID) { + return + } + + owner, ok := queueOwners.Load(queueID) + if ok && owner == processor.id { + queueOwners.Delete(queueID) + } +} + +func isIgnorableAcquireConsumerError(err error) bool { + if err == nil { + return false } + + return util.ContainStr(err.Error(), "already owning this topic") } var xxHashPool = sync.Pool{ @@ -549,8 +665,6 @@ func (processor *BulkIndexingProcessor) getConsumerConfig(queueID, consumerName } func (processor *BulkIndexingProcessor) NewSlicedBulkWorker(ctx *pipeline.Context, key, workerID string, sliceID, maxSlices int, tag string, bulkSizeInByte int, qConfig *queue.QueueConfig, host string) { - processor.inFlightQueueConfigs.Store(key, workerID) - defer func() { if !global.Env().IsDebug { if r := recover(); r != nil { @@ -571,6 +685,7 @@ func (processor *BulkIndexingProcessor) NewSlicedBulkWorker(ctx *pipeline.Contex } } processor.inFlightQueueConfigs.Delete(key) + processor.releaseQueueOwnerIfIdle(qConfig.ID) processor.wg.Done() if global.Env().IsDebug { log.Tracef("exit slice worker, worker:[%v], queue:%v, slice_id:%v, key:%v", workerID, qConfig.ID, sliceID, key) @@ -600,12 +715,15 @@ func (processor *BulkIndexingProcessor) NewSlicedBulkWorker(ctx *pipeline.Contex var consumerInstance queue.ConsumerAPI consumerInstance, err = queue.AcquireConsumer(qConfig, consumerConfig, workerID) if err != nil || consumerInstance == nil { - if util.ContainStr(err.Error(), "already owning this topic") { + if isIgnorableAcquireConsumerError(err) { if global.Env().IsDebug { - log.Warnf("other consumer already owning this topic, queue:%v-%v, slice_id:%v", qConfig.Name, qConfig.ID, sliceID) + log.Warnf("skip duplicate consumer acquisition, queue:%v-%v, slice_id:%v, err:%v", qConfig.Name, qConfig.ID, sliceID, err) } return } + if err == nil { + err = errors.New("failed to acquire queue consumer") + } panic(err) } @@ -681,7 +799,9 @@ func (processor *BulkIndexingProcessor) NewSlicedBulkWorker(ctx *pipeline.Contex log.Errorf("should not submit this bulk request, worker[%v], queue:[%v], slice:[%v], offset:[%v]->[%v],%v, msg:%v", workerID, qConfig.ID, sliceID, committedOffset, offset, err, mainBuf.GetMessageCount()) } } - log.Debugf("exit worker[%v], message count[%d], queue:[%v], slice_id:%v", workerID, mainBuf.GetMessageCount(), qConfig.ID, sliceID) + if global.Env().IsDebug { + log.Tracef("exit worker[%v], message count[%d], queue:[%v], slice_id:%v", workerID, mainBuf.GetMessageCount(), qConfig.ID, sliceID) + } }() if global.Env().IsDebug { @@ -798,7 +918,13 @@ READ_DOCS: consumerConfig.KeepActive() messages, timeout, err := consumerInstance.FetchMessages(ctx1, consumerConfig.FetchMaxMessages) stats.IncrementBy("queue", qConfig.ID+".msg_fetched_from_queue", int64(len(messages))) - log.Debugf("slice worker, worker:[%v], [%v][%v][%v][%v] fetched message:%v,ctx:%v,timeout:%v,err:%v", workerID, qConfig.Name, consumerConfig.Group, consumerConfig.Name, sliceID, len(messages), ctx1.String(), timeout, err) + if err != nil || len(messages) > 0 { + if qConfig.Name == "bulk_requests" { + log.Tracef("slice worker, worker:[%v], [%v][%v][%v][%v] fetched message:%v,ctx:%v,timeout:%v,err:%v", workerID, qConfig.Name, consumerConfig.Group, consumerConfig.Name, sliceID, len(messages), ctx1.String(), timeout, err) + } else { + log.Debugf("slice worker, worker:[%v], [%v][%v][%v][%v] fetched message:%v,ctx:%v,timeout:%v,err:%v", workerID, qConfig.Name, consumerConfig.Group, consumerConfig.Name, sliceID, len(messages), ctx1.String(), timeout, err) + } + } if err != nil { if strings.Contains(err.Error(), "dirty_read") || err.Error() == "EOF" || err.Error() == "unexpected EOF" { ctx.CancelTask() @@ -905,6 +1031,10 @@ READ_DOCS: mainBuf.WriteByteBuffer(pop.Data) } + // Keep the in-memory offset aligned with the data already buffered. + // If the current message triggers an immediate flush, its NextOffset must be committed too. + offset = advanceBufferedOffset(pop.NextOffset) + if global.Env().IsDebug { log.Tracef("slice worker, worker:[%v], message count: %v, size: %v", workerID, mainBuf.GetMessageCount(), util.ByteSize(uint64(mainBuf.GetMessageSize()))) } @@ -949,7 +1079,7 @@ READ_DOCS: if offset != nil && committedOffset != nil && !offset.Equals(*committedOffset) { err := consumerInstance.CommitOffset(*offset) if err != nil { - log.Errorf("🔧 offset commit failed, worker:[%v], queue:[%v], slice:[%v], offset:[%v], err:%v", workerID, qConfig.Name, sliceID, *offset, err) + log.Errorf("offset commit failed, worker:[%v], queue:[%v], slice:[%v], offset:[%v], err:%v", workerID, qConfig.Name, sliceID, *offset, err) panic(err) } @@ -958,27 +1088,18 @@ READ_DOCS: } // fix: update committedOffset immediately after successful commit, to ensure state consistency committedOffset = offset - log.Debugf("🔧 offset committed successfully, worker:[%v], queue:[%v], slice:[%v], offset:[%v]", workerID, qConfig.Name, sliceID, *offset) - } else { if global.Env().IsDebug { - log.Debugf("🔧 offset not changed, skip commit, worker:[%v], queue:[%v], slice:[%v], offset:[%v], committed:[%v]", workerID, qConfig.Name, sliceID, offset, committedOffset) + log.Tracef("offset committed, worker:[%v], queue:[%v], slice:[%v], offset:[%v]", workerID, qConfig.Name, sliceID, *offset) } + } else { + // skip unchanged offset silently to avoid noisy debug logs } - // fix: this code is moved to loop outside (line 970) to avoid updating offset in the middle of bulk submission - // offset = &pop.NextOffset } } else { log.Errorf("should not submit this bulk request, worker[%v], queue:[%v], slice:[%v], offset:[%v]->[%v],%v, msg:%v", workerID, qConfig.ID, sliceID, committedOffset, offset, err, msgCount) } } - - // fix: update offset after each message is processed, to ensure progress sync with actual processing - // so even if it crashes before submission, it will not repeat processing messages written to the buffer after restart - offset = &pop.NextOffset } - - // fix: remove this code to avoid overwriting the updated offset in the loop - // offset = &ctx1.NextOffset } if time.Since(lastCommit) > idleDuration && mainBuf.GetMessageSize() > 0 { @@ -1002,7 +1123,7 @@ CLEAN_BUFFER: } if global.Env().IsDebug { - log.Debugf("cleanup buffer, queue:[%v], slice_id:%v, offset [%v]-[%v], bulk failed (host: %v, err: %v)", qConfig.ID, sliceID, committedOffset, offset, host, err) + log.Tracef("cleanup buffer, queue:[%v], slice_id:%v, offset [%v]-[%v], bulk failed (host: %v, err: %v)", qConfig.ID, sliceID, committedOffset, offset, host, err) } lastCommit = time.Now() // check bulk result, if ok, then commit offset, or retry non-200 requests, or save failure offset @@ -1167,6 +1288,11 @@ func appendStrArr(arr []string, size int, elems []string) []string { return append(arr, elems...) } +func advanceBufferedOffset(nextOffset queue.Offset) *queue.Offset { + next := nextOffset + return &next +} + func (processor *BulkIndexingProcessor) getElasticsearchMetadata(qConfig *queue.QueueConfig) (string, *elastic.ElasticsearchMetadata) { elasticsearch, ok := qConfig.Labels["elasticsearch"] diff --git a/plugins/elastic/bulk_indexing/bulk_indexing_test.go b/plugins/elastic/bulk_indexing/bulk_indexing_test.go index e37b0ffe0..f462d8f34 100644 --- a/plugins/elastic/bulk_indexing/bulk_indexing_test.go +++ b/plugins/elastic/bulk_indexing/bulk_indexing_test.go @@ -28,9 +28,13 @@ package bulk_indexing import ( + stdErrors "errors" "github.com/OneOfOne/xxhash" "github.com/stretchr/testify/assert" + "infini.sh/framework/core/queue" + "sync" "testing" + "time" ) func TestXXHash(t *testing.T) { @@ -84,3 +88,90 @@ func TestXXHash(t *testing.T) { } } + +func TestReserveInFlightQueue(t *testing.T) { + processor := &BulkIndexingProcessor{} + + current, reserved := processor.reserveInFlightQueue("queue-0", "worker-1") + assert.True(t, reserved) + assert.Equal(t, "worker-1", current) + + stored, exists := processor.inFlightQueueConfigs.Load("queue-0") + assert.True(t, exists) + assert.Equal(t, "worker-1", stored) + + current, reserved = processor.reserveInFlightQueue("queue-0", "worker-2") + assert.False(t, reserved) + assert.Equal(t, "worker-1", current) + + processor.inFlightQueueConfigs.Delete("queue-0") + processor.wg.Done() +} + +func TestHasInFlightQueue(t *testing.T) { + processor := &BulkIndexingProcessor{} + + assert.False(t, processor.hasInFlightQueue("queue-0")) + + processor.inFlightQueueConfigs.Store("queue-0-0", "worker-1") + assert.True(t, processor.hasInFlightQueue("queue-0")) + + processor.inFlightQueueConfigs.Delete("queue-0-0") + assert.False(t, processor.hasInFlightQueue("queue-0")) +} + +func TestAcquireQueueOwner(t *testing.T) { + queueOwners = sync.Map{} + + processor1 := &BulkIndexingProcessor{id: "processor-1"} + processor2 := &BulkIndexingProcessor{id: "processor-2"} + + assert.True(t, processor1.acquireQueueOwner("queue-0")) + assert.True(t, processor1.acquireQueueOwner("queue-0")) + assert.False(t, processor2.acquireQueueOwner("queue-0")) + + queueOwners = sync.Map{} +} + +func TestReleaseQueueOwnerIfIdle(t *testing.T) { + queueOwners = sync.Map{} + + processor := &BulkIndexingProcessor{id: "processor-1"} + assert.True(t, processor.acquireQueueOwner("queue-0")) + + processor.inFlightQueueConfigs.Store("queue-0-0", "worker-1") + processor.releaseQueueOwnerIfIdle("queue-0") + _, exists := queueOwners.Load("queue-0") + assert.True(t, exists) + + processor.inFlightQueueConfigs.Delete("queue-0-0") + processor.releaseQueueOwnerIfIdle("queue-0") + _, exists = queueOwners.Load("queue-0") + assert.False(t, exists) +} + +func TestIsIgnorableAcquireConsumerError(t *testing.T) { + assert.True(t, isIgnorableAcquireConsumerError(stdErrors.New("already owning this topic"))) + assert.False(t, isIgnorableAcquireConsumerError(stdErrors.New("the consumer is in fighting list"))) + assert.False(t, isIgnorableAcquireConsumerError(stdErrors.New("some other error"))) + assert.False(t, isIgnorableAcquireConsumerError(nil)) +} + +func TestShouldQuitActiveQueueDetection(t *testing.T) { + assert.False(t, shouldQuitActiveQueueDetection(time.Now(), 5*time.Second, 5*time.Second, 0)) + assert.False(t, shouldQuitActiveQueueDetection(time.Now().Add(-10*time.Second), 5*time.Second, 5*time.Second, 1)) + assert.False(t, shouldQuitActiveQueueDetection(time.Now().Add(-9*time.Second), 5*time.Second, 5*time.Second, 0)) + assert.True(t, shouldQuitActiveQueueDetection(time.Now().Add(-10*time.Second), 5*time.Second, 5*time.Second, 0)) + assert.True(t, shouldQuitActiveQueueDetection(time.Now().Add(-5*time.Second), 5*time.Second, 0, 0)) +} + +func TestAdvanceBufferedOffsetUsesCurrentMessageNextOffset(t *testing.T) { + previousCommitted := queue.NewOffsetWithVersion(0, 100, 1) + currentNext := queue.NewOffsetWithVersion(0, 200, 1) + + offset := advanceBufferedOffset(currentNext) + + assert.NotNil(t, offset) + assert.True(t, offset.Equals(currentNext)) + assert.False(t, offset.Equals(previousCommitted)) +} From a6c50f8ee891b9a80d6aac875629e18d802b7bd6 Mon Sep 17 00:00:00 2001 From: hardy Date: Tue, 26 May 2026 10:12:55 +0800 Subject: [PATCH 2/4] docs: clarify migration flow changes Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- core/elastic/partition.go | 6 +++++- core/pipeline/context.go | 3 +++ docs/content.en/docs/release-notes/_index.md | 1 + modules/pipeline/module.go | 2 ++ modules/pipeline/tasks.go | 4 +++- plugins/elastic/bulk_indexing/bulk_indexing.go | 6 ++++++ 6 files changed, 20 insertions(+), 2 deletions(-) diff --git a/core/elastic/partition.go b/core/elastic/partition.go index d18855642..4169ca9e9 100644 --- a/core/elastic/partition.go +++ b/core/elastic/partition.go @@ -436,7 +436,9 @@ func buildHashPartitionAggQuery(fieldName string, partitionCount int, filter int "size": partitionCount, "value_type": "long", "script": util.MapStr{ - "lang": "painless", + "lang": "painless", + // Keep the aggregation-side hash logic identical to the partition filter so each + // bucket count matches the documents selected when a migration resumes that bucket. "source": fmt.Sprintf("if (doc[%s].size()==0 || doc[%s].value == '') return null; return (((doc[%s].value.hashCode() %% params.partition_count) + params.partition_count) %% params.partition_count);", fieldLiteral, fieldLiteral, fieldLiteral), "params": util.MapStr{ "partition_count": partitionCount, @@ -566,6 +568,8 @@ func dedupeSortedBoundaries(boundaries []float64) []float64 { sort.Float64s(boundaries) result := make([]float64, 0, len(boundaries)) for _, boundary := range boundaries { + // Percentile aggregations on skewed datasets can return the same boundary more than once. + // Drop duplicates here so later range filters do not create zero-width partitions. if len(result) == 0 || !sameBoundary(result[len(result)-1], boundary) { result = append(result, boundary) } diff --git a/core/pipeline/context.go b/core/pipeline/context.go index 8936789e4..efcf16ff9 100755 --- a/core/pipeline/context.go +++ b/core/pipeline/context.go @@ -387,6 +387,9 @@ func (ctx *Context) getResultStateLocked() RunningState { case FINISHED, FAILED: return ctx.runningState case STOPPED: + // STOPPED is also used during normal shutdown. Once the run has an end time, derive the + // last completed result from the recorded errors so the API can distinguish manual stop + // from a finished or failed migration run. if ctx.endTime == nil { return STOPPED } diff --git a/docs/content.en/docs/release-notes/_index.md b/docs/content.en/docs/release-notes/_index.md index fefcf930f..9e9e779c4 100644 --- a/docs/content.en/docs/release-notes/_index.md +++ b/docs/content.en/docs/release-notes/_index.md @@ -30,6 +30,7 @@ Information about release notes of INFINI Framework is provided here. ### 🐛 Bug fix ### ✈️ Improvements +- feat(data migration): improve partitioning, bulk queue recovery, pipeline result visibility, and migration path handling #368 - chore: API Handler Registration Improvements #283 - refactor: use PathUnescape to decode query param filter #249 - chore: move entity provider to non-managed mode #250 diff --git a/modules/pipeline/module.go b/modules/pipeline/module.go index 03e8f7b91..854aa9899 100755 --- a/modules/pipeline/module.go +++ b/modules/pipeline/module.go @@ -169,6 +169,8 @@ func (module *PipeModule) stopTask(taskID string) (exists bool) { // deleteTask will clean all in-memory states and release the pipeline context func (module *PipeModule) deleteTask(taskID string) { if ctx, ok := module.contexts.Load(taskID); ok { + // Wait for the worker loop to observe cancellation before dropping the context so a + // re-created migration task does not race with the previous loop's final cleanup. if v1, ok := ctx.(*pipeline.Context); ok && !v1.IsLoopReleased() { module.stopAndWaitForRelease([]string{taskID}, time.Minute) } diff --git a/modules/pipeline/tasks.go b/modules/pipeline/tasks.go index 5b598185e..88306eade 100644 --- a/modules/pipeline/tasks.go +++ b/modules/pipeline/tasks.go @@ -80,7 +80,9 @@ func (module *PipeModule) getPipelineTaskStatus(id string, config string, proces return nil } ret := &PipelineTaskStatus{ - State: c1.GetRunningState(), + State: c1.GetRunningState(), + // Keep the current runtime state and the last completed result separate so migration + // callers can tell a stopped task from a run that already finished or failed. LastRunState: c1.GetResultState(), CreateTime: c1.GetCreateTime(), StartTime: c1.GetStartTime(), diff --git a/plugins/elastic/bulk_indexing/bulk_indexing.go b/plugins/elastic/bulk_indexing/bulk_indexing.go index 02544a5fe..d6054e5f5 100755 --- a/plugins/elastic/bulk_indexing/bulk_indexing.go +++ b/plugins/elastic/bulk_indexing/bulk_indexing.go @@ -329,6 +329,8 @@ func (processor *BulkIndexingProcessor) Process(c *pipeline.Context) error { if processor.config.DetectIntervalInMs > 0 { time.Sleep(time.Millisecond * time.Duration(processor.config.DetectIntervalInMs)) } + // Let migration-style pipelines exit once queue discovery stays idle for a full + // interval instead of polling forever after all active queues drain. if shouldQuitActiveQueueDetection( lastDispatch, time.Duration(processor.config.IdleTimeoutInSecond)*time.Second, @@ -380,6 +382,8 @@ func shouldQuitActiveQueueDetection(lastDispatch time.Time, idleDuration time.Du const queueHandleSingleton = "queue_handler_singleton" func (processor *BulkIndexingProcessor) HandleQueueConfig(v *queue.QueueConfig, parentContext *pipeline.Context) { + // Prevent duplicate local workers for the same queue before competing for the + // distributed lease; this keeps one process from starting overlapping consumers. if !processor.acquireQueueOwner(v.ID) { if rate.GetRateLimiter("bulk_queue_owner", v.ID, 1, 1, 30*time.Second).Allow() { log.Debugf("skip queue:[%v], already owned by another local bulk processor", v.ID) @@ -569,6 +573,8 @@ func (processor *BulkIndexingProcessor) reserveInFlightQueue(key, workerID strin return v, false } + // Track workers by queue+slice so retries or queue re-discovery never start a second + // consumer for the same slice while the first one is still draining. processor.inFlightQueueConfigs.Store(key, workerID) processor.wg.Add(1) From c48a27d883389c8024b53b702f77852c080b681a Mon Sep 17 00:00:00 2001 From: hardy Date: Tue, 26 May 2026 11:02:45 +0800 Subject: [PATCH 3/4] test: expand migration coverage Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- app.go | 4 +++- core/env/env.go | 2 ++ core/task/task.go | 2 ++ core/task/task_test.go | 15 +++++++++++++++ modules/elastic/module.go | 9 +++++++++ modules/elastic/module_test.go | 12 ++++++++++++ modules/queue/disk_queue/consumer.go | 7 +++++++ 7 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 core/task/task_test.go diff --git a/app.go b/app.go index af1ef24e5..5cf8ec5e6 100755 --- a/app.go +++ b/app.go @@ -32,7 +32,7 @@ import ( "flag" "fmt" "github.com/fsnotify/fsnotify" - "github.com/shirou/gopsutil/v3/process" + "github.com/shirou/gopsutil/v4/process" "infini.sh/framework/core/task" "infini.sh/framework/core/wrapper/taskset" "infini.sh/framework/modules/configs/client" @@ -88,6 +88,8 @@ type App struct { func getServiceWorkingDirectory() string { executablePath, err := os.Executable() if err == nil { + // Services are often launched from a manager-controlled cwd. Use the executable directory so + // relative data/log/config paths resolve the same way for service installs and manual runs. return filepath.Dir(executablePath) } workdir, err := os.Getwd() diff --git a/core/env/env.go b/core/env/env.go index 1744e37f3..d8603bf0c 100755 --- a/core/env/env.go +++ b/core/env/env.go @@ -496,6 +496,8 @@ func resolvePathRelativeToExecutable(p string) string { if err != nil { return p } + // Keep relative runtime paths anchored to the installed binary rather than the caller's cwd so + // restarts, service managers, and migration workers all read/write the same directories. return filepath.Join(filepath.Dir(executablePath), p) } diff --git a/core/task/task.go b/core/task/task.go index 1b5eab23d..dc2df50e3 100644 --- a/core/task/task.go +++ b/core/task/task.go @@ -41,6 +41,8 @@ import ( var Tasks = sync.Map{} func shouldSilenceStartupTaskError(msg string) bool { + // During startup some tasks can race slightly ahead of ORM registration. Treat that specific + // error as bootstrap noise so real migration/task failures remain visible in error logs. return !orm.HasHandler() && strings.Contains(msg, "ORM handler is not registered") } diff --git a/core/task/task_test.go b/core/task/task_test.go new file mode 100644 index 000000000..073e5c1d3 --- /dev/null +++ b/core/task/task_test.go @@ -0,0 +1,15 @@ +package task + +import "testing" + +func TestShouldSilenceStartupTaskErrorForMissingORMHandler(t *testing.T) { + if !shouldSilenceStartupTaskError("ORM handler is not registered") { + t.Fatal("expected missing ORM handler startup error to be silenced") + } +} + +func TestShouldSilenceStartupTaskErrorIgnoresOtherErrors(t *testing.T) { + if shouldSilenceStartupTaskError("queue consumer is not registered") { + t.Fatal("expected unrelated startup errors to remain visible") + } +} diff --git a/modules/elastic/module.go b/modules/elastic/module.go index 56ad1ca42..702763f16 100755 --- a/modules/elastic/module.go +++ b/modules/elastic/module.go @@ -125,6 +125,9 @@ func loadESBasedElasticConfig() []elastic.ElasticsearchConfig { configs := []elastic.ElasticsearchConfig{} systemID, ok := lookupSystemElasticsearchID() if !ok { + // Console-managed elasticsearch configs live in the system cluster. During startup or + // migration bootstrap that cluster may not exist yet, so treat it as "no remote configs" + // instead of failing module initialization. return configs } query := elastic.SearchRequest{From: 0, Size: 1000} //TODO handle clusters beyond 1000 @@ -411,6 +414,8 @@ func (module *ElasticModule) Start() error { if moduleConfig.ORMConfig.Enabled { if !hasSystemCluster { + // Allow the module to start before the system cluster is registered so bootstrap and + // migration flows can finish wiring the cluster first, then re-enable ORM-backed features. log.Warn("skip elastic ORM initialization, system cluster is not available") } else { client := elastic.GetClient(systemID) @@ -721,6 +726,8 @@ func (module *ElasticModule) refreshAllClusterMetadata() { if ok { cfg := elastic.GetConfigNoPanic(v.Config.ID) if cfg == nil { + // Metadata can outlive the config during remote-config reloads or migration cleanup. + // Drop it here so later workers do not keep probing a cluster that was already removed. log.Debugf("elasticsearch metadata [%v] has no active config, removing stale metadata", v.Config.ID) elastic.RemoveInstance(v.Config.ID) elastic.RemoveHostsByClusterID(v.Config.ID) @@ -745,6 +752,8 @@ func (module *ElasticModule) refreshAllClusterAlias(force bool) { if ok { cfg := elastic.GetConfigNoPanic(v.Config.ID) if cfg == nil { + // Keep alias refresh in sync with metadata refresh: once the config is gone, clear any + // cached hosts/metadata so the next initialization starts from the active config set only. log.Debugf("elasticsearch metadata [%v] has no active config, removing stale metadata", v.Config.ID) elastic.RemoveInstance(v.Config.ID) elastic.RemoveHostsByClusterID(v.Config.ID) diff --git a/modules/elastic/module_test.go b/modules/elastic/module_test.go index e88f7e39c..4594315b7 100644 --- a/modules/elastic/module_test.go +++ b/modules/elastic/module_test.go @@ -19,6 +19,18 @@ func TestLoadESBasedElasticConfigSkipsWhenSystemClusterUnavailable(t *testing.T) } } +func TestLookupSystemElasticsearchIDRejectsInvalidValues(t *testing.T) { + previous := global.Lookup(coreElastic.GlobalSystemElasticsearchID) + defer global.Register(coreElastic.GlobalSystemElasticsearchID, previous) + + global.Register(coreElastic.GlobalSystemElasticsearchID, 123) + + systemID, ok := lookupSystemElasticsearchID() + if ok { + t.Fatalf("expected invalid system cluster value to be rejected, got %q", systemID) + } +} + func TestElasticModuleStartSkipsSystemClusterDependentInitBeforeSetup(t *testing.T) { previousSystemID := global.Lookup(coreElastic.GlobalSystemElasticsearchID) defer global.Register(coreElastic.GlobalSystemElasticsearchID, previousSystemID) diff --git a/modules/queue/disk_queue/consumer.go b/modules/queue/disk_queue/consumer.go index 4f4529ae3..c27746770 100644 --- a/modules/queue/disk_queue/consumer.go +++ b/modules/queue/disk_queue/consumer.go @@ -84,6 +84,9 @@ func (d *Consumer) parkOnEmptyTail(fileName string) error { } func (d *Consumer) waitingForTailFile() bool { + // A parked consumer has already advanced to the current write segment, but there is no tail + // file content yet. Treat this as a normal catch-up state so the next write can resume from + // the live tail instead of rescanning an older corrupt or already-consumed segment. return d.diskQueue != nil && d.readFile == nil && d.reader == nil && @@ -560,6 +563,8 @@ func (d *Consumer) ResetOffset(segment, readPos int64) error { //double check, but next file exists if !util.FileExists(fileName) { if segment == d.diskQueue.writeSegmentNum && readPos == 0 && d.diskQueue.writePos == 0 { + // The consumer has caught up to an empty tail segment. Park there and let FetchMessages + // wait for the producer to materialize the next file instead of treating the tail as corrupt. return d.parkOnEmptyTail(fileName) } if d.mCfg.AutoSkipCorruptFile { @@ -589,6 +594,8 @@ func (d *Consumer) ResetOffset(segment, readPos int64) error { d.segment = d.diskQueue.writeSegmentNum d.readPos = 0 d.diskQueue.UpdateSegmentConsumerInReading(d.ID, d.segment) + // After skipping every missing intermediate segment, the safest recovery point is + // the current writer tail. Park on it so the consumer resumes from fresh data only. return d.parkOnEmptyTail(GetFileName(d.queue, d.segment)) } return errors.New(fileName + " not found, next segment greater than current write segment") From 6d23ca2da395e3beb7407107aff59858e7856027 Mon Sep 17 00:00:00 2001 From: hardy Date: Tue, 26 May 2026 11:38:54 +0800 Subject: [PATCH 4/4] chore: add file headers for migration tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- app_test.go | 23 ++++++++++++++++++++++ core/config/fs_watcher_test.go | 23 ++++++++++++++++++++++ core/elastic/actions_test.go | 23 ++++++++++++++++++++++ core/elastic/domain_actions_test.go | 23 ++++++++++++++++++++++ core/elastic/partition_test.go | 23 ++++++++++++++++++++++ core/orm/registry_test.go | 23 ++++++++++++++++++++++ core/pipeline/context_result_test.go | 23 ++++++++++++++++++++++ core/task/task_test.go | 23 ++++++++++++++++++++++ modules/elastic/adapter/ver_test.go | 23 ++++++++++++++++++++++ modules/elastic/common/config_test.go | 23 ++++++++++++++++++++++ modules/pipeline/pipeline_test.go | 23 ++++++++++++++++++++++ modules/queue/disk_queue/diskqueue_test.go | 23 ++++++++++++++++++++++ modules/queue/disk_queue/module_test.go | 23 ++++++++++++++++++++++ 13 files changed, 299 insertions(+) diff --git a/app_test.go b/app_test.go index 702a85f48..9c65bde87 100644 --- a/app_test.go +++ b/app_test.go @@ -1,3 +1,26 @@ +// Copyright (C) INFINI Labs & INFINI LIMITED. +// +// The INFINI Framework is offered under the GNU Affero General Public License v3.0 +// and as commercial software. +// +// For commercial licensing, contact us at: +// - Website: infinilabs.com +// - Email: hello@infini.ltd +// +// Open Source licensed under AGPL V3: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package framework import ( diff --git a/core/config/fs_watcher_test.go b/core/config/fs_watcher_test.go index c1ddf1f75..5575f7812 100644 --- a/core/config/fs_watcher_test.go +++ b/core/config/fs_watcher_test.go @@ -1,3 +1,26 @@ +// Copyright (C) INFINI Labs & INFINI LIMITED. +// +// The INFINI Framework is offered under the GNU Affero General Public License v3.0 +// and as commercial software. +// +// For commercial licensing, contact us at: +// - Website: infinilabs.com +// - Email: hello@infini.ltd +// +// Open Source licensed under AGPL V3: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package config import ( diff --git a/core/elastic/actions_test.go b/core/elastic/actions_test.go index f7d2ef830..db096d30f 100644 --- a/core/elastic/actions_test.go +++ b/core/elastic/actions_test.go @@ -1,3 +1,26 @@ +// Copyright (C) INFINI Labs & INFINI LIMITED. +// +// The INFINI Framework is offered under the GNU Affero General Public License v3.0 +// and as commercial software. +// +// For commercial licensing, contact us at: +// - Website: infinilabs.com +// - Email: hello@infini.ltd +// +// Open Source licensed under AGPL V3: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package elastic import ( diff --git a/core/elastic/domain_actions_test.go b/core/elastic/domain_actions_test.go index 1a2c05a56..3132a15c5 100644 --- a/core/elastic/domain_actions_test.go +++ b/core/elastic/domain_actions_test.go @@ -1,3 +1,26 @@ +// Copyright (C) INFINI Labs & INFINI LIMITED. +// +// The INFINI Framework is offered under the GNU Affero General Public License v3.0 +// and as commercial software. +// +// For commercial licensing, contact us at: +// - Website: infinilabs.com +// - Email: hello@infini.ltd +// +// Open Source licensed under AGPL V3: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package elastic import ( diff --git a/core/elastic/partition_test.go b/core/elastic/partition_test.go index 1a7795a6d..86c0d00ad 100644 --- a/core/elastic/partition_test.go +++ b/core/elastic/partition_test.go @@ -1,3 +1,26 @@ +// Copyright (C) INFINI Labs & INFINI LIMITED. +// +// The INFINI Framework is offered under the GNU Affero General Public License v3.0 +// and as commercial software. +// +// For commercial licensing, contact us at: +// - Website: infinilabs.com +// - Email: hello@infini.ltd +// +// Open Source licensed under AGPL V3: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package elastic import ( diff --git a/core/orm/registry_test.go b/core/orm/registry_test.go index b74dc34dd..d8327959a 100644 --- a/core/orm/registry_test.go +++ b/core/orm/registry_test.go @@ -1,3 +1,26 @@ +// Copyright (C) INFINI Labs & INFINI LIMITED. +// +// The INFINI Framework is offered under the GNU Affero General Public License v3.0 +// and as commercial software. +// +// For commercial licensing, contact us at: +// - Website: infinilabs.com +// - Email: hello@infini.ltd +// +// Open Source licensed under AGPL V3: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package orm import "testing" diff --git a/core/pipeline/context_result_test.go b/core/pipeline/context_result_test.go index addb8d4b5..626259d6c 100644 --- a/core/pipeline/context_result_test.go +++ b/core/pipeline/context_result_test.go @@ -1,3 +1,26 @@ +// Copyright (C) INFINI Labs & INFINI LIMITED. +// +// The INFINI Framework is offered under the GNU Affero General Public License v3.0 +// and as commercial software. +// +// For commercial licensing, contact us at: +// - Website: infinilabs.com +// - Email: hello@infini.ltd +// +// Open Source licensed under AGPL V3: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package pipeline import ( diff --git a/core/task/task_test.go b/core/task/task_test.go index 073e5c1d3..6a18dc66d 100644 --- a/core/task/task_test.go +++ b/core/task/task_test.go @@ -1,3 +1,26 @@ +// Copyright (C) INFINI Labs & INFINI LIMITED. +// +// The INFINI Framework is offered under the GNU Affero General Public License v3.0 +// and as commercial software. +// +// For commercial licensing, contact us at: +// - Website: infinilabs.com +// - Email: hello@infini.ltd +// +// Open Source licensed under AGPL V3: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package task import "testing" diff --git a/modules/elastic/adapter/ver_test.go b/modules/elastic/adapter/ver_test.go index db5808e55..4775efdae 100644 --- a/modules/elastic/adapter/ver_test.go +++ b/modules/elastic/adapter/ver_test.go @@ -1,3 +1,26 @@ +// Copyright (C) INFINI Labs & INFINI LIMITED. +// +// The INFINI Framework is offered under the GNU Affero General Public License v3.0 +// and as commercial software. +// +// For commercial licensing, contact us at: +// - Website: infinilabs.com +// - Email: hello@infini.ltd +// +// Open Source licensed under AGPL V3: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package adapter import ( diff --git a/modules/elastic/common/config_test.go b/modules/elastic/common/config_test.go index 461d5286d..78ff78347 100644 --- a/modules/elastic/common/config_test.go +++ b/modules/elastic/common/config_test.go @@ -1,3 +1,26 @@ +// Copyright (C) INFINI Labs & INFINI LIMITED. +// +// The INFINI Framework is offered under the GNU Affero General Public License v3.0 +// and as commercial software. +// +// For commercial licensing, contact us at: +// - Website: infinilabs.com +// - Email: hello@infini.ltd +// +// Open Source licensed under AGPL V3: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package common import ( diff --git a/modules/pipeline/pipeline_test.go b/modules/pipeline/pipeline_test.go index a34b123ea..2ee932a0b 100644 --- a/modules/pipeline/pipeline_test.go +++ b/modules/pipeline/pipeline_test.go @@ -1,3 +1,26 @@ +// Copyright (C) INFINI Labs & INFINI LIMITED. +// +// The INFINI Framework is offered under the GNU Affero General Public License v3.0 +// and as commercial software. +// +// For commercial licensing, contact us at: +// - Website: infinilabs.com +// - Email: hello@infini.ltd +// +// Open Source licensed under AGPL V3: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package pipeline import ( diff --git a/modules/queue/disk_queue/diskqueue_test.go b/modules/queue/disk_queue/diskqueue_test.go index b4c522da5..7a0798fa1 100644 --- a/modules/queue/disk_queue/diskqueue_test.go +++ b/modules/queue/disk_queue/diskqueue_test.go @@ -1,3 +1,26 @@ +// Copyright (C) INFINI Labs & INFINI LIMITED. +// +// The INFINI Framework is offered under the GNU Affero General Public License v3.0 +// and as commercial software. +// +// For commercial licensing, contact us at: +// - Website: infinilabs.com +// - Email: hello@infini.ltd +// +// Open Source licensed under AGPL V3: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package queue import ( diff --git a/modules/queue/disk_queue/module_test.go b/modules/queue/disk_queue/module_test.go index fc643621f..4b51e7524 100644 --- a/modules/queue/disk_queue/module_test.go +++ b/modules/queue/disk_queue/module_test.go @@ -1,3 +1,26 @@ +// Copyright (C) INFINI Labs & INFINI LIMITED. +// +// The INFINI Framework is offered under the GNU Affero General Public License v3.0 +// and as commercial software. +// +// For commercial licensing, contact us at: +// - Website: infinilabs.com +// - Email: hello@infini.ltd +// +// Open Source licensed under AGPL V3: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + package queue import (