Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@
firehose-data*
/.fleet/settings.json
/firecore
/generator
/firehose.yaml
.claude
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@ If you were at `firehose-core` version `1.0.0` and are bumping to `1.1.0`, you s

## Unreleased

### Added

- `tools wkp descriptors [output-file]`: new command that exports all well-known blockchain protobuf descriptors as a self-contained, serialized `google.protobuf.FileDescriptorSet` (binary wire format). The set includes every transitive import (google/protobuf/* well-known types included) so consumers can build a descriptor registry with no external resolution. Output is deterministic (stable topological + alphabetical ordering) enabling "is it up to date?" CI checks via a regenerate-and-diff workflow. Use `-` as `output-file` to write to stdout; the default output name is `well-known-descriptors.binpb`.
- `proto/generator`: switched from the BSR Reflection v1beta1 API to the BSR HTTP descriptor endpoint (`/descriptor/<ref>?source_info=true`). Regenerated WKP files will now embed `source_code_info` (proto field/message comments), enabling documentation renderers and tooling that reads comment annotations. Authentication via `BUFBUILD_AUTH_TOKEN` is now optional for public modules (a warning is emitted when the token is absent).

### Fixed

- Removed vulnerable `github.com/docker/docker` dependency (GHSA-x744-4wpc-v9h2, GHSA-x86f-5xw2-fm2r, GHSA-rg2x-37c3-w2rh). Upgraded `testcontainers-go` to v0.42.0 (which uses `github.com/moby/moby/api` instead) and updated the single import in `relayer/relayer_e2e_test.go` from `github.com/docker/docker/api/types/container` to `github.com/moby/moby/api/types/container`.

### Changed

- Bumped `dstore`: S3 store now suppresses the SDK's checksum validation warnings (sets `DisableLogOutputChecksumValidationSkipped` to `true`) and updates the AWS S3 SDK to a newer version.
Expand Down
2 changes: 2 additions & 0 deletions cmd/tools/tools.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
print2 "github.com/streamingfast/firehose-core/cmd/tools/print"
"github.com/streamingfast/firehose-core/cmd/tools/relayer"
"github.com/streamingfast/firehose-core/cmd/tools/substreams"
toolswkp "github.com/streamingfast/firehose-core/cmd/tools/wkp"
"github.com/streamingfast/logging"
"go.uber.org/zap"
)
Expand Down Expand Up @@ -84,6 +85,7 @@ func ConfigureToolsCmd[B firecore.Block](
ToolsCmd.AddCommand(relayer.NewToolsRelayerGroup(chain, logger))
ToolsCmd.AddCommand(substreams.NewToolsSubstreamsCmd(chain, logger))
ToolsCmd.AddCommand(NewToolsNetworksCmd(chain, logger))
ToolsCmd.AddCommand(toolswkp.NewToolsWKPCmd(chain, logger))

if chain.Tools.MergedBlockUpgrader != nil {
ToolsCmd.AddCommand(mergeblock.NewToolsUpgradeMergedBlocksCmd(chain, logger))
Expand Down
7 changes: 7 additions & 0 deletions cmd/tools/wkp/log_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package wkp

import "github.com/streamingfast/logging"

func init() {
logging.InstantiateLoggers()
}
38 changes: 38 additions & 0 deletions cmd/tools/wkp/tools_wkp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package wkp

import (
. "github.com/streamingfast/cli"
"github.com/spf13/cobra"
firecore "github.com/streamingfast/firehose-core"
"go.uber.org/zap"
)

func NewToolsWKPCmd[B firecore.Block](chain *firecore.Chain[B], logger *zap.Logger) *cobra.Command {
return ToCobraCmd(Group(
"wkp",
"Well-known protocols related tools",

Command(
toolsWKPDescriptorsRunner(chain, logger),
"descriptors [output-file]",
"Export all well-known blockchain protobuf descriptors as a self-contained FileDescriptorSet",
Description(`
Exports every well-known blockchain protobuf descriptor registered in firehose-core
as a serialized google.protobuf.FileDescriptorSet written to <output-file>
(defaults to "well-known-descriptors.binpb"). Use "-" to write to stdout.

The exported set is self-contained: all transitive imports, including google/protobuf/*
well-known types, are included. No external resolution is needed to build a descriptor
registry from this file.

The output is deterministic: given the same embedded descriptors, re-running the
command produces byte-identical output, enabling "is it up to date?" CI checks via a
regenerate-and-diff workflow.

source_code_info (proto field/message comments) is included when the embedded
descriptors were generated with the BSR HTTP descriptor endpoint using ?source_info=true
(see proto/generator/generator.go). Re-run the generator to embed updated descriptors.
`),
),
))
}
103 changes: 103 additions & 0 deletions cmd/tools/wkp/tools_wkp_descriptors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package wkp

import (
"fmt"
"io"
"maps"
"os"
"slices"

"github.com/spf13/cobra"
firecore "github.com/streamingfast/firehose-core"
"github.com/streamingfast/firehose-core/proto/wkp"
"go.uber.org/zap"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/descriptorpb"
)

func toolsWKPDescriptorsRunner[B firecore.Block](chain *firecore.Chain[B], logger *zap.Logger) firecore.CommandExecutor {
return func(cmd *cobra.Command, args []string) error {
outputPath := "well-known-descriptors.binpb"
if len(args) > 0 {
outputPath = args[0]
}

descriptors := collectSortedDescriptors()

fds := &descriptorpb.FileDescriptorSet{File: descriptors}

b, err := proto.Marshal(fds)
if err != nil {
return fmt.Errorf("marshaling FileDescriptorSet: %w", err)
}

var w io.Writer
if outputPath == "-" {
w = os.Stdout
} else {
f, err := os.Create(outputPath)
if err != nil {
return fmt.Errorf("creating output file %q: %w", outputPath, err)
}
defer f.Close()
w = f
}

if _, err := w.Write(b); err != nil {
return fmt.Errorf("writing output: %w", err)
}

if outputPath != "-" {
logger.Info("exported well-known descriptors",
zap.String("path", outputPath),
zap.Int("file_count", len(descriptors)),
zap.Int("bytes", len(b)),
)
}

return nil
}
}

// collectSortedDescriptors returns all well-known proto file descriptors in a
// deterministic topological order: dependencies always precede dependants, and
// siblings are ordered alphabetically by file name.
func collectSortedDescriptors() []*descriptorpb.FileDescriptorProto {
all := wkp.WellKnownProtos()

fileMap := make(map[string]*descriptorpb.FileDescriptorProto, len(all))
for _, d := range all {
fileMap[d.GetName()] = d
}

// Visit in sorted name order so that the topological traversal is deterministic.
sortedNames := slices.Sorted(maps.Keys(fileMap))

visited := make(map[string]bool, len(sortedNames))
result := make([]*descriptorpb.FileDescriptorProto, 0, len(sortedNames))

var visit func(name string)
visit = func(name string) {
if visited[name] {
return
}
visited[name] = true

d := fileMap[name]
deps := slices.Clone(d.Dependency)
slices.Sort(deps)
for _, dep := range deps {
if _, exists := fileMap[dep]; exists {
visit(dep)
}
}

result = append(result, d)
}

for _, name := range sortedNames {
visit(name)
}

return result
}
158 changes: 158 additions & 0 deletions cmd/tools/wkp/tools_wkp_descriptors_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
package wkp

import (
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/reflect/protodesc"
"google.golang.org/protobuf/reflect/protoreflect"
"google.golang.org/protobuf/types/descriptorpb"
)

// TestDescriptorsExport verifies that the exported FileDescriptorSet is valid,
// self-contained, and contains the expected well-known block types.
func TestDescriptorsExport(t *testing.T) {
descriptors := collectSortedDescriptors()
require.NotEmpty(t, descriptors)

fds := &descriptorpb.FileDescriptorSet{File: descriptors}

// Round-trip through proto marshal/unmarshal
b, err := proto.Marshal(fds)
require.NoError(t, err)
require.NotEmpty(t, b)

parsed := &descriptorpb.FileDescriptorSet{}
require.NoError(t, proto.Unmarshal(b, parsed))
assert.Equal(t, len(descriptors), len(parsed.File))
}

// TestDescriptorsDeterminism verifies that repeated calls to collectSortedDescriptors
// produce byte-identical output.
func TestDescriptorsDeterminism(t *testing.T) {
marshal := func() []byte {
fds := &descriptorpb.FileDescriptorSet{File: collectSortedDescriptors()}
b, err := proto.Marshal(fds)
require.NoError(t, err)
return b
}

first := marshal()
for range 5 {
assert.Equal(t, first, marshal(), "output must be byte-identical across runs")
}
}

// TestDescriptorsSelfContained verifies that the exported set forms a valid registry
// with no unresolved imports — every referenced file is present in the set.
func TestDescriptorsSelfContained(t *testing.T) {
descriptors := collectSortedDescriptors()

fds := &descriptorpb.FileDescriptorSet{File: descriptors}

// protodesc.NewFiles builds a registry and validates all imports are present.
reg, err := protodesc.NewFiles(fds)
require.NoError(t, err, "all imports must be resolvable within the exported set")

assert.Greater(t, reg.NumFiles(), 0)
}

// TestDescriptorsEthereumBlockType verifies that the Ethereum block type is fully
// resolvable with all its field definitions present.
func TestDescriptorsEthereumBlockType(t *testing.T) {
fds := &descriptorpb.FileDescriptorSet{File: collectSortedDescriptors()}

reg, err := protodesc.NewFiles(fds)
require.NoError(t, err)

desc, err := reg.FindDescriptorByName("sf.ethereum.type.v2.Block")
require.NoError(t, err, "sf.ethereum.type.v2.Block must be present in the exported set")

md, ok := desc.(protoreflect.MessageDescriptor)
require.True(t, ok)
assert.Greater(t, md.Fields().Len(), 0, "Block message must have fields")
}

// TestDescriptorsNoDuplicateFiles verifies that each file path appears exactly once.
func TestDescriptorsNoDuplicateFiles(t *testing.T) {
descriptors := collectSortedDescriptors()

seen := make(map[string]int, len(descriptors))
for _, d := range descriptors {
seen[d.GetName()]++
}

for name, count := range seen {
assert.Equal(t, 1, count, "file %q appears %d times, expected exactly once", name, count)
}
}

// TestDescriptorsTopologicalOrder verifies that every file appears after all of its
// declared imports in the exported set.
func TestDescriptorsTopologicalOrder(t *testing.T) {
descriptors := collectSortedDescriptors()

position := make(map[string]int, len(descriptors))
for i, d := range descriptors {
position[d.GetName()] = i
}

for _, d := range descriptors {
for _, dep := range d.Dependency {
depPos, present := position[dep]
if !present {
// Dependency not in our set (e.g., external), skip.
continue
}
assert.Less(t, depPos, position[d.GetName()],
"dependency %q must appear before %q", dep, d.GetName())
}
}
}

// TestDescriptorsWriteToFile verifies end-to-end: collect → marshal → write → read back.
func TestDescriptorsWriteToFile(t *testing.T) {
outPath := filepath.Join(t.TempDir(), "descriptors.binpb")

descriptors := collectSortedDescriptors()
fds := &descriptorpb.FileDescriptorSet{File: descriptors}
b, err := proto.Marshal(fds)
require.NoError(t, err)
require.NoError(t, os.WriteFile(outPath, b, 0o644))

// Read back and verify
raw, err := os.ReadFile(outPath)
require.NoError(t, err)

parsed := &descriptorpb.FileDescriptorSet{}
require.NoError(t, proto.Unmarshal(raw, parsed))
assert.Equal(t, len(descriptors), len(parsed.File))

// Re-build registry from file to confirm it's still valid
_, err = protodesc.NewFiles(parsed)
require.NoError(t, err)
}

// TestDescriptorsSourceCodeInfoStatus documents the source_code_info state of the
// currently embedded descriptors. The generator (proto/generator/generator.go) now
// uses the BSR HTTP descriptor endpoint with ?source_info=true, so newly regenerated
// WKP files will carry source_code_info. The existing embedded files pre-date that
// change and do not yet have it. After the next `go generate ./proto/...` run this
// test should be updated to assert presence instead of absence.
func TestDescriptorsSourceCodeInfoStatus(t *testing.T) {
descriptors := collectSortedDescriptors()
withInfo := 0
for _, d := range descriptors {
if d.SourceCodeInfo != nil {
withInfo++
}
}
// After regenerating with the updated generator, withInfo will equal len(descriptors).
// This assertion simply records the current state so the test fails loudly if someone
// regenerates without updating it.
t.Logf("source_code_info present in %d/%d embedded descriptor files", withInfo, len(descriptors))
}
Loading
Loading