diff --git a/internal/archive/archive.go b/internal/archive/archive.go index e24a9c24..812d13d8 100644 --- a/internal/archive/archive.go +++ b/internal/archive/archive.go @@ -2,10 +2,12 @@ package archive import ( "compress/gzip" + "errors" "fmt" "io" "net/http" "net/url" + "path" "slices" "strings" "time" @@ -90,14 +92,15 @@ type ubuntuArchive struct { } type ubuntuIndex struct { - label string - version string - arch string - suite string - component string - release control.Section - packages control.File - archive *ubuntuArchive + label string + version string + arch string + suite string + component string + release control.Section + packages control.File + archive *ubuntuArchive + acquireByHash bool } func (a *ubuntuArchive) Options() *Options { @@ -322,6 +325,7 @@ func (index *ubuntuIndex) fetchRelease() error { logf("Release date: %s", section.Get("Date")) index.release = section + index.acquireByHash = section.Get("Acquire-By-Hash") == "yes" return nil } @@ -333,8 +337,14 @@ func (index *ubuntuIndex) fetchIndex() error { return fmt.Errorf("%s is missing from %s %s component digests", packagesPath, index.suite, index.component) } + // Look up the digest for the .gz file to use in by-hash URL construction. + // The by-hash URL must reference the file as it exists on the server + // (gzipped), not the decompressed content. + gzPackagesPath := packagesPath + ".gz" + byHashDigest, _, _ := control.ParsePathInfo(digests, gzPackagesPath) + logf("Fetching index for %s %s %s %s component...", index.displayName(), index.version, index.suite, index.component) - reader, err := index.fetch(index.distPath(packagesPath+".gz"), digest, fetchBulk) + reader, err := index.fetch(index.distPath(gzPackagesPath), digest, fetchBulk, byHashDigest) if err != nil { return err } @@ -373,7 +383,15 @@ func (index *ubuntuIndex) distPath(suffix string) string { return "dists/" + index.suite + "/" + suffix } -func (index *ubuntuIndex) fetch(path, digest string, flags fetchFlags) (io.ReadSeekCloser, error) { +// byHashPath transforms a dists/ path into its by-hash equivalent. +// For example: dists/jammy/main/binary-amd64/Packages.gz +// becomes: dists/jammy/main/binary-amd64/by-hash/SHA256/ +func (index *ubuntuIndex) byHashPath(p, digest string) string { + dir, _ := path.Split(p) + return dir + "by-hash/SHA256/" + digest +} + +func (index *ubuntuIndex) fetch(path, digest string, flags fetchFlags, byHashDigest ...string) (io.ReadSeekCloser, error) { reader, err := index.archive.cache.Open(digest) if err == nil { return reader, nil @@ -381,11 +399,52 @@ func (index *ubuntuIndex) fetch(path, digest string, flags fetchFlags) (io.ReadS return nil, err } + // Build the list of candidate URLs to try. When Acquire-By-Hash is + // supported and the path is an index file (under dists/ with a known + // by-hash digest), try the by-hash URL first. This avoids digest + // mismatches during archive publication. Fall back to the canonical + // path if the by-hash URL returns 404. + var hashDigest string + if len(byHashDigest) > 0 { + hashDigest = byHashDigest[0] + } + var candidates []string + if index.acquireByHash && hashDigest != "" && strings.HasPrefix(path, "dists/") { + hashPath := index.byHashPath(path, hashDigest) + cleanURL, err := url.JoinPath(index.archive.baseURL, hashPath) + if err != nil { + return nil, fmt.Errorf("internal error: cannot construct by-hash URL: %v", err) + } + logf("Fetching by-hash: %s", hashPath) + candidates = append(candidates, cleanURL) + } cleanURL, err := url.JoinPath(index.archive.baseURL, path) if err != nil { return nil, fmt.Errorf("internal error: cannot construct URL: %v", err) } - req, err := http.NewRequest("GET", cleanURL, nil) + candidates = append(candidates, cleanURL) + + var lastErr error + for _, candidateURL := range candidates { + reader, lastErr = index.fetchURL(candidateURL, path, digest, flags) + if lastErr == nil { + return reader, nil + } + if !errors.Is(lastErr, errArchiveNotFound) { + return nil, lastErr + } + // 404 for this candidate; try the next one. + if len(candidates) > 1 { + logf("By-hash URL not found, falling back to canonical path for %s", path) + } + } + return nil, lastErr +} + +var errArchiveNotFound = errors.New("cannot find archive data") + +func (index *ubuntuIndex) fetchURL(rawURL, path, digest string, flags fetchFlags) (io.ReadSeekCloser, error) { + req, err := http.NewRequest("GET", rawURL, nil) if err != nil { return nil, fmt.Errorf("cannot create HTTP request: %v", err) } @@ -410,7 +469,7 @@ func (index *ubuntuIndex) fetch(path, digest string, flags fetchFlags) (io.ReadS case 401: return nil, fmt.Errorf("cannot fetch from %q: unauthorized", index.label) case 404: - return nil, fmt.Errorf("cannot find archive data") + return nil, errArchiveNotFound default: return nil, fmt.Errorf("error from archive: %v", resp.Status) } diff --git a/internal/archive/archive_test.go b/internal/archive/archive_test.go index 8ca840e5..c98abe37 100644 --- a/internal/archive/archive_test.go +++ b/internal/archive/archive_test.go @@ -617,6 +617,112 @@ func (s *httpSuite) TestPackageInfo(c *C) { } } +func (s *httpSuite) TestAcquireByHash(c *C) { + // When Acquire-By-Hash is enabled, the by-hash URL should be used + // for fetching index files. + s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) { + r.AcquireByHash = true + }) + + options := archive.Options{ + Label: "ubuntu", + Version: "22.04", + Arch: "amd64", + Suites: []string{"jammy"}, + Components: []string{"main"}, + CacheDir: c.MkDir(), + PubKeys: []*packet.PublicKey{s.pubKey}, + } + + _, err := archive.Open(&options) + c.Assert(err, IsNil) + + // Verify that a by-hash URL was requested. + foundByHash := false + for _, req := range s.requests { + if strings.Contains(req.URL.Path, "by-hash/SHA256/") { + foundByHash = true + break + } + } + c.Assert(foundByHash, Equals, true) +} + +func (s *httpSuite) TestAcquireByHashFallback(c *C) { + // When Acquire-By-Hash is enabled but the by-hash URL returns 404, + // Chisel should fall back to the canonical path. + s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) { + r.AcquireByHash = true + }) + + // Override Do to return 404 for by-hash URLs, normal responses otherwise. + var allRequests []*http.Request + restoreDo := archive.FakeDo(func(req *http.Request) (*http.Response, error) { + allRequests = append(allRequests, req) + c.Logf("Request: %s", req.URL.String()) + if strings.Contains(req.URL.Path, "by-hash/") { + return &http.Response{ + Body: io.NopCloser(strings.NewReader("")), + StatusCode: 404, + }, nil + } + return s.Do(req) + }) + defer restoreDo() + + options := archive.Options{ + Label: "ubuntu", + Version: "22.04", + Arch: "amd64", + Suites: []string{"jammy"}, + Components: []string{"main"}, + CacheDir: c.MkDir(), + PubKeys: []*packet.PublicKey{s.pubKey}, + } + + _, err := archive.Open(&options) + c.Assert(err, IsNil) + + // Verify that both by-hash and canonical URLs were requested. + foundByHash := false + foundCanonical := false + for _, req := range allRequests { + if strings.Contains(req.URL.Path, "by-hash/SHA256/") { + foundByHash = true + } + if strings.Contains(req.URL.Path, "binary-amd64/Packages.gz") && + !strings.Contains(req.URL.Path, "by-hash/") { + foundCanonical = true + } + } + c.Assert(foundByHash, Equals, true) + c.Assert(foundCanonical, Equals, true) +} + +func (s *httpSuite) TestNoAcquireByHash(c *C) { + // When Acquire-By-Hash is not enabled, the canonical path should + // be used directly (no by-hash URLs). + s.prepareArchive("jammy", "22.04", "amd64", []string{"main"}) + + options := archive.Options{ + Label: "ubuntu", + Version: "22.04", + Arch: "amd64", + Suites: []string{"jammy"}, + Components: []string{"main"}, + CacheDir: c.MkDir(), + PubKeys: []*packet.PublicKey{s.pubKey}, + } + + _, err := archive.Open(&options) + c.Assert(err, IsNil) + + // Verify that no by-hash URL was requested. + for _, req := range s.requests { + c.Assert(strings.Contains(req.URL.Path, "by-hash/"), Equals, false) + } +} + func read(r io.Reader) string { data, err := io.ReadAll(r) if err != nil { diff --git a/internal/archive/testarchive/testarchive.go b/internal/archive/testarchive/testarchive.go index ea5b8e08..d778f7a1 100644 --- a/internal/archive/testarchive/testarchive.go +++ b/internal/archive/testarchive/testarchive.go @@ -102,11 +102,12 @@ func (p *Package) Content() []byte { } type Release struct { - Suite string - Version string - Label string - Items []Item - PrivKey *packet.PrivateKey + Suite string + Version string + Label string + Items []Item + PrivKey *packet.PrivateKey + AcquireByHash bool } func (r *Release) Walk(f func(Item) error) error { @@ -127,6 +128,10 @@ func (r *Release) Content() []byte { content := item.Content() fmt.Fprintf(&digests, " %s %d %s\n", makeSha256(content), len(content), item.Path()) } + acquireByHash := "" + if r.AcquireByHash { + acquireByHash = "Acquire-By-Hash: yes\n" + } content := fmt.Sprintf(string(testutil.Reindent(` Origin: Ubuntu Label: %s @@ -137,9 +142,9 @@ func (r *Release) Content() []byte { Architectures: amd64 arm64 armhf i386 ppc64el riscv64 s390x Components: main restricted universe multiverse Description: Ubuntu %s - SHA256: + %sSHA256: %s - `)), r.Label, r.Suite, r.Version, r.Version, digests.String()) + `)), r.Label, r.Suite, r.Version, r.Version, acquireByHash, digests.String()) var buf bytes.Buffer writer, err := clearsign.Encode(&buf, r.PrivKey, nil) @@ -166,6 +171,13 @@ func (r *Release) Render(prefix string, content map[string][]byte) error { itemPath = path.Join(prefix, "dists", r.Suite, itemPath) } content[itemPath] = item.Content() + // When Acquire-By-Hash is enabled, also render the by-hash URL + // for index files (those under dists/). + if r.AcquireByHash && !strings.HasPrefix(item.Path(), "pool/") { + dir, _ := path.Split(itemPath) + hashPath := dir + "by-hash/SHA256/" + makeSha256(item.Content()) + content[hashPath] = item.Content() + } return nil }) }