Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 71 additions & 12 deletions internal/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package archive

import (
"compress/gzip"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"path"
"slices"
"strings"
"time"
Expand Down Expand Up @@ -90,14 +92,15 @@ type ubuntuArchive struct {
}

type ubuntuIndex struct {
label string
version string
arch string
suite string
component string
release control.Section
packages control.File
archive *ubuntuArchive
label string
version string
arch string
suite string
component string
release control.Section
packages control.File
archive *ubuntuArchive
acquireByHash bool
}

func (a *ubuntuArchive) Options() *Options {
Expand Down Expand Up @@ -322,6 +325,7 @@ func (index *ubuntuIndex) fetchRelease() error {
logf("Release date: %s", section.Get("Date"))

index.release = section
index.acquireByHash = section.Get("Acquire-By-Hash") == "yes"
return nil
}

Expand All @@ -333,8 +337,14 @@ func (index *ubuntuIndex) fetchIndex() error {
return fmt.Errorf("%s is missing from %s %s component digests", packagesPath, index.suite, index.component)
}

// Look up the digest for the .gz file to use in by-hash URL construction.
// The by-hash URL must reference the file as it exists on the server
// (gzipped), not the decompressed content.
gzPackagesPath := packagesPath + ".gz"
byHashDigest, _, _ := control.ParsePathInfo(digests, gzPackagesPath)

logf("Fetching index for %s %s %s %s component...", index.displayName(), index.version, index.suite, index.component)
reader, err := index.fetch(index.distPath(packagesPath+".gz"), digest, fetchBulk)
reader, err := index.fetch(index.distPath(gzPackagesPath), digest, fetchBulk, byHashDigest)
if err != nil {
return err
}
Expand Down Expand Up @@ -373,19 +383,68 @@ func (index *ubuntuIndex) distPath(suffix string) string {
return "dists/" + index.suite + "/" + suffix
}

func (index *ubuntuIndex) fetch(path, digest string, flags fetchFlags) (io.ReadSeekCloser, error) {
// byHashPath transforms a dists/ path into its by-hash equivalent.
// For example: dists/jammy/main/binary-amd64/Packages.gz
// becomes: dists/jammy/main/binary-amd64/by-hash/SHA256/<digest>
func (index *ubuntuIndex) byHashPath(p, digest string) string {
dir, _ := path.Split(p)
return dir + "by-hash/SHA256/" + digest
}

func (index *ubuntuIndex) fetch(path, digest string, flags fetchFlags, byHashDigest ...string) (io.ReadSeekCloser, error) {
reader, err := index.archive.cache.Open(digest)
if err == nil {
return reader, nil
} else if err != cache.MissErr {
return nil, err
}

// Build the list of candidate URLs to try. When Acquire-By-Hash is
// supported and the path is an index file (under dists/ with a known
// by-hash digest), try the by-hash URL first. This avoids digest
// mismatches during archive publication. Fall back to the canonical
// path if the by-hash URL returns 404.
var hashDigest string
if len(byHashDigest) > 0 {
hashDigest = byHashDigest[0]
}
var candidates []string
if index.acquireByHash && hashDigest != "" && strings.HasPrefix(path, "dists/") {
hashPath := index.byHashPath(path, hashDigest)
cleanURL, err := url.JoinPath(index.archive.baseURL, hashPath)
if err != nil {
return nil, fmt.Errorf("internal error: cannot construct by-hash URL: %v", err)
}
logf("Fetching by-hash: %s", hashPath)
candidates = append(candidates, cleanURL)
}
cleanURL, err := url.JoinPath(index.archive.baseURL, path)
if err != nil {
return nil, fmt.Errorf("internal error: cannot construct URL: %v", err)
}
req, err := http.NewRequest("GET", cleanURL, nil)
candidates = append(candidates, cleanURL)

var lastErr error
for _, candidateURL := range candidates {
reader, lastErr = index.fetchURL(candidateURL, path, digest, flags)
if lastErr == nil {
return reader, nil
}
if !errors.Is(lastErr, errArchiveNotFound) {
return nil, lastErr
}
// 404 for this candidate; try the next one.
if len(candidates) > 1 {
logf("By-hash URL not found, falling back to canonical path for %s", path)
}
}
return nil, lastErr
}

var errArchiveNotFound = errors.New("cannot find archive data")

func (index *ubuntuIndex) fetchURL(rawURL, path, digest string, flags fetchFlags) (io.ReadSeekCloser, error) {
req, err := http.NewRequest("GET", rawURL, nil)
if err != nil {
return nil, fmt.Errorf("cannot create HTTP request: %v", err)
}
Expand All @@ -410,7 +469,7 @@ func (index *ubuntuIndex) fetch(path, digest string, flags fetchFlags) (io.ReadS
case 401:
return nil, fmt.Errorf("cannot fetch from %q: unauthorized", index.label)
case 404:
return nil, fmt.Errorf("cannot find archive data")
return nil, errArchiveNotFound
default:
return nil, fmt.Errorf("error from archive: %v", resp.Status)
}
Expand Down
106 changes: 106 additions & 0 deletions internal/archive/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,112 @@ func (s *httpSuite) TestPackageInfo(c *C) {
}
}

func (s *httpSuite) TestAcquireByHash(c *C) {
// When Acquire-By-Hash is enabled, the by-hash URL should be used
// for fetching index files.
s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) {
r.AcquireByHash = true
})

options := archive.Options{
Label: "ubuntu",
Version: "22.04",
Arch: "amd64",
Suites: []string{"jammy"},
Components: []string{"main"},
CacheDir: c.MkDir(),
PubKeys: []*packet.PublicKey{s.pubKey},
}

_, err := archive.Open(&options)
c.Assert(err, IsNil)

// Verify that a by-hash URL was requested.
foundByHash := false
for _, req := range s.requests {
if strings.Contains(req.URL.Path, "by-hash/SHA256/") {
foundByHash = true
break
}
}
c.Assert(foundByHash, Equals, true)
}

func (s *httpSuite) TestAcquireByHashFallback(c *C) {
// When Acquire-By-Hash is enabled but the by-hash URL returns 404,
// Chisel should fall back to the canonical path.
s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) {
r.AcquireByHash = true
})

// Override Do to return 404 for by-hash URLs, normal responses otherwise.
var allRequests []*http.Request
restoreDo := archive.FakeDo(func(req *http.Request) (*http.Response, error) {
allRequests = append(allRequests, req)
c.Logf("Request: %s", req.URL.String())
if strings.Contains(req.URL.Path, "by-hash/") {
return &http.Response{
Body: io.NopCloser(strings.NewReader("")),
StatusCode: 404,
}, nil
}
return s.Do(req)
})
defer restoreDo()

options := archive.Options{
Label: "ubuntu",
Version: "22.04",
Arch: "amd64",
Suites: []string{"jammy"},
Components: []string{"main"},
CacheDir: c.MkDir(),
PubKeys: []*packet.PublicKey{s.pubKey},
}

_, err := archive.Open(&options)
c.Assert(err, IsNil)

// Verify that both by-hash and canonical URLs were requested.
foundByHash := false
foundCanonical := false
for _, req := range allRequests {
if strings.Contains(req.URL.Path, "by-hash/SHA256/") {
foundByHash = true
}
if strings.Contains(req.URL.Path, "binary-amd64/Packages.gz") &&
!strings.Contains(req.URL.Path, "by-hash/") {
foundCanonical = true
}
}
c.Assert(foundByHash, Equals, true)
c.Assert(foundCanonical, Equals, true)
}

func (s *httpSuite) TestNoAcquireByHash(c *C) {
// When Acquire-By-Hash is not enabled, the canonical path should
// be used directly (no by-hash URLs).
s.prepareArchive("jammy", "22.04", "amd64", []string{"main"})

options := archive.Options{
Label: "ubuntu",
Version: "22.04",
Arch: "amd64",
Suites: []string{"jammy"},
Components: []string{"main"},
CacheDir: c.MkDir(),
PubKeys: []*packet.PublicKey{s.pubKey},
}

_, err := archive.Open(&options)
c.Assert(err, IsNil)

// Verify that no by-hash URL was requested.
for _, req := range s.requests {
c.Assert(strings.Contains(req.URL.Path, "by-hash/"), Equals, false)
}
}

func read(r io.Reader) string {
data, err := io.ReadAll(r)
if err != nil {
Expand Down
26 changes: 19 additions & 7 deletions internal/archive/testarchive/testarchive.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,12 @@ func (p *Package) Content() []byte {
}

type Release struct {
Suite string
Version string
Label string
Items []Item
PrivKey *packet.PrivateKey
Suite string
Version string
Label string
Items []Item
PrivKey *packet.PrivateKey
AcquireByHash bool
}

func (r *Release) Walk(f func(Item) error) error {
Expand All @@ -127,6 +128,10 @@ func (r *Release) Content() []byte {
content := item.Content()
fmt.Fprintf(&digests, " %s %d %s\n", makeSha256(content), len(content), item.Path())
}
acquireByHash := ""
if r.AcquireByHash {
acquireByHash = "Acquire-By-Hash: yes\n"
}
content := fmt.Sprintf(string(testutil.Reindent(`
Origin: Ubuntu
Label: %s
Expand All @@ -137,9 +142,9 @@ func (r *Release) Content() []byte {
Architectures: amd64 arm64 armhf i386 ppc64el riscv64 s390x
Components: main restricted universe multiverse
Description: Ubuntu %s
SHA256:
%sSHA256:
%s
`)), r.Label, r.Suite, r.Version, r.Version, digests.String())
`)), r.Label, r.Suite, r.Version, r.Version, acquireByHash, digests.String())

var buf bytes.Buffer
writer, err := clearsign.Encode(&buf, r.PrivKey, nil)
Expand All @@ -166,6 +171,13 @@ func (r *Release) Render(prefix string, content map[string][]byte) error {
itemPath = path.Join(prefix, "dists", r.Suite, itemPath)
}
content[itemPath] = item.Content()
// When Acquire-By-Hash is enabled, also render the by-hash URL
// for index files (those under dists/).
if r.AcquireByHash && !strings.HasPrefix(item.Path(), "pool/") {
dir, _ := path.Split(itemPath)
hashPath := dir + "by-hash/SHA256/" + makeSha256(item.Content())
content[hashPath] = item.Content()
}
return nil
})
}
Expand Down
Loading