From 20ee6ac185aad1e0867fde26b916e9cf84d42a6c Mon Sep 17 00:00:00 2001 From: Adam Singer Date: Tue, 2 Dec 2025 17:35:12 +0000 Subject: [PATCH 1/2] [metrics] Add build_tar.py metrics --- BUILD_TAR_METRICS.md | 162 ++++++++++++ minidock/container_data.bzl | 24 +- minidock/container_data_tools/build_tar.py | 292 +++++++++++++++------ test_metrics.sh | 96 +++++++ 4 files changed, 485 insertions(+), 89 deletions(-) create mode 100644 BUILD_TAR_METRICS.md create mode 100755 test_metrics.sh diff --git a/BUILD_TAR_METRICS.md b/BUILD_TAR_METRICS.md new file mode 100644 index 0000000..2d6c76f --- /dev/null +++ b/BUILD_TAR_METRICS.md @@ -0,0 +1,162 @@ +# Build Tar Performance Metrics + +## Overview + +Performance monitoring has been added to `build_tar.py` to help identify bottlenecks and understand where time is being spent during tar file creation. + +## Usage + +To enable metrics collection, set the `BUILD_TAR_METRICS` environment variable to `1` before running the script: + +```bash +export BUILD_TAR_METRICS=1 +python minidock/container_data_tools/build_tar.py [your arguments...] +``` + +Or inline: + +```bash +BUILD_TAR_METRICS=1 python minidock/container_data_tools/build_tar.py [your arguments...] +``` + +### Output to File + +By default, metrics are written to **stderr**. To write metrics to a file instead, use the `--metrics_output` parameter: + +```bash +BUILD_TAR_METRICS=1 python minidock/container_data_tools/build_tar.py \ + --output=out.tar.gz \ + --file=myfile.txt=/dest/path.txt \ + --metrics_output=metrics.log +``` + +This is useful when: +- You want to separate metrics from regular stderr output +- You need to parse metrics programmatically +- You're running in a Bazel build and want to preserve metrics logs + +### Using with Bazel (`container_data` rule) + +When using the `container_data` rule, you can enable metrics collection by setting the `build_tar_metrics` attribute: + +```python +container_data( + name = "my_layer", + files = ["file.txt"], + build_tar_metrics = True, # Enable performance metrics +) +``` + +When enabled, the rule will automatically: +- Set the `BUILD_TAR_METRICS=1` environment variable +- Create a `{name}-layer.metrics` output file +- Include the metrics file as a build artifact + +To view the metrics after building: + +```bash +bazel build //your:my_layer +cat bazel-bin/your/my_layer-layer.metrics +``` + +**Note:** `build_tar_metrics` defaults to `False`, so metrics are only collected when explicitly enabled for specific targets. + +## What Gets Measured + +The metrics system tracks timing for: + +### Method-level Metrics +- `TarFileWriter.__init__` - Initialization of tar writer +- `TarFileWriter.add_file` - Adding individual files +- `TarFileWriter.add_dir` - Adding directories recursively +- `TarFileWriter.add_tar` - Merging tar files +- `TarFileWriter._addfile` - Internal file addition +- `TarFileWriter.close` - Closing and finalizing tar file +- `TarFile.__init__` - Initialization of TarFile wrapper +- `TarFile.add_file` - Adding files through TarFile wrapper +- `TarFile.add_tar` - Merging tars through TarFile wrapper +- `TarFile.add_empty_file` - Creating empty files +- `TarFile.add_deb` - Adding debian packages +- `TarFile.add_pkg_metadata` - Adding package metadata + +### Operation-level Metrics +- `zstd_compression` - Time spent in zstd compression +- `xz_compression` - Time spent in xz compression +- `process_manifest` - Time spent processing manifest files +- `process_files` - Time spent processing individual files +- `process_tars` - Time spent processing tar files + +## Output Format + +Metrics are written to **stderr** in real-time as operations complete: + +``` +[BUILD_TAR_METRIC] TarFile.__init__: 0.0023s +[BUILD_TAR_METRIC] TarFile.add_file: 0.0145s +[BUILD_TAR_METRIC] TarFileWriter.add_file: 0.0142s +... +``` + +At the end of execution, a summary table is printed: + +``` +[BUILD_TAR_METRIC] ================================================================================ +[BUILD_TAR_METRIC] PERFORMANCE SUMMARY +[BUILD_TAR_METRIC] ================================================================================ +[BUILD_TAR_METRIC] Operation Count Total(s) Avg(s) Min(s) Max(s) +[BUILD_TAR_METRIC] -------------------------------------------------------------------------------- +[BUILD_TAR_METRIC] zstd_compression 1 5.2341 5.2341 5.2341 5.2341 +[BUILD_TAR_METRIC] TarFileWriter.close 1 5.2456 5.2456 5.2456 5.2456 +[BUILD_TAR_METRIC] TarFileWriter.add_tar 3 1.2345 0.4115 0.2100 0.6200 +[BUILD_TAR_METRIC] TarFile.add_file 42 0.5234 0.0125 0.0010 0.0450 +[BUILD_TAR_METRIC] Total execution time: 7.1234s +[BUILD_TAR_METRIC] ================================================================================ +``` + +## Interpreting Results + +- **Count**: Number of times the operation was called +- **Total(s)**: Total cumulative time spent in that operation +- **Avg(s)**: Average time per call +- **Min(s)**: Fastest call +- **Max(s)**: Slowest call + +The summary is sorted by total time (descending), so the most expensive operations appear first. + +## Example + +```bash +# Run with metrics enabled (output to stderr) +BUILD_TAR_METRICS=1 bazel build //your:target 2>&1 | grep BUILD_TAR_METRIC + +# Or if running the script directly with stderr output +BUILD_TAR_METRICS=1 python minidock/container_data_tools/build_tar.py \ + --output=out.tar.gz \ + --file=myfile.txt=/dest/path.txt \ + --compression=gz \ + 2>&1 | tee metrics.log + +# Run with metrics output to a dedicated file +BUILD_TAR_METRICS=1 python minidock/container_data_tools/build_tar.py \ + --output=out.tar.gz \ + --file=myfile.txt=/dest/path.txt \ + --compression=gz \ + --metrics_output=build_metrics.log + +# Then view the metrics +cat build_metrics.log +``` + +## Tips for Performance Analysis + +1. **Look at Total Time First**: The operations with the highest total time are your main bottlenecks +2. **Check Call Counts**: High call counts with low individual times can add up +3. **Compression**: If compression times dominate, consider: + - Using lower compression levels (`--zstd_compression_level` or `--gzip_compression_level`) + - Using faster compression (gz is faster than zstd, which is faster than xz) +4. **File Operations**: If `add_file` operations dominate, you may have many small files that could be batched differently + +## Performance Overhead + +The timing decorators add minimal overhead (microseconds per call). The metrics are only collected when `BUILD_TAR_METRICS=1` is set, so there's no performance impact in production use. + diff --git a/minidock/container_data.bzl b/minidock/container_data.bzl index 24408ff..09d78b6 100644 --- a/minidock/container_data.bzl +++ b/minidock/container_data.bzl @@ -116,12 +116,26 @@ def __container_data_impl( args.add(compression, format = "--compression=%s") args.add(ctx.attr.mtime, format = "--mtime=%s") + # Conditionally declare metrics output file + metrics_file = None + action_env = {} + outputs = [layer] + output_files = [layer] + + if ctx.attr.build_tar_metrics: + metrics_file = ctx.actions.declare_file(ctx.attr.name + "-layer.metrics") + args.add(metrics_file, format = "--metrics_output=%s") + action_env["BUILD_TAR_METRICS"] = "1" + outputs.append(metrics_file) + output_files.append(metrics_file) + ctx.actions.run( executable = ctx.executable._build_tar, arguments = [args], inputs = ctx.files.tars + [manifest_file] + files, - outputs = [layer], - use_default_shell_env = True, + outputs = outputs, + env = action_env, + use_default_shell_env = False, mnemonic = "ContainerData", ) return [ @@ -133,7 +147,7 @@ def __container_data_impl( config = None, ), DefaultInfo( - files = depset([layer]), + files = depset(output_files), ), ] @@ -208,6 +222,10 @@ container_data = rule( A list of tar files whose content should be in the Docker image.""", ), + "build_tar_metrics": attr.bool( + default = False, + doc = "Enable performance metrics collection. When enabled, creates a {name}-layer.metrics file with detailed timing information.", + ), }, implementation = __container_data_impl, ) diff --git a/minidock/container_data_tools/build_tar.py b/minidock/container_data_tools/build_tar.py index 38ce19b..acba55b 100644 --- a/minidock/container_data_tools/build_tar.py +++ b/minidock/container_data_tools/build_tar.py @@ -30,8 +30,89 @@ import re import tarfile import tempfile +import time +# Performance monitoring utilities +# Enable via BUILD_TAR_METRICS=1 environment variable +_METRICS_ENABLED = os.environ.get('BUILD_TAR_METRICS', '0') == '1' +_metrics_data = {} +_metrics_file = None # File handle for metrics output (if specified) + + +def _log_metric(message): + """Log metric to file (if specified) or stderr.""" + if _METRICS_ENABLED: + output = _metrics_file if _metrics_file else sys.stderr + output.write(f"[BUILD_TAR_METRIC] {message}\n") + output.flush() + + +def timing_decorator(func): + """Decorator to measure and log execution time of methods.""" + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not _METRICS_ENABLED: + return func(*args, **kwargs) + + func_name = f"{args[0].__class__.__name__}.{func.__name__}" if args and hasattr(args[0], '__class__') else func.__name__ + start_time = time.perf_counter() + try: + result = func(*args, **kwargs) + return result + finally: + elapsed = time.perf_counter() - start_time + if func_name not in _metrics_data: + _metrics_data[func_name] = {'count': 0, 'total_time': 0.0, 'min': float('inf'), 'max': 0.0} + _metrics_data[func_name]['count'] += 1 + _metrics_data[func_name]['total_time'] += elapsed + _metrics_data[func_name]['min'] = min(_metrics_data[func_name]['min'], elapsed) + _metrics_data[func_name]['max'] = max(_metrics_data[func_name]['max'], elapsed) + _log_metric(f"{func_name}: {elapsed:.4f}s") + return wrapper + + +@contextmanager +def timing_context(name): + """Context manager for timing specific code blocks.""" + if not _METRICS_ENABLED: + yield + return + + start_time = time.perf_counter() + try: + yield + finally: + elapsed = time.perf_counter() - start_time + if name not in _metrics_data: + _metrics_data[name] = {'count': 0, 'total_time': 0.0, 'min': float('inf'), 'max': 0.0} + _metrics_data[name]['count'] += 1 + _metrics_data[name]['total_time'] += elapsed + _metrics_data[name]['min'] = min(_metrics_data[name]['min'], elapsed) + _metrics_data[name]['max'] = max(_metrics_data[name]['max'], elapsed) + _log_metric(f"{name}: {elapsed:.4f}s") + + +def print_metrics_summary(): + """Print summary of all collected metrics.""" + if not _METRICS_ENABLED or not _metrics_data: + return + + _log_metric("=" * 80) + _log_metric("PERFORMANCE SUMMARY") + _log_metric("=" * 80) + + # Sort by total time + sorted_metrics = sorted(_metrics_data.items(), key=lambda x: x[1]['total_time'], reverse=True) + + _log_metric(f"{'Operation':<50} {'Count':>8} {'Total(s)':>10} {'Avg(s)':>10} {'Min(s)':>10} {'Max(s)':>10}") + _log_metric("-" * 80) + + for name, data in sorted_metrics: + avg_time = data['total_time'] / data['count'] if data['count'] > 0 else 0 + _log_metric(f"{name:<50} {data['count']:>8} {data['total_time']:>10.4f} {avg_time:>10.4f} {data['min']:>10.4f} {data['max']:>10.4f}") + + _log_metric("=" * 80) @@ -42,6 +123,7 @@ class TarFileWriter(object): class Error(Exception): pass + @timing_decorator def __init__(self, name, compression='', @@ -104,6 +186,7 @@ def __enter__(self): def __exit__(self, t, v, traceback): self.close() + @timing_decorator def add_dir(self, name, path, @@ -172,6 +255,7 @@ def add_dir(self, mtime=mtime, mode=mode) + @timing_decorator def _addfile(self, info, fileobj=None): """Add a file in the tar file if there is no conflict.""" if not info.name.endswith('/') and info.type == tarfile.DIRTYPE: @@ -184,6 +268,7 @@ def _addfile(self, info, fileobj=None): print('Duplicate file in archive: %s, ' 'picking first occurrence' % info.name) + @timing_decorator def add_file(self, name, kind=tarfile.REGTYPE, @@ -262,6 +347,7 @@ def add_file(self, self.directories.add(name) self._addfile(tarinfo) + @timing_decorator def add_tar(self, tar, rootuid=None, @@ -391,6 +477,7 @@ def add_tar(self, self._addfile(tarinfo) intar.close() + @timing_decorator def close(self): """Close the output tar file. This class should not be used anymore after calling that method. @@ -408,21 +495,23 @@ def close(self): if subprocess.call('which zstd', shell=True, stdout=subprocess.PIPE): raise self.Error('Cannot handle .zstd compression: ' 'zstd command not found.') - subprocess.call( - 'mv {0} {0}.d && zstd -z -{1} {0}.d && mv {0}.d.zst {0}'.format( - self.name, self.zstd_compression_level), - shell=True, - stdout=subprocess.PIPE) + with timing_context('zstd_compression'): + subprocess.call( + 'mv {0} {0}.d && zstd --threads=0 -z -{1} {0}.d && mv {0}.d.zst {0}'.format( + self.name, self.zstd_compression_level), + shell=True, + stdout=subprocess.PIPE) if self.xz: # Support xz compression through xz... until we can use Py3 if subprocess.call('which xz', shell=True, stdout=subprocess.PIPE): raise self.Error('Cannot handle .xz and .lzma compression: ' 'xz not found.') - subprocess.call( - 'mv {0} {0}.d && xz -z {0}.d && mv {0}.d.xz {0}'.format(self.name), - shell=True, - stdout=subprocess.PIPE) + with timing_context('xz_compression'): + subprocess.call( + 'mv {0} {0}.d && xz -z {0}.d && mv {0}.d.xz {0}'.format(self.name), + shell=True, + stdout=subprocess.PIPE) @@ -444,6 +533,7 @@ def parse_pkg_name(metadata, filename): else: return os.path.basename(os.path.splitext(filename)[0]) + @timing_decorator def __init__(self, output, directory, root_directory, default_mtime, enable_mtime_preservation, force_posixpath, gzip_compression_level, zstd_compression_level=3, compression='gz'): @@ -472,6 +562,7 @@ def __enter__(self): def __exit__(self, t, v, traceback): self.tarfile.close() + @timing_decorator def add_file(self, f, destfile, mode=None, ids=None, names=None): """Add a file to the tar file. @@ -507,6 +598,7 @@ def add_file(self, f, destfile, mode=None, ids=None, names=None): uname=names[0], gname=names[1]) + @timing_decorator def add_empty_file(self, destfile, mode=None, ids=None, names=None, kind=tarfile.REGTYPE): """Add a file to the tar file. @@ -573,6 +665,7 @@ def add_empty_root_dir(self, destpath, mode=None, ids=None, names=None): destpath, mode=mode, ids=ids, names=names) self.tarfile.root_directory = original_root_directory + @timing_decorator def add_tar(self, tar): """Merge a tar file into the destination tar file. @@ -617,6 +710,7 @@ def write_temp_file(self, data, suffix='tar', mode='wb'): finally: os.remove(tmpfile) + @timing_decorator def add_pkg_metadata(self, metadata_tar, deb): try: with tarfile.open(metadata_tar) as tar: @@ -636,6 +730,7 @@ def add_pkg_metadata(self, metadata_tar, deb): raise self.DebError('Unknown Exception {0}. Please report an issue at' ' github.com/bazelbuild/rules_docker.'.format(e)) + @timing_decorator def add_deb(self, deb): """Extract a debian package in the output tar. @@ -675,86 +770,108 @@ def add_deb(self, deb): raise self.DebError(deb + ' does not contains a control file!') def main(FLAGS): - # Parse modes arguments - default_mode = None - if FLAGS.mode: - # Convert from octal - default_mode = int(FLAGS.mode, 8) - - mode_map = {} - if FLAGS.modes: - for filemode in FLAGS.modes: - (f, mode) = filemode.split('=', 1) - if f[0] == '/': - f = f[1:] - mode_map[f] = int(mode, 8) - - default_ownername = ('', '') - if FLAGS.owner_name: - default_ownername = FLAGS.owner_name.split('.', 1) - names_map = {} - if FLAGS.owner_names: - for file_owner in FLAGS.owner_names: - (f, owner) = file_owner.split('=', 1) - (user, group) = owner.split('.', 1) - if f[0] == '/': - f = f[1:] - names_map[f] = (user, group) - - default_ids = FLAGS.owner.split('.', 1) - default_ids = (int(default_ids[0]), int(default_ids[1])) - ids_map = {} - if FLAGS.owners: - for file_owner in FLAGS.owners: - (f, owner) = file_owner.split('=', 1) - (user, group) = owner.split('.', 1) - if f[0] == '/': - f = f[1:] - ids_map[f] = (int(user), int(group)) - - # Add objects to the tar file - with TarFile(FLAGS.output, FLAGS.directory, - FLAGS.root_directory, FLAGS.mtime, - FLAGS.enable_mtime_preservation, - FLAGS.force_posixpath, FLAGS.gzip_compression_level, - FLAGS.zstd_compression_level, FLAGS.compression) as output: - def file_attributes(filename): - if filename.startswith('/'): - filename = filename[1:] - return { - 'mode': mode_map.get(filename, default_mode), - 'ids': ids_map.get(filename, default_ids), - 'names': names_map.get(filename, default_ownername), - } - - if FLAGS.manifest: - with open(FLAGS.manifest, 'r') as f: - manifest = json.load(f) - for f in manifest.get('files', []): - output.add_file(f['src'], f['dst'], **file_attributes(f['dst'])) - for f in manifest.get('empty_files', []): + global _metrics_file + start_time = time.perf_counter() + + # Open metrics output file if specified + if FLAGS.metrics_output: + _metrics_file = open(FLAGS.metrics_output, 'w') + + try: + # Parse modes arguments + default_mode = None + if FLAGS.mode: + # Convert from octal + default_mode = int(FLAGS.mode, 8) + + mode_map = {} + if FLAGS.modes: + for filemode in FLAGS.modes: + (f, mode) = filemode.split('=', 1) + if f[0] == '/': + f = f[1:] + mode_map[f] = int(mode, 8) + + default_ownername = ('', '') + if FLAGS.owner_name: + default_ownername = FLAGS.owner_name.split('.', 1) + names_map = {} + if FLAGS.owner_names: + for file_owner in FLAGS.owner_names: + (f, owner) = file_owner.split('=', 1) + (user, group) = owner.split('.', 1) + if f[0] == '/': + f = f[1:] + names_map[f] = (user, group) + + default_ids = FLAGS.owner.split('.', 1) + default_ids = (int(default_ids[0]), int(default_ids[1])) + ids_map = {} + if FLAGS.owners: + for file_owner in FLAGS.owners: + (f, owner) = file_owner.split('=', 1) + (user, group) = owner.split('.', 1) + if f[0] == '/': + f = f[1:] + ids_map[f] = (int(user), int(group)) + + # Add objects to the tar file + with TarFile(FLAGS.output, FLAGS.directory, + FLAGS.root_directory, FLAGS.mtime, + FLAGS.enable_mtime_preservation, + FLAGS.force_posixpath, FLAGS.gzip_compression_level, + FLAGS.zstd_compression_level, FLAGS.compression) as output: + def file_attributes(filename): + if filename.startswith('/'): + filename = filename[1:] + return { + 'mode': mode_map.get(filename, default_mode), + 'ids': ids_map.get(filename, default_ids), + 'names': names_map.get(filename, default_ownername), + } + + if FLAGS.manifest: + with timing_context('process_manifest'): + with open(FLAGS.manifest, 'r') as f: + manifest = json.load(f) + for f in manifest.get('files', []): + output.add_file(f['src'], f['dst'], **file_attributes(f['dst'])) + for f in manifest.get('empty_files', []): + output.add_empty_file(f, **file_attributes(f)) + for d in manifest.get('empty_dirs', []): + output.add_empty_dir(d, **file_attributes(d)) + for d in manifest.get('empty_root_dirs', []): + output.add_empty_root_dir(d, **file_attributes(d)) + for f in manifest.get('symlinks', []): + output.add_link(f['linkname'], f['target']) + for tar in manifest.get('tars', []): + output.add_tar(tar) + + with timing_context('process_files'): + for f in FLAGS.file: + (inf, tof) = f.split('=', 1) + output.add_file(inf, tof, **file_attributes(tof)) + for f in FLAGS.empty_file: output.add_empty_file(f, **file_attributes(f)) - for d in manifest.get('empty_dirs', []): - output.add_empty_dir(d, **file_attributes(d)) - for d in manifest.get('empty_root_dirs', []): - output.add_empty_root_dir(d, **file_attributes(d)) - for f in manifest.get('symlinks', []): - output.add_link(f['linkname'], f['target']) - for tar in manifest.get('tars', []): + for f in FLAGS.empty_dir: + output.add_empty_dir(f, **file_attributes(f)) + + with timing_context('process_tars'): + for tar in FLAGS.tar: output.add_tar(tar) + for link in FLAGS.link: + l = link.split(':', 1) + output.add_link(l[0], l[1]) - for f in FLAGS.file: - (inf, tof) = f.split('=', 1) - output.add_file(inf, tof, **file_attributes(tof)) - for f in FLAGS.empty_file: - output.add_empty_file(f, **file_attributes(f)) - for f in FLAGS.empty_dir: - output.add_empty_dir(f, **file_attributes(f)) - for tar in FLAGS.tar: - output.add_tar(tar) - for link in FLAGS.link: - l = link.split(':', 1) - output.add_link(l[0], l[1]) + # Print metrics summary if enabled + total_time = time.perf_counter() - start_time + if _METRICS_ENABLED: + _log_metric(f"Total execution time: {total_time:.4f}s") + print_metrics_summary() + finally: + # Close metrics file if it was opened + if _metrics_file: + _metrics_file.close() if __name__ == '__main__': @@ -834,6 +951,9 @@ def validate_link(l): parser.add_argument('--compression', type=str, default='gz', help='Set the compression type: gz, bz2, xz, lzma, zstd, or empty string for no compression.') + parser.add_argument('--metrics_output', type=str, default=None, + help='Optional file path to write performance metrics to (instead of stderr).') + main(parser.parse_args()) diff --git a/test_metrics.sh b/test_metrics.sh new file mode 100755 index 0000000..fcbcb50 --- /dev/null +++ b/test_metrics.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# Simple script to demonstrate BUILD_TAR_METRICS functionality + +set -e + +echo "======================================" +echo "Build Tar Metrics Demo" +echo "======================================" +echo + +# Create a temp directory for test files +TMPDIR=$(mktemp -d) +echo "Creating test files in: $TMPDIR" + +# Create some test files +echo "Test content 1" > "$TMPDIR/test1.txt" +echo "Test content 2" > "$TMPDIR/test2.txt" +mkdir -p "$TMPDIR/subdir" +echo "Test content 3" > "$TMPDIR/subdir/test3.txt" + +OUTPUT_TAR="$TMPDIR/output.tar.gz" + +echo +echo "Running build_tar.py WITHOUT metrics..." +echo "--------------------------------------" +python minidock/container_data_tools/build_tar.py \ + --output="$OUTPUT_TAR" \ + --file="$TMPDIR/test1.txt=/app/test1.txt" \ + --file="$TMPDIR/test2.txt=/app/test2.txt" \ + --compression=gz \ + 2>&1 | head -5 + +echo +echo "Build completed. Output size:" +ls -lh "$OUTPUT_TAR" +rm "$OUTPUT_TAR" + +echo +echo "======================================" +echo "Now running WITH metrics to stderr..." +echo "======================================" +echo + +BUILD_TAR_METRICS=1 python minidock/container_data_tools/build_tar.py \ + --output="$OUTPUT_TAR" \ + --file="$TMPDIR/test1.txt=/app/test1.txt" \ + --file="$TMPDIR/test2.txt=/app/test2.txt" \ + --file="$TMPDIR/subdir/test3.txt=/app/test3.txt" \ + --compression=gz \ + 2>&1 + +echo +echo "Build completed with metrics. Output size:" +ls -lh "$OUTPUT_TAR" +rm "$OUTPUT_TAR" + +echo +echo "======================================" +echo "Now running WITH metrics to file..." +echo "======================================" +echo + +METRICS_FILE="$TMPDIR/metrics.log" +BUILD_TAR_METRICS=1 python minidock/container_data_tools/build_tar.py \ + --output="$OUTPUT_TAR" \ + --file="$TMPDIR/test1.txt=/app/test1.txt" \ + --file="$TMPDIR/test2.txt=/app/test2.txt" \ + --file="$TMPDIR/subdir/test3.txt=/app/test3.txt" \ + --compression=gz \ + --metrics_output="$METRICS_FILE" + +echo +echo "Build completed. Metrics written to file:" +echo "--------------------------------------" +cat "$METRICS_FILE" +echo "--------------------------------------" +echo +echo "Output tar size:" +ls -lh "$OUTPUT_TAR" + +# Cleanup +echo +echo "Cleaning up..." +rm -rf "$TMPDIR" + +echo +echo "======================================" +echo "Demo complete!" +echo "======================================" +echo +echo "To enable metrics in your builds:" +echo " export BUILD_TAR_METRICS=1" +echo +echo "Or inline:" +echo " BUILD_TAR_METRICS=1 bazel build //your:target" + From 0986a329a58e4b0948ee617adba0144245a9a7ef Mon Sep 17 00:00:00 2001 From: Adam Singer Date: Tue, 2 Dec 2025 17:55:43 +0000 Subject: [PATCH 2/2] update doc --- BUILD_TAR_METRICS.md | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/BUILD_TAR_METRICS.md b/BUILD_TAR_METRICS.md index 2d6c76f..cb030ab 100644 --- a/BUILD_TAR_METRICS.md +++ b/BUILD_TAR_METRICS.md @@ -19,9 +19,9 @@ Or inline: BUILD_TAR_METRICS=1 python minidock/container_data_tools/build_tar.py [your arguments...] ``` -### Output to File +### Output Location -By default, metrics are written to **stderr**. To write metrics to a file instead, use the `--metrics_output` parameter: +By default (when `--metrics_output` is not specified), metrics are written to **stderr**. To write metrics to a dedicated file instead, use the `--metrics_output` parameter: ```bash BUILD_TAR_METRICS=1 python minidock/container_data_tools/build_tar.py \ @@ -30,10 +30,10 @@ BUILD_TAR_METRICS=1 python minidock/container_data_tools/build_tar.py \ --metrics_output=metrics.log ``` -This is useful when: +When `--metrics_output` is specified, all metrics are written exclusively to the specified file (not stderr). This is useful when: - You want to separate metrics from regular stderr output - You need to parse metrics programmatically -- You're running in a Bazel build and want to preserve metrics logs +- You're running in a Bazel build and want to preserve metrics logs as build artifacts ### Using with Bazel (`container_data` rule) @@ -88,7 +88,11 @@ The metrics system tracks timing for: ## Output Format -Metrics are written to **stderr** in real-time as operations complete: +When `BUILD_TAR_METRICS=1` is set, metrics are logged in real-time as operations complete. The output destination depends on whether `--metrics_output` is specified: +- **Without `--metrics_output`**: Metrics are written to stderr +- **With `--metrics_output`**: Metrics are written to the specified file + +Example output format: ``` [BUILD_TAR_METRIC] TarFile.__init__: 0.0023s @@ -126,7 +130,7 @@ The summary is sorted by total time (descending), so the most expensive operatio ## Example ```bash -# Run with metrics enabled (output to stderr) +# Run with metrics enabled (output to stderr by default) BUILD_TAR_METRICS=1 bazel build //your:target 2>&1 | grep BUILD_TAR_METRIC # Or if running the script directly with stderr output @@ -136,14 +140,14 @@ BUILD_TAR_METRICS=1 python minidock/container_data_tools/build_tar.py \ --compression=gz \ 2>&1 | tee metrics.log -# Run with metrics output to a dedicated file +# Run with metrics output to a dedicated file (not stderr) BUILD_TAR_METRICS=1 python minidock/container_data_tools/build_tar.py \ --output=out.tar.gz \ --file=myfile.txt=/dest/path.txt \ --compression=gz \ --metrics_output=build_metrics.log -# Then view the metrics +# Then view the metrics file cat build_metrics.log ```