Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions .github/workflows/examples.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,38 @@ jobs:
run: |
uv venv && source .venv/bin/activate
uv pip install ".[examples]"
- name: Detect GPU availability.
id: gpu_check
run: |
source .venv/bin/activate
if python -c "import torch; raise SystemExit(0 if torch.cuda.is_available() else 1)"; then
echo "has_gpu=true" >> "$GITHUB_OUTPUT"
else
echo "has_gpu=false" >> "$GITHUB_OUTPUT"
echo "::warning::No usable GPU detected on this runner; GPU-only steps will be skipped."
fi
- name: Run single GPU example with Adam to serve as a baseline.
run: |
source .venv/bin/activate
CUDA_VISIBLE_DEVICES="" python -m distributed_shampoo.examples.cifar10_example optimizer=adam batch_size=1024
- name: Run single GPU examples with Distributed Shampoo and different graftings on CPU.
run: |
source .venv/bin/activate
CUDA_VISIBLE_DEVICES="" python -m distributed_shampoo.examples.cifar10_example optimizer=shampoo optimizer.precondition_frequency=30 'optimizer.grafting_config={_target_:distributed_shampoo.AdaGradPreconditionerConfig,epsilon:1e-8}' epochs=1 batch_size=1024
CUDA_VISIBLE_DEVICES="" python -m distributed_shampoo.examples.cifar10_example optimizer=shampoo optimizer.precondition_frequency=30 'optimizer.grafting_config={_target_:distributed_shampoo.AdamPreconditionerConfig,beta2:0.999,epsilon:1e-8}' epochs=1 batch_size=1024
CUDA_VISIBLE_DEVICES="" python -m distributed_shampoo.examples.cifar10_example optimizer=shampoo optimizer.precondition_frequency=30 'optimizer.grafting_config={_target_:distributed_shampoo.RMSpropPreconditionerConfig,beta2:0.999,epsilon:1e-8}' epochs=1 batch_size=1024
CUDA_VISIBLE_DEVICES="" python -m distributed_shampoo.examples.cifar10_example optimizer=shampoo optimizer.precondition_frequency=30 'optimizer.grafting_config={_target_:distributed_shampoo.SGDPreconditionerConfig}' epochs=1 batch_size=1024
CUDA_VISIBLE_DEVICES="" python -m distributed_shampoo.examples.cifar10_example optimizer=shampoo optimizer.precondition_frequency=30 '~optimizer.grafting_config' '+optimizer.grafting_config={_target_:distributed_shampoo.AdaGradPreconditionerConfig,epsilon:1e-8}' epochs=1 batch_size=1024
CUDA_VISIBLE_DEVICES="" python -m distributed_shampoo.examples.cifar10_example optimizer=shampoo optimizer.precondition_frequency=30 '~optimizer.grafting_config' '+optimizer.grafting_config={_target_:distributed_shampoo.AdamPreconditionerConfig,beta2:0.999,epsilon:1e-8}' epochs=1 batch_size=1024
CUDA_VISIBLE_DEVICES="" python -m distributed_shampoo.examples.cifar10_example optimizer=shampoo optimizer.precondition_frequency=30 '~optimizer.grafting_config' '+optimizer.grafting_config={_target_:distributed_shampoo.RMSpropPreconditionerConfig,beta2:0.999,epsilon:1e-8}' epochs=1 batch_size=1024
CUDA_VISIBLE_DEVICES="" python -m distributed_shampoo.examples.cifar10_example optimizer=shampoo optimizer.precondition_frequency=30 '~optimizer.grafting_config' '+optimizer.grafting_config={_target_:distributed_shampoo.SGDPreconditionerConfig}' epochs=1 batch_size=1024
- name: Run single GPU example on GPU.
if: steps.gpu_check.outputs.has_gpu == 'true'
run: |
source .venv/bin/activate
python -m distributed_shampoo.examples.cifar10_example optimizer=shampoo optimizer.precondition_frequency=30 'optimizer.grafting_config={_target_:distributed_shampoo.AdamPreconditionerConfig,beta2:0.999,epsilon:1e-8}' epochs=1 batch_size=1024
python -m distributed_shampoo.examples.cifar10_example optimizer=shampoo optimizer.precondition_frequency=30 '~optimizer.grafting_config' '+optimizer.grafting_config={_target_:distributed_shampoo.AdamPreconditionerConfig,beta2:0.999,epsilon:1e-8}' epochs=1 batch_size=1024
- name: Run DDP example on CPU.
run: |
source .venv/bin/activate
CUDA_VISIBLE_DEVICES="" torchrun --standalone --nnodes=1 --nproc_per_node=2 -m distributed_shampoo.examples.cifar10_example parallelism=ddp optimizer=shampoo optimizer.precondition_frequency=15 'optimizer.grafting_config={_target_:distributed_shampoo.AdamPreconditionerConfig,beta2:0.999,epsilon:1e-8}' epochs=1 local_batch_size=1024 backend=gloo
CUDA_VISIBLE_DEVICES="" torchrun --standalone --nnodes=1 --nproc_per_node=2 -m distributed_shampoo.examples.cifar10_example parallelism=ddp optimizer=shampoo optimizer.precondition_frequency=15 '~optimizer.grafting_config' '+optimizer.grafting_config={_target_:distributed_shampoo.AdamPreconditionerConfig,beta2:0.999,epsilon:1e-8}' epochs=1 local_batch_size=1024 backend=gloo
- name: Run DDP example on GPU.
if: steps.gpu_check.outputs.has_gpu == 'true'
run: |
source .venv/bin/activate
torchrun --standalone --nnodes=1 --nproc_per_node=1 -m distributed_shampoo.examples.cifar10_example parallelism=ddp optimizer=shampoo optimizer.precondition_frequency=30 'optimizer.grafting_config={_target_:distributed_shampoo.AdamPreconditionerConfig,beta2:0.999,epsilon:1e-8}' epochs=1 local_batch_size=1024
torchrun --standalone --nnodes=1 --nproc_per_node=1 -m distributed_shampoo.examples.cifar10_example parallelism=ddp optimizer=shampoo optimizer.precondition_frequency=30 '~optimizer.grafting_config' '+optimizer.grafting_config={_target_:distributed_shampoo.AdamPreconditionerConfig,beta2:0.999,epsilon:1e-8}' epochs=1 local_batch_size=1024
14 changes: 7 additions & 7 deletions distributed_shampoo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
from distributed_shampoo.shampoo_types import (
AdaGradPreconditionerConfig,
AdamPreconditionerConfig,
AmortizedPreconditionerConfig,
BaseShampooPreconditionerConfig,
ClassicShampooPreconditionerConfig,
DDPDistributedConfig,
DefaultEigenvalueCorrectedShampooConfig,
DefaultShampooConfig,
Expand All @@ -59,7 +60,6 @@
RootInvShampooPreconditionerConfig,
ScheduleFreeConfig,
SGDPreconditionerConfig,
ShampooPreconditionerConfig,
ShampooPT2CompileConfig,
SignDescentPreconditionerConfig,
SingleDeviceDistributedConfig,
Expand All @@ -85,14 +85,14 @@
# `precision_config`.
# `preconditioner_config` options.
"PreconditionerConfig", # Abstract base class.
"AmortizedPreconditionerConfig", # Abstract base class (based on `PreconditionerConfig`).
"ShampooPreconditionerConfig", # Abstract base class (based on `AmortizedPreconditionerConfig`).
"RootInvShampooPreconditionerConfig", # Based on `ShampooPreconditionerConfig`.
"BaseShampooPreconditionerConfig", # Abstract base class (based on `PreconditionerConfig`).
"ClassicShampooPreconditionerConfig", # Abstract base class (based on `BaseShampooPreconditionerConfig`).
"RootInvShampooPreconditionerConfig", # Based on `ClassicShampooPreconditionerConfig`.
"DefaultShampooConfig", # Default `RootInvShampooPreconditionerConfig` using `EigenConfig`.
"RootInvKLShampooPreconditionerConfig", # Based on `RootInvShampooPreconditionerConfig`.
"EigendecomposedShampooPreconditionerConfig", # Based on `ShampooPreconditionerConfig`.
"EigendecomposedShampooPreconditionerConfig", # Based on `ClassicShampooPreconditionerConfig`.
"EigendecomposedKLShampooPreconditionerConfig", # Based on `EigendecomposedShampooPreconditionerConfig`.
"EigenvalueCorrectedShampooPreconditionerConfig", # Based on `AmortizedPreconditionerConfig`.
"EigenvalueCorrectedShampooPreconditionerConfig", # Based on `BaseShampooPreconditionerConfig`.
"DefaultEigenvalueCorrectedShampooConfig", # Default `EigenvalueCorrectedShampooPreconditionerConfig` using `EighEigendecompositionConfig`.
"DefaultSOAPConfig", # Default `EigenvalueCorrectedShampooPreconditionerConfig` using `QREigendecompositionConfig`.
"SpectralDescentPreconditionerConfig", # Based on `PreconditionerConfig`.
Expand Down
Loading