Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions configs/dataset/droidLerobot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
defaults:
- base_dataset # Inherit from base
name: rpadLerobot

# The LeRobot dataset repo containing post-processed goals.
# # Overriding this means you have to do LEROBOT_HOME / repo_id directly in data_dir.
data_dir: null # don't want to override automatic computation, unless you do...
repo_id: sriramsk/droid_lerobot
use_subgoals: False

val_episode_ratio: 0.05 # percent of episodes in val set

# Multi-camera configuration (first camera is primary)
cameras:
- name: cam_1
color_key: "observation.images.cam_1.color"
depth_key: "observation.images.cam_1.depth"
- name: cam_2
color_key: "observation.images.cam_2.color"
depth_key: "observation.images.cam_2.depth"

gripper_pcd_key: "observation.points.gripper_pcds"

rgb_feat: False # If true, compute DINOv2 features, else just return RGB
7 changes: 7 additions & 0 deletions configs/model/dino_3dgp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ is_gmm: True # Train a GMM and minimize negative log likelihood instead
fixed_variance: [0.01, 0.05, 0.1, 0.25, 0.5] # for gmm
uniform_weights_coeff: 0.1 # coefficient for nll loss term when we use uniform mixing weights instead of pred

# Optimal Transport loss settings (for domain adaptation)
use_ot_loss: True # Enable optimal transport loss for aligning human/robot latent distributions
ot_alpha: 0.05 # Weight for combining OT loss with main loss
ot_lambda: 0.1 # Discount factor for matching latents (lower = stronger discount)
ot_epsilon: 0.1 # Regularization parameter for Sinkhorn algorithm
ot_percentile: 0.1 # Percentile threshold for determining best matches based on goal similarity

# Model-specific training augmentations
image_token_dropout: True # Enable image token dropout during training
gripper_noise_prob: 0.4 # Probability of applying gripper noise augmentation
Expand Down
18 changes: 18 additions & 0 deletions configs/training/droidLerobot_dino_3dgp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
defaults:
- base_train

# Override augment_train to image_color_only for dino 3dgp (safe for multiview)
augment_train: "image_color_only"

epochs: 100
batch_size: 128
val_batch_size: 128

# ModelCheckpoint configurations
checkpoints:
rmse:
monitor: val/rmse
mode: min
rmse_and_std_combi:
monitor: val/rmse_and_std_combi
mode: min
33 changes: 32 additions & 1 deletion pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ dependencies = [
"peft>=0.17,<0.18",
"decord>=0.6.0,<0.7",
"mink>=0.0.11,<0.0.12",
"pot>=0.9.6.post1,<0.10",
]

[project.optional-dependencies]
Expand Down
160 changes: 160 additions & 0 deletions scripts/analyze_latents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#!/usr/bin/env python3
"""
Analyze and visualize latent representations from two datasets using t-SNE.
Also computes Wasserstein-2 distance between distributions.
"""

import argparse
from pathlib import Path
from typing import List, Tuple

import matplotlib.pyplot as plt
import numpy as np
import ot
import torch
from sklearn.manifold import TSNE


def load_latents_from_path(dataset_path: Path) -> Tuple[np.ndarray, List[str]]:
"""
Load latent tensors from episode*.pt files in the given path.

Args:
dataset_path: Path to the dataset directory

Returns:
Tuple of (concatenated latents array of shape [total_frames, latent_dim], list of episode names)
"""
episode_files = sorted(dataset_path.glob("episode*.pt"))
all_latents, episode_names = [], []

for episode_file in episode_files:
latent_tensor = torch.load(episode_file)["latents"]
latent_np = latent_tensor.cpu().numpy()
all_latents.append(latent_np)
episode_names.append(episode_file.name)

concatenated_latents = np.concatenate(all_latents, axis=0)
return concatenated_latents, episode_names


def compute_wasserstein2_distance(X: np.ndarray, Y: np.ndarray) -> float:
"""
Compute the Wasserstein-2 distance between two point clouds.

Args:
X: First point cloud of shape [n_samples, n_features]
Y: Second point cloud of shape [m_samples, n_features]

Returns:
Wasserstein-2 distance
"""
# Uniform weights for both distributions
a = np.ones(len(X)) / len(X)
b = np.ones(len(Y)) / len(Y)

# Compute cost matrix (squared Euclidean distance)
M = ot.dist(X, Y, metric="sqeuclidean")

# Compute Wasserstein distance squared using EMD
w2_squared = ot.emd2(a, b, M)

# Return Wasserstein-2 distance (square root)
return np.sqrt(w2_squared)


def main():
parser = argparse.ArgumentParser(
description="Analyze latents using t-SNE visualization"
)
parser.add_argument(
"--dset1_path", type=str, required=True, help="Path to dataset 1"
)
parser.add_argument(
"--dset2_path", type=str, required=True, help="Path to dataset 2"
)
parser.add_argument(
"--perplexity",
type=float,
default=30.0,
help="t-SNE perplexity parameter (default: 30)",
)
parser.add_argument(
"--n_iter",
type=int,
default=1000,
help="Number of t-SNE iterations (default: 1000)",
)
parser.add_argument(
"--output",
type=str,
default="latent_tsne.png",
help="Output figure path (default: latent_tsne.png)",
)

args = parser.parse_args()

dset1_path = Path(args.dset1_path)
dset2_path = Path(args.dset2_path)

dset1_latents, _ = load_latents_from_path(dset1_path)
dset2_latents, _ = load_latents_from_path(dset2_path)

# Compute Wasserstein-2 distance
w2_dist = compute_wasserstein2_distance(dset1_latents, dset2_latents)
print(f"\nWasserstein-2 distance: {w2_dist:.6f}\n")

# Combine all latents
all_latents = np.concatenate([dset1_latents, dset2_latents], axis=0)
dset1_labels = np.zeros(len(dset1_latents))
dset2_labels = np.ones(len(dset2_latents))
all_labels = np.concatenate([dset1_labels, dset2_labels])

tsne = TSNE(
n_components=2,
perplexity=args.perplexity,
n_iter=args.n_iter,
random_state=42,
verbose=1,
)
embeddings = tsne.fit_transform(all_latents)

dset1_embeddings = embeddings[all_labels == 0]
dset2_embeddings = embeddings[all_labels == 1]

plt.figure(figsize=(12, 8))
plt.scatter(
dset1_embeddings[:, 0],
dset1_embeddings[:, 1],
c="blue",
alpha=0.6,
s=10,
label=f"{args.dset1_path} (n={len(dset1_latents)})",
)
plt.scatter(
dset2_embeddings[:, 0],
dset2_embeddings[:, 1],
c="red",
alpha=0.6,
s=10,
label=f"{args.dset2_path} (n={len(dset2_latents)})",
)
plt.xlabel("t-SNE Dimension 1", fontsize=12)
plt.ylabel("t-SNE Dimension 2", fontsize=12)
plt.title(
f"t-SNE Visualization of Latents (W2: {w2_dist:.4f})",
fontsize=14,
fontweight="bold",
)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()

# Save figure
plt.savefig(args.output, dpi=300, bbox_inches="tight")
print(f"Figure saved to: {args.output}")
print("\nDone!")


if __name__ == "__main__":
main()
55 changes: 48 additions & 7 deletions scripts/eval_lerobot_episode.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import csv
import json
import random
from pathlib import Path

import cv2
import hydra
Expand Down Expand Up @@ -166,16 +168,21 @@ def main(cfg):
preds = trainer.predict(model, datamodule=eval_datamodule)
preds_dict = {tag: {} for tag in eval_datamodule.eval_tags}

exp_dir = Path(cfg.log_dir) / f"{cfg.checkpoint.run_id}_{cfg.dataset.repo_id}"
exp_dir.mkdir(parents=True)
all_episode_metrics = []

loader = eval_datamodule.predict_dataloader()
for i, episode_id in enumerate(episode_idx):
heatmaps = []
raw_heatmaps = []
heatmaps, raw_heatmaps, episode_latents = [], [], []
metrics = {"pix_dist": [], "rmse": []}

if len(episode_idx) == 1:
preds = [preds]

for pred, batch in tqdm(zip(preds[i], loader[i]), total=len(loader[i])):
for pred, batch in tqdm(
zip(preds[i], loader[i]), total=len(loader[i]), desc=f"Episode {episode_id}"
):
rgb = batch["rgbs"][:, 0].cpu().numpy() # B, H, W, 3
batch_size = rgb.shape[0]

Expand All @@ -197,6 +204,7 @@ def main(cfg):
pred_coord = pred["pred_coord"].cpu().numpy().astype(int) # B, 2
metrics["pix_dist"].append(pred["pix_dist"])
metrics["rmse"].append(pred["rmse"])
episode_latents.append(pred["z"].cpu())

for j in range(batch_size):
heatmap_ = generate_heatmap_from_points(
Expand All @@ -215,19 +223,52 @@ def main(cfg):

if len(raw_heatmaps) > 0:
save_video(
f"{cfg.log_dir}/episode_{episode_id}_raw_heatmaps_{cfg.model.name}.mp4",
str(exp_dir / f"episode_{episode_id}_raw_heatmaps.mp4"),
frames=raw_heatmaps,
)
save_video(
f"{cfg.log_dir}/episode_{episode_id}_heatmap_{cfg.model.name}.mp4",
str(exp_dir / f"episode_{episode_id}_heatmap.mp4"),
frames=heatmaps,
)

if len(episode_latents) > 0:
episode_latents_tensor = torch.cat(episode_latents, dim=0) # (N_frames, D)
latent_file = exp_dir / f"episode_{episode_id}.pt"
torch.save(
{
"latents": episode_latents_tensor,
"n_frames": episode_latents_tensor.shape[0],
"latent_dim": episode_latents_tensor.shape[1],
},
latent_file,
)

# Compute and store metrics for this episode
episode_metric_dict = {"episode_id": episode_id}
for key in metrics:
if len(metrics[key]) != 0:
metric_val = torch.cat(metrics[key])
print(f"Mean {key}:", metric_val.mean().item())
print(f"Std. {key}:", metric_val.std().item())
mean_val = metric_val.mean().item()
std_val = metric_val.std().item()
episode_metric_dict[f"{key}_mean"] = mean_val
episode_metric_dict[f"{key}_std"] = std_val
print(
f"Episode {episode_id} - {key}: mean={mean_val:.4f}, std={std_val:.4f}"
)

all_episode_metrics.append(episode_metric_dict)

# Save metrics to CSV sorted by episode number
if all_episode_metrics:
all_episode_metrics.sort(key=lambda x: x["episode_id"])
csv_file = exp_dir / "metrics.csv"
fieldnames = list(all_episode_metrics[0].keys())
with open(csv_file, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(all_episode_metrics)
print(f"\nSaved metrics to {csv_file}")
print(f"Experiment results saved to: {exp_dir}")


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions src/lfd3d/datasets/lerobot/lerobot_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def __init__(
self.GRIPPER_IDX = {
"aloha": np.array([6, 197, 174]),
"human": np.array([343, 763, 60]),
"droid": np.array([356, 232, 16]),
"libero_franka": np.array(
[0, 1, 2]
), # gripper pcd in dataset: [left right top grasp-center] in agentview; (right gripper, left gripper, top, grasp-center)
Expand Down
Loading
Loading