Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/gr00t_n1_5/conf/serve.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ experiment:
exp_dir: outputs/${experiment.exp_name}
task:
type: serve
entrypoint: flagscale/serve/run_serve_gr00t_n1_5.py
entrypoint: flagscale/serve/run_serve_qwen_gr00t.py
runner:
hostfile: null
deploy:
Expand Down
2 changes: 1 addition & 1 deletion examples/gr00t_n1_5/conf/serve/gr00t_n1_5.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
host: 0.0.0.0
port: 5000
model_variant: Gr00tN15
model: /workspace/models/gr00t_n1_5_train/checkpoints/last
model: ./outputs/gr00t_n1_5_train/checkpoints/last/pretrained_model
device: "cuda"
2 changes: 1 addition & 1 deletion examples/gr00t_n1_5/conf/train.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
defaults:
- _self_
- train: gr00t_n1_5
- train: libero_spatial_demo

experiment:
exp_name: gr00t_n1_5_train
Expand Down
123 changes: 123 additions & 0 deletions examples/gr00t_n1_5/conf/train/libero_spatial_demo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
system:
batch_size: 32
train_steps: 1000
log_freq: 10
grad_clip_norm: 10.0
use_amp: true
shuffle: true
num_workers: 8

checkpoint:
output_directory: ${experiment.exp_dir}
save_checkpoint: true
save_freq: 500
# Path to a checkpoint directory to resume training from (e.g. outputs/gr00t_n1_5_train/checkpoints/001000)
# resume_from:

model:
model_name: gr00t_n1_5
# Path or HuggingFace model ID for the pretrained GR00T N1.5 model
checkpoint_dir: /workspace/models/nvidia/GR00T-N1.5-3B

# Fine-tuning control
tune_llm: true
tune_visual: true
tune_projector: true
tune_diffusion_model: true
compute_dtype: bfloat16

# Embodiment tag to use for training (e.g. 'new_embodiment', 'gr1')
embodiment_tag: new_embodiment

# Number of future action steps predicted per forward pass.
# Determines action_delta_indices = [0, 1, ..., chunk_size - 1].
chunk_size: 16

# Padding dimensions — shorter state/action sequences are zero-padded to these sizes
max_state_dim: 64
max_action_dim: 32

normalization_mapping:
VISUAL: IDENTITY
STATE: MIN_MAX
ACTION: MIN_MAX

# LoRA fine-tuning (lora_rank: 0 disables LoRA)
# lora_rank: 0
# lora_alpha: 16
# lora_dropout: 0.1
# lora_full_model: false

# ============================================================
# Module Freezing Configuration
# ============================================================
# Freezing logic: freeze_patterns are applied first, then keep_patterns override.
# Patterns are regex matched against full parameter names.
#
# Common patterns for GR00T N1.5:
# - "_groot_model\\.backbone\\..*" # Entire backbone (VLM + vision)
# - "_groot_model\\.action_head\\..*" # Action diffusion head
#
# freeze:
# freeze_patterns:
# - "_groot_model\\.backbone\\..*"

optimizer:
name: AdamW
lr: 1.0e-4
betas: [0.95, 0.999]
eps: 1.0e-08
weight_decay: 1.0e-05
scheduler:
name: cosine_decay_with_warmup
warmup_steps: 500
decay_steps: 10000
peak_lr: 1.0e-4
decay_lr: 1.0e-5

data:
dataset_type: lerobot
data_path: /workspace/datasets/tailong-wu/libero_spatial_no_noops_1.0.0_lerobot_v3.0
tolerance_s: 0.0001
preprocessor:
name: policy_preprocessor
steps:
# 1. Rename keys if needed (e.g., dataset-specific camera names)
- registry_name: rename_observations_processor
config:
rename_map: {}
# 2. Add batch dimension for single samples
- registry_name: to_batch_processor
config: {}
# 3. Pack video/state/action/language/embodiment; apply optional min-max normalization before padding
- registry_name: groot_pack_inputs
config:
state_horizon: 1
action_horizon: 16
max_state_dim: 64
max_action_dim: 32
language_key: task
embodiment_tag: new_embodiment
normalize_min_max: true
# 4. Eagle encode (creates eagle_content)
- registry_name: groot_eagle_encode
config: {}
# 5. Collate eagle_content -> eagle_* tensors
- registry_name: groot_eagle_collate
config: {}
# 6. Move to device
- registry_name: device_processor
config:
device: cuda
float_dtype: null
postprocessor:
name: policy_postprocessor
steps:
- registry_name: groot_action_unpack_unnormalize
config:
env_action_dim: 7
normalize_min_max: true
- registry_name: device_processor
config:
device: cpu
float_dtype: null
2 changes: 1 addition & 1 deletion examples/pi0_5/conf/serve/pi0_5.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
host: 0.0.0.0
port: 5000
model_variant: "pi0.5"
model: /workspace/models/pi0_5_train/checkpoints/last/pretrained_model
model: ./outputs/pi0_5_train/checkpoints/last/pretrained_model
device: "cuda"
# Maps client-sent observation keys to the keys the model was trained with.
# Format: {key_from_client: key_expected_by_model}
Expand Down
2 changes: 1 addition & 1 deletion examples/pi0_5/conf/train.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
defaults:
- _self_
- train: pi0_5
- train: libero_spatial_demo

experiment:
exp_name: pi0_5_train
Expand Down
48 changes: 48 additions & 0 deletions examples/pi0_5/conf/train/libero_spatial_demo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
system:
batch_size: 16
train_steps: 2000
log_freq: 10
grad_clip_norm: 1.0
use_amp: true
shuffle: true
num_workers: 4

checkpoint:
output_directory: ${experiment.exp_dir}
# Whether to save checkpoint
save_checkpoint: true
# Number of steps between checkpoints
save_freq: 500
# TODO(yupu): Support resuming from checkpoint

model:
model_name: pi0.5
# Path to the pretrained pi05_base model checkpoint
checkpoint_dir: /workspace/models/lerobot/pi05_libero_base
# Path to paligemma tokenizer
tokenizer_path: /workspace/models/google/paligemma-3b-pt-224
tokenizer_max_length: 200
gradient_checkpointing: true
freeze_vision_encoder: false

optimizer:
name: AdamW
lr: 2.5e-5
betas: [0.9, 0.95]
eps: 1.0e-8
weight_decay: 0.01
scheduler:
warmup_steps: 1000
decay_steps: 30000
decay_lr: 2.5e-6

data:
# Path to the training data
data_path: /workspace/datasets/tailong-wu/libero_spatial_no_noops_1.0.0_lerobot_v3.0
tolerance_s: 0.0001
use_imagenet_stats: true
# To match the input features naming from the dataset to the policy config
rename_map:
"observation.images.wrist_image": "observation.images.image2"
# By default, Pi0.5 uses quantiles for state and action normalization, if false, it uses mean and std instead
use_quantiles: false
12 changes: 6 additions & 6 deletions examples/qwen_gr00t/conf/serve/qwen_gr00t.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
host: 0.0.0.0
port: 5000
model_variant: QwenGr00t
model: /share/project/fengyupu/github/FlagScale_2/outputs/260320_qwen_gr00t_train_libero_goal_old_dataset/checkpoints/last
model: ./outputs/qwen_gr00t_train/checkpoints/last/pretrained_model
device: "cuda"
# Maps client-sent observation keys to the keys the model was trained with.
# Format: {key_from_client: key_expected_by_model}
rename_map:
"observation/image": "observation.images.image"
"observation/wrist_image": "observation.images.wrist_image"
"observation/state": "observation.state"
"prompt": "task"
# rename_map:
# "observation/image": "observation.images.image"
# "observation/wrist_image": "observation.images.wrist_image"
# "observation/state": "observation.state"
# "prompt": "task"
serve_preprocessor:
steps:
- registry_name: image_resize_processor
Expand Down
2 changes: 1 addition & 1 deletion examples/qwen_gr00t/conf/train.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
defaults:
- _self_
- train: qwen_gr00t
- train: libero_spatial_demo

experiment:
exp_name: qwen_gr00t_train
Expand Down
150 changes: 150 additions & 0 deletions examples/qwen_gr00t/conf/train/libero_spatial_demo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
system:
batch_size: 8
train_steps: 4000
log_freq: 1
grad_clip_norm: 1.0
use_amp: true
shuffle: true
num_workers: 4
# Weight applied to VLM language modelling loss when co-training with vlm_data_path.
# Set to 0 or omit vlm_data_path to disable co-training.
vlm_loss_scale: 0.1

checkpoint:
output_directory: ${experiment.exp_dir}
# Whether to save checkpoint
save_checkpoint: true
# Number of steps between checkpoints
save_freq: 1000
# Path to a checkpoint directory to resume training from (e.g. /path/to/checkpoints/005000)
# resume_from:

model:
model_name: qwen_gr00t
vlm:
type: qwen3-vl
base_vlm: /workspace/models/Qwen/Qwen3-VL-4B-Instruct/
attn_implementation: flash_attention_2
action_model:
# Whether to condition the action model on proprioceptive state (observation.state)
use_state: false
type: gr00t_action_head
action_model_type: DiT-B
hidden_size: 1024
add_pos_embed: True
max_seq_len: 1024
action_dim: 7
state_dim: 7
future_action_window_size: 7
action_horizon: 8
repeated_diffusion_steps: 4
noise_beta_alpha: 1.5
noise_beta_beta: 1.0
noise_s: 0.999
num_timestep_buckets: 1000
num_inference_timesteps: 4
num_target_vision_tokens: 32
diffusion_model_cfg:
cross_attention_dim: 2048
dropout: 0.2
final_dropout: True
interleave_self_attention: True
norm_type: ada_norm
num_layers: 16
output_dim: 1024
positional_embeddings: None

prompt_template: "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format."

normalization_mapping:
VISUAL: IDENTITY
STATE: MIN_MAX
ACTION: MIN_MAX

optimizer:
name: AdamW
lr: 2.5e-5
betas: [0.9, 0.95]
eps: 1.0e-08
weight_decay: 1.0e-08
param_groups:
vlm:
lr: 1.0e-05
action_model:
lr: 1.0e-04
scheduler:
name: cosine_with_min_lr
warmup_steps: 200
scheduler_kwargs:
min_lr: 1.0e-06
# Legacy fields kept for BC
decay_steps: 30000
decay_lr: 2.5e-6

# ============================================================
# Module Freezing Configuration
# ============================================================
# Freezing logic: freeze_patterns are applied first, then keep_patterns override.
# Patterns are regex matched against full parameter names.
#
# Common patterns for QwenGR00T:
# - "qwen_vl_interface\\..*" # Entire VLM
# - "qwen_vl_interface\\.model\\.visual\\..*" # Vision encoder
# - "qwen_vl_interface\\.model\\.model\\..*" # Language model
# - "qwen_vl_interface\\.model\\.model\\.layers\\.[0-9]\\." # LLM layers 0-9
# - "action_model\\..*" # Action head
# - "action_model\\.model\\.transformer_blocks\\.[0-7]\\." # DiT blocks 0-7
#
# freeze:
# # SCENARIO A: Freeze VLM, train only action head
# freeze_patterns:
# - "qwen_vl_interface\\..*"
#
# # SCENARIO B: Freeze VLM but keep projector trainable
# # freeze_patterns:
# # - "qwen_vl_interface\\..*"
# # keep_patterns:
# # - "qwen_vl_interface\\.model\\.visual\\.merger\\..*"
#
# # SCENARIO C: Freeze everything except action decoder
# # freeze_patterns:
# # - ".*"
# # keep_patterns:
# # - "action_model\\.action_decoder\\..*"

data:
dataset_type: lerobot
wds:
vision_root: ""
action_key: eepose
state_key: eepose
# Path to the training data
data_path: /workspace/datasets/tailong-wu/libero_spatial_no_noops_1.0.0_lerobot_v3.0
# Path to VLM co-training data (WDS/Energon format). Leave unset to disable co-training.
# vlm_data_path: /workspace/datasets/vlm_cotrain/
tolerance_s: 0.0001
preprocessor:
name: policy_preprocessor
steps:
- registry_name: rename_observations_processor
config:
rename_map: {}
- registry_name: to_batch_processor
config: {}
- registry_name: device_processor
config:
device: cuda
float_dtype: null
- registry_name: normalizer_processor
config:
eps: 1e-8
features: {}
# norm_map is injected at runtime from model.normalization_mapping
postprocessor:
name: policy_postprocessor
steps:
- registry_name: unnormalizer_processor
config:
eps: 1e-8
features: {}
# norm_map is injected at runtime from model.normalization_mapping
Loading
Loading