flagos-ai · legitnull · Apr 2, 2026 · Apr 2, 2026 · Apr 3, 2026
diff --git a/examples/gr00t_n1_5/conf/serve.yaml b/examples/gr00t_n1_5/conf/serve.yaml
@@ -7,7 +7,7 @@ experiment:
   exp_dir: outputs/${experiment.exp_name}
   task:
     type: serve
-    entrypoint: flagscale/serve/run_serve_gr00t_n1_5.py
+    entrypoint: flagscale/serve/run_serve_qwen_gr00t.py
   runner:
     hostfile: null
     deploy:

diff --git a/examples/gr00t_n1_5/conf/serve/gr00t_n1_5.yaml b/examples/gr00t_n1_5/conf/serve/gr00t_n1_5.yaml
@@ -3,5 +3,5 @@
     host: 0.0.0.0
     port: 5000
     model_variant: Gr00tN15
-    model: /workspace/models/gr00t_n1_5_train/checkpoints/last
+    model: ./outputs/gr00t_n1_5_train/checkpoints/last/pretrained_model
     device: "cuda"
diff --git a/examples/gr00t_n1_5/conf/train.yaml b/examples/gr00t_n1_5/conf/train.yaml
@@ -1,6 +1,6 @@
 defaults:
   - _self_
-  - train: gr00t_n1_5
+  - train: libero_spatial_demo
 
 experiment:
   exp_name: gr00t_n1_5_train

diff --git a/examples/gr00t_n1_5/conf/train/libero_spatial_demo.yaml b/examples/gr00t_n1_5/conf/train/libero_spatial_demo.yaml
@@ -0,0 +1,123 @@
+system:
+  batch_size: 32
+  train_steps: 1000
+  log_freq: 10
+  grad_clip_norm: 10.0
+  use_amp: true
+  shuffle: true
+  num_workers: 8
+
+  checkpoint:
+    output_directory: ${experiment.exp_dir}
+    save_checkpoint: true
+    save_freq: 500
+    # Path to a checkpoint directory to resume training from (e.g. outputs/gr00t_n1_5_train/checkpoints/001000)
+    # resume_from:
+
+model:
+  model_name: gr00t_n1_5
+  # Path or HuggingFace model ID for the pretrained GR00T N1.5 model
+  checkpoint_dir: /workspace/models/nvidia/GR00T-N1.5-3B
+
+  # Fine-tuning control
+  tune_llm: true
+  tune_visual: true
+  tune_projector: true
+  tune_diffusion_model: true
+  compute_dtype: bfloat16
+
+  # Embodiment tag to use for training (e.g. 'new_embodiment', 'gr1')
+  embodiment_tag: new_embodiment
+
+  # Number of future action steps predicted per forward pass.
+  # Determines action_delta_indices = [0, 1, ..., chunk_size - 1].
+  chunk_size: 16
+
+  # Padding dimensions — shorter state/action sequences are zero-padded to these sizes
+  max_state_dim: 64
+  max_action_dim: 32
+
+  normalization_mapping:
+    VISUAL: IDENTITY
+    STATE: MIN_MAX
+    ACTION: MIN_MAX
+
+  # LoRA fine-tuning (lora_rank: 0 disables LoRA)
+  # lora_rank: 0
+  # lora_alpha: 16
+  # lora_dropout: 0.1
+  # lora_full_model: false
+
+  # ============================================================
+  # Module Freezing Configuration
+  # ============================================================
+  # Freezing logic: freeze_patterns are applied first, then keep_patterns override.
+  # Patterns are regex matched against full parameter names.
+  #
+  # Common patterns for GR00T N1.5:
+  #   - "_groot_model\\.backbone\\..*"         # Entire backbone (VLM + vision)
+  #   - "_groot_model\\.action_head\\..*"       # Action diffusion head
+  #
+  # freeze:
+  #   freeze_patterns:
+  #     - "_groot_model\\.backbone\\..*"
+
+  optimizer:
+    name: AdamW
+    lr: 1.0e-4
+    betas: [0.95, 0.999]
+    eps: 1.0e-08
+    weight_decay: 1.0e-05
+    scheduler:
+      name: cosine_decay_with_warmup
+      warmup_steps: 500
+      decay_steps: 10000
+      peak_lr: 1.0e-4
+      decay_lr: 1.0e-5
+
+data:
+  dataset_type: lerobot
+  data_path: /workspace/datasets/tailong-wu/libero_spatial_no_noops_1.0.0_lerobot_v3.0
+  tolerance_s: 0.0001
+  preprocessor:
+    name: policy_preprocessor
+    steps:
+      # 1. Rename keys if needed (e.g., dataset-specific camera names)
+      - registry_name: rename_observations_processor
+        config:
+          rename_map: {}
+      # 2. Add batch dimension for single samples
+      - registry_name: to_batch_processor
+        config: {}
+      # 3. Pack video/state/action/language/embodiment; apply optional min-max normalization before padding
+      - registry_name: groot_pack_inputs
+        config:
+          state_horizon: 1
+          action_horizon: 16
+          max_state_dim: 64
+          max_action_dim: 32
+          language_key: task
+          embodiment_tag: new_embodiment
+          normalize_min_max: true
+      # 4. Eagle encode (creates eagle_content)
+      - registry_name: groot_eagle_encode
+        config: {}
+      # 5. Collate eagle_content -> eagle_* tensors
+      - registry_name: groot_eagle_collate
+        config: {}
+      # 6. Move to device
+      - registry_name: device_processor
+        config:
+          device: cuda
+          float_dtype: null
+  postprocessor:
+    name: policy_postprocessor
+    steps:
+      - registry_name: groot_action_unpack_unnormalize
+        config:
+          env_action_dim: 7
+          normalize_min_max: true
+      - registry_name: device_processor
+        config:
+          device: cpu
+          float_dtype: null
diff --git a/examples/pi0_5/conf/serve/pi0_5.yaml b/examples/pi0_5/conf/serve/pi0_5.yaml
@@ -3,7 +3,7 @@
     host: 0.0.0.0
     port: 5000
     model_variant: "pi0.5"
-    model: /workspace/models/pi0_5_train/checkpoints/last/pretrained_model
+    model: ./outputs/pi0_5_train/checkpoints/last/pretrained_model
     device: "cuda"
     # Maps client-sent observation keys to the keys the model was trained with.
     # Format: {key_from_client: key_expected_by_model}

diff --git a/examples/pi0_5/conf/train.yaml b/examples/pi0_5/conf/train.yaml
@@ -1,6 +1,6 @@
 defaults:
   - _self_
-  - train: pi0_5
+  - train: libero_spatial_demo
 
 experiment:
   exp_name: pi0_5_train

diff --git a/examples/pi0_5/conf/train/libero_spatial_demo.yaml b/examples/pi0_5/conf/train/libero_spatial_demo.yaml
@@ -0,0 +1,48 @@
+system:
+  batch_size: 16
+  train_steps: 2000
+  log_freq: 10
+  grad_clip_norm: 1.0
+  use_amp: true
+  shuffle: true
+  num_workers: 4
+
+  checkpoint:
+    output_directory: ${experiment.exp_dir}
+    # Whether to save checkpoint
+    save_checkpoint: true
+    # Number of steps between checkpoints
+    save_freq: 500
+  # TODO(yupu): Support resuming from checkpoint
+
+model:
+  model_name: pi0.5
+  # Path to the pretrained pi05_base model checkpoint
+  checkpoint_dir: /workspace/models/lerobot/pi05_libero_base
+  # Path to paligemma tokenizer
+  tokenizer_path: /workspace/models/google/paligemma-3b-pt-224
+  tokenizer_max_length: 200
+  gradient_checkpointing: true
+  freeze_vision_encoder: false
+
+  optimizer:
+    name: AdamW
+    lr: 2.5e-5
+    betas: [0.9, 0.95]
+    eps: 1.0e-8
+    weight_decay: 0.01
+    scheduler:
+      warmup_steps: 1000
+      decay_steps: 30000
+      decay_lr: 2.5e-6
+
+data:
+  # Path to the training data
+  data_path: /workspace/datasets/tailong-wu/libero_spatial_no_noops_1.0.0_lerobot_v3.0
+  tolerance_s: 0.0001
+  use_imagenet_stats: true
+  # To match the input features naming from the dataset to the policy config
+  rename_map:
+    "observation.images.wrist_image": "observation.images.image2"
+  # By default, Pi0.5 uses quantiles for state and action normalization, if false, it uses mean and std instead
+  use_quantiles: false
diff --git a/examples/qwen_gr00t/conf/serve/qwen_gr00t.yaml b/examples/qwen_gr00t/conf/serve/qwen_gr00t.yaml
@@ -3,15 +3,15 @@
     host: 0.0.0.0
     port: 5000
     model_variant: QwenGr00t
-    model: /share/project/fengyupu/github/FlagScale_2/outputs/260320_qwen_gr00t_train_libero_goal_old_dataset/checkpoints/last
+    model: ./outputs/qwen_gr00t_train/checkpoints/last/pretrained_model
     device: "cuda"
     # Maps client-sent observation keys to the keys the model was trained with.
     # Format: {key_from_client: key_expected_by_model}
-    rename_map:
-      "observation/image": "observation.images.image"
-      "observation/wrist_image": "observation.images.wrist_image"
-      "observation/state": "observation.state"
-      "prompt": "task"
+    # rename_map:
+    #   "observation/image": "observation.images.image"
+    #   "observation/wrist_image": "observation.images.wrist_image"
+    #   "observation/state": "observation.state"
+    #   "prompt": "task"
     serve_preprocessor:
       steps:
         - registry_name: image_resize_processor

diff --git a/examples/qwen_gr00t/conf/train.yaml b/examples/qwen_gr00t/conf/train.yaml
@@ -1,6 +1,6 @@
 defaults:
   - _self_
-  - train: qwen_gr00t
+  - train: libero_spatial_demo
 
 experiment:
   exp_name: qwen_gr00t_train

diff --git a/examples/qwen_gr00t/conf/train/libero_spatial_demo.yaml b/examples/qwen_gr00t/conf/train/libero_spatial_demo.yaml
@@ -0,0 +1,150 @@
+system:
+  batch_size: 8
+  train_steps: 4000
+  log_freq: 1
+  grad_clip_norm: 1.0
+  use_amp: true
+  shuffle: true
+  num_workers: 4
+  # Weight applied to VLM language modelling loss when co-training with vlm_data_path.
+  # Set to 0 or omit vlm_data_path to disable co-training.
+  vlm_loss_scale: 0.1
+
+  checkpoint:
+    output_directory: ${experiment.exp_dir}
+    # Whether to save checkpoint
+    save_checkpoint: true
+    # Number of steps between checkpoints
+    save_freq: 1000
+    # Path to a checkpoint directory to resume training from (e.g. /path/to/checkpoints/005000)
+    # resume_from:
+
+model:
+  model_name: qwen_gr00t
+  vlm:
+    type: qwen3-vl
+    base_vlm: /workspace/models/Qwen/Qwen3-VL-4B-Instruct/
+    attn_implementation: flash_attention_2
+  action_model:
+    # Whether to condition the action model on proprioceptive state (observation.state)
+    use_state: false
+    type: gr00t_action_head
+    action_model_type: DiT-B
+    hidden_size: 1024
+    add_pos_embed: True
+    max_seq_len: 1024
+    action_dim: 7
+    state_dim: 7
+    future_action_window_size: 7
+    action_horizon: 8
+    repeated_diffusion_steps: 4
+    noise_beta_alpha: 1.5
+    noise_beta_beta: 1.0
+    noise_s: 0.999
+    num_timestep_buckets: 1000
+    num_inference_timesteps: 4
+    num_target_vision_tokens: 32
+    diffusion_model_cfg:
+      cross_attention_dim: 2048
+      dropout: 0.2
+      final_dropout: True
+      interleave_self_attention: True
+      norm_type: ada_norm
+      num_layers: 16
+      output_dim: 1024
+      positional_embeddings: None
+
+  prompt_template: "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format."
+
+  normalization_mapping:
+    VISUAL: IDENTITY
+    STATE: MIN_MAX
+    ACTION: MIN_MAX
+
+  optimizer:
+    name: AdamW
+    lr: 2.5e-5
+    betas: [0.9, 0.95]
+    eps: 1.0e-08
+    weight_decay: 1.0e-08
+    param_groups:
+      vlm:
+        lr: 1.0e-05
+      action_model:
+        lr: 1.0e-04
+    scheduler:
+      name: cosine_with_min_lr
+      warmup_steps: 200
+      scheduler_kwargs:
+        min_lr: 1.0e-06
+      # Legacy fields kept for BC
+      decay_steps: 30000
+      decay_lr: 2.5e-6
+
+  # ============================================================
+  # Module Freezing Configuration
+  # ============================================================
+  # Freezing logic: freeze_patterns are applied first, then keep_patterns override.
+  # Patterns are regex matched against full parameter names.
+  #
+  # Common patterns for QwenGR00T:
+  #   - "qwen_vl_interface\\..*"                              # Entire VLM
+  #   - "qwen_vl_interface\\.model\\.visual\\..*"             # Vision encoder
+  #   - "qwen_vl_interface\\.model\\.model\\..*"              # Language model
+  #   - "qwen_vl_interface\\.model\\.model\\.layers\\.[0-9]\\."  # LLM layers 0-9
+  #   - "action_model\\..*"                                   # Action head
+  #   - "action_model\\.model\\.transformer_blocks\\.[0-7]\\."   # DiT blocks 0-7
+  #
+  # freeze:
+  #   # SCENARIO A: Freeze VLM, train only action head
+    # freeze_patterns:
+    #   - "qwen_vl_interface\\..*"
+  #
+  #   # SCENARIO B: Freeze VLM but keep projector trainable
+  #   # freeze_patterns:
+  #   #   - "qwen_vl_interface\\..*"
+  #   # keep_patterns:
+  #   #   - "qwen_vl_interface\\.model\\.visual\\.merger\\..*"
+  #
+  #   # SCENARIO C: Freeze everything except action decoder
+  #   # freeze_patterns:
+  #   #   - ".*"
+  #   # keep_patterns:
+  #   #   - "action_model\\.action_decoder\\..*"
+
+data:
+  dataset_type: lerobot
+  wds:
+    vision_root: ""
+    action_key: eepose
+    state_key: eepose
+  # Path to the training data
+  data_path: /workspace/datasets/tailong-wu/libero_spatial_no_noops_1.0.0_lerobot_v3.0
+  # Path to VLM co-training data (WDS/Energon format). Leave unset to disable co-training.
+  # vlm_data_path: /workspace/datasets/vlm_cotrain/
+  tolerance_s: 0.0001
+  preprocessor:
+    name: policy_preprocessor
+    steps:
+      - registry_name: rename_observations_processor
+        config:
+          rename_map: {}
+      - registry_name: to_batch_processor
+        config: {}
+      - registry_name: device_processor
+        config:
+          device: cuda
+          float_dtype: null
+      - registry_name: normalizer_processor
+        config:
+          eps: 1e-8
+          features: {}
+          # norm_map is injected at runtime from model.normalization_mapping
+  postprocessor:
+    name: policy_postprocessor
+    steps:
+      - registry_name: unnormalizer_processor
+        config:
+          eps: 1e-8
+          features: {}
+          # norm_map is injected at runtime from model.normalization_mapping