PanoSharp/main.py at main · Constannnnnt/PanoSharp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
#!/usr/bin/env python3
"""
PanoSharp: Unified 3DGS Panorama Alignment Pipeline

A unified command-line interface for aligning and merging 3D Gaussian Splatting
point clouds from Apple's SHARP model. Automatically handles both real photographs
and AI-generated images with appropriate alignment strategies.

Usage Examples:
    # Align real photographs (full pipeline: image -> SHARP -> alignment)
    python main.py --src photo1.jpg --tgt photo2.jpg --type real --output outputs/

    # Align AI-generated images
    python main.py --src ai1.png --tgt ai2.png --type ai --output outputs/

    # Use pre-existing PLY files (skip SHARP generation)
    python main.py --src_ply outputs/1.ply --tgt_ply outputs/2.ply \\
                   --src_img img1.jpg --tgt_img img2.jpg \\
                   --type real --output outputs/

    # Customize alignment parameters
    python main.py --src img1.jpg --tgt img2.jpg --type real --output outputs/ \\
                   --smoothing 1.0 --warp-strength 0.5 --falloff 0.5

Alignment Strategies:
    --type real: For photographs from cameras
        - Uses Sim3 transform (rotation + translation + scale)
        - Depth affine correction for monocular depth bias
        - Non-rigid RBF warping for residual correction

    --type ai: For AI-generated images (DALL-E, Midjourney, Gemini, etc.)
        - Uses 2D homography (robust to depth inconsistencies)
        - Scale estimation from depth ratios
        - 2D RBF smoothing

Output Files:
    {output_dir}/
    ├── ply/
    │   ├── {src_name}.ply    # SHARP output for source image
    │   └── {tgt_name}.ply    # SHARP output for target image
    ├── aligned.ply           # Source PLY aligned to target
    └── merged.ply            # Combined target + aligned source

Author: PanoSharp Team
License: See LICENSE file
"""

from __future__ import annotations

import argparse
import sys
from pathlib import Path

import torch

from panosharp.core.data_types import ImageType, PipelineInput
from panosharp.pipeline.orchestrator import PanoSharpPipeline


def parse_args() -> argparse.Namespace:
    """
    Parse command-line arguments.

    Returns:
        Parsed arguments namespace
    """
    parser = argparse.ArgumentParser(
        description="PanoSharp: Unified 3DGS Panorama Alignment Pipeline",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Full pipeline with real photos
  python main.py --src photo1.jpg --tgt photo2.jpg --type real --output outputs/

  # AI-generated images with custom parameters
  python main.py --src ai1.png --tgt ai2.png --type ai --output outputs/ \\
                 --smoothing 1.0 --warp-strength 0.5

  # Use existing PLY files (skip SHARP)
  python main.py --src_ply src.ply --tgt_ply tgt.ply \\
                 --src_img src.jpg --tgt_img tgt.jpg \\
                 --type real --output outputs/
        """,
    )

    # Input images (required for full pipeline)
    input_group = parser.add_argument_group("Input Images")
    input_group.add_argument(
        "--src",
        "--src_img",
        dest="src_img",
        type=Path,
        required=True,
        help="Source image path (to be aligned)",
    )
    input_group.add_argument(
        "--tgt",
        "--tgt_img",
        dest="tgt_img",
        type=Path,
        required=True,
        help="Target image path (reference)",
    )

    # Pre-existing PLY files (optional, skip SHARP if provided)
    ply_group = parser.add_argument_group("PLY Files (Optional)")
    ply_group.add_argument(
        "--src_ply",
        type=Path,
        default=None,
        help="Pre-existing source PLY file (skip SHARP generation)",
    )
    ply_group.add_argument(
        "--tgt_ply",
        type=Path,
        default=None,
        help="Pre-existing target PLY file (skip SHARP generation)",
    )

    # Output
    output_group = parser.add_argument_group("Output")
    output_group.add_argument(
        "--output",
        "-o",
        type=Path,
        required=True,
        help="Output directory for results",
    )

    # Image type (required)
    type_group = parser.add_argument_group("Image Type")
    type_group.add_argument(
        "--type",
        "-t",
        choices=["real", "ai"],
        required=True,
        help="Image type: 'real' for photographs, 'ai' for AI-generated",
    )

    # Device
    device_group = parser.add_argument_group("Computation")
    device_group.add_argument(
        "--device",
        choices=["cuda", "cpu", "mps"],
        default="cuda",
        help="Computation device (default: cuda)",
    )

    # Alignment parameters
    align_group = parser.add_argument_group("Alignment Parameters")
    align_group.add_argument(
        "--smoothing",
        type=float,
        default=0.3,
        help="RBF smoothing (higher = smoother warp, default: 0.3)",
    )
    align_group.add_argument(
        "--warp-strength",
        type=float,
        default=1.0,
        help="Warp intensity (0 = rigid only, default: 1.0)",
    )
    align_group.add_argument(
        "--falloff",
        type=float,
        default=0.5,
        help="Distance falloff (0 = no falloff, default: 0.0)",
    )
    align_group.add_argument(
        "--scale",
        type=float,
        default=0.0,
        help="Manual depth scale for AI images (0 = auto, default: 0)",
    )

    # SHARP options
    sharp_group = parser.add_argument_group("SHARP Options")
    sharp_group.add_argument(
        "--sharp-checkpoint",
        type=Path,
        default=None,
        help="Path to SHARP model checkpoint (optional)",
    )

    return parser.parse_args()


def validate_args(args: argparse.Namespace) -> None:
    """
    Validate parsed arguments.

    Args:
        args: Parsed arguments

    Raises:
        SystemExit: If validation fails
    """
    # Check source image exists
    if not args.src_img.exists():
        print(f"Error: Source image not found: {args.src_img}", file=sys.stderr)
        sys.exit(1)

    # Check target image exists
    if not args.tgt_img.exists():
        print(f"Error: Target image not found: {args.tgt_img}", file=sys.stderr)
        sys.exit(1)

    # If PLY files provided, check they exist
    if args.src_ply and not args.src_ply.exists():
        print(f"Error: Source PLY not found: {args.src_ply}", file=sys.stderr)
        sys.exit(1)

    if args.tgt_ply and not args.tgt_ply.exists():
        print(f"Error: Target PLY not found: {args.tgt_ply}", file=sys.stderr)
        sys.exit(1)

    # Must provide both PLY files or neither
    if (args.src_ply is None) != (args.tgt_ply is None):
        print(
            "Error: Must provide both --src_ply and --tgt_ply or neither",
            file=sys.stderr,
        )
        sys.exit(1)

    # Check device availability
    if args.device == "cuda" and not torch.cuda.is_available():
        print("Warning: CUDA not available, falling back to CPU", file=sys.stderr)
        args.device = "cpu"

    if args.device == "mps" and not torch.backends.mps.is_available():
        print("Warning: MPS not available, falling back to CPU", file=sys.stderr)
        args.device = "cpu"


def main() -> int:
    """
    Main entry point for PanoSharp CLI.

    Returns:
        Exit code (0 = success, 1 = failure)
    """
    # Parse and validate arguments
    args = parse_args()
    validate_args(args)

    # Convert image type string to enum
    image_type = ImageType.REAL if args.type == "real" else ImageType.AI_GENERATED

    # Create pipeline input
    pipeline_input = PipelineInput(
        src_image_path=args.src_img,
        tgt_image_path=args.tgt_img,
        output_dir=args.output,
        image_type=image_type,
        src_ply_path=args.src_ply,
        tgt_ply_path=args.tgt_ply,
        device=args.device,
        smoothing=args.smoothing,
        warp_strength=args.warp_strength,
        falloff=args.falloff,
        scale_factor=args.scale,
    )

    # Create and run pipeline
    pipeline = PanoSharpPipeline.create_default(
        device=args.device,
        sharp_checkpoint=args.sharp_checkpoint,
    )

    result = pipeline.run(pipeline_input)

    # Print summary
    print("\n" + "=" * 60)
    if result.success:
        print("SUCCESS!")
        print("=" * 60)
        print(f"Source PLY: {result.src_ply_path}")
        print(f"Target PLY: {result.tgt_ply_path}")
        print(f"Aligned: {result.aligned_ply_path}")
        if result.merged_ply_path:
            print(f"Merged: {result.merged_ply_path}")

        stats = result.alignment_result.statistics
        print(f"\nAlignment Statistics:")
        print(f"  Matches: {stats.num_matches}")
        print(f"  Inliers: {stats.num_inliers}")
        print(f"  Transform: {stats.transform_type}")
        print(f"  Residual: mean={stats.residual_mean:.4f}, max={stats.residual_max:.4f}")
        print(f"  Warp: mean={stats.warp_mean:.4f}, max={stats.warp_max:.4f}")

        if stats.depth_affine:
            print(f"  Depth affine: a={stats.depth_affine[0]:.4f}, b={stats.depth_affine[1]:.4f}")

        return 0
    else:
        print("FAILED!")
        print("=" * 60)
        print(f"Error: {result.error_message}")
        return 1


if __name__ == "__main__":
    sys.exit(main())