diff --git a/cmd/desync/cache.go b/cmd/desync/cache.go index b7b981a..db538fd 100644 --- a/cmd/desync/cache.go +++ b/cmd/desync/cache.go @@ -22,10 +22,11 @@ func newCacheCommand(ctx context.Context) *cobra.Command { cmd := &cobra.Command{ Use: "cache [...]", Short: "Read indexes and copy the referenced chunks", - Long: `Read chunk IDs from caibx or caidx files from one or more stores without -writing to disk. Can be used (with -c) to populate a store with desired chunks -either to be used as cache, or to populate a store with chunks referenced in an -index file. Use '-' to read (a single) index from STDIN. + Long: `Read chunk IDs from one or more index files (caibx or caidx) and copy the +referenced chunks from the source store(s) into the target store given with +-c, without assembling any blob on disk. This can be used to pre-populate a +cache, or to replicate the chunks referenced by indexes into another store. +Use '-' to read (a single) index from STDIN. To exclude chunks that are known to exist in the target store already, use --ignore which will skip any chunks from the given index. The same can @@ -40,9 +41,9 @@ file with --ignore-chunks .`, } flags := cmd.Flags() flags.StringSliceVarP(&opt.stores, "store", "s", nil, "source store(s)") - flags.StringVarP(&opt.cache, "cache", "c", "", "target store") - flags.StringSliceVarP(&opt.ignoreIndexes, "ignore", "", nil, "indexes to ignore chunks from") - flags.StringSliceVarP(&opt.ignoreChunks, "ignore-chunks", "", nil, "ignore chunks from text file") + flags.StringVarP(&opt.cache, "cache", "c", "", "target store the chunks are copied to") + flags.StringSliceVarP(&opt.ignoreIndexes, "ignore", "", nil, "index(es) with chunks to be excluded") + flags.StringSliceVarP(&opt.ignoreChunks, "ignore-chunks", "", nil, "text file with chunk IDs to be excluded") addStoreOptions(&opt.cmdStoreOptions, flags) return cmd } diff --git a/cmd/desync/cat.go b/cmd/desync/cat.go index 7206cb2..2ba5665 100644 --- a/cmd/desync/cat.go +++ b/cmd/desync/cat.go @@ -33,8 +33,9 @@ This is inherently slower than extract as while multiple chunks can be retrieved concurrently, writing to stdout cannot be parallelized. Use '-' to read the index from STDIN.`, - Example: ` desync cat -s http://192.168.1.1/ file.caibx | grep something`, - Args: cobra.RangeArgs(1, 2), + Example: ` desync cat -s http://192.168.1.1/ file.caibx | grep something + desync cat -s /path/to/store -o 1048576 -l 4096 file.caibx slice.bin`, + Args: cobra.RangeArgs(1, 2), RunE: func(cmd *cobra.Command, args []string) error { return runCat(ctx, opt, args) }, @@ -44,7 +45,7 @@ Use '-' to read the index from STDIN.`, flags.StringSliceVarP(&opt.stores, "store", "s", nil, "source store(s)") flags.StringVarP(&opt.cache, "cache", "c", "", "store to be used as cache") flags.IntVarP(&opt.offset, "offset", "o", 0, "offset in bytes to seek to before reading") - flags.IntVarP(&opt.length, "length", "l", 0, "number of bytes to read") + flags.IntVarP(&opt.length, "length", "l", 0, "number of bytes to read (0 reads to the end)") addStoreOptions(&opt.cmdStoreOptions, flags) return cmd } diff --git a/cmd/desync/chop.go b/cmd/desync/chop.go index 21eebcc..4b8226e 100644 --- a/cmd/desync/chop.go +++ b/cmd/desync/chop.go @@ -43,8 +43,8 @@ Use '-' to read the index from STDIN.`, } flags := cmd.Flags() flags.StringVarP(&opt.store, "store", "s", "", "target store") - flags.StringSliceVarP(&opt.ignoreIndexes, "ignore", "", nil, "index(s) to ignore chunks from") - flags.StringSliceVarP(&opt.ignoreChunks, "ignore-chunks", "", nil, "ignore chunks from text file") + flags.StringSliceVarP(&opt.ignoreIndexes, "ignore", "", nil, "index(es) with chunks to be excluded") + flags.StringSliceVarP(&opt.ignoreChunks, "ignore-chunks", "", nil, "text file with chunk IDs to be excluded") addStoreOptions(&opt.cmdStoreOptions, flags) return cmd } diff --git a/cmd/desync/chunk.go b/cmd/desync/chunk.go index 795f48f..b226e15 100644 --- a/cmd/desync/chunk.go +++ b/cmd/desync/chunk.go @@ -19,9 +19,11 @@ func newChunkCommand(ctx context.Context) *cobra.Command { var opt chunkOptions cmd := &cobra.Command{ - Use: "chunk ", - Short: "Chunk input file and print chunk points plus chunk ID", - Long: `Write start/length/hash pairs for each chunk a file would be split into.`, + Use: "chunk ", + Short: "Chunk input file and print chunk boundaries and IDs", + Long: `Chunks the input file without storing anything, and prints a start/length/hash +triple for each chunk the file would be split into. Useful to inspect or tune +chunking parameters before running 'make'.`, Example: ` desync chunk file.bin`, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { @@ -30,7 +32,7 @@ func newChunkCommand(ctx context.Context) *cobra.Command { SilenceUsage: true, } flags := cmd.Flags() - flags.Uint64VarP(&opt.startPos, "start", "S", 0, "starting position") + flags.Uint64VarP(&opt.startPos, "start", "S", 0, "starting position in bytes") flags.StringVarP(&opt.chunkSize, "chunk-size", "m", "16:64:256", "min:avg:max chunk size in kb") return cmd } diff --git a/cmd/desync/chunkserver.go b/cmd/desync/chunkserver.go index 4e682ff..fd48a41 100644 --- a/cmd/desync/chunkserver.go +++ b/cmd/desync/chunkserver.go @@ -51,8 +51,10 @@ This command supports the --store-file option which can be used to define the st and caches in a JSON file. The config can then be reloaded by sending a SIGHUP without needing to restart the server. This can be done under load as well. `, - Example: ` desync chunk-server -s sftp://192.168.1.1/store -c /path/to/cache -l :8080`, - Args: cobra.NoArgs, + Example: ` desync chunk-server -s sftp://192.168.1.1/store -c /path/to/cache -l :8080 + desync chunk-server -s /path/to/store -w -l :8080 + desync chunk-server -s /path/to/store --cert cert.pem --key key.pem -l :8443`, + Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { return runChunkServer(ctx, opt, args) }, @@ -62,7 +64,7 @@ needing to restart the server. This can be done under load as well. flags.StringVar(&opt.storeFile, "store-file", "", "read store arguments from a file, supports reload on SIGHUP") flags.StringSliceVarP(&opt.stores, "store", "s", nil, "upstream source store(s)") flags.StringVarP(&opt.cache, "cache", "c", "", "store to be used as cache") - flags.StringSliceVarP(&opt.listenAddresses, "listen", "l", []string{":http"}, "listen address") + flags.StringSliceVarP(&opt.listenAddresses, "listen", "l", []string{":http"}, "listen address(es), can be repeated") flags.BoolVarP(&opt.writable, "writable", "w", false, "support writing") flags.BoolVar(&opt.writable, "writeable", false, "support writing") _ = flags.MarkDeprecated("writeable", "use --writable instead") diff --git a/cmd/desync/extract.go b/cmd/desync/extract.go index 013279c..3b75ed1 100644 --- a/cmd/desync/extract.go +++ b/cmd/desync/extract.go @@ -32,22 +32,26 @@ func newExtractCommand(ctx context.Context) *cobra.Command { Use: "extract ", Short: "Read an index and build a blob from it", Long: `Reads an index and builds a blob reading chunks from one or more chunk stores. +Use '-' to read the index from STDIN. + When using -k, the blob will be extracted in-place utilizing existing data and the target file will not be deleted on error. This can be used to restart a failed prior extraction without having to retrieve completed chunks again. -Multiple optional seed indexes can be given with --seed. The matching blob should -have the same name as the index file without the .caibx extension. Instead, if the -matching blob data is in another location, or with a different name, you can explicitly -set the path by writing the index file path, followed by a colon and the data path. -If several seed files and indexes are available, the --seed-dir option can be used -to automatically select all .caibx files in a directory as seeds. Use '-' to read -the index from STDIN. If a seed is invalid, by default the extract operation will be -aborted. With --skip-invalid-seeds, the invalid seeds will be discarded and the -extraction will continue without them. Otherwise with --regenerate-invalid-seeds, -any invalid seed indexes will be regenerated, in memory, by using the -available data, and neither data nor indexes will be changed on disk. Also, if the seed changes -while processing, its invalid chunks will be taken from the self seed, or the store, instead -of aborting.`, + +Multiple optional seed indexes can be given with --seed. The matching blob +should have the same name as the index file without the .caibx extension. If +the blob data is in another location, or has a different name, the path can be +set explicitly by appending a colon and the data path to the index path, as in +--seed :. If several seed files and indexes are available, the +--seed-dir option can be used to automatically select all .caibx files in a +directory as seeds, expecting the matching blobs next to them. + +If a seed is invalid, the extract operation is aborted by default. With +--skip-invalid-seeds, invalid seeds are discarded and the extraction continues +without them. Alternatively, --regenerate-invalid-seeds regenerates invalid +seed indexes in memory from the available data; neither data nor indexes are +changed on disk. Also, if a seed changes while processing, its invalid chunks +will be taken from the self seed, or the store, instead of aborting.`, Example: ` desync extract -s http://192.168.1.1/ -c /path/to/local file.caibx largefile.bin desync extract -s /mnt/store -s /tmp/other/store file.tar.caibx file.tar desync extract -s /mnt/store --seed /mnt/v1.caibx v2.caibx v2.vmdk @@ -62,8 +66,8 @@ of aborting.`, flags.StringSliceVarP(&opt.stores, "store", "s", nil, "source store(s)") flags.StringSliceVar(&opt.seeds, "seed", nil, "seed indexes") flags.StringSliceVar(&opt.seedDirs, "seed-dir", nil, "directory with seed index files") - flags.BoolVar(&opt.skipInvalidSeeds, "skip-invalid-seeds", false, "Skip seeds with invalid chunks") - flags.BoolVar(&opt.regenerateInvalidSeeds, "regenerate-invalid-seeds", false, "Regenerate seed indexes with invalid chunks") + flags.BoolVar(&opt.skipInvalidSeeds, "skip-invalid-seeds", false, "skip seeds with invalid chunks") + flags.BoolVar(&opt.regenerateInvalidSeeds, "regenerate-invalid-seeds", false, "regenerate seed indexes with invalid chunks") flags.StringVarP(&opt.cache, "cache", "c", "", "store to be used as cache") flags.BoolVarP(&opt.inPlace, "in-place", "k", false, "extract the file in place and keep it in case of error") flags.BoolVarP(&opt.printStats, "print-stats", "", false, "print extraction statistics to stdout when done") diff --git a/cmd/desync/indexserver.go b/cmd/desync/indexserver.go index 61eb98f..292f8b0 100644 --- a/cmd/desync/indexserver.go +++ b/cmd/desync/indexserver.go @@ -35,8 +35,9 @@ func newIndexServerCommand(ctx context.Context) *cobra.Command { reading from a single local store or proxying to a remote store. If --cert and --key are provided, the server will serve over HTTPS. The -w option enables writing to this store.`, - Example: ` desync index-server -s sftp://192.168.1.1/indexes -l :8080`, - Args: cobra.NoArgs, + Example: ` desync index-server -s sftp://192.168.1.1/indexes -l :8080 + desync index-server -s /path/to/indexes -w -l :8080`, + Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { return runIndexServer(ctx, opt, args) }, @@ -44,7 +45,7 @@ enables writing to this store.`, } flags := cmd.Flags() flags.StringVarP(&opt.store, "store", "s", "", "upstream source index store") - flags.StringSliceVarP(&opt.listenAddresses, "listen", "l", []string{":http"}, "listen address") + flags.StringSliceVarP(&opt.listenAddresses, "listen", "l", []string{":http"}, "listen address(es), can be repeated") flags.BoolVarP(&opt.writable, "writable", "w", false, "support writing") flags.BoolVar(&opt.writable, "writeable", false, "support writing") _ = flags.MarkDeprecated("writeable", "use --writable instead") diff --git a/cmd/desync/make.go b/cmd/desync/make.go index 3bd8665..d5aef46 100644 --- a/cmd/desync/make.go +++ b/cmd/desync/make.go @@ -28,8 +28,9 @@ func newMakeCommand(ctx context.Context) *cobra.Command { provided with -s, such as a local directory or S3 store, it splits the input file according to the index and stores the chunks. Use '-' to write the index to STDOUT.`, - Example: ` desync make -s /path/to/local file.caibx largefile.bin`, - Args: cobra.ExactArgs(2), + Example: ` desync make -s /path/to/local file.caibx largefile.bin + desync make -m 8:32:128 - largefile.bin > file.caibx`, + Args: cobra.ExactArgs(2), RunE: func(cmd *cobra.Command, args []string) error { return runMake(ctx, opt, args) }, diff --git a/cmd/desync/manpage.go b/cmd/desync/manpage.go index d8b28be..96ccedc 100644 --- a/cmd/desync/manpage.go +++ b/cmd/desync/manpage.go @@ -17,6 +17,7 @@ func newManpageCommand(ctx context.Context, root *cobra.Command) *cobra.Command cmd := &cobra.Command{ Use: "manpage ", Short: "Generate manpages for desync", + Long: `Generates man pages for desync and all of its commands into the given directory.`, Example: ` desync manpage /tmp/man`, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { diff --git a/cmd/desync/mount-index.go b/cmd/desync/mount-index.go index 72a0fe4..2df30be 100644 --- a/cmd/desync/mount-index.go +++ b/cmd/desync/mount-index.go @@ -40,14 +40,14 @@ the index from STDIN. When a Copy-on-Read file is given (with --cor-file), the file is used as a fast cache. All chunks that are accessed by the mount are retrieved from the store and written into the file as read operations are performed. Once all chunks have been accessed, the COR -file is fully populated. On termination, a .state file is written containing -information about which chunks of the index have or have not been read. A state file is -only valid for one cache file and one index. When re-using it with a different index, -data corruption can occur. +file is fully populated. If --cor-state-save is given, a state file is written on +termination (and on SIGHUP) containing information about which chunks of the index have +or have not been read. A state file is only valid for one cache file and one index. +When re-using it with a different index, data corruption can occur. This command supports the --store-file option which can be used to define the stores and caches in a JSON file. The config can then be reloaded by sending a SIGHUP without -needing to restart the server. This can be done under load as well. +having to unmount and remount. This can be done under load as well. `, Example: ` desync mount-index -s http://192.168.1.1/ file.caibx /mnt/blob desync mount-index -s /path/to/store --cor-file /var/tmp/blob.cor blob.caibx /mnt/blob @@ -63,8 +63,8 @@ needing to restart the server. This can be done under load as well. flags.StringVarP(&opt.cache, "cache", "c", "", "store to be used as cache") flags.StringVar(&opt.storeFile, "store-file", "", "read store arguments from a file, supports reload on SIGHUP") flags.StringVarP(&opt.corFile, "cor-file", "", "", "use a copy-on-read sparse file as cache") - flags.StringVarP(&opt.StateSaveFile, "cor-state-save", "", "", "file to store the state for copy-on-read") - flags.StringVarP(&opt.StateInitFile, "cor-state-init", "", "", "copy-on-read state init file") + flags.StringVarP(&opt.StateSaveFile, "cor-state-save", "", "", "file to store the copy-on-read state in on exit or SIGHUP") + flags.StringVarP(&opt.StateInitFile, "cor-state-init", "", "", "state file to initialize the copy-on-read cache from") flags.IntVarP(&opt.StateInitConcurrency, "cor-init-n", "", 10, "number of goroutines to use for initialization (with --cor-state-init)") addStoreOptions(&opt.cmdStoreOptions, flags) return cmd diff --git a/cmd/desync/prune.go b/cmd/desync/prune.go index 57aa2e3..aac2aa5 100644 --- a/cmd/desync/prune.go +++ b/cmd/desync/prune.go @@ -20,21 +20,23 @@ func newPruneCommand(ctx context.Context) *cobra.Command { var opt pruneOptions cmd := &cobra.Command{ - Use: "prune [..]", + Use: "prune [...]", Short: "Remove unreferenced chunks from a store", - Long: `Read chunk IDs in from index files and delete any chunks from a store -that are not referenced in the provided index files. Use '-' to read a single index -from STDIN.`, - Example: ` desync prune -s /path/to/local --yes file.caibx`, - Args: cobra.MinimumNArgs(1), + Long: `Read chunk IDs from index files and delete all chunks from a store +that are not referenced in any of the provided index files. This is a +destructive operation; a confirmation prompt is shown before any chunks are +deleted unless --yes is used. Use '-' to read a single index from STDIN.`, + Example: ` desync prune -s /path/to/local --yes file.caibx + desync prune -s /path/to/local current.caibx previous.caibx`, + Args: cobra.MinimumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { return runPrune(ctx, opt, args) }, SilenceUsage: true, } flags := cmd.Flags() - flags.StringVarP(&opt.store, "store", "s", "", "target store") - flags.BoolVarP(&opt.yes, "yes", "y", false, "do not ask for confirmation") + flags.StringVarP(&opt.store, "store", "s", "", "store to prune") + flags.BoolVarP(&opt.yes, "yes", "y", false, "do not ask for confirmation before deleting chunks") addStoreOptions(&opt.cmdStoreOptions, flags) return cmd } diff --git a/cmd/desync/root.go b/cmd/desync/root.go index 5d84f73..c96e74c 100644 --- a/cmd/desync/root.go +++ b/cmd/desync/root.go @@ -8,6 +8,22 @@ func newRootCommand() *cobra.Command { cmd := &cobra.Command{ Use: "desync", Short: "Content-addressed binary distribution system", + Long: `desync is a content-addressed binary distribution system. It chunks files +into reusable, compressed pieces kept in chunk stores, and reassembles them +efficiently using indexes, seeds and caches. It is compatible with casync +archives, indexes and stores. + +Store locations, used with options like -s/--store and -c/--cache, can be: + /path/to/store local directory store + http(s)://host/path/ chunk/index server (see chunk-server command) + s3+http(s)://host/bucket S3-compatible object store + gs://bucket/prefix Google Cloud Storage bucket + sftp://user@host/path SFTP store + ssh://user@host/path casync protocol over SSH (read-only) + +Commands that accept multiple stores try them in the order given. Several +stores can also be combined into one failover group by separating them with +'|', for example -s "http://server1/store|http://server2/store".`, } cmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default $HOME/.config/desync/config.json)") cmd.PersistentFlags().StringVar(&digestAlgorithm, "digest", "sha512-256", "digest algorithm, sha512-256 or sha256") diff --git a/cmd/desync/verify.go b/cmd/desync/verify.go index 50e7324..2e6653e 100644 --- a/cmd/desync/verify.go +++ b/cmd/desync/verify.go @@ -23,15 +23,16 @@ func newVerifyCommand(ctx context.Context) *cobra.Command { Short: "Read chunks in a store and verify their integrity", Long: `Reads all chunks in a local store and verifies their integrity. If -r is used, invalid chunks are deleted from the store.`, - Example: ` desync verify -s /path/to/store`, - Args: cobra.NoArgs, + Example: ` desync verify -s /path/to/store + desync verify -s /path/to/store -r`, + Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { return runVerify(ctx, opt, args) }, SilenceUsage: true, } flags := cmd.Flags() - flags.StringVarP(&opt.store, "store", "s", "", "target store") + flags.StringVarP(&opt.store, "store", "s", "", "local store to verify") flags.IntVarP(&opt.n, "concurrency", "n", 10, "number of concurrent goroutines") flags.BoolVarP(&opt.repair, "repair", "r", false, "remove invalid chunks from the store") return cmd