From df15ef85bdbeaadb37c362d471ff4a76f0340309 Mon Sep 17 00:00:00 2001 From: Mark Date: Wed, 13 Sep 2023 01:16:47 +0200 Subject: [PATCH] initial commit --- .gitignore | 2 ++ Cargo.toml | 9 ++++++ README.md | 55 ++++++++++++++++++++++++++++++++ src/apply_indexchanges.rs | 67 +++++++++++++++++++++++++++++++++++++++ src/args.rs | 14 ++++++++ src/indexchanges.rs | 11 +++++++ src/indexfile.rs | 53 +++++++++++++++++++++++++++++++ src/main.rs | 56 ++++++++++++++++++++++++++++++++ src/repr_file.rs | 46 +++++++++++++++++++++++++++ src/update_index.rs | 57 +++++++++++++++++++++++++++++++++ 10 files changed, 370 insertions(+) create mode 100755 .gitignore create mode 100644 Cargo.toml create mode 100755 README.md create mode 100755 src/apply_indexchanges.rs create mode 100755 src/args.rs create mode 100755 src/indexchanges.rs create mode 100755 src/indexfile.rs create mode 100755 src/main.rs create mode 100755 src/repr_file.rs create mode 100755 src/update_index.rs diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..869df07 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..e8d4fe9 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "rembackup" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "4.4.3", features = ["derive"] } diff --git a/README.md b/README.md new file mode 100755 index 0000000..ff56397 --- /dev/null +++ b/README.md @@ -0,0 +1,55 @@ +# rembackup + +A super simple yet fast backup solution, designed with slow connections in mind. + +## How it works + +Rembackup uses 3 directories: `source`, `index`, and `target`. + +```sh +rembackup $SOURCE $INDEX $TARGET +``` + +In *Step 1*, Rembackup recursively walks the `source` directory, comparing all entries with `index`. +It then shows a list of changes that would make `target` contain the same files as `source`. + +If you accept, it will then move on to *Step 2* and apply these changes. + +If you didn't get any warnings, `target` is now a backup of `source`. + +If you *did* get one or more warnings - don't worry! +You can just rerun the backup and the failed operations will be retried. + +## What makes it special + +If you want to back up your data to an external disk, you probably bought a large HDD. +If you want a remote backup, you may want to self-host something. +In both of these situations, the filesystem containing `target` is horribly slow. + +If a backup tool tries to compare `source` to `target` to figure out which files have changed, +it will always be affected by this slowness - even when working on unchanged files. + +Rembackup only performs read operations on `source` and `index` in *Step 1*. +Because of this, it can be surprisingly fast even when backing up large disks. + +In *Step 2*, where files are actually being copied to `target`, the slowness will still be noticeable, +but since only modified files are being copied, this usually takes a somewhat reasonable amount of time. + +## Usage + +To create a backup of your home directory `~` to `/mnt/backup`: + +```sh +rembackup ~ ~/index /mnt/backup +``` + +Note: `index` (`~/index`) doesn't need to be a subdirectory of `source` (`~`), but if it is, it will not be part of the backup to avoid problems. +Note 2: `~/index` and `/mnt/backup` don't need to exist yet - they will be created if their parent directories exist. + +If this is the first backup, you can try to maximize the speed of `/mnt/backup`. +If you want remote backups, you should probably connect the server's disk directly to your computer. +The backups after the initial one will be a lot faster, so you can switch to remote backups after this. + +## TODO + +detect files that have been removed diff --git a/src/apply_indexchanges.rs b/src/apply_indexchanges.rs new file mode 100755 index 0000000..6c21584 --- /dev/null +++ b/src/apply_indexchanges.rs @@ -0,0 +1,67 @@ +use std::{fs, io, path::Path}; + +use crate::{indexchanges::IndexChange, repr_file::ReprFile}; + +/// Only errors that happen when writing to the index are immediately returned. +/// Other errors are logged to stderr and the failed change will not be saved to the index, +/// so the next backup will try again. +pub fn apply_indexchanges( + source: &Path, + index: &Path, + target: &Path, + changes: &Vec, +) -> io::Result<()> { + let o = apply_indexchanges_int(source, index, target, changes); + eprintln!(); + o +} +pub fn apply_indexchanges_int( + source: &Path, + index: &Path, + target: &Path, + changes: &Vec, +) -> io::Result<()> { + let len_width = changes.len().to_string().len(); + let width = 80 - 3 - 2 - len_width - len_width; + eprint!( + "{}0/{} [>{}]", + " ".repeat(len_width - 1), + changes.len(), + " ".repeat(width) + ); + for (i, change) in changes.iter().enumerate() { + match change { + IndexChange::AddDir(dir) => { + let t = target.join(dir); + if let Err(e) = fs::create_dir(&t) { + eprintln!("\n[warn] couldn't create directory {t:?}: {e}"); + } else { + fs::create_dir(&index.join(dir))?; + } + } + IndexChange::AddFile(file, index_file) => { + let s = source.join(file); + let t = target.join(file); + if let Err(e) = fs::copy(&s, &t) { + eprintln!("\n[warn] couldn't copy file from {s:?} to {t:?}: {e}"); + } + fs::write(&index.join(file), index_file.save())?; + } + } + { + let i = i + 1; + let leftpad = width * i / changes.len(); + let rightpad = width - leftpad; + let prognum = i.to_string(); + eprint!( + "\r{}{}/{} [{}>{}]", + " ".repeat(len_width - prognum.len()), + prognum, + changes.len(), + "-".repeat(leftpad), + " ".repeat(rightpad) + ); + } + } + Ok(()) +} diff --git a/src/args.rs b/src/args.rs new file mode 100755 index 0000000..d05e89b --- /dev/null +++ b/src/args.rs @@ -0,0 +1,14 @@ +use std::path::PathBuf; + +use clap::Parser; + +#[derive(Parser)] +#[command(author, version)] +pub struct Args { + #[arg()] + pub source: PathBuf, + #[arg()] + pub index: PathBuf, + #[arg()] + pub target: PathBuf, +} diff --git a/src/indexchanges.rs b/src/indexchanges.rs new file mode 100755 index 0000000..a77508d --- /dev/null +++ b/src/indexchanges.rs @@ -0,0 +1,11 @@ +use std::path::PathBuf; + +use crate::indexfile::IndexFile; + +#[derive(Debug)] +pub enum IndexChange { + /// Ensure a directory with this path exists (at least if all its parent directories exist). + AddDir(PathBuf), + /// Add or update a file + AddFile(PathBuf, IndexFile), +} diff --git a/src/indexfile.rs b/src/indexfile.rs new file mode 100755 index 0000000..27fdafa --- /dev/null +++ b/src/indexfile.rs @@ -0,0 +1,53 @@ +use std::{ + collections::HashMap, + fs::{self, Metadata}, + io, + path::Path, + time::SystemTime, +}; + +use crate::repr_file::ReprFile; + +#[derive(Debug, PartialEq, Eq)] +pub struct IndexFile { + size: u64, + last_modified: Option, +} + +impl IndexFile { + pub fn new_from_metadata(metadata: &Metadata) -> Self { + Self { + size: metadata.len(), + last_modified: metadata + .modified() + .ok() + .and_then(|v| v.duration_since(SystemTime::UNIX_EPOCH).ok()) + .map(|v| v.as_secs()), + } + } + pub fn from_path(path: &Path) -> io::Result> { + Ok(Self::load(&fs::read_to_string(path)?)) + } +} + +impl ReprFile for IndexFile { + fn save(&self) -> String { + let mut o = format!("Len={}\n", self.size); + if let Some(age) = self.last_modified { + o.push_str(&format!("Age={}\n", age)); + } + o + } + fn load(src: &str) -> Result { + let hm = HashMap::load(src)?; + if let Some(len) = hm.get("Len").and_then(|len_str| len_str.parse().ok()) { + let age = hm.get("Age").and_then(|lm_str| lm_str.parse().ok()); + Ok(Self { + size: len, + last_modified: age, + }) + } else { + return Err(format!("no Len in IndexFile!")); + } + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100755 index 0000000..96702d8 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,56 @@ +use std::process::exit; + +use clap::Parser; + +use crate::{ + apply_indexchanges::apply_indexchanges, indexchanges::IndexChange, + update_index::perform_index_diff, +}; + +mod apply_indexchanges; +mod args; +mod indexchanges; +mod indexfile; +mod repr_file; +mod update_index; + +fn main() { + // get args + let args = args::Args::parse(); + // index diff + eprintln!("performing index diff..."); + let changes = match perform_index_diff(&args.source, &args.index) { + Ok(c) => c, + Err(e) => { + eprintln!("Failed to generate index diff:\n {e}"); + exit(20); + } + }; + if changes.is_empty() { + eprintln!("done! found no changes."); + } else { + eprintln!("done! found {} changes.", changes.len()); + // display the changes + eprintln!(" - - - - -"); + for change in &changes { + match change { + IndexChange::AddDir(v) => eprintln!(" - Add the directory {v:?}"), + IndexChange::AddFile(v, _) => eprintln!(" - Add the file {v:?}"), + } + } + eprintln!( + "Press Enter to add these {} changes to the backup.", + changes.len() + ); + // apply changes + if std::io::stdin().read_line(&mut String::new()).is_ok() { + match apply_indexchanges(&args.source, &args.index, &args.target, &changes) { + Ok(()) => {} + Err(e) => { + eprintln!("Failed to apply index changes: {e}"); + exit(30); + } + } + } + } +} diff --git a/src/repr_file.rs b/src/repr_file.rs new file mode 100755 index 0000000..73546ae --- /dev/null +++ b/src/repr_file.rs @@ -0,0 +1,46 @@ +use std::collections::HashMap; + +pub trait ReprFile: Sized { + fn save(&self) -> String; + fn load(src: &str) -> Result; +} + +impl ReprFile for Vec { + fn save(&self) -> String { + let mut o = String::new(); + for line in self { + o.push_str(line); + } + o + } + fn load(src: &str) -> Result { + Ok(src.lines().map(|v| v.to_owned()).collect()) + } +} +impl ReprFile for HashMap { + fn save(&self) -> String { + let mut o = String::new(); + for (key, value) in self { + o.push_str(key); + o.push('='); + o.push_str(value); + o.push('\n'); + } + o + } + fn load(src: &str) -> Result { + let mut o = HashMap::new(); + for line in src.lines() { + if !line.is_empty() { + if let Some((key, value)) = line.split_once('=') { + o.insert(key.to_owned(), value.to_owned()); + } else { + return Err(format!( + "Nonempty line didn't contain the required = char! (line: {line:?})" + )); + } + } + } + Ok(o) + } +} diff --git a/src/update_index.rs b/src/update_index.rs new file mode 100755 index 0000000..4ed051f --- /dev/null +++ b/src/update_index.rs @@ -0,0 +1,57 @@ +use std::{fs, io, path::Path}; + +use crate::{indexchanges::IndexChange, indexfile::IndexFile}; + +pub fn perform_index_diff(source: &Path, index: &Path) -> io::Result> { + let mut changes = Vec::new(); + rec( + source.as_ref(), + Path::new(""), + index, + &mut changes, + index.strip_prefix(source).ok(), + )?; + Ok(changes) +} +fn rec( + source: &Path, + rel_path: &Path, + index_files: &Path, + changes: &mut Vec, + inner_index: Option<&Path>, +) -> Result<(), io::Error> { + if let Some(ii) = &inner_index { + if rel_path.starts_with(ii) { + eprintln!("[info] source contains index, but index will not be part of the backup."); + return Ok(()); + } + } + + if !index_files.join(rel_path).try_exists()? { + changes.push(IndexChange::AddDir(rel_path.to_path_buf())); + } + for entry in fs::read_dir(source.join(rel_path))? { + let entry = entry?; + let metadata = entry.metadata()?; + if metadata.is_dir() { + rec( + source, + &rel_path.join(entry.file_name()), + index_files, + changes, + inner_index, + )?; + } else { + let newif = IndexFile::new_from_metadata(&metadata); + let oldif = IndexFile::from_path(&index_files.join(rel_path).join(entry.file_name())); + match oldif { + Ok(Ok(oldif)) if oldif == newif => {} + _ => changes.push(IndexChange::AddFile( + rel_path.join(entry.file_name()), + newif, + )), + } + } + } + Ok(()) +}