From 0092627cff01fb5e650fa935d40bf9a1242f512d Mon Sep 17 00:00:00 2001 From: Mark <> Date: Thu, 27 Jun 2024 10:59:04 +0200 Subject: [PATCH] change ignore to use a custom file instead of cli arguments --- Cargo.toml | 1 + src/args.rs | 8 +- src/config.rs | 199 ++++++++++++++++++++++++++++++++++++++++++++ src/indexfile.rs | 4 +- src/main.rs | 73 +++++++++++++--- src/update_index.rs | 47 +++++++---- 6 files changed, 298 insertions(+), 34 deletions(-) mode change 100755 => 100644 Cargo.toml create mode 100644 src/config.rs diff --git a/Cargo.toml b/Cargo.toml old mode 100755 new mode 100644 index e8d4fe9..28069d8 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,3 +7,4 @@ edition = "2021" [dependencies] clap = { version = "4.4.3", features = ["derive"] } +glob-match = "0.2.1" diff --git a/src/args.rs b/src/args.rs index 7ced6a7..1880530 100755 --- a/src/args.rs +++ b/src/args.rs @@ -16,15 +16,13 @@ pub struct Args { /// where your backup will be stored #[arg()] pub target: Option, - /// directories to ignore. - /// can be paths relative to (like backups/) or paths starting with (like my_source/backups/). - /// if starts with , it is automatically ignored and doesn't need to be specified. - #[arg(long, num_args(0..))] - pub ignore: Vec, /// don't ask for confirmation, just apply the changes. #[arg(long)] pub noconfirm: bool, + #[arg(long)] + pub ignore: Option, + #[command(flatten)] pub settings: Settings, } diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..1382bf5 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,199 @@ +use std::{ + iter::{Enumerate, Peekable}, + path::{Path, PathBuf}, + str::Lines, +}; + +#[derive(Clone, Copy, Debug)] +pub struct FsEntry<'a> { + pub path: &'a Path, + pub is_directory: bool, +} + +#[derive(Debug)] +pub struct Ignore(pub Vec); +#[derive(Debug)] +pub enum Specifier { + /// Everything in this config-part will be explicitly ignored. + /// Files in ignored directories, even if they would later match, + /// will not be seen my rembackup. + Except(Ignore), + Entries(Match), + Files(Match), + /// Descend into a directory. + /// Contains an inner config, which uses paths relative to the matched directory. + InDir { + dir: Match, + inner: Ignore, + }, +} +#[derive(Debug)] +pub enum Match { + Any, + Eq(PathBuf), + Glob(String), +} + +impl Match { + pub fn matches(&self, path: &Path) -> bool { + match self { + Self::Any => true, + Self::Eq(v) => v.as_path() == path, + Self::Glob(v) => path + .to_str() + .is_some_and(|path| glob_match::glob_match(v, path)), + } + } +} + +impl Ignore { + /// If `self.matches(entry)` is `Some(v)`, returns `v`, if not, returns `false`. + pub fn matches_or_default(&self, entry: &FsEntry) -> bool { + self.matches(entry).unwrap_or(false) + } + pub fn matches(&self, entry: &FsEntry) -> Option { + self.0.iter().rev().filter_map(|v| v.matches(entry)).next() + } + /// applies each specifier to each element of the `entries()` iterator. + /// any specifier overrides all earlier ones, + /// but the first entry that produces true or false will determine the output. + pub fn matches_of<'a, I: Iterator>>( + &self, + entries: impl Fn() -> I, + ) -> Option { + self.0 + .iter() + .rev() + .filter_map(|v| entries().filter_map(|entry| v.matches(entry)).next()) + .next() + } +} +impl Specifier { + pub fn matches(&self, entry: &FsEntry) -> Option { + match self { + Self::Except(inner) => inner.matches(entry).map(std::ops::Not::not), + Self::Entries(path) => path.matches(entry.path).then_some(true), + Self::Files(path) => (!entry.is_directory && path.matches(entry.path)).then_some(true), + Self::InDir { dir, inner } => { + dbg!(&inner); + if dbg!(inner.0.is_empty()) { + // this has no inner things, so we just check for this directory + // if this is a directory and it matches, then return true + (entry.is_directory && dir.matches(entry.path)).then_some(true) + } else { + // this has inner things, so, for every matching parent, + // get the relative path (by removing the parent), ... + let mut path = entry.path; + let mut paths = vec![]; + while let Some(p) = path.parent() { + if dir.matches(p) { + if let Ok(p) = entry.path.strip_prefix(p) { + let mut e = *entry; + e.path = p; + paths.push(e); + } else { + eprintln!("[WARN] Parent {p:?} of path {:?} could not be removed... this is probably be a bug.", entry.path); + } + } + path = p; + } + // ... and then check if any match + inner.matches_of(|| paths.iter()) + } + } + } + } +} + +impl Ignore { + pub fn parse(config: &str) -> Result { + Self::parsei(&mut config.lines().enumerate().peekable(), 0) + } + /// min_indent = parent_indent + 1, or 0 if no parent + fn parsei(lines: &mut Peekable>, min_indent: usize) -> Result { + let mut indent = None; + let mut specifiers = vec![]; + loop { + if let Some((line_nr, full_line)) = lines.peek() { + let line_nr = *line_nr; + let indent = { + let line = full_line.trim_start(); + // check indentation + let line_start_whitespace = &full_line[0..full_line.len() - line.len()]; + if let Some(c) = line_start_whitespace.chars().find(|c| *c != ' ') { + return Err(format!( + "Lines must start with any number of spaces, and no other whitespace character, but line {} contained the '{c:?}' character (Unicode {}).", + line_nr + 1, c.escape_unicode())); + } + let line_indent = line_start_whitespace.len(); + if line_indent < min_indent { + // less indentation than before, go up one layer of recursion + break; + } + if let Some(indent) = indent { + // check if we indent more/less than on the first line + if line_indent != indent { + return Err(format!( + "Lines in one part of a config may must all have the same indentation! (expected {indent} spaces, but found {line_indent})" + )); + } + } else { + // store the first line's indent + indent = Some(line_indent); + } + line_indent + }; + // -- indentation is ok, this line belongs to us -- + // because we only used `lines.peek()` until now + let line = lines.next().unwrap().1.trim_start(); + if line.starts_with("#") { + // comment, ignore + } else { + let (line_type, args) = + line.split_once(char::is_whitespace).unwrap_or((line, "")); + specifiers.push(match line_type.to_lowercase().trim() { + "except" => Specifier::Except(Ignore::parsei(lines, indent + 1)?), + line_type => match ( + line_type.chars().next().unwrap_or(' '), + line_type.chars().skip(1).next().unwrap_or(' '), + ) { + ('*', m) => Specifier::Entries(Match::parse_m(args, m, line_nr)?), + ('+', m) => Specifier::Files(Match::parse_m(args, m, line_nr)?), + ('/', m) => Specifier::InDir { + dir: Match::parse_m(args, m, line_nr)?, + inner: Ignore::parsei(lines, indent + 1)?, + }, + _ => { + return Err(format!( + "Got '{line}' (Line {}), but expected one of [[*+/][a=*], except]", + line_nr + 1 + )) + } + }, + }); + } + } else { + break; + } + } + Ok(Self(specifiers)) + } +} +impl Match { + fn parse_m(text: &str, m: char, line_nr: usize) -> Result { + Ok(match m { + 'a' => Self::Any, + '=' => Self::Eq(text.into()), + '*' => Self::parse_glob(text, line_nr)?, + _ => { + return Err(format!( + "[Line {}] unknown match-type '{m}', expected one of [a=*]", + line_nr + 1 + )) + } + }) + } + fn parse_glob(text: &str, _line_nr: usize) -> Result { + Ok(Self::Glob(text.to_owned())) + } +} diff --git a/src/indexfile.rs b/src/indexfile.rs index 8d03dcc..c1e05bc 100755 --- a/src/indexfile.rs +++ b/src/indexfile.rs @@ -29,8 +29,8 @@ impl IndexFile { false } } - (Some(new), None) => !settings.dont_replace_if_timestamp_found, - (None, Some(old)) => settings.replace_if_timestamp_lost, + (Some(_), None) => !settings.dont_replace_if_timestamp_found, + (None, Some(_)) => settings.replace_if_timestamp_lost, (None, None) => settings.replace_if_timestamp_unknown, }) } diff --git a/src/main.rs b/src/main.rs index bd922ef..343e63d 100755 --- a/src/main.rs +++ b/src/main.rs @@ -3,34 +3,85 @@ use std::process::exit; use clap::Parser; use crate::{ - apply_indexchanges::apply_indexchanges, indexchanges::IndexChange, + apply_indexchanges::apply_indexchanges, config::Ignore, indexchanges::IndexChange, update_index::perform_index_diff, }; mod apply_indexchanges; mod args; +mod config; mod indexchanges; mod indexfile; mod repr_file; mod update_index; +const EXIT_IGNORE_FAILED: u8 = 200; +const EXIT_DIFF_FAILED: u8 = 20; +const EXIT_APPLY_FAILED: u8 = 30; + fn main() { // get args let args = args::Args::parse(); // index diff eprintln!("performing index diff..."); - let source = &args.source; - let index = &args.index; - let ignore_subdirs = args - .ignore - .iter() - .map(|path| path.strip_prefix(source).unwrap_or(path)) - .collect(); - let changes = match perform_index_diff(source, index, ignore_subdirs, &args.settings) { + let cwd = match std::env::current_dir() { + Ok(v) => Some(v), + Err(e) => { + eprintln!("[WARN] Couldn't get current directory (CWD): {e}"); + None + } + }; + let source = if args.source.is_absolute() { + args.source.clone() + } else { + cwd.as_ref() + .expect("tried to use a relative path when there is no valid CWD") + .join(&args.source) + }; + let index = if args.index.is_absolute() { + args.index.clone() + } else { + cwd.as_ref() + .expect("tried to use a relative path when there is no valid CWD") + .join(&args.index) + }; + let target = args.target.as_ref().map(|target| { + if target.is_absolute() { + target.clone() + } else { + cwd.as_ref() + .expect("tried to use a relative path when there is no valid CWD") + .join(target) + } + }); + let ignore = if let Some(path) = &args.ignore { + match std::fs::read_to_string(path) { + Ok(text) => match Ignore::parse(&text) { + Ok(config) => config, + Err(e) => { + eprintln!("Couldn't parse ignore-file {path:?}: {e}"); + exit(EXIT_IGNORE_FAILED as _); + } + }, + Err(e) => { + eprintln!("Couldn't load ignore-file {path:?}: {e}"); + exit(EXIT_IGNORE_FAILED as _); + } + } + } else { + Ignore(vec![]) + }; + let changes = match perform_index_diff( + &source, + &index, + target.as_ref().map(|v| v.as_path()), + ignore, + &args.settings, + ) { Ok(c) => c, Err(e) => { eprintln!("Failed to generate index diff:\n {e}"); - exit(20); + exit(EXIT_DIFF_FAILED as _); } }; if changes.is_empty() { @@ -98,7 +149,7 @@ fn main() { Ok(()) => {} Err(e) => { eprintln!("Failed to apply: {e}"); - exit(30); + exit(EXIT_APPLY_FAILED as _); } } } diff --git a/src/update_index.rs b/src/update_index.rs index e2c30b4..03e1bbb 100755 --- a/src/update_index.rs +++ b/src/update_index.rs @@ -2,7 +2,11 @@ use std::{collections::HashMap, fs, io, path::Path}; use clap::Args; -use crate::{indexchanges::IndexChange, indexfile::IndexFile}; +use crate::{ + config::{FsEntry, Ignore, Match, Specifier}, + indexchanges::IndexChange, + indexfile::IndexFile, +}; #[derive(Clone, Default, Args)] pub struct Settings { @@ -26,20 +30,33 @@ pub struct Settings { pub fn perform_index_diff<'a>( source: &Path, index: &'a Path, - mut ignore_paths: Vec<&'a Path>, + target: Option<&'a Path>, + mut ignore: Ignore, settings: &Settings, ) -> io::Result> { let mut changes = Vec::new(); if let Ok(inner_index) = index.strip_prefix(source) { - eprintln!("[info] source contains index, but index will not be part of the backup."); - ignore_paths.push(inner_index); + eprintln!("[info] source contains index at {inner_index:?}, but index will not be part of the backup."); + ignore.0.push(Specifier::InDir { + dir: Match::Eq(inner_index.to_owned()), + inner: Ignore(vec![]), + }); + } + if let Some(target) = target { + if let Ok(inner_target) = target.strip_prefix(source) { + eprintln!("[info] source contains target at {inner_target:?}, but target will not be part of the backup."); + ignore.0.push(Specifier::InDir { + dir: Match::Eq(inner_target.to_owned()), + inner: Ignore(vec![]), + }); + } } rec( source.as_ref(), Path::new(""), index, &mut changes, - &ignore_paths, + &ignore, settings, )?; Ok(changes) @@ -53,7 +70,7 @@ fn rec( index_files: &Path, // list of changes to be made changes: &mut Vec, - ignore_paths: &Vec<&Path>, + ignore: &Ignore, settings: &Settings, ) -> Result<(), io::Error> { // used to find removals @@ -75,26 +92,24 @@ fn rec( for entry in source_files { let entry = entry?; let rel_path = rel_path.join(entry.file_name()); + let metadata = entry.metadata()?; + // ignore entries - if ignore_paths.iter().any(|ii| &rel_path == ii) { + let fs_entry = FsEntry { + path: &rel_path, + is_directory: metadata.is_dir(), + }; + if ignore.matches_or_default(&fs_entry) { continue; } - let metadata = entry.metadata()?; let in_index_and_is_dir = index_entries.remove(&entry.file_name()); if metadata.is_dir() { if let Some(false) = in_index_and_is_dir { // is dir, but was file -> remove file changes.push(IndexChange::RemoveFile(rel_path.clone())); } - rec( - source, - &rel_path, - index_files, - changes, - ignore_paths, - settings, - )?; + rec(source, &rel_path, index_files, changes, ignore, settings)?; } else { if let Some(true) = in_index_and_is_dir { // is file, but was dir -> remove dir