change ignore to use a custom file

instead of cli arguments
This commit is contained in:
Mark 2024-06-27 10:59:04 +02:00
parent c9c85ea6ad
commit 0092627cff
6 changed files with 298 additions and 34 deletions

1
Cargo.toml Executable file → Normal file
View File

@ -7,3 +7,4 @@ edition = "2021"
[dependencies] [dependencies]
clap = { version = "4.4.3", features = ["derive"] } clap = { version = "4.4.3", features = ["derive"] }
glob-match = "0.2.1"

View File

@ -16,15 +16,13 @@ pub struct Args {
/// where your backup will be stored /// where your backup will be stored
#[arg()] #[arg()]
pub target: Option<PathBuf>, pub target: Option<PathBuf>,
/// directories to ignore.
/// can be paths relative to <source> (like backups/) or paths starting with <source> (like my_source/backups/).
/// if <index> starts with <source>, it is automatically ignored and doesn't need to be specified.
#[arg(long, num_args(0..))]
pub ignore: Vec<PathBuf>,
/// don't ask for confirmation, just apply the changes. /// don't ask for confirmation, just apply the changes.
#[arg(long)] #[arg(long)]
pub noconfirm: bool, pub noconfirm: bool,
#[arg(long)]
pub ignore: Option<PathBuf>,
#[command(flatten)] #[command(flatten)]
pub settings: Settings, pub settings: Settings,
} }

199
src/config.rs Normal file
View File

@ -0,0 +1,199 @@
use std::{
iter::{Enumerate, Peekable},
path::{Path, PathBuf},
str::Lines,
};
#[derive(Clone, Copy, Debug)]
pub struct FsEntry<'a> {
pub path: &'a Path,
pub is_directory: bool,
}
#[derive(Debug)]
pub struct Ignore(pub Vec<Specifier>);
#[derive(Debug)]
pub enum Specifier {
/// Everything in this config-part will be explicitly ignored.
/// Files in ignored directories, even if they would later match,
/// will not be seen my rembackup.
Except(Ignore),
Entries(Match),
Files(Match),
/// Descend into a directory.
/// Contains an inner config, which uses paths relative to the matched directory.
InDir {
dir: Match,
inner: Ignore,
},
}
#[derive(Debug)]
pub enum Match {
Any,
Eq(PathBuf),
Glob(String),
}
impl Match {
pub fn matches(&self, path: &Path) -> bool {
match self {
Self::Any => true,
Self::Eq(v) => v.as_path() == path,
Self::Glob(v) => path
.to_str()
.is_some_and(|path| glob_match::glob_match(v, path)),
}
}
}
impl Ignore {
/// If `self.matches(entry)` is `Some(v)`, returns `v`, if not, returns `false`.
pub fn matches_or_default(&self, entry: &FsEntry) -> bool {
self.matches(entry).unwrap_or(false)
}
pub fn matches(&self, entry: &FsEntry) -> Option<bool> {
self.0.iter().rev().filter_map(|v| v.matches(entry)).next()
}
/// applies each specifier to each element of the `entries()` iterator.
/// any specifier overrides all earlier ones,
/// but the first entry that produces true or false will determine the output.
pub fn matches_of<'a, I: Iterator<Item = &'a FsEntry<'a>>>(
&self,
entries: impl Fn() -> I,
) -> Option<bool> {
self.0
.iter()
.rev()
.filter_map(|v| entries().filter_map(|entry| v.matches(entry)).next())
.next()
}
}
impl Specifier {
pub fn matches(&self, entry: &FsEntry) -> Option<bool> {
match self {
Self::Except(inner) => inner.matches(entry).map(std::ops::Not::not),
Self::Entries(path) => path.matches(entry.path).then_some(true),
Self::Files(path) => (!entry.is_directory && path.matches(entry.path)).then_some(true),
Self::InDir { dir, inner } => {
dbg!(&inner);
if dbg!(inner.0.is_empty()) {
// this has no inner things, so we just check for this directory
// if this is a directory and it matches, then return true
(entry.is_directory && dir.matches(entry.path)).then_some(true)
} else {
// this has inner things, so, for every matching parent,
// get the relative path (by removing the parent), ...
let mut path = entry.path;
let mut paths = vec![];
while let Some(p) = path.parent() {
if dir.matches(p) {
if let Ok(p) = entry.path.strip_prefix(p) {
let mut e = *entry;
e.path = p;
paths.push(e);
} else {
eprintln!("[WARN] Parent {p:?} of path {:?} could not be removed... this is probably be a bug.", entry.path);
}
}
path = p;
}
// ... and then check if any match
inner.matches_of(|| paths.iter())
}
}
}
}
}
impl Ignore {
pub fn parse(config: &str) -> Result<Self, String> {
Self::parsei(&mut config.lines().enumerate().peekable(), 0)
}
/// min_indent = parent_indent + 1, or 0 if no parent
fn parsei(lines: &mut Peekable<Enumerate<Lines>>, min_indent: usize) -> Result<Self, String> {
let mut indent = None;
let mut specifiers = vec![];
loop {
if let Some((line_nr, full_line)) = lines.peek() {
let line_nr = *line_nr;
let indent = {
let line = full_line.trim_start();
// check indentation
let line_start_whitespace = &full_line[0..full_line.len() - line.len()];
if let Some(c) = line_start_whitespace.chars().find(|c| *c != ' ') {
return Err(format!(
"Lines must start with any number of spaces, and no other whitespace character, but line {} contained the '{c:?}' character (Unicode {}).",
line_nr + 1, c.escape_unicode()));
}
let line_indent = line_start_whitespace.len();
if line_indent < min_indent {
// less indentation than before, go up one layer of recursion
break;
}
if let Some(indent) = indent {
// check if we indent more/less than on the first line
if line_indent != indent {
return Err(format!(
"Lines in one part of a config may must all have the same indentation! (expected {indent} spaces, but found {line_indent})"
));
}
} else {
// store the first line's indent
indent = Some(line_indent);
}
line_indent
};
// -- indentation is ok, this line belongs to us --
// because we only used `lines.peek()` until now
let line = lines.next().unwrap().1.trim_start();
if line.starts_with("#") {
// comment, ignore
} else {
let (line_type, args) =
line.split_once(char::is_whitespace).unwrap_or((line, ""));
specifiers.push(match line_type.to_lowercase().trim() {
"except" => Specifier::Except(Ignore::parsei(lines, indent + 1)?),
line_type => match (
line_type.chars().next().unwrap_or(' '),
line_type.chars().skip(1).next().unwrap_or(' '),
) {
('*', m) => Specifier::Entries(Match::parse_m(args, m, line_nr)?),
('+', m) => Specifier::Files(Match::parse_m(args, m, line_nr)?),
('/', m) => Specifier::InDir {
dir: Match::parse_m(args, m, line_nr)?,
inner: Ignore::parsei(lines, indent + 1)?,
},
_ => {
return Err(format!(
"Got '{line}' (Line {}), but expected one of [[*+/][a=*], except]",
line_nr + 1
))
}
},
});
}
} else {
break;
}
}
Ok(Self(specifiers))
}
}
impl Match {
fn parse_m(text: &str, m: char, line_nr: usize) -> Result<Self, String> {
Ok(match m {
'a' => Self::Any,
'=' => Self::Eq(text.into()),
'*' => Self::parse_glob(text, line_nr)?,
_ => {
return Err(format!(
"[Line {}] unknown match-type '{m}', expected one of [a=*]",
line_nr + 1
))
}
})
}
fn parse_glob(text: &str, _line_nr: usize) -> Result<Self, String> {
Ok(Self::Glob(text.to_owned()))
}
}

View File

@ -29,8 +29,8 @@ impl IndexFile {
false false
} }
} }
(Some(new), None) => !settings.dont_replace_if_timestamp_found, (Some(_), None) => !settings.dont_replace_if_timestamp_found,
(None, Some(old)) => settings.replace_if_timestamp_lost, (None, Some(_)) => settings.replace_if_timestamp_lost,
(None, None) => settings.replace_if_timestamp_unknown, (None, None) => settings.replace_if_timestamp_unknown,
}) })
} }

View File

@ -3,34 +3,85 @@ use std::process::exit;
use clap::Parser; use clap::Parser;
use crate::{ use crate::{
apply_indexchanges::apply_indexchanges, indexchanges::IndexChange, apply_indexchanges::apply_indexchanges, config::Ignore, indexchanges::IndexChange,
update_index::perform_index_diff, update_index::perform_index_diff,
}; };
mod apply_indexchanges; mod apply_indexchanges;
mod args; mod args;
mod config;
mod indexchanges; mod indexchanges;
mod indexfile; mod indexfile;
mod repr_file; mod repr_file;
mod update_index; mod update_index;
const EXIT_IGNORE_FAILED: u8 = 200;
const EXIT_DIFF_FAILED: u8 = 20;
const EXIT_APPLY_FAILED: u8 = 30;
fn main() { fn main() {
// get args // get args
let args = args::Args::parse(); let args = args::Args::parse();
// index diff // index diff
eprintln!("performing index diff..."); eprintln!("performing index diff...");
let source = &args.source; let cwd = match std::env::current_dir() {
let index = &args.index; Ok(v) => Some(v),
let ignore_subdirs = args Err(e) => {
.ignore eprintln!("[WARN] Couldn't get current directory (CWD): {e}");
.iter() None
.map(|path| path.strip_prefix(source).unwrap_or(path)) }
.collect(); };
let changes = match perform_index_diff(source, index, ignore_subdirs, &args.settings) { let source = if args.source.is_absolute() {
args.source.clone()
} else {
cwd.as_ref()
.expect("tried to use a relative path when there is no valid CWD")
.join(&args.source)
};
let index = if args.index.is_absolute() {
args.index.clone()
} else {
cwd.as_ref()
.expect("tried to use a relative path when there is no valid CWD")
.join(&args.index)
};
let target = args.target.as_ref().map(|target| {
if target.is_absolute() {
target.clone()
} else {
cwd.as_ref()
.expect("tried to use a relative path when there is no valid CWD")
.join(target)
}
});
let ignore = if let Some(path) = &args.ignore {
match std::fs::read_to_string(path) {
Ok(text) => match Ignore::parse(&text) {
Ok(config) => config,
Err(e) => {
eprintln!("Couldn't parse ignore-file {path:?}: {e}");
exit(EXIT_IGNORE_FAILED as _);
}
},
Err(e) => {
eprintln!("Couldn't load ignore-file {path:?}: {e}");
exit(EXIT_IGNORE_FAILED as _);
}
}
} else {
Ignore(vec![])
};
let changes = match perform_index_diff(
&source,
&index,
target.as_ref().map(|v| v.as_path()),
ignore,
&args.settings,
) {
Ok(c) => c, Ok(c) => c,
Err(e) => { Err(e) => {
eprintln!("Failed to generate index diff:\n {e}"); eprintln!("Failed to generate index diff:\n {e}");
exit(20); exit(EXIT_DIFF_FAILED as _);
} }
}; };
if changes.is_empty() { if changes.is_empty() {
@ -98,7 +149,7 @@ fn main() {
Ok(()) => {} Ok(()) => {}
Err(e) => { Err(e) => {
eprintln!("Failed to apply: {e}"); eprintln!("Failed to apply: {e}");
exit(30); exit(EXIT_APPLY_FAILED as _);
} }
} }
} }

View File

@ -2,7 +2,11 @@ use std::{collections::HashMap, fs, io, path::Path};
use clap::Args; use clap::Args;
use crate::{indexchanges::IndexChange, indexfile::IndexFile}; use crate::{
config::{FsEntry, Ignore, Match, Specifier},
indexchanges::IndexChange,
indexfile::IndexFile,
};
#[derive(Clone, Default, Args)] #[derive(Clone, Default, Args)]
pub struct Settings { pub struct Settings {
@ -26,20 +30,33 @@ pub struct Settings {
pub fn perform_index_diff<'a>( pub fn perform_index_diff<'a>(
source: &Path, source: &Path,
index: &'a Path, index: &'a Path,
mut ignore_paths: Vec<&'a Path>, target: Option<&'a Path>,
mut ignore: Ignore,
settings: &Settings, settings: &Settings,
) -> io::Result<Vec<IndexChange>> { ) -> io::Result<Vec<IndexChange>> {
let mut changes = Vec::new(); let mut changes = Vec::new();
if let Ok(inner_index) = index.strip_prefix(source) { if let Ok(inner_index) = index.strip_prefix(source) {
eprintln!("[info] source contains index, but index will not be part of the backup."); eprintln!("[info] source contains index at {inner_index:?}, but index will not be part of the backup.");
ignore_paths.push(inner_index); ignore.0.push(Specifier::InDir {
dir: Match::Eq(inner_index.to_owned()),
inner: Ignore(vec![]),
});
}
if let Some(target) = target {
if let Ok(inner_target) = target.strip_prefix(source) {
eprintln!("[info] source contains target at {inner_target:?}, but target will not be part of the backup.");
ignore.0.push(Specifier::InDir {
dir: Match::Eq(inner_target.to_owned()),
inner: Ignore(vec![]),
});
}
} }
rec( rec(
source.as_ref(), source.as_ref(),
Path::new(""), Path::new(""),
index, index,
&mut changes, &mut changes,
&ignore_paths, &ignore,
settings, settings,
)?; )?;
Ok(changes) Ok(changes)
@ -53,7 +70,7 @@ fn rec(
index_files: &Path, index_files: &Path,
// list of changes to be made // list of changes to be made
changes: &mut Vec<IndexChange>, changes: &mut Vec<IndexChange>,
ignore_paths: &Vec<&Path>, ignore: &Ignore,
settings: &Settings, settings: &Settings,
) -> Result<(), io::Error> { ) -> Result<(), io::Error> {
// used to find removals // used to find removals
@ -75,26 +92,24 @@ fn rec(
for entry in source_files { for entry in source_files {
let entry = entry?; let entry = entry?;
let rel_path = rel_path.join(entry.file_name()); let rel_path = rel_path.join(entry.file_name());
let metadata = entry.metadata()?;
// ignore entries // ignore entries
if ignore_paths.iter().any(|ii| &rel_path == ii) { let fs_entry = FsEntry {
path: &rel_path,
is_directory: metadata.is_dir(),
};
if ignore.matches_or_default(&fs_entry) {
continue; continue;
} }
let metadata = entry.metadata()?;
let in_index_and_is_dir = index_entries.remove(&entry.file_name()); let in_index_and_is_dir = index_entries.remove(&entry.file_name());
if metadata.is_dir() { if metadata.is_dir() {
if let Some(false) = in_index_and_is_dir { if let Some(false) = in_index_and_is_dir {
// is dir, but was file -> remove file // is dir, but was file -> remove file
changes.push(IndexChange::RemoveFile(rel_path.clone())); changes.push(IndexChange::RemoveFile(rel_path.clone()));
} }
rec( rec(source, &rel_path, index_files, changes, ignore, settings)?;
source,
&rel_path,
index_files,
changes,
ignore_paths,
settings,
)?;
} else { } else {
if let Some(true) = in_index_and_is_dir { if let Some(true) = in_index_and_is_dir {
// is file, but was dir -> remove dir // is file, but was dir -> remove dir