diff options
author | Nick Gerace <nickagerace@gmail.com> | 2022-01-14 22:12:21 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-14 22:12:21 -0500 |
commit | 8d650d2ae1caa9b832ec1caca94a1ec6cec2ff62 (patch) | |
tree | 8aa9e909e55ad0a40e8034c45de54eabf2da31ab | |
parent | 677acbe145299932a456bef049fe460334fa1296 (diff) | |
download | gfold-8d650d2ae1caa9b832ec1caca94a1ec6cec2ff62.zip |
Make target generation work with parallel iterators (#170)
- Remove Targets and use a recursive function instead
- Use nested rayon parallel iterators within recursive target generation
- Remove "target_gen" module since targets and reports are now generated
together
- Move "bench-loosely" logic to its own script
- Convert Reports to type alias for ease of use (e.g. not having to use
"foo.0" or "bar.1" for its fields)
Signed-off-by: Nick Gerace <nickagerace@gmail.com>
-rw-r--r-- | CHANGELOG.md | 6 | ||||
-rw-r--r-- | Cargo.lock | 4 | ||||
-rw-r--r-- | Makefile | 11 | ||||
-rwxr-xr-x | scripts/bench-loosely.sh | 22 | ||||
-rw-r--r-- | src/config.rs | 35 | ||||
-rw-r--r-- | src/display.rs | 6 | ||||
-rw-r--r-- | src/main.rs | 16 | ||||
-rw-r--r-- | src/report.rs | 130 | ||||
-rw-r--r-- | src/run.rs | 11 | ||||
-rw-r--r-- | src/target_gen.rs | 54 |
10 files changed, 150 insertions, 145 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 6561ef2..97b09e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased] -The latest version contains all changes. +<!-- The latest version contains all changes. --> + +### Changed + +- Major performance improvements due to moving from sequential target generation to nested, parallel iterators for target generation ### [3.0.0] - 2022-01-06 @@ -149,9 +149,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c" dependencies = [ "cfg-if", "libc", @@ -1,5 +1,4 @@ MAKEPATH := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) -NEW := $(MAKEPATH)/target/release/gfold .DEFAULT_GOAL := prepare @@ -49,14 +48,10 @@ msrv: .PHONY: msrv bench-loosely: - @echo "=============================================================" - @time $(shell which gfold) ~/ - @echo "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -" - @time $(NEW) -i ~/ - @echo "=============================================================" + REPOPATH=$(MAKEPATH) $(MAKEPATH)/scripts/bench-loosely.sh .PHONY: bench-loosely -compare: +compare: release @du -h $(shell which gfold) - @du -h $(NEW) + @du -h $(MAKEPATH)/target/release/gfold .PHONY: compare diff --git a/scripts/bench-loosely.sh b/scripts/bench-loosely.sh new file mode 100755 index 0000000..820a72f --- /dev/null +++ b/scripts/bench-loosely.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +if [ "$REPOPATH" = "" ]; then + echo "must execute script via make target from repository root" + exit 1 +fi + +( cd $REPOPATH; cargo build --release ) + +OLD=$(which gfold) +NEW=$REPOPATH/target/release/gfold + +function run { + echo "=============================================================" + time $OLD $1 + echo "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -" + time $NEW -i $1 + echo "=============================================================" +} + +run "/" +run "$HOME/" +run "$HOME/src" diff --git a/src/config.rs b/src/config.rs index f074d3d..573e4e2 100644 --- a/src/config.rs +++ b/src/config.rs @@ -52,7 +52,7 @@ impl Config { // Within this method, we check if the config file is empty before deserializing it. Users // should be able to proceed with empty config files. If empty, then we fall back to the // "EntryConfig" default before conversion. - pub fn try_config() -> Result<Config> { + pub fn try_config() -> Result<Self> { let home = dirs::home_dir().ok_or(Error::HomeDirNotFound)?; let entry_config = match File::open(home.join(".config").join("gfold").join("gfold.json")) { Ok(o) => match o.metadata()?.len() { @@ -68,13 +68,13 @@ impl Config { EntryConfig::default() } }; - entry_config_to_config(&entry_config) + Self::from_entry_config(&entry_config) } // This method does not look for the config file and uses "EntryConfig"'s defaults instead. // It is best for testing use and when the user wishes to skip config file lookup. - pub fn new() -> Result<Config> { - entry_config_to_config(&EntryConfig::default()) + pub fn new() -> Result<Self> { + Self::from_entry_config(&EntryConfig::default()) } // This method prints the full config (merged with config file, as needed) as valid JSON. @@ -82,19 +82,18 @@ impl Config { println!("{}", serde_json::to_string_pretty(&self)?); Ok(()) } -} -// Internal conversion function for private "EntryConfig" objects to "Config" objects. -fn entry_config_to_config(entry_config: &EntryConfig) -> Result<Config> { - Ok(Config { - path: match &entry_config.path { - Some(s) => s.clone(), - None => env::current_dir()?.canonicalize()?, - }, - display_mode: match &entry_config.display_mode { - Some(s) => s.clone(), - None => DisplayMode::Standard, - }, - git_path: entry_config.git_path.clone(), - }) + fn from_entry_config(entry_config: &EntryConfig) -> Result<Self> { + Ok(Config { + path: match &entry_config.path { + Some(s) => s.clone(), + None => env::current_dir()?.canonicalize()?, + }, + display_mode: match &entry_config.display_mode { + Some(s) => s.clone(), + None => DisplayMode::Standard, + }, + git_path: entry_config.git_path.clone(), + }) + } } diff --git a/src/display.rs b/src/display.rs index 0b1fb53..8371b0b 100644 --- a/src/display.rs +++ b/src/display.rs @@ -8,9 +8,9 @@ use std::path::Path; const PAD: usize = 2; pub fn classic(reports: &Reports) -> Result<()> { - let length = reports.0.keys().len(); + let length = reports.keys().len(); let mut first = true; - for group in &reports.0 { + for group in reports { // FIXME: make group title matching less cumbersome. if length > 1 { match first { @@ -58,7 +58,7 @@ pub fn classic(reports: &Reports) -> Result<()> { pub fn standard(reports: &Reports) -> Result<()> { let mut all_reports = Vec::new(); - for grouped_report in &reports.0 { + for grouped_report in reports { all_reports.append(&mut grouped_report.1.clone()); } all_reports.sort_by(|a, b| a.path.cmp(&b.path)); diff --git a/src/main.rs b/src/main.rs index 07a1f55..daf4f03 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,7 +10,6 @@ mod logging; mod report; mod run; mod status; -mod target_gen; fn main() -> Result<()> { cli::parse() @@ -25,28 +24,25 @@ mod tests { #[test] fn current_directory() { - let config = Config::new().unwrap(); - + let config = Config::new().expect("could not create new Config"); assert!(run::run(&config).is_ok()); } #[test] fn parent_directory() { - let mut config = Config::new().unwrap(); - + let mut config = Config::new().expect("could not create new Config"); let mut parent = env::current_dir().expect("failed to get current working directory"); parent.pop(); config.path = parent; - assert!(run::run(&config).is_ok()); } #[test] fn home_directory() { - let mut config = Config::new().unwrap(); - - config.path = dirs::home_dir().ok_or(Error::HomeDirNotFound).unwrap(); - + let mut config = Config::new().expect("could not create new Config"); + config.path = dirs::home_dir() + .ok_or(Error::HomeDirNotFound) + .expect("could not find home directory"); assert!(run::run(&config).is_ok()); } } diff --git a/src/report.rs b/src/report.rs index df09b3e..c984546 100644 --- a/src/report.rs +++ b/src/report.rs @@ -2,19 +2,23 @@ use crate::config::DisplayMode; use crate::consts::NONE; use crate::error::Error; use crate::status::Status; -use crate::target_gen::Targets; use anyhow::Result; -use log::debug; +use log::{debug, error, warn}; use rayon::prelude::*; use std::collections::BTreeMap; +use std::fs::DirEntry; use std::path::{Path, PathBuf}; use std::process::Command; +use std::{fs, io}; #[cfg(target_os = "windows")] const NEWLINE: &str = "\r\n"; #[cfg(not(target_os = "windows"))] const NEWLINE: &str = "\n"; +// Use a BTreeMap over a HashMap for sorted keys. +pub type Reports = BTreeMap<String, Vec<Report>>; + #[derive(Clone, Debug)] pub struct Report { pub path: String, @@ -28,48 +32,39 @@ pub struct Report { pub email: Option<String>, } -pub struct Reports(pub BTreeMap<String, Vec<Report>>); - -impl Reports { - pub fn new( - targets: Targets, - display_mode: &DisplayMode, - git_path: &Option<PathBuf>, - ) -> Result<Reports> { - let include_email = match display_mode { - DisplayMode::Standard => true, - DisplayMode::Classic => false, - }; - let git_path = match git_path { - Some(s) => s.canonicalize()?, - None => Path::new("git").to_path_buf(), - }; - - let unprocessed = targets - .0 - .into_par_iter() - .map(|path| generate_report(&path, &git_path, include_email)) - .collect::<Vec<Result<Report>>>(); - - // Use a BTreeMap over a HashMap for sorted keys. - let mut processed: Reports = Reports(BTreeMap::new()); - for wrapped_report in unprocessed { - match wrapped_report { - Ok(o) => { - let report = o.clone(); - if let Some(mut s) = processed - .0 - .insert(report.clone().parent, vec![report.clone()]) - { - s.push(report.clone()); - processed.0.insert(report.parent, s); - } +pub fn generate_reports( + path: &Path, + display_mode: &DisplayMode, + git_path: &Option<PathBuf>, +) -> Result<Reports> { + let include_email = match display_mode { + DisplayMode::Standard => true, + DisplayMode::Classic => false, + }; + let git_path = match git_path { + Some(s) => s.canonicalize()?, + None => Path::new("git").to_path_buf(), + }; + + let unprocessed = recursive_target_gen(path)? + .into_par_iter() + .map(|path| generate_report(&path, &git_path, include_email)) + .collect::<Vec<Result<Report>>>(); + + let mut processed = Reports::new(); + for wrapped_report in unprocessed { + match wrapped_report { + Ok(o) => { + let report = o.clone(); + if let Some(mut s) = processed.insert(report.clone().parent, vec![report.clone()]) { + s.push(report.clone()); + processed.insert(report.parent, s); } - Err(e) => return Err(e), } + Err(e) => return Err(e), } - Ok(processed) } + Ok(processed) } fn generate_report(repo_path: &Path, git_path: &Path, include_email: bool) -> Result<Report> { @@ -190,3 +185,58 @@ impl GitShim { Ok(String::from_utf8(output.stdout)?) } } + +fn recursive_target_gen(path: &Path) -> Result<Vec<PathBuf>> { + let mut results = vec![]; + let entries: Vec<DirEntry> = match fs::read_dir(&path) { + Ok(o) => o.filter_map(|r| r.ok()).collect(), + Err(e) if e.kind() == io::ErrorKind::PermissionDenied => { + warn!("{}: {}", e, &path.display()); + return Ok(results); + } + Err(e) => { + error!("{}: {}", e, &path.display()); + return Ok(results); + } + }; + + let targets = entries + .par_iter() + .map(|entry| { + if entry.file_type()?.is_dir() && !is_hidden(entry) { + return match has_git_subdir(entry) { + true => Ok(Some(vec![entry.path()])), + false => Ok(Some(recursive_target_gen(&entry.path())?)), + }; + } + Ok(None) + }) + .collect::<Vec<Result<Option<Vec<PathBuf>>>>>(); + + for target in targets { + match target { + Ok(o) => { + if let Some(mut s) = o { + if !s.is_empty() { + results.append(&mut s); + } + } + } + Err(e) => return Err(e), + } + } + Ok(results) +} + +fn is_hidden(entry: &DirEntry) -> bool { + entry + .file_name() + .to_str() + .map(|s| s.starts_with('.')) + .unwrap_or(false) +} + +fn has_git_subdir(entry: &DirEntry) -> bool { + let suspect = entry.path().join(".git"); + suspect.exists() && suspect.is_dir() +} @@ -1,19 +1,12 @@ use crate::config::{Config, DisplayMode}; -use crate::display; -use crate::report::Reports; -use crate::target_gen::Targets; +use crate::{display, report}; use anyhow::Result; // This function is the primary entrypoint for the crate. It takes a given config and performs // the end-to-end workflow using it. At this point, all CLI and config file options should be // set, merged, ignored, etc. pub fn run(config: &Config) -> Result<()> { - let reports = Reports::new( - Targets::new(&config.path)?, - &config.display_mode, - &config.git_path, - )?; - + let reports = report::generate_reports(&config.path, &config.display_mode, &config.git_path)?; match config.display_mode { DisplayMode::Standard => display::standard(&reports), DisplayMode::Classic => display::classic(&reports), diff --git a/src/target_gen.rs b/src/target_gen.rs deleted file mode 100644 index 42d3614..0000000 --- a/src/target_gen.rs +++ /dev/null @@ -1,54 +0,0 @@ -use anyhow::Result; -use log::{error, warn}; -use std::fs::DirEntry; -use std::path::{Path, PathBuf}; -use std::{fs, io}; - -pub struct Targets(pub Vec<PathBuf>); - -impl Targets { - pub fn new(path: &Path) -> Result<Targets> { - let mut targets = Targets(Vec::new()); - targets.generate_targets(path)?; - Ok(targets) - } - - fn generate_targets(&mut self, path: &Path) -> Result<()> { - let entries = match fs::read_dir(&path) { - Ok(o) => o, - Err(e) if e.kind() == io::ErrorKind::PermissionDenied => { - warn!("{}: {}", e, &path.display()); - return Ok(()); - } - Err(e) => { - error!("{}: {}", e, &path.display()); - return Ok(()); - } - }; - for wrapped_entry in entries { - let entry = wrapped_entry?; - if entry.file_type()?.is_dir() && !is_hidden(&entry) { - match has_git_subdir(&entry) { - true => self.0.push(entry.path()), - false => { - self.generate_targets(&entry.path())?; - } - } - } - } - Ok(()) - } -} - -fn is_hidden(entry: &DirEntry) -> bool { - entry - .file_name() - .to_str() - .map(|s| s.starts_with('.')) - .unwrap_or(false) -} - -fn has_git_subdir(entry: &DirEntry) -> bool { - let suspect = entry.path().join(".git"); - suspect.exists() && suspect.is_dir() -} |