summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Gerace <nickagerace@gmail.com>2022-01-14 22:12:21 -0500
committerGitHub <noreply@github.com>2022-01-14 22:12:21 -0500
commit8d650d2ae1caa9b832ec1caca94a1ec6cec2ff62 (patch)
tree8aa9e909e55ad0a40e8034c45de54eabf2da31ab
parent677acbe145299932a456bef049fe460334fa1296 (diff)
downloadgfold-8d650d2ae1caa9b832ec1caca94a1ec6cec2ff62.zip
Make target generation work with parallel iterators (#170)
- Remove Targets and use a recursive function instead - Use nested rayon parallel iterators within recursive target generation - Remove "target_gen" module since targets and reports are now generated together - Move "bench-loosely" logic to its own script - Convert Reports to type alias for ease of use (e.g. not having to use "foo.0" or "bar.1" for its fields) Signed-off-by: Nick Gerace <nickagerace@gmail.com>
-rw-r--r--CHANGELOG.md6
-rw-r--r--Cargo.lock4
-rw-r--r--Makefile11
-rwxr-xr-xscripts/bench-loosely.sh22
-rw-r--r--src/config.rs35
-rw-r--r--src/display.rs6
-rw-r--r--src/main.rs16
-rw-r--r--src/report.rs130
-rw-r--r--src/run.rs11
-rw-r--r--src/target_gen.rs54
10 files changed, 150 insertions, 145 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6561ef2..97b09e6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
## [Unreleased]
-The latest version contains all changes.
+<!-- The latest version contains all changes. -->
+
+### Changed
+
+- Major performance improvements due to moving from sequential target generation to nested, parallel iterators for target generation
### [3.0.0] - 2022-01-06
diff --git a/Cargo.lock b/Cargo.lock
index 58e5763..4920415 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -149,9 +149,9 @@ dependencies = [
[[package]]
name = "getrandom"
-version = "0.2.3"
+version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753"
+checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c"
dependencies = [
"cfg-if",
"libc",
diff --git a/Makefile b/Makefile
index e2830a5..c56bbc7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,4 @@
MAKEPATH := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
-NEW := $(MAKEPATH)/target/release/gfold
.DEFAULT_GOAL := prepare
@@ -49,14 +48,10 @@ msrv:
.PHONY: msrv
bench-loosely:
- @echo "============================================================="
- @time $(shell which gfold) ~/
- @echo "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -"
- @time $(NEW) -i ~/
- @echo "============================================================="
+ REPOPATH=$(MAKEPATH) $(MAKEPATH)/scripts/bench-loosely.sh
.PHONY: bench-loosely
-compare:
+compare: release
@du -h $(shell which gfold)
- @du -h $(NEW)
+ @du -h $(MAKEPATH)/target/release/gfold
.PHONY: compare
diff --git a/scripts/bench-loosely.sh b/scripts/bench-loosely.sh
new file mode 100755
index 0000000..820a72f
--- /dev/null
+++ b/scripts/bench-loosely.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+if [ "$REPOPATH" = "" ]; then
+ echo "must execute script via make target from repository root"
+ exit 1
+fi
+
+( cd $REPOPATH; cargo build --release )
+
+OLD=$(which gfold)
+NEW=$REPOPATH/target/release/gfold
+
+function run {
+ echo "============================================================="
+ time $OLD $1
+ echo "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -"
+ time $NEW -i $1
+ echo "============================================================="
+}
+
+run "/"
+run "$HOME/"
+run "$HOME/src"
diff --git a/src/config.rs b/src/config.rs
index f074d3d..573e4e2 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -52,7 +52,7 @@ impl Config {
// Within this method, we check if the config file is empty before deserializing it. Users
// should be able to proceed with empty config files. If empty, then we fall back to the
// "EntryConfig" default before conversion.
- pub fn try_config() -> Result<Config> {
+ pub fn try_config() -> Result<Self> {
let home = dirs::home_dir().ok_or(Error::HomeDirNotFound)?;
let entry_config = match File::open(home.join(".config").join("gfold").join("gfold.json")) {
Ok(o) => match o.metadata()?.len() {
@@ -68,13 +68,13 @@ impl Config {
EntryConfig::default()
}
};
- entry_config_to_config(&entry_config)
+ Self::from_entry_config(&entry_config)
}
// This method does not look for the config file and uses "EntryConfig"'s defaults instead.
// It is best for testing use and when the user wishes to skip config file lookup.
- pub fn new() -> Result<Config> {
- entry_config_to_config(&EntryConfig::default())
+ pub fn new() -> Result<Self> {
+ Self::from_entry_config(&EntryConfig::default())
}
// This method prints the full config (merged with config file, as needed) as valid JSON.
@@ -82,19 +82,18 @@ impl Config {
println!("{}", serde_json::to_string_pretty(&self)?);
Ok(())
}
-}
-// Internal conversion function for private "EntryConfig" objects to "Config" objects.
-fn entry_config_to_config(entry_config: &EntryConfig) -> Result<Config> {
- Ok(Config {
- path: match &entry_config.path {
- Some(s) => s.clone(),
- None => env::current_dir()?.canonicalize()?,
- },
- display_mode: match &entry_config.display_mode {
- Some(s) => s.clone(),
- None => DisplayMode::Standard,
- },
- git_path: entry_config.git_path.clone(),
- })
+ fn from_entry_config(entry_config: &EntryConfig) -> Result<Self> {
+ Ok(Config {
+ path: match &entry_config.path {
+ Some(s) => s.clone(),
+ None => env::current_dir()?.canonicalize()?,
+ },
+ display_mode: match &entry_config.display_mode {
+ Some(s) => s.clone(),
+ None => DisplayMode::Standard,
+ },
+ git_path: entry_config.git_path.clone(),
+ })
+ }
}
diff --git a/src/display.rs b/src/display.rs
index 0b1fb53..8371b0b 100644
--- a/src/display.rs
+++ b/src/display.rs
@@ -8,9 +8,9 @@ use std::path::Path;
const PAD: usize = 2;
pub fn classic(reports: &Reports) -> Result<()> {
- let length = reports.0.keys().len();
+ let length = reports.keys().len();
let mut first = true;
- for group in &reports.0 {
+ for group in reports {
// FIXME: make group title matching less cumbersome.
if length > 1 {
match first {
@@ -58,7 +58,7 @@ pub fn classic(reports: &Reports) -> Result<()> {
pub fn standard(reports: &Reports) -> Result<()> {
let mut all_reports = Vec::new();
- for grouped_report in &reports.0 {
+ for grouped_report in reports {
all_reports.append(&mut grouped_report.1.clone());
}
all_reports.sort_by(|a, b| a.path.cmp(&b.path));
diff --git a/src/main.rs b/src/main.rs
index 07a1f55..daf4f03 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -10,7 +10,6 @@ mod logging;
mod report;
mod run;
mod status;
-mod target_gen;
fn main() -> Result<()> {
cli::parse()
@@ -25,28 +24,25 @@ mod tests {
#[test]
fn current_directory() {
- let config = Config::new().unwrap();
-
+ let config = Config::new().expect("could not create new Config");
assert!(run::run(&config).is_ok());
}
#[test]
fn parent_directory() {
- let mut config = Config::new().unwrap();
-
+ let mut config = Config::new().expect("could not create new Config");
let mut parent = env::current_dir().expect("failed to get current working directory");
parent.pop();
config.path = parent;
-
assert!(run::run(&config).is_ok());
}
#[test]
fn home_directory() {
- let mut config = Config::new().unwrap();
-
- config.path = dirs::home_dir().ok_or(Error::HomeDirNotFound).unwrap();
-
+ let mut config = Config::new().expect("could not create new Config");
+ config.path = dirs::home_dir()
+ .ok_or(Error::HomeDirNotFound)
+ .expect("could not find home directory");
assert!(run::run(&config).is_ok());
}
}
diff --git a/src/report.rs b/src/report.rs
index df09b3e..c984546 100644
--- a/src/report.rs
+++ b/src/report.rs
@@ -2,19 +2,23 @@ use crate::config::DisplayMode;
use crate::consts::NONE;
use crate::error::Error;
use crate::status::Status;
-use crate::target_gen::Targets;
use anyhow::Result;
-use log::debug;
+use log::{debug, error, warn};
use rayon::prelude::*;
use std::collections::BTreeMap;
+use std::fs::DirEntry;
use std::path::{Path, PathBuf};
use std::process::Command;
+use std::{fs, io};
#[cfg(target_os = "windows")]
const NEWLINE: &str = "\r\n";
#[cfg(not(target_os = "windows"))]
const NEWLINE: &str = "\n";
+// Use a BTreeMap over a HashMap for sorted keys.
+pub type Reports = BTreeMap<String, Vec<Report>>;
+
#[derive(Clone, Debug)]
pub struct Report {
pub path: String,
@@ -28,48 +32,39 @@ pub struct Report {
pub email: Option<String>,
}
-pub struct Reports(pub BTreeMap<String, Vec<Report>>);
-
-impl Reports {
- pub fn new(
- targets: Targets,
- display_mode: &DisplayMode,
- git_path: &Option<PathBuf>,
- ) -> Result<Reports> {
- let include_email = match display_mode {
- DisplayMode::Standard => true,
- DisplayMode::Classic => false,
- };
- let git_path = match git_path {
- Some(s) => s.canonicalize()?,
- None => Path::new("git").to_path_buf(),
- };
-
- let unprocessed = targets
- .0
- .into_par_iter()
- .map(|path| generate_report(&path, &git_path, include_email))
- .collect::<Vec<Result<Report>>>();
-
- // Use a BTreeMap over a HashMap for sorted keys.
- let mut processed: Reports = Reports(BTreeMap::new());
- for wrapped_report in unprocessed {
- match wrapped_report {
- Ok(o) => {
- let report = o.clone();
- if let Some(mut s) = processed
- .0
- .insert(report.clone().parent, vec![report.clone()])
- {
- s.push(report.clone());
- processed.0.insert(report.parent, s);
- }
+pub fn generate_reports(
+ path: &Path,
+ display_mode: &DisplayMode,
+ git_path: &Option<PathBuf>,
+) -> Result<Reports> {
+ let include_email = match display_mode {
+ DisplayMode::Standard => true,
+ DisplayMode::Classic => false,
+ };
+ let git_path = match git_path {
+ Some(s) => s.canonicalize()?,
+ None => Path::new("git").to_path_buf(),
+ };
+
+ let unprocessed = recursive_target_gen(path)?
+ .into_par_iter()
+ .map(|path| generate_report(&path, &git_path, include_email))
+ .collect::<Vec<Result<Report>>>();
+
+ let mut processed = Reports::new();
+ for wrapped_report in unprocessed {
+ match wrapped_report {
+ Ok(o) => {
+ let report = o.clone();
+ if let Some(mut s) = processed.insert(report.clone().parent, vec![report.clone()]) {
+ s.push(report.clone());
+ processed.insert(report.parent, s);
}
- Err(e) => return Err(e),
}
+ Err(e) => return Err(e),
}
- Ok(processed)
}
+ Ok(processed)
}
fn generate_report(repo_path: &Path, git_path: &Path, include_email: bool) -> Result<Report> {
@@ -190,3 +185,58 @@ impl GitShim {
Ok(String::from_utf8(output.stdout)?)
}
}
+
+fn recursive_target_gen(path: &Path) -> Result<Vec<PathBuf>> {
+ let mut results = vec![];
+ let entries: Vec<DirEntry> = match fs::read_dir(&path) {
+ Ok(o) => o.filter_map(|r| r.ok()).collect(),
+ Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
+ warn!("{}: {}", e, &path.display());
+ return Ok(results);
+ }
+ Err(e) => {
+ error!("{}: {}", e, &path.display());
+ return Ok(results);
+ }
+ };
+
+ let targets = entries
+ .par_iter()
+ .map(|entry| {
+ if entry.file_type()?.is_dir() && !is_hidden(entry) {
+ return match has_git_subdir(entry) {
+ true => Ok(Some(vec![entry.path()])),
+ false => Ok(Some(recursive_target_gen(&entry.path())?)),
+ };
+ }
+ Ok(None)
+ })
+ .collect::<Vec<Result<Option<Vec<PathBuf>>>>>();
+
+ for target in targets {
+ match target {
+ Ok(o) => {
+ if let Some(mut s) = o {
+ if !s.is_empty() {
+ results.append(&mut s);
+ }
+ }
+ }
+ Err(e) => return Err(e),
+ }
+ }
+ Ok(results)
+}
+
+fn is_hidden(entry: &DirEntry) -> bool {
+ entry
+ .file_name()
+ .to_str()
+ .map(|s| s.starts_with('.'))
+ .unwrap_or(false)
+}
+
+fn has_git_subdir(entry: &DirEntry) -> bool {
+ let suspect = entry.path().join(".git");
+ suspect.exists() && suspect.is_dir()
+}
diff --git a/src/run.rs b/src/run.rs
index 0741ff0..e3a74ba 100644
--- a/src/run.rs
+++ b/src/run.rs
@@ -1,19 +1,12 @@
use crate::config::{Config, DisplayMode};
-use crate::display;
-use crate::report::Reports;
-use crate::target_gen::Targets;
+use crate::{display, report};
use anyhow::Result;
// This function is the primary entrypoint for the crate. It takes a given config and performs
// the end-to-end workflow using it. At this point, all CLI and config file options should be
// set, merged, ignored, etc.
pub fn run(config: &Config) -> Result<()> {
- let reports = Reports::new(
- Targets::new(&config.path)?,
- &config.display_mode,
- &config.git_path,
- )?;
-
+ let reports = report::generate_reports(&config.path, &config.display_mode, &config.git_path)?;
match config.display_mode {
DisplayMode::Standard => display::standard(&reports),
DisplayMode::Classic => display::classic(&reports),
diff --git a/src/target_gen.rs b/src/target_gen.rs
deleted file mode 100644
index 42d3614..0000000
--- a/src/target_gen.rs
+++ /dev/null
@@ -1,54 +0,0 @@
-use anyhow::Result;
-use log::{error, warn};
-use std::fs::DirEntry;
-use std::path::{Path, PathBuf};
-use std::{fs, io};
-
-pub struct Targets(pub Vec<PathBuf>);
-
-impl Targets {
- pub fn new(path: &Path) -> Result<Targets> {
- let mut targets = Targets(Vec::new());
- targets.generate_targets(path)?;
- Ok(targets)
- }
-
- fn generate_targets(&mut self, path: &Path) -> Result<()> {
- let entries = match fs::read_dir(&path) {
- Ok(o) => o,
- Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
- warn!("{}: {}", e, &path.display());
- return Ok(());
- }
- Err(e) => {
- error!("{}: {}", e, &path.display());
- return Ok(());
- }
- };
- for wrapped_entry in entries {
- let entry = wrapped_entry?;
- if entry.file_type()?.is_dir() && !is_hidden(&entry) {
- match has_git_subdir(&entry) {
- true => self.0.push(entry.path()),
- false => {
- self.generate_targets(&entry.path())?;
- }
- }
- }
- }
- Ok(())
- }
-}
-
-fn is_hidden(entry: &DirEntry) -> bool {
- entry
- .file_name()
- .to_str()
- .map(|s| s.starts_with('.'))
- .unwrap_or(false)
-}
-
-fn has_git_subdir(entry: &DirEntry) -> bool {
- let suspect = entry.path().join(".git");
- suspect.exists() && suspect.is_dir()
-}