ftzz

File Tree Fuzzer creates a pseudo-random directory hierarchy filled with some number of files.

APACHE-2.0 License

Downloads
11.4K
Stars
45
Committers
2

Bot releases are hidden (Show)

ftzz -

Published by SUPERCILEX over 1 year ago

The quality of directory shapes was dramatically improved.

Results

  • Per directory file count follows a much broader and non-normal distribution resulting in more varied directories.
  • Total file count has lasered into the true mean instead of skewing upwards proportional to max depth (atypical parameters still result in skew, but it is at least a normal-ish distribution).
Old New
Files per directory image image
Total file count image image
Total file count Old New
Low file count image image
High depth image image

Appendix

Sample collection code:

Subject: [PATCH] Upgrade deps
---
Index: src/core/scheduler.rs
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/core/scheduler.rs b/src/core/scheduler.rs
--- a/src/core/scheduler.rs	(revision 6b7e88c4d05ed8fd247178bd9ec86440fa923d5f)
+++ b/src/core/scheduler.rs	(date 1679085314728)
@@ -11,6 +11,7 @@
     core::{
         files::GeneratorTaskOutcome,
         tasks::{QueueErrors, QueueOutcome, TaskGenerator},
+        TOTAL_FILE_COUNT,
     },
     generator::Error,
     utils::{with_dir_name, FastPathBuf},
@@ -165,6 +166,11 @@
         handle_task_result(task, &mut stats)?;
     }
 
+    {
+        use std::io::Write;
+        let mut count = TOTAL_FILE_COUNT.lock().unwrap();
+        writeln!(count, "{}", stats.files).unwrap();
+    }
     Ok(stats)
 }
 
Index: src/core/mod.rs
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/core/mod.rs b/src/core/mod.rs
--- a/src/core/mod.rs	(revision 6b7e88c4d05ed8fd247178bd9ec86440fa923d5f)
+++ b/src/core/mod.rs	(date 1679085426452)
@@ -1,3 +1,9 @@
+use std::{
+    cell::LazyCell,
+    fs::File,
+    sync::{LazyLock, Mutex},
+};
+
 pub use scheduler::{run, GeneratorStats};
 pub use tasks::{DynamicGenerator, GeneratorBytes, StaticGenerator};
 
@@ -5,3 +11,31 @@
 mod files;
 mod scheduler;
 mod tasks;
+
+static FILE_COUNT: LazyLock<Mutex<File>, fn() -> Mutex<File>> = LazyLock::new(|| {
+    Mutex::new(
+        File::options()
+            .create(true)
+            .append(true)
+            .open("file_count.samples")
+            .unwrap(),
+    )
+});
+static DIR_COUNT: LazyLock<Mutex<File>, fn() -> Mutex<File>> = LazyLock::new(|| {
+    Mutex::new(
+        File::options()
+            .create(true)
+            .append(true)
+            .open("dir_count.samples")
+            .unwrap(),
+    )
+});
+static TOTAL_FILE_COUNT: LazyLock<Mutex<File>, fn() -> Mutex<File>> = LazyLock::new(|| {
+    Mutex::new(
+        File::options()
+            .create(true)
+            .append(true)
+            .open("total_file_count.samples")
+            .unwrap(),
+    )
+});
Index: src/lib.rs
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/lib.rs b/src/lib.rs
--- a/src/lib.rs	(revision 6b7e88c4d05ed8fd247178bd9ec86440fa923d5f)
+++ b/src/lib.rs	(date 1679084880740)
@@ -3,6 +3,7 @@
 #![feature(let_chains)]
 #![feature(const_option)]
 #![feature(inline_const)]
+#![feature(once_cell)]
 #![allow(clippy::multiple_crate_versions)]
 #![allow(clippy::module_name_repetitions)]
 
Index: src/core/tasks.rs
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/core/tasks.rs b/src/core/tasks.rs
--- a/src/core/tasks.rs	(revision 6b7e88c4d05ed8fd247178bd9ec86440fa923d5f)
+++ b/src/core/tasks.rs	(date 1679085314852)
@@ -12,6 +12,7 @@
             PreDefinedGeneratedFileContents,
         },
         files::{create_files_and_dirs, GeneratorTaskOutcome, GeneratorTaskParams},
+        DIR_COUNT, FILE_COUNT,
     },
     utils::FastPathBuf,
 };
@@ -122,6 +123,16 @@
         } else {
             0
         };
+        {
+            use std::io::Write;
+            let mut count = FILE_COUNT.lock().unwrap();
+            writeln!(count, "{}", num_files).unwrap();
+        }
+        {
+            use std::io::Write;
+            let mut count = DIR_COUNT.lock().unwrap();
+            writeln!(count, "{}", num_dirs).unwrap();
+        }
 
         macro_rules! build_params {
             ($file_contents:expr) => {{

Collection:

cargo b --features dry_run
for d in [0, 5, 20] { for n in [10, 10_000, 1_000_000] { for i in 0..1000 { ./target/debug/ftzz g -n $n /tmp/foo -d $d --seed $i out> /dev/null }; mkdir $"($d)d_($n)n"; mv *.samples $"($d)d_($n)n/" } }

Plots:

!unzip samples.zip
import numpy as np
import matplotlib.pyplot as plt
import os

for dir in os.listdir():
  if not dir.endswith('n'):
    continue

  # List of files containing samples
  files = ['file_count.samples', 'total_file_count.samples']

  # Loop over each file and plot its samples

  for file in files:
      # Load samples from file into a list
      file = dir + '/' + file
      with open(file, 'r') as f:
          samples = [int(x.strip()) for x in f.readlines()]

      # Calculate some statistics on the samples
      mean = np.mean(samples)
      std = np.std(samples)

      # Plot the histogram of the samples
      plt.hist(samples, bins=50, density=True, alpha=0.5)

      # Add a vertical line at the mean of the distribution
      plt.axvline(x=mean, color='red', linestyle='--')

      # Add labels and title to the plot
      plt.xlabel('Values')
      plt.ylabel('Frequency')
      plt.title(f'Distribution of samples from {file}')

      # Show the plot
      plt.show()
ftzz -

Published by SUPERCILEX almost 2 years ago

  • Added the fill-byte flag which lets you fill files with a specific byte instead of pseudo-random ones.
ftzz -

Published by SUPERCILEX about 2 years ago

Tweak help output to be slightly clearer.

ftzz -

Published by SUPERCILEX about 2 years ago

Re-release of 1.1.1 with fixed binary builds. Binaries are now statically linked.

ftzz -

Published by SUPERCILEX about 2 years ago

Improvements across internal infrastructure, testing, and error handling.