| Crates.io | fsindex |
| lib.rs | fsindex |
| version | 0.3.1 |
| created_at | 2025-12-06 03:25:50.050814+00 |
| updated_at | 2025-12-06 07:48:15.350229+00 |
| description | Fast, powerful filesystem indexing with .gitignore support and iterator-based API |
| homepage | https://github.com/xandwr/fsindex |
| repository | https://github.com/xandwr/fsindex |
| max_upload_size | |
| id | 1969572 |
| size | 125,811 |
Fast, powerful filesystem indexing with .gitignore support and an iterator-based API.
rayon for parallel file processing.gitignore support - Respects .gitignore, .git/info/exclude, and global gitignorenotifyAdd to your Cargo.toml:
[dependencies]
fsindex = "0.1"
use fsindex::FileIndexer;
let indexer = FileIndexer::new("./src");
for file in indexer.files() {
println!("{}: {} bytes", file.path.display(), file.metadata.size);
}
Use the builder pattern for custom configuration:
use fsindex::{FileIndexer, Config};
let config = Config::builder()
.respect_gitignore(true)
.include_hidden(false)
.max_depth(Some(10))
.extensions(vec!["rs", "toml"])
.follow_symlinks(false)
.parallel(true)
.build();
let indexer = FileIndexer::with_config("./", config);
for file in indexer.files() {
println!("{}", file.path.display());
}
| Option | Default | Description |
|---|---|---|
respect_gitignore |
true |
Honor .gitignore files |
include_hidden |
false |
Include hidden files/directories |
max_depth |
None |
Maximum traversal depth (unlimited by default) |
extensions |
[] |
Filter by file extensions (empty = all) |
exclude_patterns |
[] |
Glob patterns to exclude |
include_patterns |
[] |
Glob patterns to include |
follow_symlinks |
false |
Follow symbolic links |
read_contents |
true |
Read file contents into memory |
max_content_size |
10MB |
Maximum file size for content reading |
custom_ignore_files |
[] |
Additional ignore files |
parallel |
true |
Use parallel traversal |
threads |
0 |
Thread count (0 = auto) |
parse_structure |
false |
Parse code structure (functions, classes, etc.) |
For large directories, use parallel file processing:
use fsindex::FileIndexer;
let indexer = FileIndexer::new("./");
let files = indexer.files_parallel(); // Collects and processes in parallel
for file in files {
if let Some(hash) = file.hash_hex() {
println!("{}: {}", file.path.display(), hash);
}
}
Monitor filesystem changes in real-time:
use fsindex::FileIndexer;
let indexer = FileIndexer::new("./src");
let watcher = indexer.watch().expect("Failed to create watcher");
for event in watcher.filtered_events() {
match event {
Ok(e) => println!("{:?}: {:?}", e.kind, e.path()),
Err(e) => eprintln!("Error: {}", e),
}
}
Files are automatically tagged with their detected programming language:
use fsindex::FileIndexer;
let indexer = FileIndexer::new("./src");
for file in indexer.files() {
if let Some(lang) = file.metadata.language {
println!("{}: {}", file.path.display(), lang);
}
}
Supports 50+ languages including Rust, Python, JavaScript, TypeScript, Go, C/C++, Java, and many more.
Each file includes an XXH3 hash for efficient change detection:
use fsindex::FileIndexer;
let indexer = FileIndexer::new("./");
for file in indexer.files() {
if let Some(hash) = file.hash {
println!("{}: {:016x}", file.path.display(), hash);
}
}
Use files_result() for explicit error handling:
use fsindex::FileIndexer;
let indexer = FileIndexer::new("./");
for result in indexer.files_result() {
match result {
Ok(file) => println!("{}", file.path.display()),
Err(e) => eprintln!("Error: {}", e),
}
}
Split file content into chunks suitable for embedding models with token limits:
use fsindex::FileIndexer;
let indexer = FileIndexer::new("./src");
for file in indexer.files() {
// Split into ~512 token chunks (using 4 chars ≈ 1 token)
if let Some(chunks) = file.chunks(512) {
for chunk in chunks {
println!("Chunk {}: {} bytes at offset {}",
chunk.index, chunk.content.len(), chunk.byte_offset);
}
}
}
For different tokenization ratios (e.g., code has more tokens per character):
// Use 2.5 chars/token for code
if let Some(chunks) = file.chunks_with_ratio(512, 2.5) {
// Process chunks...
}
Save and load index state to avoid re-scanning unchanged files:
use fsindex::FileIndexer;
use std::path::Path;
let indexer = FileIndexer::new("./src");
// Save current state
indexer.save_state(Path::new(".fsindex-state.json")).unwrap();
// Later, load and compare
let old_state = FileIndexer::load_state(Path::new(".fsindex-state.json")).unwrap();
let diff = indexer.diff_with_state(&old_state);
println!("Added: {} files", diff.added.len());
println!("Modified: {} files", diff.modified.len());
println!("Removed: {} files", diff.removed.len());
if diff.has_changes() {
// Only process changed files
for path in diff.modified.iter().chain(diff.added.iter()) {
println!("Changed: {}", path.display());
}
}
For very large files, stream content in chunks instead of loading entirely into memory:
use fsindex::{StreamExt, ContentStream};
use std::path::Path;
// Stream in 64KB chunks
let mut stream = Path::new("large_file.txt").stream_chunks(64 * 1024).unwrap();
for chunk in stream {
let chunk = chunk.unwrap();
println!("Read {} bytes at offset {}", chunk.data.len(), chunk.offset);
// Cumulative hash available at each chunk
if chunk.is_last {
println!("Final hash: {:016x}", chunk.cumulative_hash);
}
}
Stream line by line:
use fsindex::StreamExt;
use std::path::Path;
for line in Path::new("file.txt").stream_lines().unwrap() {
let line = line.unwrap();
println!("{}: {}", line.line_number, line.content);
}
Extract functions, classes, imports, and other symbols from source files:
use fsindex::{FileIndexer, Config, SymbolKind};
let config = Config::builder()
.extensions(vec!["rs", "py", "ts", "go"])
.parse_structure(true) // Enable structure parsing
.build();
let indexer = FileIndexer::with_config("./src", config);
for file in indexer.files() {
if let Some(structure) = &file.structure {
println!("\n{}:", file.path.display());
// List all functions
for func in structure.functions() {
let visibility = if func.is_public { "pub " } else { "" };
println!(" {}fn {} (line {})", visibility, func.name, func.line);
}
// List all types (structs, classes, enums, etc.)
for typ in structure.types() {
println!(" {} {} (line {})", typ.kind, typ.name, typ.line);
}
// List imports
for import in structure.imports() {
println!(" import: {}", import.name);
}
}
}
Supported languages for structure parsing:
MIT