tellaro-query-language

Crates.iotellaro-query-language
lib.rstellaro-query-language
version0.2.8
created_at2025-10-23 21:43:56.516208+00
updated_at2026-01-21 14:10:16.016844+00
descriptionA flexible, human-friendly query language for searching and filtering structured data
homepagehttps://github.com/tellaro/tellaro-query-language
repositoryhttps://github.com/tellaro/tellaro-query-language
max_upload_size
id1897727
size490,739
(JustinHendersonSMAPPER)

documentation

https://docs.rs/tellaro-query-language

README

Tellaro Query Language (TQL) - Rust

Crates.io Documentation License: Source Available

A blazing-fast, human-friendly query language for searching and filtering structured data in Rust.

TQL provides an intuitive SQL-like syntax for querying JSON, JSONL, CSV files, and OpenSearch indices with:

  • 300x faster than Python for large file processing
  • First-class file support with CLI and programmatic API
  • OpenSearch integration with automatic DSL translation
  • 25+ field mutators for data transformation (string, encoding, DNS, GeoIP, network)
  • Statistical aggregations for data analysis
use tellaro_query_language::Tql;
use serde_json::json;

let tql = Tql::new();
let records = vec![
    json!({"name": "Alice", "age": 30, "city": "NYC"}),
    json!({"name": "Bob", "age": 25, "city": "LA"}),
];

// Simple query
let results = tql.query(&records, "age > 27").unwrap();
assert_eq!(results.len(), 1);

// With field mutators
let results = tql.query(&records, "name | lowercase = 'alice'").unwrap();

🚀 Quick Start

Installation

Add this to your Cargo.toml:

[dependencies]
tellaro-query-language = "0.1"
serde_json = "1.0"

# Optional: Enable OpenSearch backend
# tellaro-query-language = { version = "0.1", features = ["opensearch"] }

CLI Installation

Install the high-performance command-line tool:

cargo install tellaro-query-language

# Query files directly
tql 'status = "active"' users.json
tql 'age > 25 AND city = "NYC"' data.jsonl

# Statistical aggregations
tql '| stats count() by status' events.jsonl
tql 'status = 200 | stats avg(response_time) by endpoint' logs.jsonl

📁 Query Files (First-Class Support)

CLI Usage

TQL treats files as first-class data sources:

# Query JSON/JSONL files
tql 'status = "active"' users.json
tql 'age > 25 AND city = "NYC"' data.jsonl

# Query CSV files (auto-detects headers)
tql 'price > 100 AND category = "electronics"' products.csv

# Statistical aggregations
tql '| stats count() by status' events.jsonl
tql 'status = 200 | stats average(response_time) by endpoint' logs.jsonl

# Process folders recursively
tql 'level = "ERROR"' logs/ --pattern "*.jsonl" --recursive

# Stream data from stdin
cat large-file.jsonl | tql 'score > 90'

# Output formats
tql 'age > 30' users.json --output results.json   # JSON
tql 'age > 30' users.json --output results.jsonl  # JSONL
tql 'age > 30' users.json                         # Table (console)

Performance: Process 50MB files in ~200ms with streaming (no memory overhead).

Programmatic File Queries

use tellaro_query_language::Tql;
use std::fs::File;
use std::io::BufReader;
use serde_json::Value;

let tql = Tql::new();

// Read and query JSON file
let file = File::open("data.json")?;
let reader = BufReader::new(file);
let records: Vec<Value> = serde_json::from_reader(reader)?;
let results = tql.query(&records, "status = 'active' AND age > 25")?;

// Stream JSONL for large files
let file = File::open("large.jsonl")?;
let reader = BufReader::new(file);
for line in reader.lines() {
    let record: Value = serde_json::from_str(&line?)?;
    if tql.matches(&record, "level = 'ERROR'")? {
        println!("Error found: {}", record);
    }
}

🗄️ OpenSearch Integration

TQL seamlessly integrates with OpenSearch/Elasticsearch:

Automatic DSL Translation

use tellaro_query_language::{Tql, opensearch::{OpenSearchClient, QueryBuilder}};

// Configure OpenSearch (reads from environment)
std::env::set_var("OPENSEARCH_HOSTS", "http://localhost:9200");
std::env::set_var("OPENSEARCH_USERNAME", "admin");
std::env::set_var("OPENSEARCH_PASSWORD", "admin");

// Create client
let config = OpenSearchConfig::from_env()?;
let client = OpenSearchClient::new(config)?;

// Parse TQL query
let tql = Tql::new();
let ast = tql.parse("age > 25 AND status = 'active'")?;

// Build OpenSearch DSL
let builder = QueryBuilder::new(None);
let opensearch_query = builder.build_query(&ast)?;

// Execute search
let response = client.client()
    .search(opensearch::SearchParts::Index(&["users"]))
    .body(opensearch_query)
    .send()
    .await?;

TQL → OpenSearch Query DSL

TQL automatically translates to optimized OpenSearch queries:

TQL Operator OpenSearch Query Example
eq, = term or match status = "active"
ne, != bool + must_not status != "deleted"
gt, gte, lt, lte range age > 25
contains wildcard or match_phrase email contains "@example.com"
startswith prefix name startswith "John"
endswith wildcard filename endswith ".pdf"
matches (regexp) regexp email matches "^\\w+@\\w+"
in terms status in ["active", "pending"]
between range with gte + lte age between [18, 65]
cidr IP range matching ip cidr "192.168.0.0/16"
AND bool + must age > 25 AND city = "NYC"
OR bool + should city = "NYC" OR city = "LA"
NOT bool + must_not NOT status = "deleted"

Field Mapping Intelligence

use tellaro_query_language::opensearch::FieldMappings;

// Get mappings from OpenSearch
let mappings_response = client.client()
    .indices()
    .get_mapping()
    .index(&["users"])
    .send()
    .await?;

let mappings = FieldMappings::from_opensearch_response(
    mappings_response.json().await?
)?;

// Use mappings for intelligent query generation
let builder = QueryBuilder::new(Some(mappings));
let query = builder.build_query(&ast)?;
// Automatically selects .keyword for exact matches on text fields

📖 Syntax Guide

Comparison Operators

// Equality
"status = 'active'"           // Exact match (alias: eq)
"status != 'inactive'"        // Not equal (alias: ne)

// Numeric comparisons
"age > 25"                    // Greater than
"age >= 18"                   // Greater or equal
"age < 65"                    // Less than
"age <= 100"                  // Less or equal

// String operations
"email contains '@example.com'" // Substring match
"name startswith 'John'"      // Prefix match
"filename endswith '.pdf'"    // Suffix match

// Pattern matching
"email matches '^\\w+@\\w+\\.\\w+$'"  // Regex match

// Range and membership
"age between [18, 65]"        // Inclusive range
"status in ['active', 'pending']"     // Value in list
"status not in ['deleted', 'archived']" // Value not in list

// IP operations
"ip cidr '192.168.0.0/16'"    // IP in CIDR range

// Null checks
"field is null"               // Field is null or missing
"field is not null"           // Field exists and is not null

Logical Operators

// AND (all conditions must be true)
"age > 25 AND city = 'NYC'"
"status = 'active' AND role in ['admin', 'moderator']"

// OR (either condition must be true)
"city = 'NYC' OR city = 'LA'"
"status = 'admin' OR role = 'superuser'"

// NOT (negates condition)
"NOT (age < 18)"
"NOT status = 'deleted'"

// Complex expressions with parentheses
"(age > 25 AND city = 'NYC') OR (status = 'vip' AND score > 90)"

Collection Operators

// ANY - at least one array element matches
"ANY tags = 'premium'"
"ANY user.roles = 'admin'"

// ALL - every array element matches
"ALL scores >= 80"
"ALL status = 'active'"

// NONE - no array elements match
"NONE flags = 'spam'"
"NONE violations.severity = 'critical'"

Nested Field Access

// Dot notation for nested objects
"user.profile.email contains '@example.com'"
"metadata.tags.priority = 'high'"

// Array indexing
"tags[0] = 'urgent'"
"history[5].status = 'completed'"

🔄 Field Mutators (25+)

Transform field values inline before comparison:

String Mutators

// Case conversion
"email | lowercase contains '@example.com'"
"name | uppercase = 'JOHN DOE'"

// Whitespace handling
"message | trim = 'hello'"

// String manipulation
"text | length > 100"
"path | split('/') | length = 3"
"text | replace('old', 'new') contains 'new'"

Encoding Mutators

// Base64
"data | b64encode = 'aGVsbG8='"
"encoded | b64decode contains 'secret'"

// URL encoding
"param | urldecode = 'hello world'"

// Hexadecimal encoding
"data | hexencode = '68656c6c6f'"
"encoded | hexdecode = 'hello'"

// Cryptographic hashing
"password | md5 = '5f4dcc3b5aa765d61d8327deb882cf99'"
"data | sha256 = '2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824'"

Network/Security Mutators

// Defang/Refang URLs (security analysis)
"url | defang contains 'hxxp://example[.]com'"
"indicator | refang = 'http://malicious.com'"

// IP address classification
"source_ip | is_private = true"      // Check if IP is RFC 1918 private
"dest_ip | is_global = true"         // Check if IP is globally routable

// Use cases
"source_ip | is_private = true and port = 22"  // Internal SSH connections
"dest_ip | is_global = false"                    // Non-routable destinations

DNS Mutators

// DNS lookups with caching
"hostname | nslookup contains '8.8.8.8'"
"domain | nslookup = '1.1.1.1'"

Performance: DNS results are cached in memory to avoid repeated lookups.

GeoIP Mutators

// GeoIP enrichment (MaxMind and DB-IP support)
"ip | geoip.country_name = 'United States'"
"ip | geoip.city_name = 'New York'"
"ip | geoip.continent_code = 'NA'"

// Configure with environment variables
// TQL_GEOIP_DB_PATH=/path/to/GeoLite2-City.mmdb
// TQL_GEOIP_MMDB_PATH=/usr/share/GeoIP/

Supported fields:

  • geo.country_name, geo.country_iso_code
  • geo.city_name
  • geo.location (lat/lon)
  • geo.continent_code, geo.continent_name
  • geo.region_name, geo.region_iso_code
  • geo.postal_code, geo.timezone

Performance: Uses memory-mapped I/O for efficient database access (200,000+ lookups/sec).

List Mutators

// Boolean aggregations
"tags | any = true"              // Check if any element is truthy
"flags | all = true"             // Check if all elements are truthy

// Numeric aggregations
"scores | avg > 80"              // Calculate average
"values | sum > 1000"            // Calculate sum
"prices | min >= 10"             // Find minimum value
"ratings | max <= 5"             // Find maximum value

// Example with nested arrays
"users.scores | avg > 75"         // Average of nested array
"metrics.values | sum between [100, 500]"  // Sum within range

Chaining Mutators

// Multiple transformations in sequence
"email | trim | lowercase contains '@example.com'"
"data | b64decode | lowercase = 'secret'"
"ip | geoip.country_name | lowercase = 'united states'"

📊 Statistical Aggregations

TQL includes powerful stats functions for data analysis:

Available Functions

use tellaro_query_language::{StatsEvaluator, StatsQuery, AggregationSpec};
use std::collections::HashMap;

let evaluator = StatsEvaluator::new();
let records = vec![
    json!({"city": "NYC", "sales": 100, "product": "laptop"}),
    json!({"city": "LA", "sales": 150, "product": "phone"}),
    json!({"city": "NYC", "sales": 200, "product": "tablet"}),
];

// Count records
let query = StatsQuery {
    aggregations: vec![AggregationSpec {
        function: "count".to_string(),
        field: "*".to_string(),
        alias: Some("total".to_string()),
        params: HashMap::new(),
    }],
    group_by: vec![],
};
let result = evaluator.evaluate_stats(&records, &query)?;
// result["value"] = 3

// Sum with grouping
let query = StatsQuery {
    aggregations: vec![AggregationSpec {
        function: "sum".to_string(),
        field: "sales".to_string(),
        alias: Some("total_sales".to_string()),
        params: HashMap::new(),
    }],
    group_by: vec!["city".to_string()],
};
let result = evaluator.evaluate_stats(&records, &query)?;
// Groups by city: {"NYC": {"total_sales": 300}, "LA": {"total_sales": 150}}

CLI Stats Queries

# Simple aggregations
tql '| stats count()' data.jsonl
tql '| stats sum(revenue), avg(price)' sales.json

# Grouped analysis
tql '| stats count() by status' events.jsonl
tql '| stats sum(sales) by region, category' data.json

# Top N analysis
tql '| stats sum(revenue, top 10) by product' sales.json

# Combined filtering and stats
tql 'region = "west" | stats avg(revenue) by category' data.json

Aggregation Functions

  • count: Count records (count(*) or count(field))
  • unique_count: Count distinct values
  • sum: Sum numeric values
  • avg/average/mean: Calculate mean
  • min/max: Find minimum/maximum values
  • median/med: Calculate median
  • std/stdev/standard_deviation: Calculate standard deviation
  • percentile/p/pct: Calculate percentiles
  • values/unique: Return unique values

🎯 API Reference

Basic Usage

use tellaro_query_language::Tql;
use serde_json::json;

// Create TQL instance
let tql = Tql::new();

// Or with custom depth limits
let tql = Tql::with_max_depth(200);

// Query records
let records = vec![
    json!({"name": "Alice", "age": 30, "city": "NYC"}),
    json!({"name": "Bob", "age": 25, "city": "LA"}),
];

// Execute query
let results = tql.query(&records, "age > 27").unwrap();
println!("Found {} matching records", results.len());

// Count matches
let count = tql.count(&records, "city = 'NYC'").unwrap();
println!("NYC residents: {}", count);

// Check single record
let user = json!({"age": 30, "status": "active"});
if tql.matches(&user, "age >= 18 AND status = 'active'").unwrap() {
    println!("Valid adult user");
}

Query Pre-compilation

For queries executed multiple times, parse once and reuse the AST:

use tellaro_query_language::{Tql, TqlEvaluator};

let tql = Tql::new();

// Parse query once
let ast = tql.parse("age > 25 AND status = 'active'").unwrap();

// Reuse AST for multiple datasets
let evaluator = TqlEvaluator::new();
let results1 = evaluator.filter(&ast, &dataset1).unwrap();
let results2 = evaluator.filter(&ast, &dataset2).unwrap();

Error Handling

use tellaro_query_language::{Tql, TqlError};

let tql = Tql::new();

match tql.query(&records, "invalid query syntax") {
    Ok(results) => println!("Found {} records", results.len()),
    Err(TqlError::SyntaxError(msg)) => eprintln!("Syntax error: {}", msg),
    Err(TqlError::EvaluationError(msg)) => eprintln!("Evaluation error: {}", msg),
    Err(e) => eprintln!("Error: {}", e),
}

⚡ Performance

Benchmarks

Rust Implementation:

  • In-memory queries: ~3,000,000 records/sec
  • File parsing (JSON): ~150MB/sec
  • GeoIP lookups: ~200,000 lookups/sec (memory-mapped)
  • DNS lookups: ~10,000 lookups/sec (with caching)
  • Large file streaming: Process 50MB in ~200ms

vs Python Implementation:

  • 300x faster for file processing
  • 500x faster for GeoIP lookups (memory-mapped vs Python)
  • 100x faster for DNS lookups (async + caching)

Performance Features

  • Zero-copy deserialization where possible
  • Memory-mapped I/O for GeoIP databases
  • In-memory caching for DNS and GeoIP results
  • Streaming file processing with no memory overhead
  • Parallel query evaluation (planned)

Optimization Tips

// Pre-compile queries for reuse
let ast = tql.parse("age > 25").unwrap();
let results1 = evaluator.filter(&ast, &dataset1).unwrap();
let results2 = evaluator.filter(&ast, &dataset2).unwrap();

// Use streaming for large files
let file = File::open("large.jsonl")?;
let reader = BufReader::new(file);
for line in reader.lines() {
    let record: Value = serde_json::from_str(&line?)?;
    if tql.matches(&record, "level = 'ERROR'")? {
        // Process match without loading entire file
    }
}

// Configure cache sizes for mutators
std::env::set_var("TQL_DNS_CACHE_SIZE", "10000");
std::env::set_var("TQL_GEOIP_CACHE_SIZE", "50000");

🗺️ Roadmap

✅ Implemented Features

  • ✅ Core query engine with all operators
  • ✅ 25+ field mutators (string, encoding, network, DNS, GeoIP, list)
  • ✅ Statistical aggregations with grouping
  • ✅ File support (JSON, JSONL, CSV) with CLI
  • ✅ OpenSearch backend with automatic DSL translation
  • ✅ Memory-mapped GeoIP lookups
  • ✅ DNS resolution with caching
  • ✅ High-performance streaming

🚧 In Progress

  • 🚧 OpenSearch stats aggregation translation
  • 🚧 Post-processing for complex mutator chains
  • 🚧 Additional hash functions (SHA1, SHA512)

📋 Planned Features

  • 📋 Parallel record evaluation
  • 📋 Query optimization engine
  • 📋 JSON parsing mutator
  • 📋 Timestamp conversion mutators
  • 📋 PostgreSQL/MySQL backends
  • 📋 Custom mutator plugins via traits

🔮 Future Considerations

  • 🔮 Distributed query execution
  • 🔮 Query result caching
  • 🔮 Real-time data streaming
  • 🔮 WASM compilation for browser usage

🔧 Development

Setup

# Clone repository
git clone https://github.com/tellaro/tellaro-query-language.git
cd tellaro-query-language/tql

# Build
cargo build

# Run tests
cargo test

# Build release
cargo build --release

# Build with OpenSearch feature
cargo build --features opensearch

Testing

# Run all tests
cargo test

# Run with output
cargo test -- --nocapture

# Run integration tests (requires OpenSearch)
export OPENSEARCH_HOSTS=http://localhost:9200
export OPENSEARCH_USERNAME=admin
export OPENSEARCH_PASSWORD=admin
export OPENSEARCH_INTEGRATION_TEST=true
cargo test --features opensearch -- --ignored --test-threads=1

# Run benchmarks
cargo bench

Code Quality

# Format code
cargo fmt

# Linting
cargo clippy -- -D warnings

# Check compilation
cargo check

🤝 Contributing

Contributions are welcome! Please see CONTRIBUTING.md for guidelines.


📄 License

Tellaro Query Language (TQL) is source-available software with specific usage terms:

Permitted Uses:

  • Personal use (individual, non-commercial)
  • Organizational use (within your company/organization)
  • Integration into your applications and services
  • Internal tools and automation

Restricted Uses:

  • Creating derivative query language products
  • Commercial redistribution or resale
  • Offering TQL-based commercial services to third parties
  • Using source code to build competing products

For commercial licensing inquiries, contact: support@tellaro.io

See LICENSE for complete terms and conditions.


🔗 Related Projects


💬 Support


🌟 Advanced Examples

Security Log Analysis

use tellaro_query_language::Tql;
use serde_json::json;

let tql = Tql::new();
let logs = vec![
    json!({
        "timestamp": "2024-01-15T10:30:00Z",
        "source_ip": "192.168.1.100",
        "url": "hxxp://malicious[.]com/payload",
        "severity": "high",
        "tags": ["suspicious", "malware"]
    }),
    json!({
        "timestamp": "2024-01-15T10:31:00Z",
        "source_ip": "10.0.0.50",
        "url": "https://safe-site.com",
        "severity": "low",
        "tags": ["normal"]
    }),
];

// Find high-severity events with malicious indicators
let query = r#"
    severity in ['high', 'critical'] AND
    source_ip | is_private = true AND
    (ANY tags = 'malware' OR url | refang contains 'malicious')
"#;

let results = tql.query(&logs, query).unwrap();
assert_eq!(results.len(), 1);

E-commerce Product Search

let products = vec![
    json!({
        "name": "Laptop Pro 15",
        "price": 1299.99,
        "tags": ["electronics", "computers", "premium"],
        "rating": {"average": 4.5, "count": 128},
        "in_stock": true
    }),
    json!({
        "name": "Budget Mouse",
        "price": 9.99,
        "tags": ["electronics", "accessories"],
        "rating": {"average": 3.8, "count": 45},
        "in_stock": false
    }),
];

// Find in-stock electronics with good ratings under $1500
let query = r#"
    in_stock = true AND
    price < 1500 AND
    rating.average >= 4.0 AND
    ANY tags = 'electronics'
"#;

let results = tql.query(&products, query).unwrap();
assert_eq!(results.len(), 1);

GeoIP Enrichment Pipeline

// Set GeoIP database path
std::env::set_var("TQL_GEOIP_DB_PATH", "/usr/share/GeoIP/GeoLite2-City.mmdb");

let tql = Tql::new();
let events = vec![
    json!({"ip": "8.8.8.8", "event": "login"}),
    json!({"ip": "1.1.1.1", "event": "api_call"}),
];

// Query with GeoIP enrichment
let query = "ip | geoip.country_name = 'United States'";
let results = tql.query(&events, query).unwrap();

// Results include enriched geo data
println!("{}", serde_json::to_string_pretty(&results[0]).unwrap());
// {
//   "ip": "8.8.8.8",
//   "event": "login",
//   "geo": {
//     "country_name": "United States",
//     "country_iso_code": "US",
//     "city_name": "Mountain View",
//     "location": {"lat": 37.386, "lon": -122.0838}
//   }
// }

Made with ❤️ by the Tellaro Team

Commit count: 0

cargo fmt