| Crates.io | html-linter |
| lib.rs | html-linter |
| version | 0.1.1 |
| created_at | 2025-01-08 18:10:16.487543+00 |
| updated_at | 2025-01-08 18:13:44.954366+00 |
| description | An HTML linting library for checking HTML structure and semantics |
| homepage | |
| repository | |
| max_upload_size | |
| id | 1508893 |
| size | 662,772 |
html-linter is a Rust library for linting HTML content. It checks for various common errors, best practices, semantic issues, and more. You can supply rules that specify what to check (element presence, attribute presence, ordering constraints, semantic constraints, etc.) and how severe each issue should be when found.
Rule (e.g., "Images must have alt attributes").<header> instead of <div class="header">).Add this to your Cargo.toml:
[dependencies]
html-linter = "0.1.0"
And then bring it into scope in your code:
use html_linter::{HtmlLinter, LinterOptions, Rule, RuleType, Severity, LintResult};
You can define rules either programmatically or using JSON configuration:
use std::collections::HashMap;
use html_linter::{Rule, RuleType, Severity};
let rules = vec![
Rule {
name: "img-alt".to_string(),
rule_type: RuleType::AttributePresence,
severity: Severity::Error,
selector: "img".to_string(),
condition: "alt-missing".to_string(),
message: "Images must have alt attributes".to_string(),
options: HashMap::new(),
},
];
[
{
"name": "img-alt",
"rule_type": "AttributePresence",
"severity": "Error",
"selector": "img",
"condition": "alt-missing",
"message": "Images must have alt attributes",
"options": {}
},
{
"name": "meta-tags",
"rule_type": "ElementContent",
"severity": "Error",
"selector": "head",
"condition": "meta-tags",
"message": "Meta tags validation failed",
"options": {
"required_meta_tags": "[{\"name\":\"description\",\"pattern\":{\"type\":\"MinLength\",\"value\":50},\"required\":true}]"
}
}
]
You can load JSON rules either from a string or from a file:
// Load from JSON string
let linter = HtmlLinter::from_json(json_str, None)?;
// Load from JSON file
let linter = HtmlLinter::from_json_file("path/to/rules.json", None)?;
Each rule in the JSON configuration must follow this structure:
{
"name": "string", // Unique identifier for the rule
"rule_type": "string", // One of the supported rule types
"severity": "string", // "Error", "Warning", or "Info"
"selector": "string", // CSS-like selector
"condition": "string", // Rule-specific condition
"message": "string", // Error message to display
"options": {
// Optional additional configuration
"key": "value"
}
}
ElementPresence
{
"name": "require-main",
"rule_type": "ElementPresence",
"severity": "Error",
"selector": "main",
"condition": "required",
"message": "Page must have a main content area"
}
AttributePresence
{
"name": "img-alt",
"rule_type": "AttributePresence",
"severity": "Error",
"selector": "img",
"condition": "alt-missing",
"message": "Images must have alt attributes"
}
AttributeValue
{
"name": "valid-email",
"rule_type": "AttributeValue",
"selector": "input[type='email']",
"severity": "Error",
"condition": "pattern-match",
"message": "Invalid email pattern",
"options": {
"pattern": "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
}
}
ElementOrder
{
"name": "heading-order",
"rule_type": "ElementOrder",
"severity": "Warning",
"selector": "h1, h2, h3, h4, h5, h6",
"condition": "sequential-order",
"message": "Heading levels should not skip levels"
}
ElementContent
{
"name": "meta-description",
"rule_type": "ElementContent",
"severity": "Error",
"selector": "head",
"condition": "meta-tags",
"message": "Required meta tags are missing",
"options": {
"required_meta_tags": [
{
"name": "description",
"pattern": {
"type": "MinLength",
"value": 50
},
"required": true
}
]
}
}
WhiteSpace
Nesting
{
"name": "input-label",
"rule_type": "Nesting",
"severity": "Error",
"selector": "input",
"condition": "parent-label-or-for",
"message": "Input elements must be associated with a label"
}
Semantics
{
"name": "semantic-html",
"rule_type": "Semantics",
"severity": "Warning",
"selector": "div",
"condition": "semantic-structure",
"message": "Use semantic HTML elements instead of divs where appropriate"
}
Custom
{
"name": "no-empty-links",
"rule_type": "Custom",
"severity": "Error",
"selector": "a",
"condition": "no-empty-links",
"message": "Links must have content"
}
When using ElementContent with meta-tags, the following pattern types are supported:
Regex: Match content against a regular expressionMinLength: Require minimum character lengthMaxLength: Limit maximum character lengthNonEmpty: Ensure content is not emptyExact: Match exact textOneOf: Match one of several optionsContains: Check if content contains substringStartsWith: Check if content starts with stringEndsWith: Check if content ends with stringExample meta tag pattern:
{
"pattern": {
"type": "MinLength",
"value": 50
}
}
HtmlLinteruse html_linter::{HtmlLinter, LinterOptions};
// Optional: specify linter-wide options (e.g., max line length, ignoring inline styles, etc.)
let options = LinterOptions {
// For example, ignore lines longer than 80 characters
max_line_length: Some(80),
// ...other options...
..Default::default()
};
// Build the linter with your rules and options
let linter = HtmlLinter::new(rules, Some(options));
let html = r#"<html><body><img src="test.jpg"></body></html>"#;
// Lint returns a Result containing either a vector of `LintResult` or a `LinterError`.
let lint_results = linter.lint(html).unwrap();
// Each `LintResult` contains:
// - the triggered rule's name
// - the severity level
// - a descriptive message
// - location info (line, column, and element name)
// - partial source snippet of the element
fn main() {
// Define rules
let rules = vec![
Rule {
name: "img-alt".to_string(),
rule_type: RuleType::AttributePresence,
severity: Severity::Error,
selector: "img".to_string(),
condition: "alt-missing".to_string(),
message: "Images must have alt attributes".to_string(),
options: HashMap::new(),
},
];
// Create the linter
let linter = HtmlLinter::new(rules, None);
// Some HTML to check
let html = r#"<html><body><img src="test.jpg"></body></html>"#;
// Perform linting
match linter.lint(html) {
Ok(results) => {
for result in results {
println!(
"Rule: {}, Severity: {:?}, Message: {}, Location: line {}, column {}",
result.rule,
result.severity,
result.message,
result.location.line,
result.location.column
);
}
}
Err(e) => eprintln!("Linter error: {}", e),
}
}
By default, if you have a tests/ directory in your crate, you can add integration tests for your linting rules. See the examples in the code above to learn how to organize them. Run them with:
cargo test
The library supports several RuleTypes, each controlling how the rule is evaluated:
Checks if certain elements exist (or do not exist).
{
"name": "require-main",
"rule_type": "ElementPresence",
"severity": "Error",
"selector": "main",
"condition": "required",
"message": "Page must have a main content area"
}
Checks if specific attributes are present (or missing).
{
"name": "img-alt",
"rule_type": "AttributePresence",
"severity": "Error",
"selector": "img",
"condition": "alt-missing",
"message": "Images must have alt attributes"
}
Validates attribute values against a regex or other criteria.
{
"name": "valid-email",
"rule_type": "AttributeValue",
"selector": "input[type='email']",
"severity": "Error",
"condition": "pattern-match",
"message": "Invalid email pattern",
"options": {
"pattern": "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
}
}
Ensures elements follow a certain order (e.g., heading levels).
{
"name": "heading-order",
"rule_type": "ElementOrder",
"severity": "Warning",
"selector": "h1, h2, h3, h4, h5, h6",
"condition": "sequential-order",
"message": "Heading levels should not skip levels"
}
Validates text content or checks for empty content.
{
"name": "meta-description",
"rule_type": "ElementContent",
"severity": "Error",
"selector": "head",
"condition": "meta-tags",
"message": "Required meta tags are missing",
"options": {
"required_meta_tags": [
{
"name": "description",
"pattern": {
"type": "MinLength",
"value": 50
},
"required": true
}
]
}
}
Not implemented
Ensures certain elements are nested within parent elements or properly associated.
{
"name": "input-label",
"rule_type": "Nesting",
"severity": "Error",
"selector": "input",
"condition": "parent-label-or-for",
"message": "Input elements must be associated with a label"
}
Encourages semantic HTML usage (e.g., <header> instead of <div class="header">).
{
"name": "semantic-html",
"rule_type": "Semantics",
"severity": "Warning",
"selector": "div",
"condition": "semantic-structure",
"message": "Use semantic HTML elements instead of divs where appropriate"
}
Custom rule logic with a built-in function key (e.g., "no-empty-links").
{
"name": "no-empty-links",
"rule_type": "Custom",
"severity": "Error",
"selector": "a",
"condition": "no-empty-links",
"message": "Links must have content"
}
Allows combining multiple conditions that must all be satisfied. Supports various check modes and condition types.
{
"name": "accessible-button",
"rule_type": "Compound",
"severity": "Error",
"selector": "button",
"condition": "compound",
"message": "Button must meet accessibility requirements",
"options": {
"check_mode": "all",
"conditions": [
{
"type": "AttributeValue",
"attribute": "aria-label",
"pattern": ".+"
},
{
"type": "AttributeValue",
"attribute": "role",
"pattern": "button"
}
]
}
}
all: All conditions must match (default)any: Any condition must matchnone: No conditions should matchexactly_one: Exactly one condition should matchat_least_one: At least one condition must matchmajority: More than half of conditions must matchratio: Specified ratio of conditions must match (requires "ratio" option)range: Number of matching conditions must fall within specified range (requires "min" and "max" options)consecutive: Specified number of consecutive conditions must match (requires "count" option)exclusive_groups: Only one group of conditions should match (requires "groups" option)weighted: Sum of weights for matching conditions must meet threshold (requires "weights" and "threshold" options)dependency_chain: Conditions must match in sequence without gapsalternating: Conditions must alternate between matching and non-matchingsubset_match: Matching conditions must form a valid subset (requires "valid_sets" option)Example with advanced check mode:
{
"name": "weighted-conditions",
"rule_type": "Compound",
"severity": "Error",
"selector": "form",
"condition": "compound",
"message": "Form must meet weighted accessibility requirements",
"options": {
"check_mode": "weighted",
"weights": [0.5, 1.0, 0.8],
"threshold": 1.5,
"conditions": [
{
"type": "AttributeValue",
"attribute": "aria-label",
"pattern": ".+"
},
{
"type": "AttributeValue",
"attribute": "role",
"pattern": "form"
},
{
"type": "AttributeValue",
"attribute": "name",
"pattern": ".+"
}
]
}
}
Compound rules support three types of conditions:
{
"type": "TextContent",
"pattern": "^[A-Za-z0-9\\s]{10,}$"
}
{
"type": "AttributeValue",
"attribute": "class",
"pattern": "^btn-[a-z]+$"
}
{
"type": "AttributeReference",
"attribute": "aria-describedby",
"reference_must_exist": true
}
Validates the text content of elements against patterns.
{
"name": "min-heading-length",
"rule_type": "TextContent",
"severity": "Warning",
"selector": "h1, h2, h3",
"condition": "text-content",
"message": "Heading text should be descriptive",
"options": {
"pattern": ".{10,}",
"check_mode": "ensure_existence"
}
}
When validating content (especially with TextContent or ElementContent), the following pattern types are supported:
Regex: Match content against a regular expressionMinLength: Require minimum character lengthMaxLength: Limit maximum character lengthNonEmpty: Ensure content is not emptyExact: Match exact textOneOf: Match one of several optionsContains: Check if content contains substringStartsWith: Check if content starts with stringEndsWith: Check if content ends with stringExample using different pattern types:
{
"name": "meta-tags",
"rule_type": "ElementContent",
"severity": "Error",
"selector": "head",
"condition": "meta-tags",
"message": "Meta tags validation failed",
"options": {
"required_meta_tags": [
{
"name": "description",
"pattern": {
"type": "MinLength",
"value": 50
},
"required": true
},
{
"name": "keywords",
"pattern": {
"type": "Contains",
"value": "important-keyword"
},
"required": true
},
{
"property": "og:type",
"pattern": {
"type": "OneOf",
"value": ["website", "article", "product"]
},
"required": true
}
]
}
}
Many rule types support different check modes that modify how the rule is evaluated:
normal: Default behavior - report when pattern matchesensure_existence: Report when pattern doesn't match (inverse)ensure_nonexistence: Report when pattern matches (same as normal)any: For compound rules - any condition must matchall: For compound rules - all conditions must matchExample using check modes:
{
"name": "no-placeholder-images",
"rule_type": "AttributeValue",
"severity": "Warning",
"selector": "img",
"condition": "src-check",
"message": "Avoid using placeholder image services",
"options": {
"check_mode": "ensure_nonexistence",
"pattern": "placeholder\\.com|placekitten\\.com"
}
}
Validates the overall structure of the HTML document.
{
"name": "require-doctype",
"rule_type": "DocumentStructure",
"severity": "Error",
"selector": "html",
"condition": "doctype-present",
"message": "HTML document must have a DOCTYPE declaration"
}
Enforces limits on the number of specific elements.
{
"name": "single-h1",
"rule_type": "ElementCount",
"severity": "Error",
"selector": "h1",
"condition": "max-count",
"message": "Page should have only one h1 element",
"options": {
"max": "1"
}
}
Enforces consistent casing for element and attribute names.
{
"name": "lowercase-elements",
"rule_type": "ElementCase",
"severity": "Warning",
"selector": "*",
"condition": "lowercase",
"message": "HTML elements and attributes should be lowercase",
"options": {}
}
Enforces consistent use of single or double quotes for attribute values.
{
"name": "quote-style",
"rule_type": "AttributeQuotes",
"severity": "Warning",
"selector": "*",
"condition": "quote-style",
"message": "Use double quotes for attribute values",
"options": {
"style": "double"
}
}
Pull requests, bug reports, and feature requests are welcome! Feel free to open an issue or submit a PR if you have ideas to improve the library.
This project is licensed under the MIT license. Enjoy responsibly.