//! # Select HTML
//!
//! > **Extract HTML using CSS selectors in the command-line.**
use std::{
fs::File,
io::{stdin, Read},
path::PathBuf,
};
use {
clap::Parser,
color_eyre::{eyre::eyre, install, Result},
scraper::{Html, Selector},
};
/// CLI arguments struct using [`clap`]'s Derive API.
#[derive(Debug, Parser)]
#[clap(about, author, version)]
pub struct Args {
/// Output the attribute's value from the selected element, can be used
/// multiple times.
#[clap(short, long, group = "output")]
pub attribute: Vec,
/// A HTML file to read, if not specified stdin will be used instead.
#[clap(long)]
pub file: Option,
/// The CSS selector to use.
pub selector: String,
/// Output inner text of the selected elements.
#[clap(short, long, group = "output")]
pub text: bool,
/// Trim whitespace from selected items.
#[clap(long)]
pub trim: bool,
}
/// The main CLI function.
fn main() -> Result<()> {
install()?;
let args = Args::parse();
let selector = Selector::parse(&args.selector)
.map_err(|_| eyre!("Failed to parse selector"))?;
let document = {
let mut html = String::new();
if let Some(path) = args.file {
File::open(path)?.read_to_string(&mut html)?;
} else {
stdin().read_to_string(&mut html)?;
};
Html::parse_document(&html)
};
let mut to_print = vec![];
for element in document.select(&selector) {
if args.text {
to_print.push(element.text().collect::());
} else if !args.attribute.is_empty() {
let element = element.value();
for attribute in &args.attribute {
if let Some(value) = element.attr(attribute) {
to_print.push(value.to_string());
}
}
} else {
to_print.push(element.html());
}
}
for value in to_print {
if args.trim {
println!("{}", value.trim());
} else {
println!("{}", value);
}
}
Ok(())
}