Crates.io | sitescraper |
lib.rs | sitescraper |
version | 0.2.1 |
source | src |
created_at | 2021-11-01 21:57:41.105288 |
updated_at | 2023-12-04 18:12:25.553569 |
description | Scraping Websites in Rust! |
homepage | |
repository | https://github.com/floscodes/rust-sitescraper |
max_upload_size | |
id | 475450 |
size | 33,705 |
let html = "<html><body><div>Hello World!</div></body></html>";
let dom = sitescraper::parse_html(html).unwrap();
let filtered_dom = dom.filter("body");
println!("{}", filtered_dom.get_inner_html());
//Output: <div>Hello World!</div>
let html = "<html><body><div>Hello World!</div></body></html>";
let dom = sitescraper::parse_html(html).unwrap();
let filtered_dom = dom.filter("body");
println!("{}", filtered_dom.get_text());
//Output: Hello World!
use sitescraper;
let html = "<html><body><div>Hello World!</div></body></html>";
let dom = sitescraper::parse_html(html).unwrap();
let filtered_dom = dom.filter("div");
println!("{}", filtered_dom.tag[0].get_text());
//Output: Hello World!
Works also with
get_inner_html()
use sitescraper;
let html = "<html><body><div id='hello'>Hello World!</div></body></html>";
let dom = sitescraper::parse_html(html).unwrap();
let filtered_dom = dom.filter(("div", "id", "hello"));
println!("{}", filtered_dom.tag[0].get_text());
//Output: Hello World!
Works also with a tuple consisting of two string literals
let filtered_dom = dom.filter(("div", "id"));
use sitescraper;
let html = "<html><body><div id="hello">Hello World!</div></body></html>";
let dom = sitescraper::parse_html(html).unwrap();
let filtered_dom = dom.filter(("*", "id", "hello"));
println!("{}", filtered_dom.tag[0].get_text());
//Output: Hello World!
or
use sitescraper;
let html = "<html><body><div id="hello">Hello World!</div></body></html>";
let dom = sitescraper::parse_html(html).unwrap();
let filtered_dom = dom.filter(("", "", "hello"));
println!("{}", filtered_dom.tag[0].get_text());
//Output: Hello World!
use sitescraper;
let html = sitescraper::http::get("http://example.com/).await.unwrap();
let dom = sitescraper::parse_html(html).unwrap();
let filtered_dom = sitescraper::filter!(dom, "div");
println!("{}", filtered_dom.get_inner_html());