| Crates.io | sitescraper |
| lib.rs | sitescraper |
| version | 0.2.1 |
| created_at | 2021-11-01 21:57:41.105288+00 |
| updated_at | 2023-12-04 18:12:25.553569+00 |
| description | Scraping Websites in Rust! |
| homepage | |
| repository | https://github.com/floscodes/rust-sitescraper |
| max_upload_size | |
| id | 475450 |
| size | 33,705 |
let html = "<html><body><div>Hello World!</div></body></html>";
let dom = sitescraper::parse_html(html).unwrap();
let filtered_dom = dom.filter("body");
println!("{}", filtered_dom.get_inner_html());
//Output: <div>Hello World!</div>
let html = "<html><body><div>Hello World!</div></body></html>";
let dom = sitescraper::parse_html(html).unwrap();
let filtered_dom = dom.filter("body");
println!("{}", filtered_dom.get_text());
//Output: Hello World!
use sitescraper;
let html = "<html><body><div>Hello World!</div></body></html>";
let dom = sitescraper::parse_html(html).unwrap();
let filtered_dom = dom.filter("div");
println!("{}", filtered_dom.tag[0].get_text());
//Output: Hello World!
Works also with
get_inner_html()
use sitescraper;
let html = "<html><body><div id='hello'>Hello World!</div></body></html>";
let dom = sitescraper::parse_html(html).unwrap();
let filtered_dom = dom.filter(("div", "id", "hello"));
println!("{}", filtered_dom.tag[0].get_text());
//Output: Hello World!
Works also with a tuple consisting of two string literals
let filtered_dom = dom.filter(("div", "id"));
use sitescraper;
let html = "<html><body><div id="hello">Hello World!</div></body></html>";
let dom = sitescraper::parse_html(html).unwrap();
let filtered_dom = dom.filter(("*", "id", "hello"));
println!("{}", filtered_dom.tag[0].get_text());
//Output: Hello World!
or
use sitescraper;
let html = "<html><body><div id="hello">Hello World!</div></body></html>";
let dom = sitescraper::parse_html(html).unwrap();
let filtered_dom = dom.filter(("", "", "hello"));
println!("{}", filtered_dom.tag[0].get_text());
//Output: Hello World!
use sitescraper;
let html = sitescraper::http::get("http://example.com/).await.unwrap();
let dom = sitescraper::parse_html(html).unwrap();
let filtered_dom = sitescraper::filter!(dom, "div");
println!("{}", filtered_dom.get_inner_html());