diff --git a/Cargo.lock b/Cargo.lock index b854ef2b..50df5256 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -390,7 +390,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "scraper" -version = "0.23.0" +version = "0.23.1" dependencies = [ "cssparser", "ego-tree", diff --git a/README.md b/README.md deleted file mode 100644 index 39450ec2..00000000 --- a/README.md +++ /dev/null @@ -1,152 +0,0 @@ -# scraper - -[![crates.io](https://img.shields.io/crates/v/scraper?color=dark-green)][crate] -[![downloads](https://img.shields.io/crates/d/scraper)][crate] -[![test](https://github.com/causal-agent/scraper/actions/workflows/test.yml/badge.svg)][tests] - -HTML parsing and querying with CSS selectors. - -`scraper` is on [Crates.io][crate] and [GitHub][github]. - -[crate]: https://crates.io/crates/scraper -[github]: https://github.com/causal-agent/scraper -[tests]: https://github.com/causal-agent/scraper/actions/workflows/test.yml - -Scraper provides an interface to Servo's `html5ever` and `selectors` crates, for browser-grade parsing and querying. - -## Examples - -### Parsing a document - -```rust -use scraper::Html; - -let html = r#" - - - Hello, world! -

Hello, world!

-"#; - -let document = Html::parse_document(html); -``` - -### Parsing a fragment - -```rust -use scraper::Html; -let fragment = Html::parse_fragment("

Hello, world!

"); -``` - -### Parsing a selector - -```rust -use scraper::Selector; -let selector = Selector::parse("h1.foo").unwrap(); -``` - -### Selecting elements - -```rust -use scraper::{Html, Selector}; - -let html = r#" - -"#; - -let fragment = Html::parse_fragment(html); -let selector = Selector::parse("li").unwrap(); - -for element in fragment.select(&selector) { - assert_eq!("li", element.value().name()); -} -``` - -### Selecting descendent elements - -```rust -use scraper::{Html, Selector}; - -let html = r#" - -"#; - -let fragment = Html::parse_fragment(html); -let ul_selector = Selector::parse("ul").unwrap(); -let li_selector = Selector::parse("li").unwrap(); - -let ul = fragment.select(&ul_selector).next().unwrap(); -for element in ul.select(&li_selector) { - assert_eq!("li", element.value().name()); -} -``` - -### Accessing element attributes - -```rust -use scraper::{Html, Selector}; - -let fragment = Html::parse_fragment(r#""#); -let selector = Selector::parse(r#"input[name="foo"]"#).unwrap(); - -let input = fragment.select(&selector).next().unwrap(); -assert_eq!(Some("bar"), input.value().attr("value")); -``` - -### Serializing HTML and inner HTML - -```rust -use scraper::{Html, Selector}; - -let fragment = Html::parse_fragment("

Hello, world!

"); -let selector = Selector::parse("h1").unwrap(); - -let h1 = fragment.select(&selector).next().unwrap(); - -assert_eq!("

Hello, world!

", h1.html()); -assert_eq!("Hello, world!", h1.inner_html()); -``` - -### Accessing descendent text - -```rust -use scraper::{Html, Selector}; - -let fragment = Html::parse_fragment("

Hello, world!

"); -let selector = Selector::parse("h1").unwrap(); - -let h1 = fragment.select(&selector).next().unwrap(); -let text = h1.text().collect::>(); - -assert_eq!(vec!["Hello, ", "world!"], text); -``` - -### Manipulating the DOM - -```rust -use html5ever::tree_builder::TreeSink; -use scraper::{Html, Selector}; - -let html = "hello

REMOVE ME

"; -let selector = Selector::parse(".hello").unwrap(); -let mut document = Html::parse_document(html); -let node_ids: Vec<_> = document.select(&selector).map(|x| x.id()).collect(); -for id in node_ids { - document.remove_from_parent(&id); -} -assert_eq!(document.html(), "hello"); -``` - -## Contributing - -Please feel free to open pull requests. If you're planning on implementing -something big (i.e. not fixing a typo, a small bug fix, minor refactor, etc) -then please open an issue first. diff --git a/README.md b/README.md new file mode 120000 index 00000000..a6541ddb --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +scraper/README.md \ No newline at end of file diff --git a/scraper/Cargo.toml b/scraper/Cargo.toml index 6c53b45d..b88d6f49 100644 --- a/scraper/Cargo.toml +++ b/scraper/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "scraper" -version = "0.23.0" +version = "0.23.1" edition = "2021" description = "HTML parsing and querying with CSS selectors" diff --git a/scraper/README.md b/scraper/README.md index 32d46ee8..39450ec2 120000 --- a/scraper/README.md +++ b/scraper/README.md @@ -1 +1,152 @@ -../README.md \ No newline at end of file +# scraper + +[![crates.io](https://img.shields.io/crates/v/scraper?color=dark-green)][crate] +[![downloads](https://img.shields.io/crates/d/scraper)][crate] +[![test](https://github.com/causal-agent/scraper/actions/workflows/test.yml/badge.svg)][tests] + +HTML parsing and querying with CSS selectors. + +`scraper` is on [Crates.io][crate] and [GitHub][github]. + +[crate]: https://crates.io/crates/scraper +[github]: https://github.com/causal-agent/scraper +[tests]: https://github.com/causal-agent/scraper/actions/workflows/test.yml + +Scraper provides an interface to Servo's `html5ever` and `selectors` crates, for browser-grade parsing and querying. + +## Examples + +### Parsing a document + +```rust +use scraper::Html; + +let html = r#" + + + Hello, world! +

Hello, world!

+"#; + +let document = Html::parse_document(html); +``` + +### Parsing a fragment + +```rust +use scraper::Html; +let fragment = Html::parse_fragment("

Hello, world!

"); +``` + +### Parsing a selector + +```rust +use scraper::Selector; +let selector = Selector::parse("h1.foo").unwrap(); +``` + +### Selecting elements + +```rust +use scraper::{Html, Selector}; + +let html = r#" + +"#; + +let fragment = Html::parse_fragment(html); +let selector = Selector::parse("li").unwrap(); + +for element in fragment.select(&selector) { + assert_eq!("li", element.value().name()); +} +``` + +### Selecting descendent elements + +```rust +use scraper::{Html, Selector}; + +let html = r#" + +"#; + +let fragment = Html::parse_fragment(html); +let ul_selector = Selector::parse("ul").unwrap(); +let li_selector = Selector::parse("li").unwrap(); + +let ul = fragment.select(&ul_selector).next().unwrap(); +for element in ul.select(&li_selector) { + assert_eq!("li", element.value().name()); +} +``` + +### Accessing element attributes + +```rust +use scraper::{Html, Selector}; + +let fragment = Html::parse_fragment(r#""#); +let selector = Selector::parse(r#"input[name="foo"]"#).unwrap(); + +let input = fragment.select(&selector).next().unwrap(); +assert_eq!(Some("bar"), input.value().attr("value")); +``` + +### Serializing HTML and inner HTML + +```rust +use scraper::{Html, Selector}; + +let fragment = Html::parse_fragment("

Hello, world!

"); +let selector = Selector::parse("h1").unwrap(); + +let h1 = fragment.select(&selector).next().unwrap(); + +assert_eq!("

Hello, world!

", h1.html()); +assert_eq!("Hello, world!", h1.inner_html()); +``` + +### Accessing descendent text + +```rust +use scraper::{Html, Selector}; + +let fragment = Html::parse_fragment("

Hello, world!

"); +let selector = Selector::parse("h1").unwrap(); + +let h1 = fragment.select(&selector).next().unwrap(); +let text = h1.text().collect::>(); + +assert_eq!(vec!["Hello, ", "world!"], text); +``` + +### Manipulating the DOM + +```rust +use html5ever::tree_builder::TreeSink; +use scraper::{Html, Selector}; + +let html = "hello

REMOVE ME

"; +let selector = Selector::parse(".hello").unwrap(); +let mut document = Html::parse_document(html); +let node_ids: Vec<_> = document.select(&selector).map(|x| x.id()).collect(); +for id in node_ids { + document.remove_from_parent(&id); +} +assert_eq!(document.html(), "hello"); +``` + +## Contributing + +Please feel free to open pull requests. If you're planning on implementing +something big (i.e. not fixing a typo, a small bug fix, minor refactor, etc) +then please open an issue first. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy