From 9bd68689a918c36f0e18daa63be56a2373b08650 Mon Sep 17 00:00:00 2001 From: Carlo Federico Vescovo Date: Mon, 24 Feb 2025 10:14:57 +0100 Subject: [PATCH 1/2] Fix README.md for crates.io --- README.md | 153 +--------------------------------------------- scraper/README.md | 153 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 153 insertions(+), 153 deletions(-) mode change 100644 => 120000 README.md diff --git a/README.md b/README.md deleted file mode 100644 index 39450ec2..00000000 --- a/README.md +++ /dev/null @@ -1,152 +0,0 @@ -# scraper - -[![crates.io](https://img.shields.io/crates/v/scraper?color=dark-green)][crate] -[![downloads](https://img.shields.io/crates/d/scraper)][crate] -[![test](https://github.com/causal-agent/scraper/actions/workflows/test.yml/badge.svg)][tests] - -HTML parsing and querying with CSS selectors. - -`scraper` is on [Crates.io][crate] and [GitHub][github]. - -[crate]: https://crates.io/crates/scraper -[github]: https://github.com/causal-agent/scraper -[tests]: https://github.com/causal-agent/scraper/actions/workflows/test.yml - -Scraper provides an interface to Servo's `html5ever` and `selectors` crates, for browser-grade parsing and querying. - -## Examples - -### Parsing a document - -```rust -use scraper::Html; - -let html = r#" - - - Hello, world! -

Hello, world!

-"#; - -let document = Html::parse_document(html); -``` - -### Parsing a fragment - -```rust -use scraper::Html; -let fragment = Html::parse_fragment("

Hello, world!

"); -``` - -### Parsing a selector - -```rust -use scraper::Selector; -let selector = Selector::parse("h1.foo").unwrap(); -``` - -### Selecting elements - -```rust -use scraper::{Html, Selector}; - -let html = r#" - -"#; - -let fragment = Html::parse_fragment(html); -let selector = Selector::parse("li").unwrap(); - -for element in fragment.select(&selector) { - assert_eq!("li", element.value().name()); -} -``` - -### Selecting descendent elements - -```rust -use scraper::{Html, Selector}; - -let html = r#" - -"#; - -let fragment = Html::parse_fragment(html); -let ul_selector = Selector::parse("ul").unwrap(); -let li_selector = Selector::parse("li").unwrap(); - -let ul = fragment.select(&ul_selector).next().unwrap(); -for element in ul.select(&li_selector) { - assert_eq!("li", element.value().name()); -} -``` - -### Accessing element attributes - -```rust -use scraper::{Html, Selector}; - -let fragment = Html::parse_fragment(r#""#); -let selector = Selector::parse(r#"input[name="foo"]"#).unwrap(); - -let input = fragment.select(&selector).next().unwrap(); -assert_eq!(Some("bar"), input.value().attr("value")); -``` - -### Serializing HTML and inner HTML - -```rust -use scraper::{Html, Selector}; - -let fragment = Html::parse_fragment("

Hello, world!

"); -let selector = Selector::parse("h1").unwrap(); - -let h1 = fragment.select(&selector).next().unwrap(); - -assert_eq!("

Hello, world!

", h1.html()); -assert_eq!("Hello, world!", h1.inner_html()); -``` - -### Accessing descendent text - -```rust -use scraper::{Html, Selector}; - -let fragment = Html::parse_fragment("

Hello, world!

"); -let selector = Selector::parse("h1").unwrap(); - -let h1 = fragment.select(&selector).next().unwrap(); -let text = h1.text().collect::>(); - -assert_eq!(vec!["Hello, ", "world!"], text); -``` - -### Manipulating the DOM - -```rust -use html5ever::tree_builder::TreeSink; -use scraper::{Html, Selector}; - -let html = "hello

REMOVE ME

"; -let selector = Selector::parse(".hello").unwrap(); -let mut document = Html::parse_document(html); -let node_ids: Vec<_> = document.select(&selector).map(|x| x.id()).collect(); -for id in node_ids { - document.remove_from_parent(&id); -} -assert_eq!(document.html(), "hello"); -``` - -## Contributing - -Please feel free to open pull requests. If you're planning on implementing -something big (i.e. not fixing a typo, a small bug fix, minor refactor, etc) -then please open an issue first. diff --git a/README.md b/README.md new file mode 120000 index 00000000..a6541ddb --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +scraper/README.md \ No newline at end of file diff --git a/scraper/README.md b/scraper/README.md index 32d46ee8..39450ec2 120000 --- a/scraper/README.md +++ b/scraper/README.md @@ -1 +1,152 @@ -../README.md \ No newline at end of file +# scraper + +[![crates.io](https://img.shields.io/crates/v/scraper?color=dark-green)][crate] +[![downloads](https://img.shields.io/crates/d/scraper)][crate] +[![test](https://github.com/causal-agent/scraper/actions/workflows/test.yml/badge.svg)][tests] + +HTML parsing and querying with CSS selectors. + +`scraper` is on [Crates.io][crate] and [GitHub][github]. + +[crate]: https://crates.io/crates/scraper +[github]: https://github.com/causal-agent/scraper +[tests]: https://github.com/causal-agent/scraper/actions/workflows/test.yml + +Scraper provides an interface to Servo's `html5ever` and `selectors` crates, for browser-grade parsing and querying. + +## Examples + +### Parsing a document + +```rust +use scraper::Html; + +let html = r#" + + + Hello, world! +

Hello, world!

+"#; + +let document = Html::parse_document(html); +``` + +### Parsing a fragment + +```rust +use scraper::Html; +let fragment = Html::parse_fragment("

Hello, world!

"); +``` + +### Parsing a selector + +```rust +use scraper::Selector; +let selector = Selector::parse("h1.foo").unwrap(); +``` + +### Selecting elements + +```rust +use scraper::{Html, Selector}; + +let html = r#" + +"#; + +let fragment = Html::parse_fragment(html); +let selector = Selector::parse("li").unwrap(); + +for element in fragment.select(&selector) { + assert_eq!("li", element.value().name()); +} +``` + +### Selecting descendent elements + +```rust +use scraper::{Html, Selector}; + +let html = r#" + +"#; + +let fragment = Html::parse_fragment(html); +let ul_selector = Selector::parse("ul").unwrap(); +let li_selector = Selector::parse("li").unwrap(); + +let ul = fragment.select(&ul_selector).next().unwrap(); +for element in ul.select(&li_selector) { + assert_eq!("li", element.value().name()); +} +``` + +### Accessing element attributes + +```rust +use scraper::{Html, Selector}; + +let fragment = Html::parse_fragment(r#""#); +let selector = Selector::parse(r#"input[name="foo"]"#).unwrap(); + +let input = fragment.select(&selector).next().unwrap(); +assert_eq!(Some("bar"), input.value().attr("value")); +``` + +### Serializing HTML and inner HTML + +```rust +use scraper::{Html, Selector}; + +let fragment = Html::parse_fragment("

Hello, world!

"); +let selector = Selector::parse("h1").unwrap(); + +let h1 = fragment.select(&selector).next().unwrap(); + +assert_eq!("

Hello, world!

", h1.html()); +assert_eq!("Hello, world!", h1.inner_html()); +``` + +### Accessing descendent text + +```rust +use scraper::{Html, Selector}; + +let fragment = Html::parse_fragment("

Hello, world!

"); +let selector = Selector::parse("h1").unwrap(); + +let h1 = fragment.select(&selector).next().unwrap(); +let text = h1.text().collect::>(); + +assert_eq!(vec!["Hello, ", "world!"], text); +``` + +### Manipulating the DOM + +```rust +use html5ever::tree_builder::TreeSink; +use scraper::{Html, Selector}; + +let html = "hello

REMOVE ME

"; +let selector = Selector::parse(".hello").unwrap(); +let mut document = Html::parse_document(html); +let node_ids: Vec<_> = document.select(&selector).map(|x| x.id()).collect(); +for id in node_ids { + document.remove_from_parent(&id); +} +assert_eq!(document.html(), "hello"); +``` + +## Contributing + +Please feel free to open pull requests. If you're planning on implementing +something big (i.e. not fixing a typo, a small bug fix, minor refactor, etc) +then please open an issue first. From d4a9eaf6f102c03e88c92bb2e2481a61ad7d2502 Mon Sep 17 00:00:00 2001 From: Carlo Federico Vescovo Date: Mon, 24 Feb 2025 10:15:22 +0100 Subject: [PATCH 2/2] Version 0.23.1 --- Cargo.lock | 2 +- scraper/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b854ef2b..50df5256 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -390,7 +390,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "scraper" -version = "0.23.0" +version = "0.23.1" dependencies = [ "cssparser", "ego-tree", diff --git a/scraper/Cargo.toml b/scraper/Cargo.toml index 6c53b45d..b88d6f49 100644 --- a/scraper/Cargo.toml +++ b/scraper/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "scraper" -version = "0.23.0" +version = "0.23.1" edition = "2021" description = "HTML parsing and querying with CSS selectors" pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy