From b3570f31e1e3346de9a13ee9fa054101e82bf429 Mon Sep 17 00:00:00 2001 From: Carlo Federico Vescovo <26970569+cfvescovo@users.noreply.github.com> Date: Tue, 16 Jul 2024 13:14:32 +0200 Subject: [PATCH 1/4] `is` and `has` support --- src/selector.rs | 26 ++++++++++++++++++++++++++ src/test.rs | 24 ++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/src/selector.rs b/src/selector.rs index 7ef13f2f..1e6da7df 100644 --- a/src/selector.rs +++ b/src/selector.rs @@ -86,6 +86,14 @@ pub struct Parser; impl<'i> parser::Parser<'i> for Parser { type Impl = Simple; type Error = SelectorParseErrorKind<'i>; + + fn parse_is_and_where(&self) -> bool { + true + } + + fn parse_has(&self) -> bool { + true + } } /// A simple implementation of `SelectorImpl` with no pseudo-classes or pseudo-elements. @@ -222,4 +230,22 @@ mod tests { let s = ""; let _sel: Selector = s.try_into().unwrap(); } + + #[test] + fn has_selector() { + let s = ":has(a)"; + let _sel: Selector = s.try_into().unwrap(); + } + + #[test] + fn is_selector() { + let s = ":is(a)"; + let _sel: Selector = s.try_into().unwrap(); + } + + #[test] + fn where_selector() { + let s = ":where(a)"; + let _sel: Selector = s.try_into().unwrap(); + } } diff --git a/src/test.rs b/src/test.rs index 199d450f..86d498dc 100644 --- a/src/test.rs +++ b/src/test.rs @@ -20,3 +20,27 @@ fn tag_with_newline() { Some("https://github.com/causal-agent/scraper") ); } + +#[test] +fn has_selector() { + let document = Html::parse_fragment( + r#" +
+
+ Hi There! +
+
+ + "#, + ); + + let selector = Selector::parse("div:has(div#foo) + ul > li:nth-child(2)").unwrap(); + + let mut iter = document.select(&selector); + let li = iter.next().unwrap(); + assert_eq!(li.inner_html(), "second"); +} From f5cc684a0648a11891d2fa86d501b4f41d65da6a Mon Sep 17 00:00:00 2001 From: Gavin Rohrer Date: Tue, 30 Jul 2024 16:09:15 -0400 Subject: [PATCH 2/4] Make ElementRef Debug impl use Element The derived impl would print out the whole tree. This caused selectors to log many GBs per minute. --- src/element_ref/mod.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/element_ref/mod.rs b/src/element_ref/mod.rs index 7357f620..accccbfd 100644 --- a/src/element_ref/mod.rs +++ b/src/element_ref/mod.rs @@ -1,6 +1,6 @@ //! Element references. -use std::fmt; +use std::fmt::{self, Debug}; use std::ops::Deref; use ego_tree::iter::{Edge, Traverse}; @@ -15,7 +15,7 @@ use crate::{Node, Selector}; /// /// This wrapper implements the `Element` trait from the `selectors` crate, which allows it to be /// matched against CSS selectors. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Clone, Copy, PartialEq, Eq)] pub struct ElementRef<'a> { node: NodeRef<'a, Node>, } @@ -116,6 +116,12 @@ impl<'a> ElementRef<'a> { } } +impl<'a> Debug for ElementRef<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + Debug::fmt(self.value(), f) + } +} + impl<'a> Deref for ElementRef<'a> { type Target = NodeRef<'a, Node>; fn deref(&self) -> &NodeRef<'a, Node> { @@ -131,7 +137,7 @@ pub struct Select<'a, 'b> { nth_index_cache: NthIndexCache, } -impl fmt::Debug for Select<'_, '_> { +impl Debug for Select<'_, '_> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("Select") .field("scope", &self.scope) From fd01b100ead0fbfc7d5819b8c9c28bd443b3ba09 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Aug 2024 06:29:21 +0000 Subject: [PATCH 3/4] Bump indexmap from 2.2.6 to 2.3.0 Bumps [indexmap](https://github.com/indexmap-rs/indexmap) from 2.2.6 to 2.3.0. - [Changelog](https://github.com/indexmap-rs/indexmap/blob/master/RELEASES.md) - [Commits](https://github.com/indexmap-rs/indexmap/compare/2.2.6...2.3.0) --- updated-dependencies: - dependency-name: indexmap dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 61dc1405..0d6ceb12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -167,9 +167,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.6" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" dependencies = [ "equivalent", "hashbrown", diff --git a/Cargo.toml b/Cargo.toml index bdf64242..62eb8346 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ html5ever = "0.27" selectors = "0.25.0" tendril = "0.4.3" ahash = "0.8" -indexmap = { version = "2.2.6", optional = true } +indexmap = { version = "2.3.0", optional = true } once_cell = "1.19" [dependencies.getopts] From 4d33a55f59c945663b732f6ab2cb0d5dae351e69 Mon Sep 17 00:00:00 2001 From: Carlo Federico Vescovo <26970569+cfvescovo@users.noreply.github.com> Date: Mon, 5 Aug 2024 10:25:13 +0200 Subject: [PATCH 4/4] Version 0.20.0 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0d6ceb12..1b94ff1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -425,7 +425,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "scraper" -version = "0.19.1" +version = "0.20.0" dependencies = [ "ahash", "cssparser", diff --git a/Cargo.toml b/Cargo.toml index 62eb8346..3e4e356f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "scraper" -version = "0.19.1" +version = "0.20.0" edition = "2021" description = "HTML parsing and querying with CSS selectors" pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy