diff --git a/Cargo.lock b/Cargo.lock index d0ca7422..5937ff36 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,24 +2,11 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "getrandom", - "once_cell", - "version_check", - "zerocopy", -] - [[package]] name = "autocfg" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "bitflags" @@ -90,9 +77,9 @@ dependencies = [ [[package]] name = "ego-tree" -version = "0.9.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c6ba7d4eec39eaa9ab24d44a0e73a7949a1095a8b3f3abb11eddf27dbb56a53" +checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" [[package]] name = "equivalent" @@ -141,9 +128,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.0" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" [[package]] name = "html5ever" @@ -161,9 +148,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" dependencies = [ "equivalent", "hashbrown", @@ -171,15 +158,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" [[package]] name = "libc" -version = "0.2.158" +version = "0.2.167" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" +checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" [[package]] name = "lock_api" @@ -225,9 +212,9 @@ checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "parking_lot" @@ -340,9 +327,9 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -388,9 +375,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.3" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ "bitflags", ] @@ -403,9 +390,8 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "scraper" -version = "0.21.0" +version = "0.22.0" dependencies = [ - "ahash", "cssparser", "ego-tree", "getopts", @@ -437,18 +423,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.209" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.209" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", @@ -510,9 +496,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.76" +version = "2.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525" +checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" dependencies = [ "proc-macro2", "quote", @@ -532,15 +518,15 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "unicode-width" -version = "0.1.13" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "utf-8" @@ -548,12 +534,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/scraper/Cargo.toml b/scraper/Cargo.toml index 7a0549e3..0144f3a0 100644 --- a/scraper/Cargo.toml +++ b/scraper/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "scraper" -version = "0.21.0" +version = "0.22.0" edition = "2021" description = "HTML parsing and querying with CSS selectors" @@ -13,11 +13,10 @@ repository = "https://github.com/causal-agent/scraper" readme = "README.md" [dependencies] -ahash = "0.8.0" cssparser = "0.34.0" -ego-tree = "0.9.0" +ego-tree = "0.10.0" html5ever = "0.29.0" -indexmap = { version = "2.6.0", optional = true } +indexmap = { version = "2.7.0", optional = true } precomputed-hash = "0.1.1" selectors = "0.26.0" tendril = "0.4.3" diff --git a/scraper/src/element_ref/element.rs b/scraper/src/element_ref/element.rs index e804d81e..23d2a16e 100644 --- a/scraper/src/element_ref/element.rs +++ b/scraper/src/element_ref/element.rs @@ -9,7 +9,7 @@ use super::ElementRef; use crate::selector::{CssLocalName, CssString, NonTSPseudoClass, PseudoElement, Simple}; /// Note: will never match against non-tree-structure pseudo-classes. -impl<'a> Element for ElementRef<'a> { +impl Element for ElementRef<'_> { type Impl = Simple; fn opaque(&self) -> OpaqueElement { @@ -135,7 +135,7 @@ impl<'a> Element for ElementRef<'a> { fn is_root(&self) -> bool { self.parent() - .map_or(false, |parent| parent.value().is_document()) + .is_some_and(|parent| parent.value().is_document()) } fn apply_selector_flags(&self, _flags: matching::ElementSelectorFlags) {} diff --git a/scraper/src/element_ref/mod.rs b/scraper/src/element_ref/mod.rs index 4e8500e2..cfe2a3d9 100644 --- a/scraper/src/element_ref/mod.rs +++ b/scraper/src/element_ref/mod.rs @@ -117,7 +117,7 @@ impl<'a> ElementRef<'a> { } } -impl<'a> Debug for ElementRef<'a> { +impl Debug for ElementRef<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { Debug::fmt(self.value(), f) } @@ -160,7 +160,7 @@ impl Clone for Select<'_, '_> { } } -impl<'a, 'b> Iterator for Select<'a, 'b> { +impl<'a> Iterator for Select<'a, '_> { type Item = ElementRef<'a>; fn next(&mut self) -> Option> { diff --git a/scraper/src/element_ref/serializable.rs b/scraper/src/element_ref/serializable.rs index 98dda704..0b88c8d3 100644 --- a/scraper/src/element_ref/serializable.rs +++ b/scraper/src/element_ref/serializable.rs @@ -4,7 +4,7 @@ use html5ever::serialize::{Serialize, Serializer, TraversalScope}; use crate::ElementRef; -impl<'a> Serialize for ElementRef<'a> { +impl Serialize for ElementRef<'_> { fn serialize( &self, serializer: &mut S, diff --git a/scraper/src/error.rs b/scraper/src/error.rs index 15141eed..ef27dea1 100644 --- a/scraper/src/error.rs +++ b/scraper/src/error.rs @@ -73,7 +73,7 @@ impl<'a> From> for SelectorErrorKind<'a> { } } -impl<'a> Display for SelectorErrorKind<'a> { +impl Display for SelectorErrorKind<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, @@ -103,7 +103,7 @@ impl<'a> Display for SelectorErrorKind<'a> { } } -impl<'a> Error for SelectorErrorKind<'a> { +impl Error for SelectorErrorKind<'_> { fn description(&self) -> &str { match self { Self::UnexpectedToken(_) => "Token was not expected", diff --git a/scraper/src/html/mod.rs b/scraper/src/html/mod.rs index 39ad74cf..f64578bb 100644 --- a/scraper/src/html/mod.rs +++ b/scraper/src/html/mod.rs @@ -153,7 +153,7 @@ impl Clone for Select<'_, '_> { } } -impl<'a, 'b> Iterator for Select<'a, 'b> { +impl<'a> Iterator for Select<'a, '_> { type Item = ElementRef<'a>; fn next(&mut self) -> Option> { @@ -178,7 +178,7 @@ impl<'a, 'b> Iterator for Select<'a, 'b> { } } -impl<'a, 'b> DoubleEndedIterator for Select<'a, 'b> { +impl DoubleEndedIterator for Select<'_, '_> { fn next_back(&mut self) -> Option { for node in self.inner.by_ref().rev() { if let Some(element) = ElementRef::wrap(node) { diff --git a/scraper/src/html/tree_sink.rs b/scraper/src/html/tree_sink.rs index f9e18720..49b30b49 100644 --- a/scraper/src/html/tree_sink.rs +++ b/scraper/src/html/tree_sink.rs @@ -141,7 +141,7 @@ impl TreeSink for HtmlTreeSink { NodeOrText::AppendText(text) => { let text = make_tendril(text); - let did_concat = parent.last_child().map_or(false, |mut n| match n.value() { + let did_concat = parent.last_child().is_some_and(|mut n| match n.value() { Node::Text(t) => { t.text.push_tendril(&text); true @@ -181,16 +181,13 @@ impl TreeSink for HtmlTreeSink { NodeOrText::AppendText(text) => { let text = make_tendril(text); - let did_concat = - sibling - .prev_sibling() - .map_or(false, |mut n| match n.value() { - Node::Text(t) => { - t.text.push_tendril(&text); - true - } - _ => false, - }); + let did_concat = sibling.prev_sibling().is_some_and(|mut n| match n.value() { + Node::Text(t) => { + t.text.push_tendril(&text); + true + } + _ => false, + }); if !did_concat { sibling.insert_before(Node::Text(Text { text })); @@ -226,6 +223,17 @@ impl TreeSink for HtmlTreeSink { }; for attr in attrs { + #[cfg(not(feature = "deterministic"))] + if let Err(idx) = element + .attrs + .binary_search_by(|(name, _)| name.cmp(&attr.name)) + { + element + .attrs + .insert(idx, (attr.name, make_tendril(attr.value))); + } + + #[cfg(feature = "deterministic")] element .attrs .entry(attr.name) diff --git a/scraper/src/main.rs b/scraper/src/main.rs index 9d4684b3..8f9de9fa 100644 --- a/scraper/src/main.rs +++ b/scraper/src/main.rs @@ -125,8 +125,7 @@ fn main() { .iter() .map(File::open) .map(Result::unwrap) - .map(|mut f| query(&input, &output, &selector, &mut f)) - .any(|m| m) + .any(|mut f| query(&input, &output, &selector, &mut f)) }; process::exit(i32::from(!matched)); diff --git a/scraper/src/node.rs b/scraper/src/node.rs index f2390c3e..4f900857 100644 --- a/scraper/src/node.rs +++ b/scraper/src/node.rs @@ -1,9 +1,5 @@ //! HTML nodes. -#[cfg(not(feature = "deterministic"))] -use ahash::AHashMap as HashMap; -#[cfg(not(feature = "deterministic"))] -use std::collections::hash_map; use std::fmt; use std::ops::Deref; use std::slice::Iter as SliceIter; @@ -219,7 +215,7 @@ pub type Attributes = indexmap::IndexMap; /// Please enable the `deterministic` feature for order-preserving /// (de)serialization. #[cfg(not(feature = "deterministic"))] -pub type Attributes = HashMap; +pub type Attributes = Vec<(QualName, StrTendril)>; /// An HTML element. #[derive(Clone, PartialEq, Eq)] @@ -232,16 +228,20 @@ pub struct Element { id: OnceCell>, - classes: OnceCell>, + classes: OnceCell>, } impl Element { #[doc(hidden)] pub fn new(name: QualName, attributes: Vec) -> Self { - let attrs = attributes + #[allow(unused_mut)] + let mut attrs = attributes .into_iter() - .map(|a| (a.name, crate::tendril_util::make(a.value))) - .collect(); + .map(|attr| (attr.name, crate::tendril_util::make(attr.value))) + .collect::(); + + #[cfg(not(feature = "deterministic"))] + attrs.sort_unstable_by(|lhs, rhs| lhs.0.cmp(&rhs.0)); Element { attrs, @@ -277,17 +277,17 @@ impl Element { /// Returns an iterator over the element's classes. pub fn classes(&self) -> Classes { let classes = self.classes.get_or_init(|| { - let mut classes: Vec = self + let mut classes = self .attrs .iter() .filter(|(name, _)| name.local.as_ref() == "class") - .flat_map(|(_, value)| value.split_whitespace().map(LocalName::from)) - .collect(); + .flat_map(|(_, value)| value.split_ascii_whitespace().map(LocalName::from)) + .collect::>(); classes.sort_unstable(); classes.dedup(); - classes + classes.into_boxed_slice() }); Classes { @@ -298,7 +298,18 @@ impl Element { /// Returns the value of an attribute. pub fn attr(&self, attr: &str) -> Option<&str> { let qualname = QualName::new(None, ns!(), LocalName::from(attr)); - self.attrs.get(&qualname).map(Deref::deref) + + #[cfg(not(feature = "deterministic"))] + let value = self + .attrs + .binary_search_by(|attr| attr.0.cmp(&qualname)) + .ok() + .map(|idx| &*self.attrs[idx].1); + + #[cfg(feature = "deterministic")] + let value = self.attrs.get(&qualname).map(Deref::deref); + + value } /// Returns an iterator over the element's attributes. @@ -330,7 +341,7 @@ pub type AttributesIter<'a> = indexmap::map::Iter<'a, QualName, StrTendril>; /// An iterator over a node's attributes. #[cfg(not(feature = "deterministic"))] -pub type AttributesIter<'a> = hash_map::Iter<'a, QualName, StrTendril>; +pub type AttributesIter<'a> = SliceIter<'a, (QualName, StrTendril)>; /// Iterator over attributes. #[allow(missing_debug_implementations)] pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy