Skip to content

Commit 2ede12e

Browse files
authored
Merge pull request #214 from rust-scraper/bump-selectors
Bump selectors, cssparser and html5ever
2 parents e0d4ea7 + fddd90e commit 2ede12e

File tree

8 files changed

+160
-120
lines changed

8 files changed

+160
-120
lines changed

Cargo.lock

Lines changed: 17 additions & 35 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

scraper/Cargo.toml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,14 @@ repository = "https://github.com/causal-agent/scraper"
1313
readme = "README.md"
1414

1515
[dependencies]
16-
cssparser = "0.31.0"
16+
ahash = "0.8.0"
17+
cssparser = "0.34.0"
1718
ego-tree = "0.9.0"
18-
html5ever = "0.27"
19-
selectors = "0.25.0"
20-
tendril = "0.4.3"
21-
ahash = "0.8"
19+
html5ever = "0.29.0"
2220
indexmap = { version = "2.6.0", optional = true }
21+
precomputed-hash = "0.1.1"
22+
selectors = "0.26.0"
23+
tendril = "0.4.3"
2324

2425
[dependencies.getopts]
2526
version = "0.2.21"

scraper/src/element_ref/element.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use html5ever::Namespace;
22
use selectors::{
33
attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint},
4+
bloom::BloomFilter,
45
matching, Element, OpaqueElement,
56
};
67

@@ -122,6 +123,10 @@ impl<'a> Element for ElementRef<'a> {
122123
self.value().has_class(&name.0, case_sensitivity)
123124
}
124125

126+
fn has_custom_state(&self, _name: &CssLocalName) -> bool {
127+
false
128+
}
129+
125130
fn is_empty(&self) -> bool {
126131
!self
127132
.children()
@@ -134,6 +139,11 @@ impl<'a> Element for ElementRef<'a> {
134139
}
135140

136141
fn apply_selector_flags(&self, _flags: matching::ElementSelectorFlags) {}
142+
143+
fn add_element_unique_hashes(&self, _filter: &mut BloomFilter) -> bool {
144+
// FIXME: Do we want to add `self.node.id()` here?
145+
false
146+
}
137147
}
138148

139149
#[cfg(test)]

scraper/src/element_ref/mod.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::ops::Deref;
77
use ego_tree::iter::{Edge, Traverse};
88
use ego_tree::NodeRef;
99
use html5ever::serialize::{serialize, SerializeOpts, TraversalScope};
10-
use selectors::NthIndexCache;
10+
use selectors::matching::SelectorCaches;
1111

1212
use crate::node::Element;
1313
use crate::{Node, Selector};
@@ -49,7 +49,7 @@ impl<'a> ElementRef<'a> {
4949
scope: *self,
5050
inner,
5151
selector,
52-
nth_index_cache: NthIndexCache::default(),
52+
caches: Default::default(),
5353
}
5454
}
5555

@@ -135,7 +135,7 @@ pub struct Select<'a, 'b> {
135135
scope: ElementRef<'a>,
136136
inner: Traverse<'a, Node>,
137137
selector: &'b Selector,
138-
nth_index_cache: NthIndexCache,
138+
caches: SelectorCaches,
139139
}
140140

141141
impl Debug for Select<'_, '_> {
@@ -144,7 +144,7 @@ impl Debug for Select<'_, '_> {
144144
.field("scope", &self.scope)
145145
.field("inner", &self.inner)
146146
.field("selector", &self.selector)
147-
.field("nth_index_cache", &"..")
147+
.field("caches", &"..")
148148
.finish()
149149
}
150150
}
@@ -155,7 +155,7 @@ impl Clone for Select<'_, '_> {
155155
scope: self.scope,
156156
inner: self.inner.clone(),
157157
selector: self.selector,
158-
nth_index_cache: NthIndexCache::default(),
158+
caches: Default::default(),
159159
}
160160
}
161161
}
@@ -170,7 +170,7 @@ impl<'a, 'b> Iterator for Select<'a, 'b> {
170170
if self.selector.matches_with_scope_and_cache(
171171
&element,
172172
Some(self.scope),
173-
&mut self.nth_index_cache,
173+
&mut self.caches,
174174
) {
175175
return Some(element);
176176
}

scraper/src/html/mod.rs

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@ use ego_tree::Tree;
1010
use html5ever::serialize::SerializeOpts;
1111
use html5ever::tree_builder::QuirksMode;
1212
use html5ever::{driver, serialize, QualName};
13-
use selectors::NthIndexCache;
13+
use selectors::matching::SelectorCaches;
1414
use tendril::TendrilSink;
1515

1616
use crate::selector::Selector;
1717
use crate::{ElementRef, Node};
1818

19+
pub use tree_sink::HtmlTreeSink;
20+
1921
/// An HTML tree.
2022
///
2123
/// Parsing does not fail hard. Instead, the `quirks_mode` is set and errors are added to the
@@ -67,22 +69,23 @@ impl Html {
6769
/// # fn main() {
6870
/// # let document = "";
6971
/// use html5ever::driver::{self, ParseOpts};
70-
/// use scraper::Html;
72+
/// use scraper::{Html, HtmlTreeSink};
7173
/// use tendril::TendrilSink;
7274
///
73-
/// let parser = driver::parse_document(Html::new_document(), ParseOpts::default());
75+
/// let parser = driver::parse_document(HtmlTreeSink::new(Html::new_document()), ParseOpts::default());
7476
/// let html = parser.one(document);
7577
/// # }
7678
/// ```
7779
pub fn parse_document(document: &str) -> Self {
78-
let parser = driver::parse_document(Self::new_document(), Default::default());
80+
let parser =
81+
driver::parse_document(HtmlTreeSink::new(Self::new_document()), Default::default());
7982
parser.one(document)
8083
}
8184

8285
/// Parses a string of HTML as a fragment.
8386
pub fn parse_fragment(fragment: &str) -> Self {
8487
let parser = driver::parse_fragment(
85-
Self::new_fragment(),
88+
HtmlTreeSink::new(Self::new_fragment()),
8689
Default::default(),
8790
QualName::new(None, ns!(html), local_name!("body")),
8891
Vec::new(),
@@ -95,7 +98,7 @@ impl Html {
9598
Select {
9699
inner: self.tree.nodes(),
97100
selector,
98-
nth_index_cache: NthIndexCache::default(),
101+
caches: Default::default(),
99102
}
100103
}
101104

@@ -127,15 +130,15 @@ impl Html {
127130
pub struct Select<'a, 'b> {
128131
inner: Nodes<'a, Node>,
129132
selector: &'b Selector,
130-
nth_index_cache: NthIndexCache,
133+
caches: SelectorCaches,
131134
}
132135

133136
impl fmt::Debug for Select<'_, '_> {
134137
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
135138
fmt.debug_struct("Select")
136139
.field("inner", &self.inner)
137140
.field("selector", &self.selector)
138-
.field("nth_index_cache", &"..")
141+
.field("caches", &"..")
139142
.finish()
140143
}
141144
}
@@ -145,7 +148,7 @@ impl Clone for Select<'_, '_> {
145148
Self {
146149
inner: self.inner.clone(),
147150
selector: self.selector,
148-
nth_index_cache: NthIndexCache::default(),
151+
caches: Default::default(),
149152
}
150153
}
151154
}
@@ -157,11 +160,9 @@ impl<'a, 'b> Iterator for Select<'a, 'b> {
157160
for node in self.inner.by_ref() {
158161
if let Some(element) = ElementRef::wrap(node) {
159162
if element.parent().is_some()
160-
&& self.selector.matches_with_scope_and_cache(
161-
&element,
162-
None,
163-
&mut self.nth_index_cache,
164-
)
163+
&& self
164+
.selector
165+
.matches_with_scope_and_cache(&element, None, &mut self.caches)
165166
{
166167
return Some(element);
167168
}
@@ -182,11 +183,9 @@ impl<'a, 'b> DoubleEndedIterator for Select<'a, 'b> {
182183
for node in self.inner.by_ref().rev() {
183184
if let Some(element) = ElementRef::wrap(node) {
184185
if element.parent().is_some()
185-
&& self.selector.matches_with_scope_and_cache(
186-
&element,
187-
None,
188-
&mut self.nth_index_cache,
189-
)
186+
&& self
187+
.selector
188+
.matches_with_scope_and_cache(&element, None, &mut self.caches)
190189
{
191190
return Some(element);
192191
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy