rustc_span/
span_encoding.rs

1use rustc_data_structures::fx::FxIndexSet;
2// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
3// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
4use rustc_serialize::int_overflow::DebugStrictAdd;
5
6use crate::def_id::{DefIndex, LocalDefId};
7use crate::hygiene::SyntaxContext;
8use crate::{BytePos, SPAN_TRACK, SpanData};
9
10/// A compressed span.
11///
12/// [`SpanData`] is 16 bytes, which is too big to stick everywhere. `Span` only
13/// takes up 8 bytes, with less space for the length, parent and context. The
14/// vast majority (99.9%+) of `SpanData` instances can be made to fit within
15/// those 8 bytes. Any `SpanData` whose fields don't fit into a `Span` are
16/// stored in a separate interner table, and the `Span` will index into that
17/// table. Interning is rare enough that the cost is low, but common enough
18/// that the code is exercised regularly.
19///
20/// An earlier version of this code used only 4 bytes for `Span`, but that was
21/// slower because only 80--90% of spans could be stored inline (even less in
22/// very large crates) and so the interner was used a lot more. That version of
23/// the code also predated the storage of parents.
24///
25/// There are four different span forms.
26///
27/// Inline-context format (requires non-huge length, non-huge context, and no parent):
28/// - `span.lo_or_index == span_data.lo`
29/// - `span.len_with_tag_or_marker == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
30/// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`)
31///
32/// Inline-parent format (requires non-huge length, root context, and non-huge parent):
33/// - `span.lo_or_index == span_data.lo`
34/// - `span.len_with_tag_or_marker & !PARENT_TAG == len == span_data.hi - span_data.lo`
35///   (must be `<= MAX_LEN`)
36/// - `span.len_with_tag_or_marker` has top bit (`PARENT_TAG`) set
37/// - `span.ctxt_or_parent_or_marker == span_data.parent` (must be `<= MAX_CTXT`)
38///
39/// Partially-interned format (requires non-huge context):
40/// - `span.lo_or_index == index` (indexes into the interner table)
41/// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER`
42/// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`)
43///
44/// Fully-interned format (all cases not covered above):
45/// - `span.lo_or_index == index` (indexes into the interner table)
46/// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER`
47/// - `span.ctxt_or_parent_or_marker == CTXT_INTERNED_MARKER`
48///
49/// The partially-interned form requires looking in the interning table for
50/// lo and length, but the context is stored inline as well as interned.
51/// This is useful because context lookups are often done in isolation, and
52/// inline lookups are quicker.
53///
54/// Notes about the choice of field sizes:
55/// - `lo` is 32 bits in both `Span` and `SpanData`, which means that `lo`
56///   values never cause interning. The number of bits needed for `lo`
57///   depends on the crate size. 32 bits allows up to 4 GiB of code in a crate.
58///   Having no compression on this field means there is no performance cliff
59///   if a crate exceeds a particular size.
60/// - `len` is ~15 bits in `Span` (a u16, minus 1 bit for PARENT_TAG) and 32
61///   bits in `SpanData`, which means that large `len` values will cause
62///   interning. The number of bits needed for `len` does not depend on the
63///   crate size. The most common numbers of bits for `len` are from 0 to 7,
64///   with a peak usually at 3 or 4, and then it drops off quickly from 8
65///   onwards. 15 bits is enough for 99.99%+ of cases, but larger values
66///   (sometimes 20+ bits) might occur dozens of times in a typical crate.
67/// - `ctxt_or_parent_or_marker` is 16 bits in `Span` and two 32 bit fields in
68///   `SpanData`, which means intering will happen if `ctxt` is large, if
69///   `parent` is large, or if both values are non-zero. The number of bits
70///   needed for `ctxt` values depend partly on the crate size and partly on
71///   the form of the code. No crates in `rustc-perf` need more than 15 bits
72///   for `ctxt_or_parent_or_marker`, but larger crates might need more than 16
73///   bits. The number of bits needed for `parent` hasn't been measured,
74///   because `parent` isn't currently used by default.
75///
76/// In order to reliably use parented spans in incremental compilation,
77/// the dependency to the parent definition's span. This is performed
78/// using the callback `SPAN_TRACK` to access the query engine.
79///
80#[derive(Clone, Copy, Eq, PartialEq, Hash)]
81#[rustc_pass_by_value]
82pub struct Span {
83    lo_or_index: u32,
84    len_with_tag_or_marker: u16,
85    ctxt_or_parent_or_marker: u16,
86}
87
88// Convenience structures for all span formats.
89#[derive(Clone, Copy)]
90struct InlineCtxt {
91    lo: u32,
92    len: u16,
93    ctxt: u16,
94}
95
96#[derive(Clone, Copy)]
97struct InlineParent {
98    lo: u32,
99    len_with_tag: u16,
100    parent: u16,
101}
102
103#[derive(Clone, Copy)]
104struct PartiallyInterned {
105    index: u32,
106    ctxt: u16,
107}
108
109#[derive(Clone, Copy)]
110struct Interned {
111    index: u32,
112}
113
114impl InlineCtxt {
115    #[inline]
116    fn data(self) -> SpanData {
117        let len = self.len as u32;
118        debug_assert!(len <= MAX_LEN);
119        SpanData {
120            lo: BytePos(self.lo),
121            hi: BytePos(self.lo.debug_strict_add(len)),
122            ctxt: SyntaxContext::from_u16(self.ctxt),
123            parent: None,
124        }
125    }
126    #[inline]
127    fn span(lo: u32, len: u16, ctxt: u16) -> Span {
128        Span { lo_or_index: lo, len_with_tag_or_marker: len, ctxt_or_parent_or_marker: ctxt }
129    }
130    #[inline]
131    fn from_span(span: Span) -> InlineCtxt {
132        let (lo, len, ctxt) =
133            (span.lo_or_index, span.len_with_tag_or_marker, span.ctxt_or_parent_or_marker);
134        InlineCtxt { lo, len, ctxt }
135    }
136}
137
138impl InlineParent {
139    #[inline]
140    fn data(self) -> SpanData {
141        let len = (self.len_with_tag & !PARENT_TAG) as u32;
142        debug_assert!(len <= MAX_LEN);
143        SpanData {
144            lo: BytePos(self.lo),
145            hi: BytePos(self.lo.debug_strict_add(len)),
146            ctxt: SyntaxContext::root(),
147            parent: Some(LocalDefId { local_def_index: DefIndex::from_u16(self.parent) }),
148        }
149    }
150    #[inline]
151    fn span(lo: u32, len: u16, parent: u16) -> Span {
152        let (lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker) =
153            (lo, PARENT_TAG | len, parent);
154        Span { lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker }
155    }
156    #[inline]
157    fn from_span(span: Span) -> InlineParent {
158        let (lo, len_with_tag, parent) =
159            (span.lo_or_index, span.len_with_tag_or_marker, span.ctxt_or_parent_or_marker);
160        InlineParent { lo, len_with_tag, parent }
161    }
162}
163
164impl PartiallyInterned {
165    #[inline]
166    fn data(self) -> SpanData {
167        SpanData {
168            ctxt: SyntaxContext::from_u16(self.ctxt),
169            ..with_span_interner(|interner| interner.spans[self.index as usize])
170        }
171    }
172    #[inline]
173    fn span(index: u32, ctxt: u16) -> Span {
174        let (lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker) =
175            (index, BASE_LEN_INTERNED_MARKER, ctxt);
176        Span { lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker }
177    }
178    #[inline]
179    fn from_span(span: Span) -> PartiallyInterned {
180        PartiallyInterned { index: span.lo_or_index, ctxt: span.ctxt_or_parent_or_marker }
181    }
182}
183
184impl Interned {
185    #[inline]
186    fn data(self) -> SpanData {
187        with_span_interner(|interner| interner.spans[self.index as usize])
188    }
189    #[inline]
190    fn span(index: u32) -> Span {
191        let (lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker) =
192            (index, BASE_LEN_INTERNED_MARKER, CTXT_INTERNED_MARKER);
193        Span { lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker }
194    }
195    #[inline]
196    fn from_span(span: Span) -> Interned {
197        Interned { index: span.lo_or_index }
198    }
199}
200
201// This code is very hot, and converting span to an enum and matching on it doesn't optimize away
202// properly. So we are using a macro emulating such a match, but expand it directly to an if-else
203// chain.
204macro_rules! match_span_kind {
205    (
206        $span:expr,
207        InlineCtxt($span1:ident) => $arm1:expr,
208        InlineParent($span2:ident) => $arm2:expr,
209        PartiallyInterned($span3:ident) => $arm3:expr,
210        Interned($span4:ident) => $arm4:expr,
211    ) => {
212        if $span.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
213            if $span.len_with_tag_or_marker & PARENT_TAG == 0 {
214                // Inline-context format.
215                let $span1 = InlineCtxt::from_span($span);
216                $arm1
217            } else {
218                // Inline-parent format.
219                let $span2 = InlineParent::from_span($span);
220                $arm2
221            }
222        } else if $span.ctxt_or_parent_or_marker != CTXT_INTERNED_MARKER {
223            // Partially-interned format.
224            let $span3 = PartiallyInterned::from_span($span);
225            $arm3
226        } else {
227            // Interned format.
228            let $span4 = Interned::from_span($span);
229            $arm4
230        }
231    };
232}
233
234// `MAX_LEN` is chosen so that `PARENT_TAG | MAX_LEN` is distinct from
235// `BASE_LEN_INTERNED_MARKER`. (If `MAX_LEN` was 1 higher, this wouldn't be true.)
236const MAX_LEN: u32 = 0b0111_1111_1111_1110;
237const MAX_CTXT: u32 = 0b0111_1111_1111_1110;
238const PARENT_TAG: u16 = 0b1000_0000_0000_0000;
239const BASE_LEN_INTERNED_MARKER: u16 = 0b1111_1111_1111_1111;
240const CTXT_INTERNED_MARKER: u16 = 0b1111_1111_1111_1111;
241
242/// The dummy span has zero position, length, and context, and no parent.
243pub const DUMMY_SP: Span =
244    Span { lo_or_index: 0, len_with_tag_or_marker: 0, ctxt_or_parent_or_marker: 0 };
245
246impl Span {
247    #[inline]
248    pub fn new(
249        mut lo: BytePos,
250        mut hi: BytePos,
251        ctxt: SyntaxContext,
252        parent: Option<LocalDefId>,
253    ) -> Self {
254        if lo > hi {
255            std::mem::swap(&mut lo, &mut hi);
256        }
257
258        // Small len and ctxt may enable one of fully inline formats (or may not).
259        let (len, ctxt32) = (hi.0 - lo.0, ctxt.as_u32());
260        if len <= MAX_LEN && ctxt32 <= MAX_CTXT {
261            match parent {
262                None => return InlineCtxt::span(lo.0, len as u16, ctxt32 as u16),
263                Some(parent) => {
264                    let parent32 = parent.local_def_index.as_u32();
265                    if ctxt32 == 0 && parent32 <= MAX_CTXT {
266                        return InlineParent::span(lo.0, len as u16, parent32 as u16);
267                    }
268                }
269            }
270        }
271
272        // Otherwise small ctxt may enable the partially inline format.
273        let index = |ctxt| {
274            with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt, parent }))
275        };
276        if ctxt32 <= MAX_CTXT {
277            // Interned ctxt should never be read, so it can use any value.
278            PartiallyInterned::span(index(SyntaxContext::from_u32(u32::MAX)), ctxt32 as u16)
279        } else {
280            Interned::span(index(ctxt))
281        }
282    }
283
284    #[inline]
285    pub fn data(self) -> SpanData {
286        let data = self.data_untracked();
287        if let Some(parent) = data.parent {
288            (*SPAN_TRACK)(parent);
289        }
290        data
291    }
292
293    /// Internal function to translate between an encoded span and the expanded representation.
294    /// This function must not be used outside the incremental engine.
295    #[inline]
296    pub fn data_untracked(self) -> SpanData {
297        match_span_kind! {
298            self,
299            InlineCtxt(span) => span.data(),
300            InlineParent(span) => span.data(),
301            PartiallyInterned(span) => span.data(),
302            Interned(span) => span.data(),
303        }
304    }
305
306    /// Returns `true` if this span comes from any kind of macro, desugaring or inlining.
307    #[inline]
308    pub fn from_expansion(self) -> bool {
309        let ctxt = match_span_kind! {
310            self,
311            // All branches here, except `InlineParent`, actually return `span.ctxt_or_parent_or_marker`.
312            // Since `Interned` is selected if the field contains `CTXT_INTERNED_MARKER` returning that value
313            // as the context allows the compiler to optimize out the branch that selects between either
314            // `Interned` and `PartiallyInterned`.
315            //
316            // Interned contexts can never be the root context and `CTXT_INTERNED_MARKER` has a different value
317            // than the root context so this works for checking is this is an expansion.
318            InlineCtxt(span) => SyntaxContext::from_u16(span.ctxt),
319            InlineParent(_span) => SyntaxContext::root(),
320            PartiallyInterned(span) => SyntaxContext::from_u16(span.ctxt),
321            Interned(_span) => SyntaxContext::from_u16(CTXT_INTERNED_MARKER),
322        };
323        !ctxt.is_root()
324    }
325
326    /// Returns `true` if this is a dummy span with any hygienic context.
327    #[inline]
328    pub fn is_dummy(self) -> bool {
329        if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
330            // Inline-context or inline-parent format.
331            let lo = self.lo_or_index;
332            let len = (self.len_with_tag_or_marker & !PARENT_TAG) as u32;
333            debug_assert!(len <= MAX_LEN);
334            lo == 0 && len == 0
335        } else {
336            // Fully-interned or partially-interned format.
337            let index = self.lo_or_index;
338            let data = with_span_interner(|interner| interner.spans[index as usize]);
339            data.lo == BytePos(0) && data.hi == BytePos(0)
340        }
341    }
342
343    #[inline]
344    pub fn map_ctxt(self, map: impl FnOnce(SyntaxContext) -> SyntaxContext) -> Span {
345        let data = match_span_kind! {
346            self,
347            InlineCtxt(span) => {
348                // This format occurs 1-2 orders of magnitude more often than others (#125017),
349                // so it makes sense to micro-optimize it to avoid `span.data()` and `Span::new()`.
350                let new_ctxt = map(SyntaxContext::from_u16(span.ctxt));
351                let new_ctxt32 = new_ctxt.as_u32();
352                return if new_ctxt32 <= MAX_CTXT {
353                    // Any small new context including zero will preserve the format.
354                    InlineCtxt::span(span.lo, span.len, new_ctxt32 as u16)
355                } else {
356                    span.data().with_ctxt(new_ctxt)
357                };
358            },
359            InlineParent(span) => span.data(),
360            PartiallyInterned(span) => span.data(),
361            Interned(span) => span.data(),
362        };
363
364        data.with_ctxt(map(data.ctxt))
365    }
366
367    // Returns either syntactic context, if it can be retrieved without taking the interner lock,
368    // or an index into the interner if it cannot.
369    #[inline]
370    fn inline_ctxt(self) -> Result<SyntaxContext, usize> {
371        match_span_kind! {
372            self,
373            InlineCtxt(span) => Ok(SyntaxContext::from_u16(span.ctxt)),
374            InlineParent(_span) => Ok(SyntaxContext::root()),
375            PartiallyInterned(span) => Ok(SyntaxContext::from_u16(span.ctxt)),
376            Interned(span) => Err(span.index as usize),
377        }
378    }
379
380    /// This function is used as a fast path when decoding the full `SpanData` is not necessary.
381    /// It's a cut-down version of `data_untracked`.
382    #[cfg_attr(not(test), rustc_diagnostic_item = "SpanCtxt")]
383    #[inline]
384    pub fn ctxt(self) -> SyntaxContext {
385        self.inline_ctxt()
386            .unwrap_or_else(|index| with_span_interner(|interner| interner.spans[index].ctxt))
387    }
388
389    #[inline]
390    pub fn eq_ctxt(self, other: Span) -> bool {
391        match (self.inline_ctxt(), other.inline_ctxt()) {
392            (Ok(ctxt1), Ok(ctxt2)) => ctxt1 == ctxt2,
393            // If `inline_ctxt` returns `Ok` the context is <= MAX_CTXT.
394            // If it returns `Err` the span is fully interned and the context is > MAX_CTXT.
395            // As these do not overlap an `Ok` and `Err` result cannot have an equal context.
396            (Ok(_), Err(_)) | (Err(_), Ok(_)) => false,
397            (Err(index1), Err(index2)) => with_span_interner(|interner| {
398                interner.spans[index1].ctxt == interner.spans[index2].ctxt
399            }),
400        }
401    }
402
403    #[inline]
404    pub fn with_parent(self, parent: Option<LocalDefId>) -> Span {
405        let data = match_span_kind! {
406            self,
407            InlineCtxt(span) => {
408                // This format occurs 1-2 orders of magnitude more often than others (#126544),
409                // so it makes sense to micro-optimize it to avoid `span.data()` and `Span::new()`.
410                // Copypaste from `Span::new`, the small len & ctxt conditions are known to hold.
411                match parent {
412                    None => return self,
413                    Some(parent) => {
414                        let parent32 = parent.local_def_index.as_u32();
415                        if span.ctxt == 0 && parent32 <= MAX_CTXT {
416                            return InlineParent::span(span.lo, span.len, parent32 as u16);
417                        }
418                    }
419                }
420                span.data()
421            },
422            InlineParent(span) => span.data(),
423            PartiallyInterned(span) => span.data(),
424            Interned(span) => span.data(),
425        };
426
427        if let Some(old_parent) = data.parent {
428            (*SPAN_TRACK)(old_parent);
429        }
430        data.with_parent(parent)
431    }
432
433    #[inline]
434    pub fn parent(self) -> Option<LocalDefId> {
435        let interned_parent =
436            |index: u32| with_span_interner(|interner| interner.spans[index as usize].parent);
437        match_span_kind! {
438            self,
439            InlineCtxt(_span) => None,
440            InlineParent(span) => Some(LocalDefId { local_def_index: DefIndex::from_u16(span.parent) }),
441            PartiallyInterned(span) => interned_parent(span.index),
442            Interned(span) => interned_parent(span.index),
443        }
444    }
445}
446
447#[derive(Default)]
448pub(crate) struct SpanInterner {
449    spans: FxIndexSet<SpanData>,
450}
451
452impl SpanInterner {
453    fn intern(&mut self, span_data: &SpanData) -> u32 {
454        let (index, _) = self.spans.insert_full(*span_data);
455        index as u32
456    }
457}
458
459// If an interner exists, return it. Otherwise, prepare a fresh one.
460#[inline]
461fn with_span_interner<T, F: FnOnce(&mut SpanInterner) -> T>(f: F) -> T {
462    crate::with_session_globals(|session_globals| f(&mut session_globals.span_interner.lock()))
463}
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy