From d1b4172486960613b91421183d5f7e32d9ae76c2 Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 1 Jul 2025 14:08:48 +0200 Subject: [PATCH 1/9] Shared: Factor out some helper predicates in alert filtering --- shared/util/codeql/util/AlertFiltering.qll | 23 ++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/shared/util/codeql/util/AlertFiltering.qll b/shared/util/codeql/util/AlertFiltering.qll index 1bc366c0416d..f264c20aeda7 100644 --- a/shared/util/codeql/util/AlertFiltering.qll +++ b/shared/util/codeql/util/AlertFiltering.qll @@ -75,19 +75,30 @@ extensible predicate restrictAlertsToExactLocation( /** Module for applying alert location filtering. */ module AlertFilteringImpl { + pragma[nomagic] + private predicate restrictAlertsToEntireFile(string filePath) { restrictAlertsTo(filePath, 0, 0) } + + pragma[nomagic] + private predicate restrictAlertsToStartLine(string filePath, int line) { + exists(int startLineStart, int startLineEnd | + restrictAlertsTo(filePath, startLineStart, startLineEnd) and + line = [startLineStart .. startLineEnd] + ) + } + /** Applies alert filtering to the given location. */ bindingset[location] predicate filterByLocation(Location location) { not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _) or - exists(string filePath, int startLineStart, int startLineEnd | - restrictAlertsTo(filePath, startLineStart, startLineEnd) - | - startLineStart = 0 and - startLineEnd = 0 and + exists(string filePath | + restrictAlertsToEntireFile(filePath) and location.hasLocationInfo(filePath, _, _, _, _) or - location.hasLocationInfo(filePath, [startLineStart .. startLineEnd], _, _, _) + exists(int line | + restrictAlertsToStartLine(filePath, line) and + location.hasLocationInfo(filePath, line, _, _, _) + ) ) or exists(string filePath, int startLine, int startColumn, int endLine, int endColumn | From 8b345518f4f2dc3cdcdcb9a9339a0b3ea2175536 Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 1 Jul 2025 14:54:41 +0200 Subject: [PATCH 2/9] Shared: Add approximate version of getASelected{Source,Sink}Location --- shared/dataflow/codeql/dataflow/DataFlow.qll | 36 +++++++++++++++++++ .../codeql/dataflow/internal/DataFlowImpl.qll | 4 +++ .../dataflow/internal/DataFlowImplStage1.qll | 8 +++-- shared/util/codeql/util/AlertFiltering.qll | 36 +++++++++++++++++++ 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index 3483287e3b39..93803af552bc 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -459,6 +459,15 @@ module Configs Lang> { */ default Location getASelectedSourceLocation(Node source) { result = source.getLocation() } + /** + * Like `getASelectedSourceLocation`, but only has to get a location _containing_ the + * actual location associated with `source`. + * + * This prunes fewer sources than `getASelectedSourceLocation` but leaves room for the possibility + * that a more precise location can be selected in the query. + */ + default Location getASelectedSourceLocationApprox(Node source) { none() } + /** * Gets a location that will be associated with the given `sink` in a * diff-informed query that uses this configuration (see @@ -469,6 +478,15 @@ module Configs Lang> { * report the sink at all, this predicate can be `none()`. */ default Location getASelectedSinkLocation(Node sink) { result = sink.getLocation() } + + /** + * Like `getASelectedSinkLocation`, but only has to get a location _containing_ the + * actual location associated with `sink`. + * + * This prunes fewer sinks than `getASelectedSinkLocation` but leaves room for the possibility + * that a more precise location can be selected in the query. + */ + default Location getASelectedSinkLocationApprox(Node sink) { none() } } /** An input configuration for data flow using flow state. */ @@ -608,6 +626,15 @@ module Configs Lang> { */ default Location getASelectedSourceLocation(Node source) { result = source.getLocation() } + /** + * Like `getASelectedSourceLocation`, but only has to get a location _containing_ the + * actual location associated with `source`. + * + * This prunes fewer sources than `getASelectedSourceLocation` but leaves room for the possibility + * that a more precise location can be selected in the query. + */ + default Location getASelectedSourceLocationApprox(Node source) { none() } + /** * Gets a location that will be associated with the given `sink` in a * diff-informed query that uses this configuration (see @@ -618,6 +645,15 @@ module Configs Lang> { * report the sink at all, this predicate can be `none()`. */ default Location getASelectedSinkLocation(Node sink) { result = sink.getLocation() } + + /** + * Like `getASelectedSinkLocation`, but only has to get a location _containing_ the + * actual location associated with `sink`. + * + * This prunes fewer sinks than `getASelectedSinkLocation` but leaves room for the possibility + * that a more precise location can be selected in the query. + */ + default Location getASelectedSinkLocationApprox(Node sink) { none() } } } diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index a7e0736432ac..b90512bdb237 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -145,7 +145,11 @@ module MakeImpl Lang> { Location getASelectedSourceLocation(Node source); + Location getASelectedSourceLocationApprox(Node source); + Location getASelectedSinkLocation(Node sink); + + Location getASelectedSinkLocationApprox(Node sink); } /** diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll index c7883df0de18..5e85cab00d9d 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll @@ -133,7 +133,9 @@ module MakeImplStage1 Lang> { private predicate isFilteredSource(Node source) { Config::isSource(source, _) and if Config::observeDiffInformedIncrementalMode() - then AlertFiltering::filterByLocation(Config::getASelectedSourceLocation(source)) + then + AlertFiltering::filterByLocation(Config::getASelectedSourceLocation(source)) or + AlertFiltering::filterByLocationApprox(Config::getASelectedSourceLocationApprox(source)) else any() } @@ -144,7 +146,9 @@ module MakeImplStage1 Lang> { Config::isSink(sink) ) and if Config::observeDiffInformedIncrementalMode() - then AlertFiltering::filterByLocation(Config::getASelectedSinkLocation(sink)) + then + AlertFiltering::filterByLocation(Config::getASelectedSinkLocation(sink)) or + AlertFiltering::filterByLocationApprox(Config::getASelectedSinkLocationApprox(sink)) else any() } diff --git a/shared/util/codeql/util/AlertFiltering.qll b/shared/util/codeql/util/AlertFiltering.qll index f264c20aeda7..97e6198a16cf 100644 --- a/shared/util/codeql/util/AlertFiltering.qll +++ b/shared/util/codeql/util/AlertFiltering.qll @@ -107,4 +107,40 @@ module AlertFilteringImpl { location.hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) ) } + + /** + * Holds if some subrange within `location` would be accepted by alert filtering. + * + * There does not need to exist a `Location` corresponding to that subrange. + */ + bindingset[location] + predicate filterByLocationApprox(Location location) { + not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _) + or + exists(string filePath | + restrictAlertsToEntireFile(filePath) and + location.hasLocationInfo(filePath, _, _, _, _) + or + exists(int locStartLine, int locEndLine | + location.hasLocationInfo(filePath, locStartLine, _, locEndLine, _) + | + restrictAlertsToStartLine(filePath, [locStartLine .. locEndLine]) + ) + ) + or + // Check if an exact filter-location is fully contained in `location`. + // This is slow but only used for testing. + exists( + string filePath, int startLine, int startColumn, int endLine, int endColumn, + int filterStartLine, int filterStartColumn, int filterEndLine, int filterEndColumn + | + location.hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) and + restrictAlertsToExactLocation(filePath, filterStartLine, filterStartColumn, filterEndLine, + filterEndColumn) and + startLine <= filterStartLine and + (startLine != filterStartLine or startColumn <= filterStartColumn) and + endLine >= filterEndLine and + (endLine != filterEndLine or endColumn >= filterEndColumn) + ) + } } From d65da1f8a1bc92966c5ac1e6a67cc9a2d4f59233 Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 1 Jul 2025 15:52:37 +0200 Subject: [PATCH 3/9] Ruby: enable for PolyReDos but document why it still doesnt work --- .../ruby/security/regexp/PolynomialReDoSQuery.qll | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll b/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll index 98a42fcf5e7c..0d955d1eb245 100644 --- a/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll +++ b/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll @@ -18,6 +18,18 @@ private module PolynomialReDoSConfig implements DataFlow::ConfigSig { predicate isSink(DataFlow::Node sink) { sink instanceof Sink } predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer } + + // Diff-informedness is disabled because of RegExpTerms having incorrect locations when + // the regexp is parsed from a string arising from constant folding. + predicate observeDiffInformedIncrementalMode() { none() } + + Location getASelectedSinkLocation(DataFlow::Node sink) { + result = sink.(Sink).getHighlight().getLocation() + } + + Location getASelectedSinkLocationApprox(DataFlow::Node sink) { + result = sink.(Sink).getRegExp().getRootTerm().getLocation() + } } /** From a46b5f9529e7cffb7cd1ceacdbbc766c7630362c Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 1 Jul 2025 16:05:22 +0200 Subject: [PATCH 4/9] Python: enable diff-informedness for poly redos using approximate related locations --- .../security/dataflow/PolynomialReDoSQuery.qll | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll b/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll index 0e52764c1950..a6602b30b16d 100644 --- a/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll +++ b/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll @@ -18,21 +18,13 @@ private module PolynomialReDoSConfig implements DataFlow::ConfigSig { predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer } - // Diff-informed incremental mode is currently disabled for this query due to - // API limitations. The query exposes sink.getABacktrackingTerm() as an alert - // location, but there is no way to express that information through - // getASelectedSinkLocation() because there is no @location in the CodeQL - // database that corresponds to a term inside a regular expression. As a - // result, this query could miss alerts in diff-informed incremental mode. - // - // To address this problem, we need to have a version of - // getASelectedSinkLocation() that uses hasLocationInfo() instead of - // returning Location objects. - predicate observeDiffInformedIncrementalMode() { none() } + predicate observeDiffInformedIncrementalMode() { any() } Location getASelectedSinkLocation(DataFlow::Node sink) { result = sink.(Sink).getHighlight().getLocation() - or + } + + Location getASelectedSinkLocationApprox(DataFlow::Node sink) { result = sink.(Sink).getABacktrackingTerm().getLocation() } } From 82d190f4bf298423b26bf9f2729e534d275638f4 Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 1 Jul 2025 16:17:24 +0200 Subject: [PATCH 5/9] Java: use approximate related sink locations in polynomial redos --- .../security/regexp/PolynomialReDoSQuery.qll | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll b/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll index ba65e13dd611..f5d0190068b9 100644 --- a/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll +++ b/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll @@ -47,6 +47,24 @@ module PolynomialRedosConfig implements DataFlow::ConfigSig { node instanceof SimpleTypeSanitizer or node.asExpr().(MethodCall).getMethod() instanceof LengthRestrictedMethod } + + predicate observeDiffInformedIncrementalMode() { any() } + + Location getASelectedSinkLocation(DataFlow::Node sink) { + exists(SuperlinearBackTracking::PolynomialBackTrackingTerm regexp | + regexp.getRootTerm() = sink.(PolynomialRedosSink).getRegExp() + | + result = sink.getLocation() + ) + } + + Location getASelectedSinkLocationApprox(DataFlow::Node sink) { + exists(SuperlinearBackTracking::PolynomialBackTrackingTerm regexp | + regexp.getRootTerm() = sink.(PolynomialRedosSink).getRegExp() + | + result = regexp.getLocation() + ) + } } module PolynomialRedosFlow = TaintTracking::Global; From 4a2d7950760f783441057970f2b1f62b6c029336 Mon Sep 17 00:00:00 2001 From: Asger F Date: Wed, 2 Jul 2025 14:38:10 +0200 Subject: [PATCH 6/9] Shared: Make approximate location filtering the default behaviour --- .../security/regexp/PolynomialReDoSQuery.qll | 8 +---- .../dataflow/PolynomialReDoSQuery.qll | 4 +-- .../security/regexp/PolynomialReDoSQuery.qll | 4 +-- shared/dataflow/codeql/dataflow/DataFlow.qll | 36 ------------------- .../codeql/dataflow/internal/DataFlowImpl.qll | 4 --- .../dataflow/internal/DataFlowImplStage1.qll | 8 ++--- shared/util/codeql/util/AlertFiltering.qll | 26 -------------- 7 files changed, 5 insertions(+), 85 deletions(-) diff --git a/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll b/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll index f5d0190068b9..767ebc97437b 100644 --- a/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll +++ b/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll @@ -55,13 +55,7 @@ module PolynomialRedosConfig implements DataFlow::ConfigSig { regexp.getRootTerm() = sink.(PolynomialRedosSink).getRegExp() | result = sink.getLocation() - ) - } - - Location getASelectedSinkLocationApprox(DataFlow::Node sink) { - exists(SuperlinearBackTracking::PolynomialBackTrackingTerm regexp | - regexp.getRootTerm() = sink.(PolynomialRedosSink).getRegExp() - | + or result = regexp.getLocation() ) } diff --git a/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll b/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll index a6602b30b16d..89aa4961e6ef 100644 --- a/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll +++ b/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll @@ -22,9 +22,7 @@ private module PolynomialReDoSConfig implements DataFlow::ConfigSig { Location getASelectedSinkLocation(DataFlow::Node sink) { result = sink.(Sink).getHighlight().getLocation() - } - - Location getASelectedSinkLocationApprox(DataFlow::Node sink) { + or result = sink.(Sink).getABacktrackingTerm().getLocation() } } diff --git a/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll b/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll index 0d955d1eb245..81179717e01e 100644 --- a/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll +++ b/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll @@ -25,9 +25,7 @@ private module PolynomialReDoSConfig implements DataFlow::ConfigSig { Location getASelectedSinkLocation(DataFlow::Node sink) { result = sink.(Sink).getHighlight().getLocation() - } - - Location getASelectedSinkLocationApprox(DataFlow::Node sink) { + or result = sink.(Sink).getRegExp().getRootTerm().getLocation() } } diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index 93803af552bc..3483287e3b39 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -459,15 +459,6 @@ module Configs Lang> { */ default Location getASelectedSourceLocation(Node source) { result = source.getLocation() } - /** - * Like `getASelectedSourceLocation`, but only has to get a location _containing_ the - * actual location associated with `source`. - * - * This prunes fewer sources than `getASelectedSourceLocation` but leaves room for the possibility - * that a more precise location can be selected in the query. - */ - default Location getASelectedSourceLocationApprox(Node source) { none() } - /** * Gets a location that will be associated with the given `sink` in a * diff-informed query that uses this configuration (see @@ -478,15 +469,6 @@ module Configs Lang> { * report the sink at all, this predicate can be `none()`. */ default Location getASelectedSinkLocation(Node sink) { result = sink.getLocation() } - - /** - * Like `getASelectedSinkLocation`, but only has to get a location _containing_ the - * actual location associated with `sink`. - * - * This prunes fewer sinks than `getASelectedSinkLocation` but leaves room for the possibility - * that a more precise location can be selected in the query. - */ - default Location getASelectedSinkLocationApprox(Node sink) { none() } } /** An input configuration for data flow using flow state. */ @@ -626,15 +608,6 @@ module Configs Lang> { */ default Location getASelectedSourceLocation(Node source) { result = source.getLocation() } - /** - * Like `getASelectedSourceLocation`, but only has to get a location _containing_ the - * actual location associated with `source`. - * - * This prunes fewer sources than `getASelectedSourceLocation` but leaves room for the possibility - * that a more precise location can be selected in the query. - */ - default Location getASelectedSourceLocationApprox(Node source) { none() } - /** * Gets a location that will be associated with the given `sink` in a * diff-informed query that uses this configuration (see @@ -645,15 +618,6 @@ module Configs Lang> { * report the sink at all, this predicate can be `none()`. */ default Location getASelectedSinkLocation(Node sink) { result = sink.getLocation() } - - /** - * Like `getASelectedSinkLocation`, but only has to get a location _containing_ the - * actual location associated with `sink`. - * - * This prunes fewer sinks than `getASelectedSinkLocation` but leaves room for the possibility - * that a more precise location can be selected in the query. - */ - default Location getASelectedSinkLocationApprox(Node sink) { none() } } } diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index b90512bdb237..a7e0736432ac 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -145,11 +145,7 @@ module MakeImpl Lang> { Location getASelectedSourceLocation(Node source); - Location getASelectedSourceLocationApprox(Node source); - Location getASelectedSinkLocation(Node sink); - - Location getASelectedSinkLocationApprox(Node sink); } /** diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll index 5e85cab00d9d..c7883df0de18 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll @@ -133,9 +133,7 @@ module MakeImplStage1 Lang> { private predicate isFilteredSource(Node source) { Config::isSource(source, _) and if Config::observeDiffInformedIncrementalMode() - then - AlertFiltering::filterByLocation(Config::getASelectedSourceLocation(source)) or - AlertFiltering::filterByLocationApprox(Config::getASelectedSourceLocationApprox(source)) + then AlertFiltering::filterByLocation(Config::getASelectedSourceLocation(source)) else any() } @@ -146,9 +144,7 @@ module MakeImplStage1 Lang> { Config::isSink(sink) ) and if Config::observeDiffInformedIncrementalMode() - then - AlertFiltering::filterByLocation(Config::getASelectedSinkLocation(sink)) or - AlertFiltering::filterByLocationApprox(Config::getASelectedSinkLocationApprox(sink)) + then AlertFiltering::filterByLocation(Config::getASelectedSinkLocation(sink)) else any() } diff --git a/shared/util/codeql/util/AlertFiltering.qll b/shared/util/codeql/util/AlertFiltering.qll index 97e6198a16cf..091c2a89d00b 100644 --- a/shared/util/codeql/util/AlertFiltering.qll +++ b/shared/util/codeql/util/AlertFiltering.qll @@ -89,32 +89,6 @@ module AlertFilteringImpl { /** Applies alert filtering to the given location. */ bindingset[location] predicate filterByLocation(Location location) { - not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _) - or - exists(string filePath | - restrictAlertsToEntireFile(filePath) and - location.hasLocationInfo(filePath, _, _, _, _) - or - exists(int line | - restrictAlertsToStartLine(filePath, line) and - location.hasLocationInfo(filePath, line, _, _, _) - ) - ) - or - exists(string filePath, int startLine, int startColumn, int endLine, int endColumn | - restrictAlertsToExactLocation(filePath, startLine, startColumn, endLine, endColumn) - | - location.hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) - ) - } - - /** - * Holds if some subrange within `location` would be accepted by alert filtering. - * - * There does not need to exist a `Location` corresponding to that subrange. - */ - bindingset[location] - predicate filterByLocationApprox(Location location) { not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _) or exists(string filePath | From f1e9f0e323cf070dd20e3457ec6dc5d2d5bcdc3c Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Wed, 9 Jul 2025 09:24:26 +0200 Subject: [PATCH 7/9] Shared: improve join order in filterByLocation It's better to join with the range expression first since that will only multiply tuple counts by the number of lines in an average source/sink. Joining with `restrictAlertsToStartLine` first would multiply tuple counts by the number of sources/sinks in a given file. --- shared/util/codeql/util/AlertFiltering.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/util/codeql/util/AlertFiltering.qll b/shared/util/codeql/util/AlertFiltering.qll index 091c2a89d00b..4064e4f31a86 100644 --- a/shared/util/codeql/util/AlertFiltering.qll +++ b/shared/util/codeql/util/AlertFiltering.qll @@ -98,7 +98,7 @@ module AlertFilteringImpl { exists(int locStartLine, int locEndLine | location.hasLocationInfo(filePath, locStartLine, _, locEndLine, _) | - restrictAlertsToStartLine(filePath, [locStartLine .. locEndLine]) + restrictAlertsToStartLine(pragma[only_bind_into](filePath), [locStartLine .. locEndLine]) ) ) or From 0d7a842e2feb92462cb53de185e51e64e5bf1aa7 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Wed, 9 Jul 2025 09:43:49 +0200 Subject: [PATCH 8/9] Shared: improve documentation in AlertFiltering --- shared/util/codeql/util/AlertFiltering.qll | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/shared/util/codeql/util/AlertFiltering.qll b/shared/util/codeql/util/AlertFiltering.qll index 4064e4f31a86..a956693f9c7c 100644 --- a/shared/util/codeql/util/AlertFiltering.qll +++ b/shared/util/codeql/util/AlertFiltering.qll @@ -79,14 +79,27 @@ module AlertFilteringImpl { private predicate restrictAlertsToEntireFile(string filePath) { restrictAlertsTo(filePath, 0, 0) } pragma[nomagic] - private predicate restrictAlertsToStartLine(string filePath, int line) { + private predicate restrictAlertsToLine(string filePath, int line) { exists(int startLineStart, int startLineEnd | restrictAlertsTo(filePath, startLineStart, startLineEnd) and line = [startLineStart .. startLineEnd] ) } - /** Applies alert filtering to the given location. */ + /** + * Holds if the given location intersects the diff range. Testing for full + * intersection rather than only matching the start line means that this + * predicate is more broadly useful than just checking whether a specific + * element is considered to be in the diff range of GitHub Code Scanning: + * - If it's inconvenient to pass the exact `Location` of the element of + * interest, it's valid to use a `Location` of an enclosing element. + * - This predicate could be useful for other systems of alert presentation + * where the rules don't exactly match GitHub Code Scanning. + * + * If there is no diff range, this predicate holds for all locations. Note + * that this predicate has a bindingset and will therefore be inlined; + * callers should include enough context to ensure efficient evaluation. + */ bindingset[location] predicate filterByLocation(Location location) { not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _) @@ -98,7 +111,7 @@ module AlertFilteringImpl { exists(int locStartLine, int locEndLine | location.hasLocationInfo(filePath, locStartLine, _, locEndLine, _) | - restrictAlertsToStartLine(pragma[only_bind_into](filePath), [locStartLine .. locEndLine]) + restrictAlertsToLine(pragma[only_bind_into](filePath), [locStartLine .. locEndLine]) ) ) or From 3ffda2f341bdcc8e7337a076701473a3cd870568 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Wed, 9 Jul 2025 14:32:18 +0200 Subject: [PATCH 9/9] Shared: Overhaul the AlertFiltering QLDoc The documentation is now up-to-date with the new and more relaxed rules that allow overapproximating the results. I have also attempted to make a clearer distinction between the requirements of the specification and the behaviour of the implementation. --- shared/util/codeql/util/AlertFiltering.qll | 57 +++++++++++----------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/shared/util/codeql/util/AlertFiltering.qll b/shared/util/codeql/util/AlertFiltering.qll index a956693f9c7c..bfc9d901b2fa 100644 --- a/shared/util/codeql/util/AlertFiltering.qll +++ b/shared/util/codeql/util/AlertFiltering.qll @@ -8,15 +8,14 @@ module; private import codeql.util.Location /** - * Holds if the query should produce alerts that match the given line ranges. + * Holds if the query may restrict its computation to only produce alerts that match the given line + * ranges. This predicate is used for implementing _diff-informed queries_ for pull requests in + * GitHub Code Scanning. * * This predicate is active if and only if it is nonempty. If this predicate is inactive, it has no - * effect. If it is active, it accepts any alert that has at least one matching location. - * - * Note that an alert that is not accepted by this filtering predicate may still be included in the - * query results if it is accepted by another active filtering predicate in this module. An alert is - * excluded from the query results if only if (1) there is at least one active filtering predicate, - * and (2) it is not accepted by any active filtering predicate. + * effect. If it is active, queries may omit alerts that don't have a _primary_ or _related_ + * location (in SARIF terminology) whose start line is within a specified range. Queries are allowed + * to produce alerts outside this range. * * An alert location is a match if it matches a row in this predicate. If `startLineStart` and * `startLineEnd` are both 0, the row specifies a whole-file match, and a location is a match if @@ -29,26 +28,24 @@ private import codeql.util.Location * - startLineStart: inclusive start of the range for alert location start line number (1-based). * - startLineEnd: inclusive end of the range for alert location start line number (1-based). * - * A query should either perform no alert filtering, or adhere to all the filtering rules in this - * module and return all and only the accepted alerts. - * - * This predicate is suitable for situations where we want to filter alerts at line granularity, - * such as based on the pull request diff. + * Note that an alert that is not accepted by this filtering predicate may still be included in the + * query results if it is accepted by another active filtering predicate in this module. An alert is + * excluded from the query results if only if (1) there is at least one active filtering predicate, + * and (2) it is not accepted by any active filtering predicate. * * See also: `restrictAlertsToExactLocation`. */ extensible predicate restrictAlertsTo(string filePath, int startLineStart, int startLineEnd); /** - * Holds if the query should produce alerts that match the given locations. + * Holds if the query may restrict its computation to only produce alerts that match the given + * character ranges. This predicate is suitable for testing, where we want to filter by the exact + * alert location, distinguishing between alerts on the same line. * * This predicate is active if and only if it is nonempty. If this predicate is inactive, it has no - * effect. If it is active, it accepts any alert that has at least one matching location. - * - * Note that an alert that is not accepted by this filtering predicate may still be included in the - * query results if it is accepted by another active filtering predicate in this module. An alert is - * excluded from the query results if only if (1) there is at least one active filtering predicate, - * and (2) it is not accepted by any active filtering predicate. + * effect. If it is active, queries may omit alerts that don't have a _primary_ or _related_ + * location (in SARIF terminology) whose location equals a tuple from this predicate. Queries are + * allowed to produce alerts outside this range. * * An alert location is a match if it matches a row in this predicate. Each row specifies an exact * location: an alert location is a match if its file path matches `filePath`, its start line and @@ -61,11 +58,10 @@ extensible predicate restrictAlertsTo(string filePath, int startLineStart, int s * - endLine: alert location end line number (1-based). * - endColumn: alert location end column number (1-based). * - * A query should either perform no alert filtering, or adhere to all the filtering rules in this - * module and return all and only the accepted alerts. - * - * This predicate is suitable for situations where we want to filter by the exact alert location, - * distinguishing between alerts on the same line. + * Note that an alert that is not accepted by this filtering predicate may still be included in the + * query results if it is accepted by another active filtering predicate in this module. An alert is + * excluded from the query results if only if (1) there is at least one active filtering predicate, + * and (2) it is not accepted by any active filtering predicate. * * See also: `restrictAlertsTo`. */ @@ -87,10 +83,15 @@ module AlertFilteringImpl { } /** - * Holds if the given location intersects the diff range. Testing for full - * intersection rather than only matching the start line means that this - * predicate is more broadly useful than just checking whether a specific - * element is considered to be in the diff range of GitHub Code Scanning: + * Holds if the given location intersects the diff range. When that is the + * case, ensuring that alerts mentioning this location are included in the + * query results is a valid overapproximation of the requirements for + * _diff-informed queries_ as documented under `restrictAlertsTo`. + * + * Testing for full intersection rather than only matching the start line + * means that this predicate is more broadly useful than just checking whether + * a specific element is considered to be in the diff range of GitHub Code + * Scanning: * - If it's inconvenient to pass the exact `Location` of the element of * interest, it's valid to use a `Location` of an enclosing element. * - This predicate could be useful for other systems of alert presentation pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy