Skip to content

Commit 3ae4848

Browse files
authored
Merge pull request #15326 from github/max-schaefer/automodel-negative-sink-models
Automodel: Apply negative characteristics only to endpoints of the right kind.
2 parents 4660a25 + 8614d7b commit 3ae4848

17 files changed

+348
-264
lines changed

java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll

Lines changed: 90 additions & 97 deletions
Large diffs are not rendered by default.

java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,20 @@ private import AutomodelJavaUtil
2525
bindingset[limit]
2626
private Endpoint getSampleForSignature(
2727
int limit, string package, string type, string subtypes, string name, string signature,
28-
string input, string output, string isVarargs, string extensibleType
28+
string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
2929
) {
3030
exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta |
3131
num_endpoints =
3232
count(Endpoint e |
33-
e.getExtensibleType() = extensibleType and
34-
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
33+
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
34+
alreadyAiModeled, extensibleType)
3535
)
3636
|
3737
result =
3838
rank[n](Endpoint e, Location loc |
3939
loc = e.asTop().getLocation() and
40-
e.getExtensibleType() = extensibleType and
41-
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
40+
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
41+
alreadyAiModeled, extensibleType)
4242
|
4343
e
4444
order by
@@ -63,22 +63,18 @@ where
6363
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
6464
u.appliesToEndpoint(endpoint)
6565
) and
66-
CharacteristicsImpl::isSinkCandidate(endpoint, _) and
66+
CharacteristicsImpl::isCandidate(endpoint, _) and
6767
endpoint =
6868
getSampleForSignature(9, package, type, subtypes, name, signature, input, output,
69-
isVarargsArray, extensibleType) and
69+
isVarargsArray, extensibleType, alreadyAiModeled) and
70+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
71+
isVarargsArray, alreadyAiModeled, extensibleType) and
7072
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
7173
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
7274
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
7375
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
7476
// types, and we don't need to reexamine it.
75-
(
76-
not CharacteristicsImpl::isModeled(endpoint, _, _, _) and alreadyAiModeled = ""
77-
or
78-
alreadyAiModeled.matches("%ai-%") and
79-
CharacteristicsImpl::isModeled(endpoint, _, _, alreadyAiModeled)
80-
) and
81-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
77+
alreadyAiModeled.matches(["", "%ai-%"]) and
8278
includeAutomodelCandidate(package, type, name, signature)
8379
select endpoint.asNode(),
8480
"Related locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //

java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,27 +40,45 @@ Endpoint getSampleForCharacteristic(EndpointCharacteristic c, int limit) {
4040
)
4141
}
4242

43+
predicate candidate(
44+
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
45+
string type, string subtypes, string name, string signature, string input, string output,
46+
string isVarargsArray, string extensibleType
47+
) {
48+
// the node is known not to be an endpoint of any appropriate type
49+
forall(EndpointType tp | tp = CharacteristicsImpl::getAPotentialType(endpoint) |
50+
characteristic.hasImplications(tp, false, _)
51+
) and
52+
// the lowest confidence across all endpoint types should be at least highConfidence
53+
confidence =
54+
min(float c |
55+
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
56+
) and
57+
confidence >= SharedCharacteristics::highConfidence() and
58+
any(ApplicationModeMetadataExtractor meta)
59+
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
60+
isVarargsArray, _, extensibleType) and
61+
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
62+
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
63+
not exists(EndpointCharacteristic characteristic2, float confidence2 |
64+
characteristic2 != characteristic
65+
|
66+
characteristic2.appliesToEndpoint(endpoint) and
67+
confidence2 >= SharedCharacteristics::maximalConfidence() and
68+
characteristic2
69+
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
70+
)
71+
}
72+
4373
from
4474
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message,
45-
ApplicationModeMetadataExtractor meta, DollarAtString package, DollarAtString type,
46-
DollarAtString subtypes, DollarAtString name, DollarAtString signature, DollarAtString input,
47-
DollarAtString output, DollarAtString isVarargsArray, DollarAtString extensibleType
75+
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
76+
DollarAtString signature, DollarAtString input, DollarAtString output,
77+
DollarAtString isVarargsArray, DollarAtString extensibleType
4878
where
4979
endpoint = getSampleForCharacteristic(characteristic, 100) and
50-
extensibleType = endpoint.getExtensibleType() and
51-
confidence >= SharedCharacteristics::highConfidence() and
52-
characteristic.hasImplications(any(NegativeSinkType negative), true, confidence) and
53-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
54-
// It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
55-
// treated by the actual query as a sanitizer, since the final logic is something like
56-
// `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because
57-
// they're ambiguous and might confuse the model, so we explicitly exclude all known sinks from the negative examples.
58-
not exists(EndpointCharacteristic characteristic2, float confidence2, SinkType positiveType |
59-
not positiveType instanceof NegativeSinkType and
60-
characteristic2.appliesToEndpoint(endpoint) and
61-
confidence2 >= SharedCharacteristics::maximalConfidence() and
62-
characteristic2.hasImplications(positiveType, true, confidence2)
63-
) and
80+
candidate(endpoint, characteristic, confidence, package, type, subtypes, name, signature, input,
81+
output, isVarargsArray, extensibleType) and
6482
message = characteristic
6583
select endpoint.asNode(),
6684
message + "\nrelated locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //

java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@ from
1818
DollarAtString signature, DollarAtString input, DollarAtString output,
1919
DollarAtString isVarargsArray, DollarAtString extensibleType
2020
where
21-
extensibleType = endpoint.getExtensibleType() and
22-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
23-
// Extract positive examples of sinks belonging to the existing ATM query configurations.
21+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
22+
isVarargsArray, _, extensibleType) and
2423
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and
2524
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
2625
select endpoint.asNode(),

java/ql/automodel/src/AutomodelEndpointTypes.qll

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,6 @@ abstract class SinkType extends EndpointType {
3030
SinkType() { any() }
3131
}
3232

33-
/** The `Negative` class for non-sinks. */
34-
class NegativeSinkType extends SinkType {
35-
NegativeSinkType() { this = "non-sink" }
36-
}
37-
3833
/** A sink relevant to the SQL injection query */
3934
class SqlInjectionSinkType extends SinkType {
4035
SqlInjectionSinkType() { this = "sql-injection" }

java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll

Lines changed: 75 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ private import semmle.code.java.security.QueryInjection
1515
private import semmle.code.java.security.RequestForgery
1616
private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions
1717
private import AutomodelJavaUtil as AutomodelJavaUtil
18-
private import AutomodelSharedGetCallable as AutomodelSharedGetCallable
1918
import AutomodelSharedCharacteristics as SharedCharacteristics
2019
import AutomodelEndpointTypes as AutomodelEndpointTypes
2120

@@ -84,7 +83,7 @@ abstract class FrameworkModeEndpoint extends TFrameworkModeEndpoint {
8483
/**
8584
* Returns the callable that contains the endpoint.
8685
*/
87-
abstract Callable getEnclosingCallable();
86+
abstract Callable getCallable();
8887

8988
abstract Top asTop();
9089

@@ -106,7 +105,7 @@ class ExplicitParameterEndpoint extends FrameworkModeEndpoint, TExplicitParamete
106105

107106
override string getParamName() { result = param.getName() }
108107

109-
override Callable getEnclosingCallable() { result = param.getCallable() }
108+
override Callable getCallable() { result = param.getCallable() }
110109

111110
override Top asTop() { result = param }
112111

@@ -126,7 +125,7 @@ class QualifierEndpoint extends FrameworkModeEndpoint, TQualifier {
126125

127126
override string getParamName() { result = "this" }
128127

129-
override Callable getEnclosingCallable() { result = callable }
128+
override Callable getCallable() { result = callable }
130129

131130
override Top asTop() { result = callable }
132131

@@ -144,7 +143,7 @@ class ReturnValue extends FrameworkModeEndpoint, TReturnValue {
144143

145144
override string getParamName() { none() }
146145

147-
override Callable getEnclosingCallable() { result = callable }
146+
override Callable getCallable() { result = callable }
148147

149148
override Top asTop() { result = callable }
150149

@@ -163,7 +162,7 @@ class OverridableParameter extends FrameworkModeEndpoint, TOverridableParameter
163162

164163
override string getParamName() { result = param.getName() }
165164

166-
override Callable getEnclosingCallable() { result = method }
165+
override Callable getCallable() { result = method }
167166

168167
override Top asTop() { result = param }
169168

@@ -181,7 +180,7 @@ class OverridableQualifier extends FrameworkModeEndpoint, TOverridableQualifier
181180

182181
override string getParamName() { result = "this" }
183182

184-
override Callable getEnclosingCallable() { result = m }
183+
override Callable getCallable() { result = m }
185184

186185
override Top asTop() { result = m }
187186

@@ -202,7 +201,9 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
202201

203202
class EndpointType = AutomodelEndpointTypes::EndpointType;
204203

205-
class NegativeEndpointType = AutomodelEndpointTypes::NegativeSinkType;
204+
class SinkType = AutomodelEndpointTypes::SinkType;
205+
206+
class SourceType = AutomodelEndpointTypes::SourceType;
206207

207208
class RelatedLocation = Location::Top;
208209

@@ -244,8 +245,8 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
244245
additional predicate sinkSpec(
245246
Endpoint e, string package, string type, string name, string signature, string ext, string input
246247
) {
247-
e.getEnclosingCallable().hasQualifiedName(package, type, name) and
248-
signature = ExternalFlow::paramsString(e.getEnclosingCallable()) and
248+
e.getCallable().hasQualifiedName(package, type, name) and
249+
signature = ExternalFlow::paramsString(e.getCallable()) and
249250
ext = "" and
250251
input = e.getMaDInput()
251252
}
@@ -254,8 +255,8 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
254255
Endpoint e, string package, string type, string name, string signature, string ext,
255256
string output
256257
) {
257-
e.getEnclosingCallable().hasQualifiedName(package, type, name) and
258-
signature = ExternalFlow::paramsString(e.getEnclosingCallable()) and
258+
e.getCallable().hasQualifiedName(package, type, name) and
259+
signature = ExternalFlow::paramsString(e.getCallable()) and
259260
ext = "" and
260261
output = e.getMaDOutput()
261262
}
@@ -267,10 +268,10 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
267268
*/
268269
RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) {
269270
type = MethodDoc() and
270-
result = e.getEnclosingCallable().(Documentable).getJavadoc()
271+
result = e.getCallable().(Documentable).getJavadoc()
271272
or
272273
type = ClassDoc() and
273-
result = e.getEnclosingCallable().getDeclaringType().(Documentable).getJavadoc()
274+
result = e.getCallable().getDeclaringType().(Documentable).getJavadoc()
274275
}
275276
}
276277

@@ -292,16 +293,27 @@ class FrameworkModeMetadataExtractor extends string {
292293

293294
predicate hasMetadata(
294295
Endpoint e, string package, string type, string subtypes, string name, string signature,
295-
string input, string output, string parameterName
296+
string input, string output, string parameterName, string alreadyAiModeled,
297+
string extensibleType
296298
) {
297-
(if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
298-
name = e.getEnclosingCallable().getName() and
299-
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
300-
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
301-
package = e.getEnclosingCallable().getDeclaringType().getPackage().getName() and
302-
type = e.getEnclosingCallable().getDeclaringType().getErasure().(RefType).nestedName() and
303-
subtypes = AutomodelJavaUtil::considerSubtypes(e.getEnclosingCallable()).toString() and
304-
signature = ExternalFlow::paramsString(e.getEnclosingCallable())
299+
exists(Callable callable | e.getCallable() = callable |
300+
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
301+
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
302+
package = callable.getDeclaringType().getPackage().getName() and
303+
// we're using the erased types because the MaD convention is to not specify type parameters.
304+
// Whether something is or isn't a sink doesn't usually depend on the type parameters.
305+
type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
306+
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
307+
name = callable.getName() and
308+
signature = ExternalFlow::paramsString(callable) and
309+
(if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
310+
e.getExtensibleType() = extensibleType
311+
) and
312+
(
313+
not CharacteristicsImpl::isModeled(e, _, extensibleType, _) and alreadyAiModeled = ""
314+
or
315+
CharacteristicsImpl::isModeled(e, _, extensibleType, alreadyAiModeled)
316+
)
305317
}
306318
}
307319

@@ -310,53 +322,79 @@ class FrameworkModeMetadataExtractor extends string {
310322
*/
311323

312324
/**
313-
* A negative characteristic that indicates that an is-style boolean method is unexploitable even if it is a sink.
325+
* A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks,
326+
* and its return value should not be considered a source.
314327
*
315328
* A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return
316329
* type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does
317330
* the dangerous/interesting thing, so we want the latter to be modeled as the sink.
318331
*
319332
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
320333
*/
321-
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
334+
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
335+
{
322336
UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" }
323337

324338
override predicate appliesToEndpoint(Endpoint e) {
325-
not FrameworkCandidatesImpl::isSink(e, _, _) and
326-
e.getEnclosingCallable().getName().matches("is%") and
327-
e.getEnclosingCallable().getReturnType() instanceof BooleanType
339+
e.getCallable().getName().matches("is%") and
340+
e.getCallable().getReturnType() instanceof BooleanType and
341+
(
342+
e.getExtensibleType() = "sinkModel" and
343+
not FrameworkCandidatesImpl::isSink(e, _, _)
344+
or
345+
e.getExtensibleType() = "sourceModel" and
346+
not FrameworkCandidatesImpl::isSource(e, _, _) and
347+
e.getMaDOutput() = "ReturnValue"
348+
)
328349
}
329350
}
330351

331352
/**
332-
* A negative characteristic that indicates that an existence-checking boolean method is unexploitable even if it is a
333-
* sink.
353+
* A negative characteristic that indicates that parameters of an existence-checking boolean method should not be
354+
* considered sinks, and its return value should not be considered a source.
334355
*
335356
* A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a
336357
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
337358
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
338359
*/
339-
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
360+
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
361+
{
340362
UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }
341363

342364
override predicate appliesToEndpoint(Endpoint e) {
343-
not FrameworkCandidatesImpl::isSink(e, _, _) and
344365
exists(Callable callable |
345-
callable = e.getEnclosingCallable() and
366+
callable = e.getCallable() and
346367
callable.getName().toLowerCase() = ["exists", "notexists"] and
347368
callable.getReturnType() instanceof BooleanType
369+
|
370+
e.getExtensibleType() = "sinkModel" and
371+
not FrameworkCandidatesImpl::isSink(e, _, _)
372+
or
373+
e.getExtensibleType() = "sourceModel" and
374+
not FrameworkCandidatesImpl::isSource(e, _, _) and
375+
e.getMaDOutput() = "ReturnValue"
348376
)
349377
}
350378
}
351379

352380
/**
353-
* A negative characteristic that indicates that an endpoint is an argument to an exception, which is not a sink.
381+
* A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
382+
* and its return value should not be considered a source.
354383
*/
355-
private class ExceptionCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
384+
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
385+
{
356386
ExceptionCharacteristic() { this = "exception" }
357387

358388
override predicate appliesToEndpoint(Endpoint e) {
359-
e.getEnclosingCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable
389+
e.getCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable and
390+
(
391+
e.getExtensibleType() = "sinkModel" and
392+
not FrameworkCandidatesImpl::isSink(e, _, _)
393+
or
394+
e.getExtensibleType() = "sourceModel" and
395+
not FrameworkCandidatesImpl::isSource(e, _, _) and
396+
e.getMaDOutput() = "ReturnValue"
397+
)
360398
}
361399
}
362400

@@ -368,6 +406,6 @@ private class NotAModelApi extends CharacteristicsImpl::UninterestingToModelChar
368406
NotAModelApi() { this = "not a model API" }
369407

370408
override predicate appliesToEndpoint(Endpoint e) {
371-
not e.getEnclosingCallable() instanceof ModelExclusions::ModelApi
409+
not e.getCallable() instanceof ModelExclusions::ModelApi
372410
}
373411
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy