Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Go: Make the models-as-data subtypes column do something more sensible for promoted methods #17618

Merged
merged 30 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
91375d3
Update ExternalFlow.qll docs on `subtypes` column
owen-mc Sep 27, 2024
e46ec5a
Fix MaD inheritance
owen-mc Nov 11, 2024
e90e313
Simplify ensureCorrectTypeInfo
smowton Oct 23, 2024
d9126d3
Simplify interpretElement
smowton Oct 23, 2024
520acc4
Further simplify interpretElement to avoid 'm' only being used on one…
smowton Oct 23, 2024
20e8d48
Add comment noting that a Method or Field might have multiple SourceO…
smowton Oct 23, 2024
17a3429
Correct hasTypeInfo doc comment
smowton Oct 23, 2024
e290cec
Rename ensureCorrectTypeInfo and getSyntacticRecv
smowton Oct 23, 2024
fd40662
Rename getImplicitFieldReadInstruction
smowton Oct 23, 2024
ccc525a
Remove probably-unnecessary pragma
smowton Oct 23, 2024
cd99bea
Explain getSyntacticQual
smowton Oct 23, 2024
7ffff43
Improve comments
smowton Oct 23, 2024
f61251c
Document elementAppliesToQualifier
smowton Oct 23, 2024
4a1fce9
Test non-promoted fields and methods
owen-mc Nov 11, 2024
adc946e
Revert simplification to make tests pass
owen-mc Oct 24, 2024
e335737
Minor fixes to docs
owen-mc Oct 24, 2024
ede3b9f
Improve join order by making a separate predicate
owen-mc Oct 25, 2024
c2aee37
`getSyntacticQualifier` -> `getSyntacticQualifierBaseType`
owen-mc Oct 25, 2024
80e8015
Inline late and remove manual magic
owen-mc Oct 25, 2024
39afb18
Fix bad join order
owen-mc Oct 25, 2024
b2c2a94
Accept review comment - doc clarification
owen-mc Oct 25, 2024
8abb059
Combine two arms of newtype `TSourceOrSinkElement`
owen-mc Oct 25, 2024
e813fa3
Fix case in some test-related file names
owen-mc Oct 25, 2024
57192e8
Add even more tests
owen-mc Oct 29, 2024
b6a31b1
Restrict to Methods
owen-mc Oct 30, 2024
3986dff
Keep methods and fields separate in SourceOrSinkElement
owen-mc Oct 30, 2024
6af5b55
Refactor elementAppliesToQualifier
owen-mc Nov 6, 2024
d3a6452
Add missing QLDoc
owen-mc Nov 6, 2024
13fb92a
Rename `getIntermediateEmbeddedType`
owen-mc Nov 11, 2024
fd4a6d4
Change note
owen-mc Nov 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: fix
---
* The behaviour of the `subtypes` column in models-as-data now matches other languages more closely.
29 changes: 20 additions & 9 deletions go/ql/lib/semmle/go/dataflow/ExternalFlow.qll
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,12 @@
* packages in the group `<groupname>` according to the `packageGrouping`
* predicate.
* 2. The `type` column selects a type within that package.
* 3. The `subtypes` is a boolean that indicates whether to jump to an
* arbitrary subtype of that type.
* 3. The `subtypes` column is a boolean that controls what restrictions we
* place on the type `t` of the selector base when accessing a field or
* calling a method. When it is false, `t` must be the exact type specified
* by this row. When it is true, `t` may be a type which embeds the specified
* type, and for interface methods `t` may be a type which implements the
* interface.
* 4. The `name` column optionally selects a specific named member of the type.
* 5. The `signature` column is always empty.
* 6. The `ext` column is always empty.
Expand Down Expand Up @@ -470,17 +474,24 @@ SourceSinkInterpretationInput::SourceOrSinkElement interpretElement(
elementSpec(pkg, type, subtypes, name, signature, ext) and
// Go does not need to distinguish functions with signature
signature = "" and
(
exists(Field f | f.hasQualifiedName(interpretPackage(pkg), type, name) | result.asEntity() = f)
exists(string p | p = interpretPackage(pkg) |
exists(Entity e | result.hasFullInfo(e, p, type, subtypes) |
e.(Field).hasQualifiedName(p, type, name) or
e.(Method).hasQualifiedName(p, type, name)
)
or
exists(Method m | m.hasQualifiedName(interpretPackage(pkg), type, name) |
result.asEntity() = m
or
subtypes = true and result.asEntity().(Method).implementsIncludingInterfaceMethods(m)
subtypes = true and
// p.type is an interface and we include types which implement it
exists(Method m2, string pkg2, string type2 |
m2.getReceiverType().implements(p, type) and
m2.getName() = name and
m2.getReceiverBaseType().hasQualifiedName(pkg2, type2)
|
result.hasFullInfo(m2, pkg2, type2, subtypes)
)
or
type = "" and
exists(Entity e | e.hasQualifiedName(interpretPackage(pkg), name) | result.asEntity() = e)
exists(Entity e | e.hasQualifiedName(p, name) | result.asOtherEntity() = e)
)
}

Expand Down
174 changes: 165 additions & 9 deletions go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll
Original file line number Diff line number Diff line change
Expand Up @@ -149,21 +149,63 @@ module SourceSinkInterpretationInput implements
)
}

// Note that due to embedding, which is currently implemented via some
// Methods having multiple qualified names, a given Method is liable to have
// more than one SourceOrSinkElement, one for each of the names it claims.
private newtype TSourceOrSinkElement =
TEntityElement(Entity e) or
TMethodEntityElement(Method m, string pkg, string type, boolean subtypes) {
m.hasQualifiedName(pkg, type, _) and
subtypes = [true, false]
} or
TFieldEntityElement(Field f, string pkg, string type, boolean subtypes) {
f.hasQualifiedName(pkg, type, _) and
subtypes = [true, false]
} or
TOtherEntityElement(Entity e) {
not e instanceof Method and
not e instanceof Field
} or
TAstElement(AstNode n)

/** An element representable by CSV modeling. */
class SourceOrSinkElement extends TSourceOrSinkElement {
/** Gets this source or sink element as an entity, if it is one. */
Entity asEntity() { this = TEntityElement(result) }
Entity asEntity() {
result = [this.asMethodEntity(), this.asFieldEntity(), this.asOtherEntity()]
}

/** Gets this source or sink element as a method, if it is one. */
Method asMethodEntity() { this = TMethodEntityElement(result, _, _, _) }

/** Gets this source or sink element as a field, if it is one. */
Field asFieldEntity() { this = TFieldEntityElement(result, _, _, _) }

/** Gets this source or sink element as an entity which isn't a field or method, if it is one. */
Entity asOtherEntity() { this = TOtherEntityElement(result) }

/** Gets this source or sink element as an AST node, if it is one. */
AstNode asAstNode() { this = TAstElement(result) }

/**
* Holds if this source or sink element is a method or field that was specified
* with the given values for `e`, `pkg`, `type` and `subtypes`.
*/
predicate hasFullInfo(Entity e, string pkg, string type, boolean subtypes) {
this = TMethodEntityElement(e, pkg, type, subtypes) or
this = TFieldEntityElement(e, pkg, type, subtypes)
}

/** Gets a textual representation of this source or sink element. */
string toString() {
(this instanceof TOtherEntityElement or this instanceof TAstElement) and
result = "element representing " + [this.asEntity().toString(), this.asAstNode().toString()]
or
exists(Entity e, string pkg, string name, boolean subtypes |
this.hasFullInfo(e, pkg, name, subtypes) and
result =
"element representing " + e.toString() + " with receiver type " + pkg + "." + name +
" and subtypes=" + subtypes
)
}

/** Gets the location of this element. */
Expand Down Expand Up @@ -203,7 +245,17 @@ module SourceSinkInterpretationInput implements

/** Gets the target of this call, if any. */
SourceOrSinkElement getCallTarget() {
result.asEntity() = this.asCall().getNode().(DataFlow::CallNode).getTarget()
exists(DataFlow::CallNode cn, Function callTarget |
cn = this.asCall().getNode() and
Fixed Show fixed Hide fixed
callTarget = cn.getTarget()
|
(
result.asOtherEntity() = callTarget
or
callTarget instanceof Method and
result = getElementWithQualifier(callTarget, cn.getReceiver())
)
)
}

/** Gets a textual representation of this node. */
Expand All @@ -228,6 +280,105 @@ module SourceSinkInterpretationInput implements
}
}

/**
* Gets a method or field spec for `e` which applies in the context of
* qualifier `qual`.
*
* Note that naively checking `e`'s qualified name is not correct, because
* `Method`s and `Field`s may have multiple qualified names due to embedding.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I was wrong about this -- I think it's only Methods that can have multiple qnames like this, and field embedding is represented more explicitly

Suggested change
* `Method`s and `Field`s may have multiple qualified names due to embedding.
* `Method`s may have multiple qualified names due to embedding.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked into it and actually it is true for fields as well. In our code this is clear because Field.hasQualifiedName calls Type.getField, whose qldoc says "This includes fields promoted from an embedded field."

* We must instead check that the package and type name given by
* `result.hasFullInfo` refer to either `qual`'s type or to a type it embeds.
*/
bindingset[e, qual]
pragma[inline_late]
private SourceOrSinkElement getElementWithQualifier(Entity e, DataFlow::Node qual) {
exists(boolean subtypes, Type syntacticQualBaseType, Type targetType |
syntacticQualBaseType = getSyntacticQualifierBaseType(qual) and
result = constructElement(e, targetType, subtypes)
|
subtypes = [true, false] and
syntacticQualBaseType = targetType
or
subtypes = true and
(
// `syntacticQualBaseType`'s underlying type might be an interface type and `sse`
// might refer to a method defined on an interface embedded within it.
targetType =
syntacticQualBaseType.getUnderlyingType().(InterfaceType).getAnEmbeddedInterface()
or
// `syntacticQualBaseType`'s underlying type might be a struct type and `sse`
// might be a promoted method or field in it.
targetType = getAnIntermediateEmbeddedType(e, syntacticQualBaseType.getUnderlyingType())
)
)
}

bindingset[e, targetType, subtypes]
pragma[inline_late]
private SourceOrSinkElement constructElement(Entity e, Type targetType, boolean subtypes) {
exists(string pkg, string typename |
targetType.hasQualifiedName(pkg, typename) and
result.hasFullInfo(e, pkg, typename, subtypes)
)
}

/**
* Gets the type of an embedded field of `st` which is on the path to `e`,
* which is a promoted method or field of `st`, or its base type if it's a
* pointer type.
*/
private Type getAnIntermediateEmbeddedType(Entity e, StructType st) {
exists(Field field1, Field field2, int depth1, int depth2, Type t2 |
field1 = st.getFieldAtDepth(_, depth1) and
field2 = st.getFieldAtDepth(_, depth2) and
result = lookThroughPointerType(field1.getType()) and
t2 = lookThroughPointerType(field2.getType()) and
(
field1 = field2
or
field2 = result.getUnderlyingType().(StructType).getFieldAtDepth(_, depth2 - depth1 - 1)
)
|
e.(Method).getReceiverBaseType() = t2
or
e.(Field).getDeclaringType() = t2.getUnderlyingType()
)
}

/**
* Gets the base type of `underlying`, where `n` is of the form
* `implicitDeref?(underlying.implicitFieldRead1.implicitFieldRead2...)`
*
* For Go syntax like `qualifier.method()` or `qualifier.field`, this is the type of `qualifier`, before any
* implicit dereference is interposed because `qualifier` is of pointer type, or implicit field accesses
* navigate to any embedded struct types that truly host `field`.
*/
private Type getSyntacticQualifierBaseType(DataFlow::Node n) {
exists(DataFlow::Node n2 |
// look through implicit dereference, if there is one
not exists(n.asInstruction().(IR::EvalImplicitDerefInstruction).getOperand()) and
n2 = n
or
n2.asExpr() = n.asInstruction().(IR::EvalImplicitDerefInstruction).getOperand()
|
result = lookThroughPointerType(skipImplicitFieldReads(n2).getType())
)
}

private DataFlow::Node skipImplicitFieldReads(DataFlow::Node n) {
not exists(lookThroughImplicitFieldRead(n)) and result = n
or
result = skipImplicitFieldReads(lookThroughImplicitFieldRead(n))
}

private DataFlow::Node lookThroughImplicitFieldRead(DataFlow::Node n) {
result.asInstruction() =
n.(DataFlow::InstructionNode)
.asInstruction()
.(IR::ImplicitFieldReadInstruction)
.getBaseInstruction()
}

/** Provides additional sink specification logic. */
bindingset[c]
predicate interpretOutput(string c, InterpretNode mid, InterpretNode node) {
Expand All @@ -242,10 +393,12 @@ module SourceSinkInterpretationInput implements
e = mid.asElement()
|
(c = "Parameter" or c = "") and
node.asNode().asParameter() = e.asEntity()
n.asParameter() = pragma[only_bind_into](e).asEntity()
or
c = "" and
n.(DataFlow::FieldReadNode).getField() = e.asEntity()
exists(DataFlow::FieldReadNode frn | frn = n |
c = "" and
pragma[only_bind_into](e) = getElementWithQualifier(frn.getField(), frn.getBase())
)
)
}

Expand All @@ -259,10 +412,13 @@ module SourceSinkInterpretationInput implements
mid.asCallable() = getNodeEnclosingCallable(ret)
)
or
exists(DataFlow::Write fw, Field f |
exists(SourceOrSinkElement e, DataFlow::Write fw, DataFlow::Node base, Field f |
e = mid.asElement() and
f = e.asFieldEntity()
|
c = "" and
f = mid.asElement().asEntity() and
fw.writesField(_, f, node.asNode())
fw.writesField(base, f, node.asNode()) and
pragma[only_bind_into](e) = getElementWithQualifier(f, base)
)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ extensions:
pack: codeql/go-all
extensible: sourceModel
data:
- ["github.com/nonexistent/test", "I1", False, "Source", "", "", "ReturnValue", "remote", "manual"]
- ["github.com/nonexistent/test", "I1", False, "Source", "", "", "ReturnValue", "qltest", "manual"]
- addsTo:
pack: codeql/go-all
extensible: summaryModel
Expand All @@ -13,4 +13,4 @@ extensions:
pack: codeql/go-all
extensible: sinkModel
data:
- ["github.com/nonexistent/test", "I1", False, "Sink", "", "", "Argument[0]", "path-injection", "manual"]
- ["github.com/nonexistent/test", "I1", False, "Sink", "", "", "Argument[0]", "qltest", "manual"]
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import go
import semmle.go.dataflow.ExternalFlow
import ModelValidation
import semmle.go.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
import TestUtilities.InlineExpectationsTest
import MakeTest<FlowTest>

module Config implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
predicate isSource(DataFlow::Node source) { sourceNode(source, "qltest") }

predicate isSink(DataFlow::Node sink) { sink = any(FileSystemAccess fsa).getAPathArgument() }
predicate isSink(DataFlow::Node sink) { sinkNode(sink, "qltest") }
}

module Flow = TaintTracking::Global<Config>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ extensions:
pack: codeql/go-all
extensible: sourceModel
data:
- ["github.com/nonexistent/test", "I1", True, "Source", "", "", "ReturnValue", "remote", "manual"]
- ["github.com/nonexistent/test", "I1", True, "Source", "", "", "ReturnValue", "qltest", "manual"]
- addsTo:
pack: codeql/go-all
extensible: summaryModel
Expand All @@ -13,4 +13,4 @@ extensions:
pack: codeql/go-all
extensible: sinkModel
data:
- ["github.com/nonexistent/test", "I1", True, "Sink", "", "", "Argument[0]", "path-injection", "manual"]
- ["github.com/nonexistent/test", "I1", True, "Sink", "", "", "Argument[0]", "qltest", "manual"]
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import go
import semmle.go.dataflow.ExternalFlow
import ModelValidation
import semmle.go.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
import TestUtilities.InlineExpectationsTest
import MakeTest<FlowTest>

module Config implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
predicate isSource(DataFlow::Node source) { sourceNode(source, "qltest") }

predicate isSink(DataFlow::Node sink) { sink = any(FileSystemAccess fsa).getAPathArgument() }
predicate isSink(DataFlow::Node sink) { sinkNode(sink, "qltest") }
}

module Flow = TaintTracking::Global<Config>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ extensions:
pack: codeql/go-all
extensible: sourceModel
data:
- ["github.com/nonexistent/test", "I2", False, "Source", "", "", "ReturnValue", "remote", "manual"]
- ["github.com/nonexistent/test", "I2", False, "Source", "", "", "ReturnValue", "qltest", "manual"]
- addsTo:
pack: codeql/go-all
extensible: summaryModel
Expand All @@ -13,4 +13,4 @@ extensions:
pack: codeql/go-all
extensible: sinkModel
data:
- ["github.com/nonexistent/test", "I2", False, "Sink", "", "", "Argument[0]", "path-injection", "manual"]
- ["github.com/nonexistent/test", "I2", False, "Sink", "", "", "Argument[0]", "qltest", "manual"]
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import go
import semmle.go.dataflow.ExternalFlow
import ModelValidation
import semmle.go.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
import TestUtilities.InlineExpectationsTest
import MakeTest<FlowTest>

module Config implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
predicate isSource(DataFlow::Node source) { sourceNode(source, "qltest") }

predicate isSink(DataFlow::Node sink) { sink = any(FileSystemAccess fsa).getAPathArgument() }
predicate isSink(DataFlow::Node sink) { sinkNode(sink, "qltest") }
}

module Flow = TaintTracking::Global<Config>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ extensions:
pack: codeql/go-all
extensible: sourceModel
data:
- ["github.com/nonexistent/test", "I2", True, "Source", "", "", "ReturnValue", "remote", "manual"]
- ["github.com/nonexistent/test", "I2", True, "Source", "", "", "ReturnValue", "qltest", "manual"]
- addsTo:
pack: codeql/go-all
extensible: summaryModel
Expand All @@ -13,4 +13,4 @@ extensions:
pack: codeql/go-all
extensible: sinkModel
data:
- ["github.com/nonexistent/test", "I2", True, "Sink", "", "", "Argument[0]", "path-injection", "manual"]
- ["github.com/nonexistent/test", "I2", True, "Sink", "", "", "Argument[0]", "qltest", "manual"]
Loading
Loading