Skip to content

Commit 85ba647

Browse files
yoffCopilot
andcommitted
Python: switch dataflow library to new (shared) CFG + SSA
Flips the Python dataflow trunk from the legacy CFG (semmle/python/Flow.qll) and legacy ESSA SSA (semmle/python/essa/*) to the new shared CFG facade (semmle.python.controlflow.internal.Cfg) and the new SSA adapter (semmle.python.dataflow.new.internal.SsaImpl), both introduced additively in the preceding PRs in this stack. This is the trunk-flip equivalent of the original draft PR #21894 (kept around as documentation), rebased on top of the four preparatory PRs: P1: Remove AstNode.getAFlowNode() and rewrite callers (#21919). P2: Qualify Flow.qll's AST references with Py:: prefix (#21920). P3: Add new shared-CFG-backed control flow graph (#21921). P4: Add new shared-SSA-backed SSA adapter (#21923). The Python dataflow library (semmle/python/dataflow/new/) now imports the new CFG facade and SSA adapter. All CFG-typed predicates (ControlFlowNode, CallNode, BasicBlock, NameNode, AttrNode, ...) are qualified with the Cfg:: prefix; SSA references switch from EssaVariable/EssaDefinition to SsaImpl::Definition/SourceVariable. GuardNode is redesigned to use the new CFG's outcome-node model (isAfterTrue / isAfterFalse) instead of the legacy ConditionBlock + flipped indirection. Only BarrierGuard<...> is preserved as public API. Framework files (Bottle, FastApi, Django, Tornado, Pyramid, Stdlib, ...) are updated to take CFG nodes from the new facade. A handful of dataflow consistency tweaks for the new CFG: - Augmented-assignment targets are treated as both load and store. - 'from X import *' produces uncertain SSA writes for unknown names. - CFG nodes are canonicalised so dataflow does not see equivalent pre/post-order pairs as distinct nodes. Two AST tweaks for the new CFG: - AstNodeImpl: omit PEP 695 type-parameter names from FunctionDefExpr / ClassDefExpr children. - ImportResolution: drop the legacy essa import. Test churn (~175 files): reblessed library- and query-test .expected files reflect slightly different CFG granularity, different toString output, and a handful of true alert deltas in security queries. Verification: all 367 lib + src + consistency-queries compile clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 5ec5178 commit 85ba647

256 files changed

Lines changed: 6457 additions & 6176 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

python/ql/consistency-queries/DataFlowConsistency.ql

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.internal.DataFlowImplSpecific
99
private import semmle.python.dataflow.new.internal.DataFlowDispatch
1010
private import semmle.python.dataflow.new.internal.TaintTrackingImplSpecific
1111
private import codeql.dataflow.internal.DataFlowImplConsistency
12+
private import semmle.python.controlflow.internal.Cfg as Cfg
1213

1314
private module Input implements InputSig<Location, PythonDataFlow> {
1415
private import Private
@@ -72,7 +73,7 @@ private module Input implements InputSig<Location, PythonDataFlow> {
7273
// resolve to multiple functions), but we only make _one_ ArgumentNode for each
7374
// argument in the CallNode, we end up violating this consistency check in those
7475
// cases. (see `getCallArg` in DataFlowDispatch.qll)
75-
exists(DataFlowCall other, CallNode cfgCall | other != call |
76+
exists(DataFlowCall other, Cfg::CallNode cfgCall | other != call |
7677
call.getNode() = cfgCall and
7778
other.getNode() = cfgCall and
7879
isArgumentNode(arg, call, _) and
@@ -88,16 +89,16 @@ private module Input implements InputSig<Location, PythonDataFlow> {
8889
// allow it instead.
8990
(
9091
call.getScope() = attr.getScope() and
91-
any(CfgNode n | n.asCfgNode() = call.getNode().(CallNode).getFunction()).getALocalSource() =
92-
attr
92+
any(CfgNode n | n.asCfgNode() = call.getNode().(Cfg::CallNode).getFunction())
93+
.getALocalSource() = attr
9394
or
9495
not exists(call.getScope().(Function).getDefinition()) and
9596
call.getScope().getScope+() = attr.getScope()
9697
) and
9798
(
9899
other.getScope() = attr.getScope() and
99-
any(CfgNode n | n.asCfgNode() = other.getNode().(CallNode).getFunction()).getALocalSource() =
100-
attr
100+
any(CfgNode n | n.asCfgNode() = other.getNode().(Cfg::CallNode).getFunction())
101+
.getALocalSource() = attr
101102
or
102103
not exists(other.getScope().(Function).getDefinition()) and
103104
other.getScope().getScope+() = attr.getScope()
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: breaking
3+
---
4+
* The deprecated `AstNode.getAFlowNode()` and `Function.getAReturnValueFlowNode()` predicates now return nodes from the new shared CFG (`Cfg::ControlFlowNode`) rather than from the legacy CFG (`ControlFlowNode`). Callers that still rely on these deprecated APIs and feed the result into legacy-CFG-aware predicates will no longer type-check; migrate to `n.getNode() = e` (or, for return values, the explicit `Return` pattern shown in the deprecation message) to get nodes from the dataflow library's current CFG.

python/ql/lib/semmle/python/ApiGraphs.qll

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66
* directed and labeled; they specify how the components represented by nodes relate to each other.
77
*/
88

9-
// Importing python under the `py` namespace to avoid importing `CallNode` from `Flow.qll` and thereby having a naming conflict with `API::CallNode`.
9+
// Importing python under the `PY` namespace to avoid pulling in `CallNode` from `Flow.qll` (via `import python`) and thereby having a naming conflict with `API::CallNode`.
1010
private import python as PY
11+
private import semmle.python.controlflow.internal.Cfg as Cfg
1112
import semmle.python.dataflow.new.DataFlow
1213
private import semmle.python.internal.CachedStages
1314

@@ -282,15 +283,15 @@ module API {
282283
index = this.getIndex() and
283284
(
284285
// subscripting
285-
exists(PY::SubscriptNode subscript |
286+
exists(Cfg::SubscriptNode subscript |
286287
subscript.getObject() = this.getAValueReachableFromSource().asCfgNode() and
287288
subscript.getIndex() = index.asSink().asCfgNode()
288289
|
289290
// reading
290291
subscript = result.asSource().asCfgNode()
291292
or
292293
// writing
293-
subscript.(PY::DefinitionNode).getValue() = result.asSink().asCfgNode()
294+
subscript.(Cfg::DefinitionNode).getValue() = result.asSink().asCfgNode()
294295
)
295296
or
296297
// dictionary literals
@@ -684,7 +685,7 @@ module API {
684685
* Ignores relative imports, such as `from ..foo.bar import baz`.
685686
*/
686687
private predicate imports(DataFlow::CfgNode imp, string name) {
687-
exists(PY::ImportExprNode iexpr |
688+
exists(Cfg::ImportExprNode iexpr |
688689
imp.getNode() = iexpr and
689690
not iexpr.getNode().isRelative() and
690691
name = iexpr.getNode().getImportedModuleName()
@@ -775,7 +776,7 @@ module API {
775776
// list literals, from `x` to `[x]`
776777
// TODO: once convenient, this should be done at a higher level than the AST,
777778
// at least at the CFG layer, to take splitting into account.
778-
// Also consider `SequenceNode for generality.
779+
// Also consider `Cfg::SequenceNode` for generality.
779780
exists(PY::List list | list = pred.(DataFlow::ExprNode).getNode().getNode() |
780781
rhs.(DataFlow::ExprNode).getNode().getNode() = list.getAnElt() and
781782
lbl = Label::subscript()
@@ -805,7 +806,7 @@ module API {
805806
subscript = trackUseNode(src).getSubscript(index)
806807
|
807808
// from `x` to a definition of `x[...]`
808-
rhs.asCfgNode() = subscript.asCfgNode().(PY::DefinitionNode).getValue() and
809+
rhs.asCfgNode() = subscript.asCfgNode().(Cfg::DefinitionNode).getValue() and
809810
lbl = Label::subscript()
810811
or
811812
// from `x` to `"key"` in `x["key"]`

python/ql/lib/semmle/python/AstExtended.qll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module;
33

44
import python
55
private import semmle.python.internal.CachedStages
6+
private import semmle.python.controlflow.internal.Cfg as Cfg
67

78
/** A syntactic node (Class, Function, Module, Expr, Stmt or Comprehension) corresponding to a flow node */
89
abstract class AstNode extends AstNode_ {
@@ -23,17 +24,16 @@ abstract class AstNode extends AstNode_ {
2324
/**
2425
* DEPRECATED: use `ControlFlowNode.getNode()` from the other direction instead;
2526
* that is, replace `e.getAFlowNode() = n` with `n.getNode() = e`. This API is
26-
* being removed to untangle the AST and CFG hierarchies in preparation for
27-
* migrating the dataflow library off the legacy CFG.
27+
* being removed to untangle the AST and CFG hierarchies.
2828
*
29-
* Gets a flow node corresponding directly to this node.
30-
* NOTE: For some statements and other purely syntactic elements,
31-
* there may not be a `ControlFlowNode`.
29+
* Gets a flow node corresponding directly to this node, from the new
30+
* (shared) CFG. NOTE: For some statements and other purely syntactic
31+
* elements, there may not be a `ControlFlowNode`.
3232
*/
3333
cached
34-
deprecated ControlFlowNode getAFlowNode() {
34+
deprecated Cfg::ControlFlowNode getAFlowNode() {
3535
Stages::AST::ref() and
36-
py_flow_bb_node(result, this, _, _)
36+
result.getNode() = this
3737
}
3838

3939
/**

python/ql/lib/semmle/python/Concepts.qll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*/
66

77
private import python
8+
private import semmle.python.controlflow.internal.Cfg as Cfg
89
private import semmle.python.dataflow.new.DataFlow
910
private import semmle.python.dataflow.new.internal.DataFlowImplSpecific
1011
private import semmle.python.dataflow.new.RemoteFlowSources
@@ -214,7 +215,7 @@ module Path {
214215
SafeAccessCheck() { this = DataFlow::BarrierGuard<safeAccessCheck/3>::getABarrierNode() }
215216
}
216217

217-
private predicate safeAccessCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
218+
private predicate safeAccessCheck(DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch) {
218219
g.(SafeAccessCheck::Range).checks(node, branch)
219220
}
220221

@@ -223,7 +224,7 @@ module Path {
223224
/** A data-flow node that checks that a path is safe to access in some way, for example by having a controlled prefix. */
224225
abstract class Range extends DataFlow::GuardNode {
225226
/** Holds if this guard validates `node` upon evaluating to `branch`. */
226-
abstract predicate checks(ControlFlowNode node, boolean branch);
227+
abstract predicate checks(Cfg::ControlFlowNode node, boolean branch);
227228
}
228229
}
229230
}

python/ql/lib/semmle/python/Exprs.qll

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module;
33

44
private import python
55
private import semmle.python.internal.CachedStages
6+
private import semmle.python.controlflow.internal.Cfg as Cfg
67

78
/** An expression */
89
class Expr extends Expr_, AstNode {
@@ -70,7 +71,7 @@ class Attribute extends Attribute_ {
7071
/* syntax: Expr.name */
7172
override Expr getASubExpression() { result = this.getObject() }
7273

73-
deprecated override AttrNode getAFlowNode() { result = super.getAFlowNode() }
74+
deprecated override Cfg::AttrNode getAFlowNode() { result = super.getAFlowNode() }
7475

7576
/** Gets the name of this attribute. That is the `name` in `obj.name` */
7677
string getName() { result = Attribute_.super.getAttr() }
@@ -99,7 +100,7 @@ class Subscript extends Subscript_ {
99100

100101
Expr getObject() { result = Subscript_.super.getValue() }
101102

102-
deprecated override SubscriptNode getAFlowNode() { result = super.getAFlowNode() }
103+
deprecated override Cfg::SubscriptNode getAFlowNode() { result = super.getAFlowNode() }
103104
}
104105

105106
/** A call expression, such as `func(...)` */
@@ -115,7 +116,7 @@ class Call extends Call_ {
115116

116117
override string toString() { result = this.getFunc().toString() + "()" }
117118

118-
deprecated override CallNode getAFlowNode() { result = super.getAFlowNode() }
119+
deprecated override Cfg::CallNode getAFlowNode() { result = super.getAFlowNode() }
119120

120121
/** Gets a tuple (*) argument of this call. */
121122
Expr getStarargs() { result = this.getAPositionalArg().(Starred).getValue() }
@@ -203,7 +204,7 @@ class IfExp extends IfExp_ {
203204
result = this.getTest() or result = this.getBody() or result = this.getOrelse()
204205
}
205206

206-
deprecated override IfExprNode getAFlowNode() { result = super.getAFlowNode() }
207+
deprecated override Cfg::IfExprNode getAFlowNode() { result = super.getAFlowNode() }
207208
}
208209

209210
/** A starred expression, such as the `*rest` in the assignment `first, *rest = seq` */
@@ -413,7 +414,7 @@ class PlaceHolder extends PlaceHolder_ {
413414

414415
override string toString() { result = "$" + this.getId() }
415416

416-
deprecated override NameNode getAFlowNode() { result = super.getAFlowNode() }
417+
deprecated override Cfg::NameNode getAFlowNode() { result = super.getAFlowNode() }
417418
}
418419

419420
/** A tuple expression such as `( 1, 3, 5, 7, 9 )` */
@@ -480,7 +481,7 @@ class Name extends Name_ {
480481

481482
override string toString() { result = this.getId() }
482483

483-
deprecated override NameNode getAFlowNode() { result = super.getAFlowNode() }
484+
deprecated override Cfg::NameNode getAFlowNode() { result = super.getAFlowNode() }
484485

485486
override predicate isArtificial() {
486487
/* Artificial variable names in comprehensions all start with "." */
@@ -587,7 +588,7 @@ abstract class NameConstant extends Name, ImmutableLiteral {
587588

588589
override predicate isConstant() { any() }
589590

590-
deprecated override NameConstantNode getAFlowNode() { result = Name.super.getAFlowNode() }
591+
deprecated override Cfg::NameConstantNode getAFlowNode() { result = Name.super.getAFlowNode() }
591592

592593
override predicate isArtificial() { none() }
593594
}

python/ql/lib/semmle/python/Function.qll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ overlay[local]
22
module;
33

44
import python
5+
private import semmle.python.controlflow.internal.Cfg as Cfg
56

67
/**
78
* A function, independent of defaults and binding.
@@ -157,12 +158,12 @@ class Function extends Function_, Scope, AstNode {
157158
* DEPRECATED: bind a `Return` node explicitly instead, e.g.
158159
* `exists(Return ret | ret.getScope() = this and n.getNode() = ret.getValue())`.
159160
* This API is being phased out together with `AstNode.getAFlowNode()` to
160-
* untangle the AST and CFG hierarchies in preparation for migrating the
161-
* dataflow library off the legacy CFG.
161+
* untangle the AST and CFG hierarchies.
162162
*
163-
* Gets a control flow node for a return value of this function.
163+
* Gets a control flow node for a return value of this function, from the
164+
* new (shared) CFG.
164165
*/
165-
deprecated ControlFlowNode getAReturnValueFlowNode() {
166+
deprecated Cfg::ControlFlowNode getAReturnValueFlowNode() {
166167
exists(Return ret |
167168
ret.getScope() = this and
168169
ret.getValue() = result.getNode()

python/ql/lib/semmle/python/Import.qll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ module;
44
import python
55
private import semmle.python.types.Builtins
66
private import semmle.python.internal.CachedStages
7+
private import semmle.python.controlflow.internal.Cfg as Cfg
78

89
/**
910
* An alias in an import statement, the `mod as name` part of `import mod as name`. May be artificial;
@@ -149,7 +150,7 @@ class ImportExpr extends ImportExpr_ {
149150
class ImportMember extends ImportMember_ {
150151
override Expr getASubExpression() { result = this.getModule() }
151152

152-
deprecated override ImportMemberNode getAFlowNode() { result = super.getAFlowNode() }
153+
deprecated override Cfg::ImportMemberNode getAFlowNode() { result = super.getAFlowNode() }
153154

154155
override predicate hasSideEffects() {
155156
/* Strictly this only has side-effects if the module is a package */

0 commit comments

Comments
 (0)