Skip to content

Commit 9dae567

Browse files
oxistoKuechA
andauthored
Extending value evaluator in the python frontend (#1947)
* Extending value evaluator in the python frontend This PR extends the value evaluator with python specifics. This also makes the `cpg-analysis` mandatory for the python language frontend. * Added SystemInformation as an overlay node * Added more tests * Test * Added arithmetic test * ++ * Using handleHasInitializer for all things with initializers * Update cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/SystemInformation.kt Co-authored-by: KuechA <31155350+KuechA@users.noreply.github.com> --------- Co-authored-by: KuechA <31155350+KuechA@users.noreply.github.com>
1 parent 80d6756 commit 9dae567

File tree

11 files changed

+501
-46
lines changed

11 files changed

+501
-46
lines changed

cpg-analysis/src/main/kotlin/de/fraunhofer/aisec/cpg/analysis/MultiValueEvaluator.kt

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,12 @@ class MultiValueEvaluator : ValueEvaluator() {
6464
this.path += node
6565

6666
when (node) {
67-
is FieldDeclaration -> {
68-
return evaluateInternal(node.initializer, depth + 1)
69-
}
70-
is NewArrayExpression -> return evaluateInternal(node.initializer, depth + 1)
71-
is VariableDeclaration -> return handleVariableDeclaration(node, depth)
67+
is FieldDeclaration -> return handleHasInitializer(node, depth)
68+
is NewArrayExpression -> return handleHasInitializer(node, depth)
69+
is VariableDeclaration -> return handleHasInitializer(node, depth)
7270
// For a literal, we can just take its value, and we are finished
7371
is Literal<*> -> return node.value
74-
is Reference -> return handleReference(node, depth)
72+
is Reference -> return handlePrevDFG(node, depth)
7573
is UnaryOperator -> return handleUnaryOp(node, depth)
7674
is AssignExpression -> return handleAssignExpression(node, depth)
7775
is BinaryOperator -> return handleBinaryOperator(node, depth)
@@ -206,25 +204,30 @@ class MultiValueEvaluator : ValueEvaluator() {
206204
}
207205

208206
/**
209-
* Tries to compute the value of a reference. It therefore checks the incoming data flow edges.
207+
* Tries to compute the value of a node based on its [Node.prevDFG].
210208
*
211209
* In contrast to the implementation of [ValueEvaluator], this one can handle more than one
212210
* value.
213211
*/
214-
override fun handleReference(expr: Reference, depth: Int): Collection<Any?> {
212+
override fun handlePrevDFG(node: Node, depth: Int): Collection<Any?> {
215213
// For a reference, we are interested in its last assignment into the reference
216214
// denoted by the previous DFG edge. We need to filter out any self-references for READWRITE
217215
// references.
218-
val prevDFG = filterSelfReferences(expr, expr.prevDFG.toList())
216+
val prevDFG =
217+
if (node is Reference) {
218+
filterSelfReferences(node, node.prevDFG.toList())
219+
} else {
220+
node.prevDFG
221+
}
219222

220223
if (prevDFG.size == 1) {
221224
// There's only one incoming DFG edge, so we follow this one.
222225
val internalRes = evaluateInternal(prevDFG.first(), depth + 1)
223226
return (internalRes as? Collection<*>) ?: mutableSetOf(internalRes)
224227
}
225228

226-
if (prevDFG.size == 2 && prevDFG.all(::isSimpleForLoop)) {
227-
return handleSimpleLoopVariable(expr, depth)
229+
if (node is Reference && prevDFG.size == 2 && prevDFG.all(::isSimpleForLoop)) {
230+
return handleSimpleLoopVariable(node, depth)
228231
}
229232

230233
val result = mutableSetOf<Any?>()

cpg-analysis/src/main/kotlin/de/fraunhofer/aisec/cpg/analysis/ValueEvaluator.kt

Lines changed: 54 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
package de.fraunhofer.aisec.cpg.analysis
2727

2828
import de.fraunhofer.aisec.cpg.graph.AccessValues
29+
import de.fraunhofer.aisec.cpg.graph.HasInitializer
2930
import de.fraunhofer.aisec.cpg.graph.HasOperatorCode
3031
import de.fraunhofer.aisec.cpg.graph.Node
3132
import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration
@@ -74,7 +75,7 @@ open class ValueEvaluator(
7475
open fun evaluate(node: Any?): Any? {
7576
if (node !is Node) return node
7677

77-
return evaluateInternal(node as? Node, 0)
78+
return evaluateInternal(node, 0)
7879
}
7980

8081
fun clearPath() {
@@ -87,11 +88,11 @@ open class ValueEvaluator(
8788
node?.let { this.path += it }
8889

8990
when (node) {
90-
is NewArrayExpression -> return evaluateInternal(node.initializer, depth)
91-
is VariableDeclaration -> return handleVariableDeclaration(node, depth)
91+
is NewArrayExpression -> return handleHasInitializer(node, depth)
92+
is VariableDeclaration -> return handleHasInitializer(node, depth)
9293
// For a literal, we can just take its value, and we are finished
9394
is Literal<*> -> return node.value
94-
is Reference -> return handleReference(node, depth)
95+
is Reference -> return handlePrevDFG(node, depth)
9596
is UnaryOperator -> return handleUnaryOp(node, depth)
9697
is BinaryOperator -> return handleBinaryOperator(node, depth)
9798
// Casts are just a wrapper in this case, we are interested in the inner expression
@@ -108,10 +109,17 @@ open class ValueEvaluator(
108109
return cannotEvaluate(node, this)
109110
}
110111

111-
protected fun handleVariableDeclaration(node: VariableDeclaration, depth: Int): Any? {
112-
// If we have an initializer, we can use it. However, we actually should just use the DFG
113-
// instead and do something similar to handleReference
114-
return evaluateInternal(node.initializer, depth + 1)
112+
/**
113+
* If a node declaration implements [HasInitializer], we can use the initializer to evaluate
114+
* their value. If not, we can try to use [handlePrevDFG].
115+
*/
116+
protected fun handleHasInitializer(node: HasInitializer, depth: Int): Any? {
117+
// If we have an initializer, we can use it. Otherwise, we can fall back to the prevDFG
118+
return if (node.initializer != null) {
119+
evaluateInternal(node.initializer, depth + 1)
120+
} else {
121+
handlePrevDFG(node as Node, depth)
122+
}
115123
}
116124

117125
/** Under certain circumstances, an assignment can also be used as an expression. */
@@ -259,51 +267,63 @@ open class ValueEvaluator(
259267
}
260268
}
261269

262-
private fun handleGreater(lhsValue: Any?, rhsValue: Any?, expr: Expression?): Any? {
270+
protected open fun handleGreater(lhsValue: Any?, rhsValue: Any?, expr: Expression?): Any? {
263271
return if (lhsValue is Number && rhsValue is Number) {
264272
lhsValue.compareTo(rhsValue) > 0
265273
} else {
266274
cannotEvaluate(expr, this)
267275
}
268276
}
269277

270-
private fun handleGEq(lhsValue: Any?, rhsValue: Any?, expr: Expression?): Any? {
278+
protected open fun handleGEq(lhsValue: Any?, rhsValue: Any?, expr: Expression?): Any? {
271279
return if (lhsValue is Number && rhsValue is Number) {
272280
lhsValue.compareTo(rhsValue) >= 0
273281
} else {
274282
cannotEvaluate(expr, this)
275283
}
276284
}
277285

278-
private fun handleLess(lhsValue: Any?, rhsValue: Any?, expr: Expression?): Any? {
286+
protected open fun handleLess(lhsValue: Any?, rhsValue: Any?, expr: Expression?): Any? {
279287
return if (lhsValue is Number && rhsValue is Number) {
280288
lhsValue.compareTo(rhsValue) < 0
281289
} else {
282290
cannotEvaluate(expr, this)
283291
}
284292
}
285293

286-
private fun handleLEq(lhsValue: Any?, rhsValue: Any?, expr: Expression?): Any? {
294+
protected open fun handleLEq(lhsValue: Any?, rhsValue: Any?, expr: Expression?): Any? {
287295
return if (lhsValue is Number && rhsValue is Number) {
288296
lhsValue.compareTo(rhsValue) <= 0
289297
} else {
290298
cannotEvaluate(expr, this)
291299
}
292300
}
293301

294-
private fun handleEq(lhsValue: Any?, rhsValue: Any?, expr: Expression?): Any? {
295-
return if (lhsValue is Number && rhsValue is Number) {
296-
lhsValue.compareTo(rhsValue) == 0
297-
} else {
298-
cannotEvaluate(expr, this)
302+
protected open fun handleEq(lhsValue: Any?, rhsValue: Any?, expr: Expression?): Any? {
303+
return when {
304+
lhsValue is Number && rhsValue is Number -> {
305+
lhsValue.compareTo(rhsValue) == 0
306+
}
307+
lhsValue is String && rhsValue is String -> {
308+
lhsValue == rhsValue
309+
}
310+
else -> {
311+
cannotEvaluate(expr, this)
312+
}
299313
}
300314
}
301315

302-
private fun handleNEq(lhsValue: Any?, rhsValue: Any?, expr: Expression?): Any? {
303-
return if (lhsValue is Number && rhsValue is Number) {
304-
lhsValue.compareTo(rhsValue) != 0
305-
} else {
306-
cannotEvaluate(expr, this)
316+
protected open fun handleNEq(lhsValue: Any?, rhsValue: Any?, expr: Expression?): Any? {
317+
return when {
318+
lhsValue is Number && rhsValue is Number -> {
319+
lhsValue.compareTo(rhsValue) != 0
320+
}
321+
lhsValue is String && rhsValue is String -> {
322+
lhsValue != rhsValue
323+
}
324+
else -> {
325+
cannotEvaluate(expr, this)
326+
}
307327
}
308328
}
309329

@@ -390,15 +410,17 @@ open class ValueEvaluator(
390410
return cannotEvaluate(expr, this)
391411
}
392412

393-
/**
394-
* Tries to compute the constant value of a reference. It therefore checks the incoming data
395-
* flow edges.
396-
*/
397-
protected open fun handleReference(expr: Reference, depth: Int): Any? {
413+
/** Tries to compute the constant value of a node based on its [Node.prevDFG]. */
414+
protected open fun handlePrevDFG(node: Node, depth: Int): Any? {
398415
// For a reference, we are interested into its last assignment into the reference
399416
// denoted by the previous DFG edge. We need to filter out any self-references for READWRITE
400417
// references.
401-
val prevDFG = filterSelfReferences(expr, expr.prevDFG.toList())
418+
val prevDFG =
419+
if (node is Reference) {
420+
filterSelfReferences(node, node.prevDFG.toList())
421+
} else {
422+
node.prevDFG
423+
}
402424

403425
return if (prevDFG.size == 1) {
404426
// There's only one incoming DFG edge, so we follow this one.
@@ -407,13 +429,13 @@ open class ValueEvaluator(
407429
// We cannot have more than ONE valid solution, so we need to abort
408430
log.warn(
409431
"We cannot evaluate {}: It has more than 1 previous DFG edges, meaning that the value is probably affected by a branch.",
410-
expr,
432+
node,
411433
)
412-
cannotEvaluate(expr, this)
434+
cannotEvaluate(node, this)
413435
} else {
414436
// No previous DFG node
415-
log.warn("We cannot evaluate {}: It has no previous DFG edges.", expr)
416-
cannotEvaluate(expr, this)
437+
log.warn("We cannot evaluate {}: It has no previous DFG edges.", node)
438+
cannotEvaluate(node, this)
417439
}
418440
}
419441

cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/NewArrayExpression.kt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
*/
2626
package de.fraunhofer.aisec.cpg.graph.statements.expressions
2727

28+
import de.fraunhofer.aisec.cpg.graph.HasInitializer
2829
import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration
2930
import de.fraunhofer.aisec.cpg.graph.edges.Edge.Companion.propertyEqualsList
3031
import de.fraunhofer.aisec.cpg.graph.edges.ast.astEdgesOf
@@ -38,14 +39,14 @@ import org.neo4j.ogm.annotation.Relationship
3839
* combination with a [VariableDeclaration].
3940
*/
4041
// TODO Merge and/or refactor with new Expression
41-
class NewArrayExpression : Expression() {
42+
class NewArrayExpression : Expression(), HasInitializer {
4243
@Relationship("INITIALIZER") var initializerEdge = astOptionalEdgeOf<Expression>()
4344

4445
/**
4546
* The initializer of the expression, if present. Many languages, such as Java, either specify
4647
* [dimensions] or an initializer.
4748
*/
48-
var initializer by unwrapping(NewArrayExpression::initializerEdge)
49+
override var initializer by unwrapping(NewArrayExpression::initializerEdge)
4950

5051
/**
5152
* Specifies the dimensions of the array that is to be created. Many languages, such as Java,

cpg-language-python/build.gradle.kts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ dependencies {
4343
// jep for python support
4444
implementation(libs.jep)
4545

46+
// the cpg-analysis project helps supporting a dynamically invoked language
47+
implementation(projects.cpgAnalysis)
48+
4649
// to evaluate some test cases
47-
testImplementation(project(":cpg-analysis"))
50+
testImplementation(projects.cpgAnalysis)
4851
}

cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
*/
2626
package de.fraunhofer.aisec.cpg.frontends.python
2727

28+
import de.fraunhofer.aisec.cpg.TranslationConfiguration
2829
import de.fraunhofer.aisec.cpg.TranslationContext
2930
import de.fraunhofer.aisec.cpg.frontends.Language
3031
import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend
@@ -48,6 +49,19 @@ import kotlin.io.path.pathString
4849
import kotlin.io.path.relativeToOrNull
4950
import kotlin.math.min
5051

52+
/**
53+
* The [LanguageFrontend] for Python. It uses the JEP library to interact with Python's AST.
54+
*
55+
* It requires the Python interpreter (and the JEP library) to be installed on the system. The
56+
* frontend registers two additional passes.
57+
*
58+
* ## Adding dynamic variable declarations
59+
*
60+
* The [PythonAddDeclarationsPass] adds dynamic declarations to the CPG. Python does not have the
61+
* concept of a "declaration", but rather values are assigned to variables and internally variable
62+
* are represented by a dictionary. This pass adds a declaration for each variable that is assigned
63+
* a value (on the first assignment).
64+
*/
5165
@RegisterExtraPass(PythonAddDeclarationsPass::class)
5266
class PythonLanguageFrontend(language: Language<PythonLanguageFrontend>, ctx: TranslationContext) :
5367
LanguageFrontend<Python.AST.AST, Python.AST.AST?>(language, ctx) {
@@ -76,10 +90,13 @@ class PythonLanguageFrontend(language: Language<PythonLanguageFrontend>, ctx: Tr
7690
it.set("content", fileContent)
7791
it.set("filename", file.absolutePath)
7892
it.exec("import ast")
93+
it.exec("import sys")
7994
it.exec("parsed = ast.parse(content, filename=filename, type_comments=True)")
8095

8196
val pyAST = it.getValue("parsed") as PyObject
97+
8298
val tud = pythonASTtoCPG(pyAST, file.toPath())
99+
populateSystemInformation(config, tud)
83100

84101
if (config.matchCommentsToNodes) {
85102
it.exec("import tokenize")
@@ -96,6 +113,7 @@ class PythonLanguageFrontend(language: Language<PythonLanguageFrontend>, ctx: Tr
96113
(it.getValue("tokenList") as? ArrayList<*>) ?: TODO("Cannot get tokens of $it")
97114
addCommentsToCPG(tud, pyTokens, pyCommentCode)
98115
}
116+
99117
return tud
100118
}
101119
}
@@ -334,6 +352,39 @@ class PythonLanguageFrontend(language: Language<PythonLanguageFrontend>, ctx: Tr
334352
}
335353
}
336354

355+
/**
356+
* Populate system information from defined symbols that represent our environment. We add it as an
357+
* overlay node to our [TranslationUnitDeclaration].
358+
*/
359+
fun populateSystemInformation(
360+
config: TranslationConfiguration,
361+
tu: TranslationUnitDeclaration,
362+
): SystemInformation {
363+
var sysInfo =
364+
SystemInformation(
365+
platform = config.symbols["PYTHON_PLATFORM"],
366+
// We need to populate the version info "in-order", to ensure that we do not
367+
// set the micro version if minor and major are not set, i.e., there must not be a
368+
// "gap" in the granularity of version numbers
369+
versionInfo =
370+
config.symbols["PYTHON_VERSION_MAJOR"]?.toLong()?.let { major ->
371+
val minor = config.symbols["PYTHON_VERSION_MINOR"]?.toLong()
372+
val micro =
373+
if (minor != null) config.symbols["PYTHON_VERSION_MICRO"]?.toLong()
374+
else null
375+
VersionInfo(major, minor, micro)
376+
},
377+
)
378+
sysInfo.underlyingNode = tu
379+
return sysInfo
380+
}
381+
382+
/** Returns the system information overlay node from the [TranslationUnitDeclaration]. */
383+
val TranslationUnitDeclaration.sysInfo: SystemInformation?
384+
get() {
385+
return this.overlays.firstOrNull { it is SystemInformation } as? SystemInformation
386+
}
387+
337388
/**
338389
* This function maps Python's `ast` objects to out internal [Python] representation.
339390
*

0 commit comments

Comments
 (0)