Skip to content

Commit

Permalink
Type inference (#233)
Browse files Browse the repository at this point in the history
  • Loading branch information
Lipen authored Dec 27, 2024
1 parent b6daa22 commit c599ea7
Show file tree
Hide file tree
Showing 95 changed files with 10,819 additions and 226 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-and-run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
DEST_DIR="arkanalyzer"
MAX_RETRIES=10
RETRY_DELAY=3 # Delay between retries in seconds
BRANCH="neo/2024-08-07"
BRANCH="neo/2024-12-04"
for ((i=1; i<=MAX_RETRIES; i++)); do
git clone --depth=1 --branch $BRANCH $REPO_URL $DEST_DIR && break
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ buildSrc/.gradle
# Ignore Python execution cache
__pycache__/
run_python_with_gdb.sh

# Ignore Kotlin build directory
.kotlin/
3 changes: 1 addition & 2 deletions buildSrc/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@ repositories {
maven("https://jitpack.io")
}


dependencies {
implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlinVersion")
implementation("io.gitlab.arturbosch.detekt:detekt-gradle-plugin:$detektVersion")
implementation("org.glavo:gjavah:$gjavahVersion")
}
}
1 change: 0 additions & 1 deletion buildSrc/settings.gradle.kts
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
rootProject.name="usvm-conventions"

36 changes: 22 additions & 14 deletions buildSrc/src/main/kotlin/Dependencies.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import org.gradle.plugin.use.PluginDependenciesSpec

object Versions {
const val detekt = "1.18.1"
const val clikt = "5.0.0"
const val detekt = "1.23.7"
const val ini4j = "0.5.4"
const val jacodb = "ad5e1f170e"
const val juliet = "1.3.2"
Expand All @@ -18,7 +19,7 @@ object Versions {
const val mockk = "1.13.4"
const val rd = "2023.2.0"
const val sarif4k = "0.5.0"
const val shadow = "8.1.1"
const val shadow = "8.3.3"
const val slf4j = "1.6.1"

// versions for jvm samples
Expand Down Expand Up @@ -121,6 +122,11 @@ object Libs {
name = "jacodb-core",
version = Versions.jacodb
)
val jacodb_api_common = dep(
group = jacodbPackage,
name = "jacodb-api-common",
version = Versions.jacodb
)
val jacodb_api_jvm = dep(
group = jacodbPackage,
name = "jacodb-api-jvm",
Expand All @@ -136,16 +142,6 @@ object Libs {
name = "jacodb-storage",
version = Versions.jacodb
)
val jacodb_ets = dep(
group = jacodbPackage,
name = "jacodb-ets",
version = Versions.jacodb
)
val jacodb_api_common = dep(
group = jacodbPackage,
name = "jacodb-api-common",
version = Versions.jacodb
)
val jacodb_approximations = dep(
group = jacodbPackage,
name = "jacodb-approximations",
Expand All @@ -156,6 +152,11 @@ object Libs {
name = "jacodb-taint-configuration",
version = Versions.jacodb
)
val jacodb_ets = dep(
group = jacodbPackage,
name = "jacodb-ets",
version = Versions.jacodb
)

// https://github.com/Kotlin/kotlinx.coroutines
val kotlinx_coroutines_core = dep(
Expand Down Expand Up @@ -240,6 +241,13 @@ object Libs {
name = "PythonTypesAPI",
version = Versions.pythonTypesAPI
)

// https://github.com/ajalt/clikt
val clikt = dep(
group = "com.github.ajalt.clikt",
name = "clikt",
version = Versions.clikt
)
}

object Plugins {
Expand All @@ -258,9 +266,9 @@ object Plugins {
version = Versions.rd
)

// https://github.com/johnrengelman/shadow
// https://github.com/GradleUp/shadow
object Shadow : ProjectPlugin(
id = "com.github.johnrengelman.shadow",
id = "com.gradleup.shadow",
version = Versions.shadow
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,4 @@ fun Project.registerCPythonDistClean(): TaskProvider<Exec> {
commandLine(windowsBuildScript.canonicalPath, "-t", "CleanAll")
}
}
}
}
2 changes: 1 addition & 1 deletion buildSrc/src/main/kotlin/usvmpython/tasks/JNIHeaderTask.kt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ fun Project.generateJNIForCPythonAdapterTask() {
}
task.addClass(CPYTHON_ADAPTER_CLASS)
task.run()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,4 @@ fun Project.registerBuildSamplesTask(): TaskProvider<JavaExec> {
environment("PYTHONHOME" to cpythonBuildPath)
mainClass.set(BUILD_SAMPLES_ENTRY_POINT)
}
}
}
Binary file modified gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
2 changes: 1 addition & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.11-all.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
Expand Down
7 changes: 5 additions & 2 deletions gradlew
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
#

##############################################################################
#
Expand Down Expand Up @@ -55,7 +57,7 @@
# Darwin, MinGW, and NonStop.
#
# (3) This script is generated from the Groovy template
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# within the Gradle project.
#
# You can find Gradle at https://github.com/gradle/gradle/.
Expand Down Expand Up @@ -84,7 +86,8 @@ done
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
' "$PWD" ) || exit

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
Expand Down
2 changes: 2 additions & 0 deletions gradlew.bat
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
@rem SPDX-License-Identifier: Apache-2.0
@rem

@if "%DEBUG%"=="" @echo off
@rem ##########################################################################
Expand Down
6 changes: 6 additions & 0 deletions settings.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ include("usvm-jvm-instrumentation")
include("usvm-sample-language")
include("usvm-dataflow")
include("usvm-jvm-dataflow")
include("usvm-dataflow-ts")

include("usvm-python")
include("usvm-python:cpythonadapter")
Expand All @@ -21,6 +22,11 @@ findProject(":usvm-python:usvm-python-runner")?.name = "usvm-python-runner"
include("usvm-python:usvm-python-commons")
findProject(":usvm-python:usvm-python-commons")?.name = "usvm-python-commons"

// Actually, `includeBuild("../jacodb")` is enough, but there is a bug in IDEA when path is a symlink.
// As a workaround, we convert it to a real absolute path.
// See IDEA bug: https://youtrack.jetbrains.com/issue/IDEA-329756
// includeBuild(file("../jacodb").toPath().toRealPath().toAbsolutePath())

pluginManagement {
resolutionStrategy {
eachPlugin {
Expand Down
1 change: 1 addition & 0 deletions usvm-dataflow-ts/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
logs/
167 changes: 167 additions & 0 deletions usvm-dataflow-ts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# USVM Dataflow TS

## Type Inference

In order to run type inference on an arbitrary TypeScript project, you need the following:
1. IR dumped into JSON files: either from TS sources or from
binary ABC/HAP files.
2. USVM with type inference CLI: `usvm-dataflow-ts-all.jar` "fat" JAR.

**NOTE:** the instructions below are given for Linux. If you are using Windows, you need to adjust the paths and commands accordingly, or use WSL. Overall, the process should be similar, and USVM should work on any platform that supports Java.

### ArkTS IR

- Below, we use the term "ArkIR" to refer to the representation of ArkTS inside ArkAnalyzer in a form of TypeScript classes and interfaces, such as `ArkMethod`, `ArkAssignStmt`, `ArkInstanceInvokeExpr`.

- In USVM, we also have a similar model of representing ArkTS, but in the form of Java/Kotlin classes. In order to differentiate between the two models, we use the prefix "Ets" for the classes in USVM, such as `EtsMethod`, `EtsAssignStmt`, `EtsInstanceCallExpr`.

### Setup ArkAnalyzer

First of all, you need to clone the ArkAnalyzer repo. Here, we use the fork of the repo and the specific branch (named `neo/<DATE>`) that is consistent with USVM internals. Note that this branch might change in the future.
```bash
cd ~
git clone -b neo/2024-10-31 https://gitee.com/Lipenx/arkanalyzer arkanalyzer-usvm
cd arkanalyzer-usvm
```

Then, you need to install the dependencies and build the project.
```bash
npm install
npm run build
```

**Note:** after building the ArkAnalyzer project, the script for serializing ArkIR will be located at `out/save/serializeArkIR.js` and can be run with Node.js.

**Note:** you can also use TS script directly using `npx ts-node src/save/serializeArkIR.ts` instead of building the whole project.

### Serialize ArkIR to JSON

Now, you can run the `serializeArkIR` script on your TS project in order to construct its ArkIR representation and dump it into JSON files, which can later be used by USVM.
```bash
node ~/arkanalyzer-usvm/out/src/save/serializeArkIR.js --help
```
```text
Usage: serializeArkIR [options] <input> <output>
Serialize ArkIR for TypeScript files or projects to JSON
Arguments:
input Input file or directory
output Output file or directory
Options:
-p, --project Flag to indicate the input is a project directory (default: false)
-t, --infer-types [times] Infer types in the ArkIR
-v, --verbose Verbose output (default: false)
-h, --help display help for command
```

If you have a single TS file `sample.ts`, just run the following command:
```bash
node .../serializeArkIR.js sample.ts sample.json
```
The resulting `sample.json` file will contain the ArkIR in JSON format.

If you have a TS project in the `project` directory, use `-p` flag:
```bash
node .../serializeArkIR.js -p project etsir
```
The resulting `etsir` directory will contain the ArkIR in JSON format. The structure of the resulting directory (hierarchy of subfolders) will be the same as the structure of the input project, but all the files will be `*.ts.json`.

_Note:_ We call the result "EtsIR" since it is a modified version of the ArkIR model suitable for serialization. When we load IR from JSONs in USVM (Java/Kotlin), the resulting data model (structure of classes) is very similar to ArkIR in ArkAnalyzer (TypeScript), but has some minor differences. The term "EtsIR" is used to distinguish between the two.

If you have a TS project with multiple modules, run the serialization for each module separately:
```bash
node .../serializeArkIR.js -p project/entry etsir/entry
node .../serializeArkIR.js -p project/common etsir/common
node .../serializeArkIR.js -p project/feature etsir/feature
```

### Type Inference with USVM

In order to run USVM type inference, you need to obtain `usvm-dataflow-ts-all.jar` "fat" JAR (download or build it yourself) and either use it directly or use a wrapper script `src/usvm/inferTypes.ts` in ArkAnalyzer repo.

#### Build `usvm-type-inference` binary

In order to build the USVM binary, you need to clone the USVM repo (and also its dependency `jacodb` in the _sibling directory_) and build the project using Gradle.
```bash
cd ~
git clone -b lipen/usvm-type-inference https://github.com/UnitTestBot/jacodb
git clone -b lipen/type-inference https://github.com/UnitTestBot/usvm
cd usvm
./gradlew :usvm-dataflow-ts:installDist
```
The last command will build the project and create the binary at `usvm-dataflow-ts/build/install/usvm-dataflow-ts/bin/usvm-type-inference` (on Windows, the corresponding "binary" is with `.bat` extension).

#### Build "Fat" JAR

Alternatively, you can build the "fat" JAR (also known as "Uber JAR" or "shadow JAR") that contains all the dependencies.
```bash
./gradlew :usvm-dataflow-ts:shadowJar
```

#### Run Type Inference

You can run the type inference manually using USVM CLI:
```bash
usvm-dataflow-ts/build/install/usvm-dataflow-ts/bin/usvm-type-inference --help
# OR
java -jar usvm-dataflow-ts/build/libs/usvm-dataflow-ts-all.jar --help
```
```text
Usage: infer-types [<options>]
Options:
* -i, --input=<path> Input file or directory with IR (required)
* -o, --output=<path> Output file with inferred types in JSON format (required)
-h, --help Show this message and exit
```

_Note:_ `-i` option can be supplied multiple times for multi-module projects. All input IR will be merged.

For example, if you have the `project/entry` and `project/common` directories with the dumped ArkIR, you can run the following command:
```bash
java -jar usvm-dataflow-ts/build/libs/usvm-dataflow-ts-all.jar -i project/entry -i project/common -o inferred.json
```

### Type Inference with Wrapper Script

You can also use the wrapper script `src/usvm/inferTypes.ts` from the ArkAnalyzer repo. This script will run the serialization of ArkIR and type inference with USVM in a single command.

```bash
node ~/arkanalyzer-usvm/out/src/usvm/inferTypes.js --help
```
```text
Usage: inferTypes [options] <input>
Arguments:
input input directory with ETS project
Options:
-v, --verbose Verbose output (default: false)
-t, --aa-types Run type inference in ArkAnalyzer (default: false)
-s, --substitute Substitute inferred types (default: false)
-h, --help display help for command
```

For example:
```bash
node .../inferTypes.js myproject/entry
```
```text
Building scene...
Serializing Scene to '/tmp/2f8aa8b34548b808167a8f6b30121dcc/etsir'...
...
USVM command: ~/usvm/usvm-dataflow-ts/build/install/usvm-dataflow-ts/bin/usvm-type-inference --input=/tmp/2f8aa8b34548b808167a8f6b30121dcc/etsir --output=/tmp/2f8aa8b34548b808167a8f6b30121dcc/inference-result --no-skip-anonymous
...
=== Inferred Types Statistics ===
Total Classes: 10
Total Methods: 305
...
Deserialization successful.
...
Substituting inferred types...
...
Substituting type of local '$temp16' in method '@entry/model/Calculator.ts: _DEFAULT_ARK_CLASS.getFloatNum(unknown, unknown, unknown)' from unknown to number
...
```
Loading

0 comments on commit c599ea7

Please sign in to comment.