Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chopping and Dicing #925

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
20 changes: 19 additions & 1 deletion src/core/steps/all/static-slicing/10-reconstruct.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ function processor(results: { normalize?: NormalizedAst, slice?: SliceResult },
return reconstructToCode(results.normalize as NormalizedAst, (results.slice as SliceResult).result, input.autoSelectIf)
}

export const NAIVE_RECONSTRUCT = {
function diceProcessor(results: { normalize?: NormalizedAst, dice?: SliceResult }, input: Partial<ReconstructRequiredInput>) {
return reconstructToCode(results.normalize as NormalizedAst, (results.dice as SliceResult).result, input.autoSelectIf)
}

export const NAIVE_RECONSTRUCT_SLICED = {
name: 'reconstruct',
humanReadableName: 'static code reconstruction',
description: 'Reconstruct R code from the static slice',
Expand All @@ -27,3 +31,17 @@ export const NAIVE_RECONSTRUCT = {
dependencies: [ 'slice' ],
requiredInput: undefined as unknown as ReconstructRequiredInput
} as const satisfies DeepReadonly<IPipelineStep<'reconstruct', typeof processor>>

export const NAIVE_RECONSTRUCT_DICED = {
name: 'reconstruct',
humanReadableName: 'static code reconstruction',
description: 'Reconstruct R code from the static dice',

processor: diceProcessor,
executed: PipelineStepStage.OncePerRequest,
printer: {
[StepOutputFormat.Internal]: internalPrinter
},
dependencies: [ 'dice' ],
requiredInput: undefined as unknown as ReconstructRequiredInput
} as const satisfies DeepReadonly<IPipelineStep<'reconstruct', typeof processor>>
35 changes: 35 additions & 0 deletions src/core/steps/all/static-slicing/20-dicing.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import type { DeepReadonly } from 'ts-essentials'
import type { DataflowInformation } from '../../../../dataflow/info'
import type { NormalizedAst } from '../../../../r-bridge/lang-4.x/ast/model/processing/decorate'
import type { SlicingCriteria } from '../../../../slicing/criterion/parse'
import { PipelineStepStage } from '../../pipeline-step'
import type { IPipelineStep } from '../../pipeline-step'
import { internalPrinter, StepOutputFormat } from '../../../print/print'
import { staticDicing } from '../../../../slicing/static/dicer'

export interface DiceRequiredInput {
/** The dicing criterion is only of interest if you actually want to Dice the R code */
readonly startingCriterion: SlicingCriteria,

readonly endCriterion: SlicingCriteria,

/** How many re-visits of the same node are ok? */
readonly threshold?: number
}

function processor(results: { dataflow?: DataflowInformation, normalize?: NormalizedAst }, input: Partial<DiceRequiredInput>) {
return staticDicing((results.dataflow as DataflowInformation).graph, results.normalize as NormalizedAst, input.endCriterion as SlicingCriteria, input.startingCriterion as SlicingCriteria, input.threshold)
}

export const STATIC_DICE = {
name: 'dice',
humanReadableName: 'static dice',
description: 'Calculate the actual static dice from the dataflow graph and the given slicing criteria',
processor,
executed: PipelineStepStage.OncePerRequest,
printer: {
[StepOutputFormat.Internal]: internalPrinter
},
dependencies: [ 'dataflow' ],
requiredInput: undefined as unknown as DiceRequiredInput
} as const satisfies DeepReadonly<IPipelineStep<'dice', typeof processor>>
7 changes: 5 additions & 2 deletions src/core/steps/pipeline/default-pipelines.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@ import { PARSE_WITH_R_SHELL_STEP } from '../all/core/00-parse'
import { NORMALIZE } from '../all/core/10-normalize'
import { STATIC_DATAFLOW } from '../all/core/20-dataflow'
import { STATIC_SLICE } from '../all/static-slicing/00-slice'
import { NAIVE_RECONSTRUCT } from '../all/static-slicing/10-reconstruct'
import { NAIVE_RECONSTRUCT_DICED, NAIVE_RECONSTRUCT_SLICED } from '../all/static-slicing/10-reconstruct'
import { STATIC_DICE } from '../all/static-slicing/20-dicing'

export const DEFAULT_SLICING_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT)
export const DEFAULT_SLICING_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT_SLICED)
export const DEFAULT_SLICE_AND_RECONSTRUCT_PIPELINE = DEFAULT_SLICING_PIPELINE

export const DEFAULT_DICING_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_DICE, NAIVE_RECONSTRUCT_DICED)

export const DEFAULT_DATAFLOW_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW)

export const DEFAULT_NORMALIZE_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE)
Expand Down
116 changes: 116 additions & 0 deletions src/slicing/static/dicer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import { initializeCleanEnvironments } from '../../dataflow/environments/environment'
import { edgeIncludesType, EdgeType, shouldTraverseEdge, TraverseEdge } from '../../dataflow/graph/edge'
import type { DataflowGraph } from '../../dataflow/graph/graph'
import { VertexType } from '../../dataflow/graph/vertex'
import type { NormalizedAst } from '../../r-bridge/lang-4.x/ast/model/processing/decorate'
import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'
import { guard } from '../../util/assert'
import { expensiveTrace } from '../../util/log'
import { convertAllSlicingCriteriaToIds, type SlicingCriteria } from '../criterion/parse'
import { envFingerprint } from './fingerprint'
import { handleReturns, sliceForCall } from './slice-call'
import type { SliceResult } from './slicer-types'
import { slicerLogger, staticSlicing } from './static-slicer'
import { VisitingQueue } from './visiting-queue'



export function staticDicing(graph: DataflowGraph, ast: NormalizedAst, endCriteria: SlicingCriteria, startCriteria: SlicingCriteria, threshold = 75): Readonly<SliceResult> {
const backwardsSlice = staticSlicing(graph, ast, endCriteria, threshold)
const forwardSlice = forwardSlicing(graph, ast, startCriteria, threshold)

const diceResult = new Set([...backwardsSlice.result].filter(i => forwardSlice.result.has(i)))
//console.log(diceResult)
const dicingResult = { timesHitThreshold: backwardsSlice.timesHitThreshold + forwardSlice.timesHitThreshold, result: diceResult, decodedCriteria: backwardsSlice.decodedCriteria.concat(forwardSlice.decodedCriteria) }
return dicingResult
}

function forwardSlicing(graph: DataflowGraph, ast: NormalizedAst, criteria: SlicingCriteria, threshold = 75): Readonly<SliceResult> {
const idMap = ast.idMap

guard(criteria.length > 0, 'must have at least one seed id to calculate slice')
const decodedCriteria = convertAllSlicingCriteriaToIds(criteria, idMap)
expensiveTrace(slicerLogger,
() => `calculating slice for ${decodedCriteria.length} seed criteria: ${decodedCriteria.map(s => JSON.stringify(s)).join(', ')}`
)

const queue = new VisitingQueue(threshold)

let minDepth = Number.MAX_SAFE_INTEGER
const sliceSeedIds = new Set<NodeId>()
// every node ships the call environment which registers the calling environment
{
const emptyEnv = initializeCleanEnvironments()
const basePrint = envFingerprint(emptyEnv)
for(const { id: startId } of decodedCriteria) {
queue.add(startId, emptyEnv, basePrint, false)
// retrieve the minimum depth of all nodes to only add control dependencies if they are "part" of the current execution
minDepth = Math.min(minDepth, idMap.get(startId)?.info.depth ?? minDepth)
sliceSeedIds.add(startId)
}
}

const visitedIds = []
while(queue.nonEmpty()) {
const current = queue.next()
const { baseEnvironment, id, onlyForSideEffects } = current
const baseEnvFingerprint = envFingerprint(baseEnvironment)

//This is for debug only
visitedIds.push(id)

const currentInfo = graph.get(id, true)
if(currentInfo === undefined) {
slicerLogger.warn(`id: ${id} must be in graph but can not be found, keep in slice to be sure`)
continue
}

const [currentVertex, currentEdges] = currentInfo
const ingoingEdges = graph.ingoingEdges(id)
if(ingoingEdges === undefined) {
continue
}

// we only add control dependencies iff 1) we are in different function call or 2) they have, at least, the same depth as the slicing seed
if(currentVertex.controlDependencies && currentVertex.controlDependencies.length > 0) {
const topLevel = graph.isRoot(id) || sliceSeedIds.has(id)
for(const cd of currentVertex.controlDependencies.filter(({ id }) => !queue.hasId(id))) {
if(!topLevel || (idMap.get(cd.id)?.info.depth ?? 0) <= minDepth) {
queue.add(cd.id, baseEnvironment, baseEnvFingerprint, false)
}
}
}

if(!onlyForSideEffects) {
if(currentVertex.tag === VertexType.FunctionCall && !currentVertex.onlyBuiltin) {
sliceForCall(current, currentVertex, graph, queue)
}

const ret = handleReturns(queue, currentEdges, baseEnvFingerprint, baseEnvironment)
if(ret) {
continue
}
}

for(const [target, { types }] of ingoingEdges) {
if(edgeIncludesType(types, EdgeType.NonStandardEvaluation)) {
continue
}
const t = shouldTraverseEdge(types)
if(t === TraverseEdge.Always) {
queue.add(target, baseEnvironment, baseEnvFingerprint, false)
} else if(t === TraverseEdge.DefinedByOnCall) {
const n = queue.potentialArguments.get(target)
if(n) {
queue.add(target, n.baseEnvironment, envFingerprint(n.baseEnvironment), n.onlyForSideEffects)
queue.potentialArguments.delete(target)
}
} else if(t === TraverseEdge.SideEffect) {
queue.add(target, baseEnvironment, baseEnvFingerprint, true)
}
}
}
//console.log('\n\nvisitedIds: %s\n\n', visitedIds)

return { ...queue.status(), decodedCriteria }
}
45 changes: 43 additions & 2 deletions test/functionality/_helper/shell.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { assert } from 'chai'
import { testRequiresRVersion } from './version'
import type { MergeableRecord } from '../../../src/util/objects'
import { deepMergeObject } from '../../../src/util/objects'
import { NAIVE_RECONSTRUCT } from '../../../src/core/steps/all/static-slicing/10-reconstruct'
import { NAIVE_RECONSTRUCT_SLICED } from '../../../src/core/steps/all/static-slicing/10-reconstruct'
import { guard } from '../../../src/util/assert'
import { PipelineExecutor } from '../../../src/core/pipeline-executor'
import type { TestLabel } from './label'
Expand All @@ -23,6 +23,7 @@ import {
} from '../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'
import {
DEFAULT_DATAFLOW_PIPELINE,
DEFAULT_DICING_PIPELINE,
DEFAULT_NORMALIZE_PIPELINE, DEFAULT_SLICE_AND_RECONSTRUCT_PIPELINE
} from '../../../src/core/steps/pipeline/default-pipelines'
import type { RExpressionList } from '../../../src/r-bridge/lang-4.x/ast/model/nodes/r-expression-list'
Expand Down Expand Up @@ -308,7 +309,7 @@ export function assertReconstructed(name: string | TestLabel, shell: RShell, inp
request: requestFromInput(input),
shell
}).allRemainingSteps()
const reconstructed = NAIVE_RECONSTRUCT.processor({
const reconstructed = NAIVE_RECONSTRUCT_SLICED.processor({
normalize: result.normalize,
slice: {
decodedCriteria: [],
Expand Down Expand Up @@ -360,3 +361,43 @@ export function assertSliced(
handleAssertOutput(name, shell, input, userConfig)
return t
}

export function assertDiced(
name: string | TestLabel,
shell: RShell,
input: string,
startCriteria: SlicingCriteria,
endCriteria: SlicingCriteria,
expected: string,
userConfig?: Partial<TestConfigurationWithOutput> & { autoSelectIf?: AutoSelectPredicate },
getId: IdGenerator<NoInfo> = deterministicCountingIdGenerator(0)
): Mocha.Test {
const fullname = decorateLabelContext(name, ['slice'])
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a new dicing category/context for the tests.


const t = it(`${JSON.stringify(startCriteria)} to ${JSON.stringify(endCriteria)} ${fullname}`, async function() {
await ensureConfig(shell, this, userConfig)

console.log('input: %s\ncriteria: %s, %s', requestFromInput(input), JSON.stringify(startCriteria), JSON.stringify(endCriteria))
const result = await new PipelineExecutor(DEFAULT_DICING_PIPELINE,{
getId,
request: requestFromInput(input),
shell,
startingCriterion: startCriteria,
endCriterion: endCriteria,
autoSelectIf: userConfig?.autoSelectIf
}).allRemainingSteps()

try {
assert.strictEqual(
result.reconstruct.code, expected,
`got: ${result.reconstruct.code}, vs. expected: ${expected}, for input ${input} (slice for ${JSON.stringify(startCriteria)} to ${JSON.stringify(endCriteria)}`//: ${printIdMapping(result.slice.decodedCriteria.map(({ id }) => id), result.normalize.idMap)}), url: ${graphToMermaidUrl(result.dataflow.graph, true, result.slice.result)}`
)
} catch(e) {
console.error(`got:\n${result.reconstruct.code}\nvs. expected:\n${expected}`)
console.error(normalizedAstToMermaidUrl(result.normalize.ast))
throw e
}
})
handleAssertOutput(name, shell, input, userConfig)
return t
}
6 changes: 3 additions & 3 deletions test/functionality/pipelines/create/create-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { allPermutations } from '../../../../src/util/arrays'
import { NORMALIZE } from '../../../../src/core/steps/all/core/10-normalize'
import { STATIC_DATAFLOW } from '../../../../src/core/steps/all/core/20-dataflow'
import { STATIC_SLICE } from '../../../../src/core/steps/all/static-slicing/00-slice'
import { NAIVE_RECONSTRUCT } from '../../../../src/core/steps/all/static-slicing/10-reconstruct'
import { NAIVE_RECONSTRUCT_SLICED } from '../../../../src/core/steps/all/static-slicing/10-reconstruct'
import { createPipeline } from '../../../../src/core/steps/pipeline/pipeline'

describe('Create Pipeline (includes dependency checks)', () => {
Expand Down Expand Up @@ -92,7 +92,7 @@ describe('Create Pipeline (includes dependency checks)', () => {
NORMALIZE,
STATIC_DATAFLOW,
STATIC_SLICE,
NAIVE_RECONSTRUCT
NAIVE_RECONSTRUCT_SLICED
], ['parse', 'normalize', 'dataflow', 'slice', 'reconstruct'], 3)
})
describe('with decorators', () => {
Expand Down Expand Up @@ -157,7 +157,7 @@ describe('Create Pipeline (includes dependency checks)', () => {
decorates: 'dataflow'
},
STATIC_SLICE,
NAIVE_RECONSTRUCT
NAIVE_RECONSTRUCT_SLICED
], ['parse', 'normalize', 'dataflow', 'dataflow-decorator', 'slice', 'reconstruct'], 4)
})
})
Expand Down
57 changes: 57 additions & 0 deletions test/functionality/slicing/dicing/simple-tests.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import { assertDiced, withShell } from '../../_helper/shell'
import type { SlicingCriteria } from '../../../../src/slicing/criterion/parse'
import type { TestLabel } from '../../_helper/label'
import { label } from '../../_helper/label'

describe.only('Simple', withShell(shell => {
describe('Base Dicing Cases', () => {
const testcases: { label: TestLabel, input: string, endCriterion: SlicingCriteria, startCriterion: SlicingCriteria, expected: string }[]
= [
{ label: label('Simple Example for a', ['assignment-functions', 'binary-operator']), input: 'a <- 3\nb <- 4\nc <- a + b', endCriterion: ['3@c'] as SlicingCriteria, startCriterion: ['1@a'] as SlicingCriteria, expected: 'a <- 3\nc <- a + b' },
{ label: label('Simple Example for b', ['assignment-functions', 'binary-operator']), input: 'a <- 3\nb <- 4\nc <- a + b', endCriterion: ['3@c'] as SlicingCriteria, startCriterion: ['2@b'] as SlicingCriteria, expected: 'b <- 4\nc <- a + b' },
{ label: label('Extended Example', ['assignment-functions', 'binary-operator']), input: 'a <- 3\nb <- 4\nc <- a + b\nd <- 5\ne <- d + c', endCriterion: ['5@e'] as SlicingCriteria, startCriterion: ['4@d'] as SlicingCriteria, expected: 'd <- 5\ne <- d + c' },
{ label: label('Multiple Start Points', ['assignment-functions', 'binary-operator']), input: 'a <- 3\nb <- 4\nc <- a + b\nd <- 5\ne <- d + c', endCriterion: ['5@e'] as SlicingCriteria, startCriterion: ['4@d', '3@c'] as SlicingCriteria, expected: 'c <- a + b\nd <- 5\ne <- d + c' },
{ label: label('Multiple End Points', ['assignment-functions', 'binary-operator']), input: 'a <- 3\nb <- 4\nc <- a + b\nd <- b + 5\ne <- d + c', endCriterion: ['4@d', '3@c'] as SlicingCriteria, startCriterion: ['2@b'] as SlicingCriteria, expected: 'b <- 4\nc <- a + b\nd <- b + 5' },
]

for(const testcase of testcases) {
assertDiced(testcase.label, shell, testcase.input, testcase.startCriterion, testcase.endCriterion, testcase.expected)
}
})

describe('Dicing for Loops', () => {
const fibWhile = `x <- 1
y <- 1
i <- 0
while (i < 10) {
h <- x
x <- x + y
y <- h
i <- i + 1
}
cat(x)`

assertDiced(label('Simple while', ['assignment-functions', 'binary-operator', 'while-loop']), shell, fibWhile, ['2@y'], ['10@x'], 'y <- 1\nwhile(i < 10) x <- x + y\nx')
assertDiced(label('Complex while', ['assignment-functions', 'binary-operator', 'while-loop']), shell, fibWhile, ['2@y', '1@x'], ['10@x'], 'x <- 1\ny <- 1\nwhile(i < 10) x <- x + y\nx')
assertDiced(label('End in while', ['assignment-functions', 'binary-operator', 'while-loop']), shell, fibWhile, ['1@x'], ['7@y'], 'x <- 1\nwhile(i < 10) {\n h <- x\n x <- x + y\n y <- h\n}')
assertDiced(label('Start in while', ['assignment-functions', 'binary-operator', 'while-loop']), shell, fibWhile, ['6@x'], ['10@x'], 'while(i < 10) x <- x + y\nx')
assertDiced(label('Dice in while', ['assignment-functions', 'binary-operator', 'while-loop']), shell, fibWhile, ['5@x'], ['7@y'], 'while(i < 10) {\n h <- x\n y <- h\n}')

})

describe('Dicing with functions', () => {
const code = `x <- function(a, b) {
y <- 10
y <- y + a
y * b
}
c <- 2
d <- 10
z <- x(d, c)`

assertDiced(label('Simple function', ['assignment-functions', 'binary-operator', 'function-calls', 'function-definitions']), shell, code, ['6@c'], ['8@z'], 'x <- { y * b }\nc <- 2\nz <- x(d,c)')
assertDiced(label('Start in function paramenter', ['assignment-functions', 'binary-operator', 'function-calls', 'function-definitions']), shell, code, ['1@a'], ['8@z'], 'x <- function(a, b) {\n y <- a\n y * b\n }\nx(d, c)')
assertDiced(label('Start in function', ['assignment-functions', 'binary-operator', 'function-calls', 'function-definitions']), shell, code, ['3@a'], ['8@z'], 'y <- a\ny * b')
assertDiced(label('Cuts out function parameter', ['assignment-functions', 'binary-operator', 'function-calls', 'function-definitions']), shell, code, ['1@x'], ['8@z'], 'x <- { y * b }\nx(d, c)')
})
}))
3 changes: 3 additions & 0 deletions test/functionality/slicing/slicing.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,7 @@ describe('Slicing', () => {
describe('Slicing-Criterion', () => {
requireAllTestsInFolder(path.join(__dirname, 'slicing-criterion'))
})
describe('Dicing', () => {
requireAllTestsInFolder(path.join(__dirname, 'dicing'))
})
})
Loading