flowr-analysis · EagleoutIce · Aug 29, 2024 · Aug 29, 2024 · Aug 30, 2024 · Aug 30, 2024
diff --git a/src/core/steps/all/static-slicing/10-reconstruct.ts b/src/core/steps/all/static-slicing/10-reconstruct.ts
@@ -15,7 +15,11 @@ function processor(results: { normalize?: NormalizedAst, slice?: SliceResult },
 	return reconstructToCode(results.normalize as NormalizedAst, (results.slice as SliceResult).result, input.autoSelectIf)
 }
 
-export const NAIVE_RECONSTRUCT = {
+function diceProcessor(results: { normalize?: NormalizedAst, dice?: SliceResult }, input: Partial<ReconstructRequiredInput>) {
+	return reconstructToCode(results.normalize as NormalizedAst, (results.dice as SliceResult).result, input.autoSelectIf)
+}
+
+export const NAIVE_RECONSTRUCT_SLICED = {
 	name:              'reconstruct',
 	humanReadableName: 'static code reconstruction',
 	description:       'Reconstruct R code from the static slice',
@@ -27,3 +31,17 @@ export const NAIVE_RECONSTRUCT = {
 	dependencies:  [ 'slice' ],
 	requiredInput: undefined as unknown as ReconstructRequiredInput
 } as const satisfies DeepReadonly<IPipelineStep<'reconstruct', typeof processor>>
+
+export const NAIVE_RECONSTRUCT_DICED = {
+	name:              'reconstruct',
+	humanReadableName: 'static code reconstruction',
+	description:       'Reconstruct R code from the static dice',
+
+	processor: diceProcessor,
+	executed:  PipelineStepStage.OncePerRequest,
+	printer:   {
+		[StepOutputFormat.Internal]: internalPrinter
+	},
+	dependencies:  [ 'dice' ],
+	requiredInput: undefined as unknown as ReconstructRequiredInput
+} as const satisfies DeepReadonly<IPipelineStep<'reconstruct', typeof processor>>
diff --git a/src/core/steps/all/static-slicing/20-dicing.ts b/src/core/steps/all/static-slicing/20-dicing.ts
@@ -0,0 +1,35 @@
+import type { DeepReadonly } from 'ts-essentials'
+import type { DataflowInformation } from '../../../../dataflow/info'
+import type { NormalizedAst } from '../../../../r-bridge/lang-4.x/ast/model/processing/decorate'
+import type { SlicingCriteria } from '../../../../slicing/criterion/parse'
+import { PipelineStepStage } from '../../pipeline-step'
+import type { IPipelineStep } from '../../pipeline-step'
+import { internalPrinter, StepOutputFormat } from '../../../print/print'
+import { staticDicing } from '../../../../slicing/static/dicer'
+
+export interface DiceRequiredInput {
+	/** The dicing criterion is only of interest if you actually want to Dice the R code */
+	readonly startingCriterion: SlicingCriteria,
+
+    readonly endCriterion: SlicingCriteria,
+
+    /** How many re-visits of the same node are ok? */
+	readonly threshold?: number
+}
+
+function processor(results: { dataflow?: DataflowInformation, normalize?: NormalizedAst }, input: Partial<DiceRequiredInput>) {
+	return staticDicing((results.dataflow as DataflowInformation).graph, results.normalize as NormalizedAst, input.endCriterion as SlicingCriteria, input.startingCriterion as SlicingCriteria, input.threshold)
+}
+
+export const STATIC_DICE = {
+	name:              'dice',
+	humanReadableName: 'static dice',
+	description:       'Calculate the actual static dice from the dataflow graph and the given slicing criteria',
+	processor,
+	executed:          PipelineStepStage.OncePerRequest,
+	printer:           {
+		[StepOutputFormat.Internal]: internalPrinter
+	},
+	dependencies:  [ 'dataflow' ],
+	requiredInput: undefined as unknown as DiceRequiredInput
+} as const satisfies DeepReadonly<IPipelineStep<'dice', typeof processor>>
diff --git a/src/core/steps/pipeline/default-pipelines.ts b/src/core/steps/pipeline/default-pipelines.ts
@@ -6,11 +6,14 @@ import { PARSE_WITH_R_SHELL_STEP } from '../all/core/00-parse'
 import { NORMALIZE } from '../all/core/10-normalize'
 import { STATIC_DATAFLOW } from '../all/core/20-dataflow'
 import { STATIC_SLICE } from '../all/static-slicing/00-slice'
-import { NAIVE_RECONSTRUCT } from '../all/static-slicing/10-reconstruct'
+import { NAIVE_RECONSTRUCT_DICED, NAIVE_RECONSTRUCT_SLICED } from '../all/static-slicing/10-reconstruct'
+import { STATIC_DICE } from '../all/static-slicing/20-dicing'
 
-export const DEFAULT_SLICING_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT)
+export const DEFAULT_SLICING_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT_SLICED)
 export const DEFAULT_SLICE_AND_RECONSTRUCT_PIPELINE = DEFAULT_SLICING_PIPELINE
 
+export const DEFAULT_DICING_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_DICE, NAIVE_RECONSTRUCT_DICED)
+
 export const DEFAULT_DATAFLOW_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW)
 
 export const DEFAULT_NORMALIZE_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE)

diff --git a/src/slicing/static/dicer.ts b/src/slicing/static/dicer.ts
@@ -0,0 +1,116 @@
+import { initializeCleanEnvironments } from '../../dataflow/environments/environment'
+import { edgeIncludesType, EdgeType, shouldTraverseEdge, TraverseEdge } from '../../dataflow/graph/edge'
+import type { DataflowGraph } from '../../dataflow/graph/graph'
+import { VertexType } from '../../dataflow/graph/vertex'
+import type { NormalizedAst } from '../../r-bridge/lang-4.x/ast/model/processing/decorate'
+import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'
+import { guard } from '../../util/assert'
+import { expensiveTrace } from '../../util/log'
+import { convertAllSlicingCriteriaToIds, type SlicingCriteria } from '../criterion/parse'
+import { envFingerprint } from './fingerprint'
+import { handleReturns, sliceForCall } from './slice-call'
+import type { SliceResult } from './slicer-types'
+import { slicerLogger, staticSlicing } from './static-slicer'
+import { VisitingQueue } from './visiting-queue'
+
+
+
+export function staticDicing(graph: DataflowGraph, ast: NormalizedAst, endCriteria: SlicingCriteria, startCriteria: SlicingCriteria, threshold = 75):  Readonly<SliceResult> {
+	const backwardsSlice = staticSlicing(graph, ast, endCriteria, threshold)
+	const forwardSlice = forwardSlicing(graph, ast, startCriteria, threshold)
+
+	const diceResult = new Set([...backwardsSlice.result].filter(i => forwardSlice.result.has(i)))
+	//console.log(diceResult)
+	const dicingResult = { timesHitThreshold: backwardsSlice.timesHitThreshold + forwardSlice.timesHitThreshold, result: diceResult, decodedCriteria: backwardsSlice.decodedCriteria.concat(forwardSlice.decodedCriteria) }
+	return dicingResult
+}
+
+function forwardSlicing(graph: DataflowGraph, ast: NormalizedAst, criteria: SlicingCriteria, threshold = 75): Readonly<SliceResult> {
+	const idMap = ast.idMap
+
+	guard(criteria.length > 0, 'must have at least one seed id to calculate slice')
+	const decodedCriteria = convertAllSlicingCriteriaToIds(criteria, idMap)
+	expensiveTrace(slicerLogger,
+		() => `calculating slice for ${decodedCriteria.length} seed criteria: ${decodedCriteria.map(s => JSON.stringify(s)).join(', ')}`
+	)
+
+	const queue = new VisitingQueue(threshold)
+
+	let minDepth = Number.MAX_SAFE_INTEGER
+	const sliceSeedIds = new Set<NodeId>()
+	// every node ships the call environment which registers the calling environment
+	{
+		const emptyEnv = initializeCleanEnvironments()
+		const basePrint = envFingerprint(emptyEnv)
+		for(const { id: startId } of decodedCriteria) {
+			queue.add(startId, emptyEnv, basePrint, false)
+			// retrieve the minimum depth of all nodes to only add control dependencies if they are "part" of the current execution
+			minDepth = Math.min(minDepth, idMap.get(startId)?.info.depth ?? minDepth)
+			sliceSeedIds.add(startId)
+		}
+	}
+
+	const visitedIds = []
+	while(queue.nonEmpty()) {
+		const current = queue.next()
+		const { baseEnvironment, id, onlyForSideEffects } = current
+		const baseEnvFingerprint = envFingerprint(baseEnvironment)
+
+		//This is for debug only
+		visitedIds.push(id)
+
+		const currentInfo = graph.get(id, true)
+		if(currentInfo === undefined) {
+			slicerLogger.warn(`id: ${id} must be in graph but can not be found, keep in slice to be sure`)
+			continue
+		}
+
+		const [currentVertex, currentEdges] = currentInfo
+		const ingoingEdges = graph.ingoingEdges(id)
+		if(ingoingEdges === undefined) {
+			continue
+		}
+
+		// we only add control dependencies iff 1) we are in different function call or 2) they have, at least, the same depth as the slicing seed
+		if(currentVertex.controlDependencies && currentVertex.controlDependencies.length > 0) {
+			const topLevel = graph.isRoot(id) || sliceSeedIds.has(id)
+			for(const cd of currentVertex.controlDependencies.filter(({ id }) => !queue.hasId(id))) {
+				if(!topLevel || (idMap.get(cd.id)?.info.depth ?? 0) <= minDepth) {
+					queue.add(cd.id, baseEnvironment, baseEnvFingerprint, false)
+				}
+			}
+		}
+
+		if(!onlyForSideEffects) {
+			if(currentVertex.tag === VertexType.FunctionCall && !currentVertex.onlyBuiltin) {
+				sliceForCall(current, currentVertex, graph, queue)
+			}
+
+			const ret = handleReturns(queue, currentEdges, baseEnvFingerprint, baseEnvironment)
+			if(ret) {
+				continue
+			}
+		}
+
+		for(const [target, { types }] of ingoingEdges) {
+			if(edgeIncludesType(types, EdgeType.NonStandardEvaluation)) {
+				continue
+			}
+			const t = shouldTraverseEdge(types)
+			if(t === TraverseEdge.Always) {
+				queue.add(target, baseEnvironment, baseEnvFingerprint, false)
+			} else if(t === TraverseEdge.DefinedByOnCall) {
+				const n = queue.potentialArguments.get(target)
+				if(n) {
+					queue.add(target, n.baseEnvironment, envFingerprint(n.baseEnvironment), n.onlyForSideEffects)
+					queue.potentialArguments.delete(target)
+				}
+			} else if(t === TraverseEdge.SideEffect) {
+				queue.add(target, baseEnvironment, baseEnvFingerprint, true)
+			}
+		}
+	}
+	//console.log('\n\nvisitedIds: %s\n\n', visitedIds)
+
+	return { ...queue.status(), decodedCriteria }
+}
diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts
@@ -4,7 +4,7 @@ import { assert } from 'chai'
 import { testRequiresRVersion } from './version'
 import type { MergeableRecord } from '../../../src/util/objects'
 import { deepMergeObject } from '../../../src/util/objects'
-import { NAIVE_RECONSTRUCT } from '../../../src/core/steps/all/static-slicing/10-reconstruct'
+import { NAIVE_RECONSTRUCT_SLICED } from '../../../src/core/steps/all/static-slicing/10-reconstruct'
 import { guard } from '../../../src/util/assert'
 import { PipelineExecutor } from '../../../src/core/pipeline-executor'
 import type { TestLabel } from './label'
@@ -23,6 +23,7 @@ import {
 } from '../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'
 import {
 	DEFAULT_DATAFLOW_PIPELINE,
+	DEFAULT_DICING_PIPELINE,
 	DEFAULT_NORMALIZE_PIPELINE, DEFAULT_SLICE_AND_RECONSTRUCT_PIPELINE
 } from '../../../src/core/steps/pipeline/default-pipelines'
 import type { RExpressionList } from '../../../src/r-bridge/lang-4.x/ast/model/nodes/r-expression-list'
@@ -308,7 +309,7 @@ export function assertReconstructed(name: string | TestLabel, shell: RShell, inp
 			request: requestFromInput(input),
 			shell
 		}).allRemainingSteps()
-		const reconstructed = NAIVE_RECONSTRUCT.processor({
+		const reconstructed = NAIVE_RECONSTRUCT_SLICED.processor({
 			normalize: result.normalize,
 			slice:     {
 				decodedCriteria:   [],
@@ -360,3 +361,43 @@ export function assertSliced(
 	handleAssertOutput(name, shell, input, userConfig)
 	return t
 }
+
+export function assertDiced(
+	name: string | TestLabel,
+	shell: RShell,
+	input: string,
+	startCriteria: SlicingCriteria,
+	endCriteria: SlicingCriteria,
+	expected: string,
+	userConfig?: Partial<TestConfigurationWithOutput> & { autoSelectIf?: AutoSelectPredicate },
+	getId: IdGenerator<NoInfo> = deterministicCountingIdGenerator(0)
+): Mocha.Test {
+	const fullname = decorateLabelContext(name, ['slice'])
+
+	const t = it(`${JSON.stringify(startCriteria)} to ${JSON.stringify(endCriteria)} ${fullname}`, async function() {
+		await ensureConfig(shell, this, userConfig)
+
+		console.log('input: %s\ncriteria: %s, %s', requestFromInput(input), JSON.stringify(startCriteria), JSON.stringify(endCriteria))
+		const result = await new PipelineExecutor(DEFAULT_DICING_PIPELINE,{
+			getId,
+			request:           requestFromInput(input),
+			shell,
+			startingCriterion: startCriteria,
+			endCriterion:      endCriteria,
+			autoSelectIf:      userConfig?.autoSelectIf
+		}).allRemainingSteps()
+
+		try {
+			assert.strictEqual(
+				result.reconstruct.code, expected,
+				`got: ${result.reconstruct.code}, vs. expected: ${expected}, for input ${input} (slice for ${JSON.stringify(startCriteria)} to ${JSON.stringify(endCriteria)}`//: ${printIdMapping(result.slice.decodedCriteria.map(({ id }) => id), result.normalize.idMap)}), url: ${graphToMermaidUrl(result.dataflow.graph, true, result.slice.result)}`
+			)
+		} catch(e) {
+			console.error(`got:\n${result.reconstruct.code}\nvs. expected:\n${expected}`)
+			console.error(normalizedAstToMermaidUrl(result.normalize.ast))
+			throw e
+		}
+	})
+	handleAssertOutput(name, shell, input, userConfig)
+	return t
+}
diff --git a/test/functionality/pipelines/create/create-tests.ts b/test/functionality/pipelines/create/create-tests.ts
@@ -5,7 +5,7 @@ import { allPermutations } from '../../../../src/util/arrays'
 import { NORMALIZE } from '../../../../src/core/steps/all/core/10-normalize'
 import { STATIC_DATAFLOW } from '../../../../src/core/steps/all/core/20-dataflow'
 import { STATIC_SLICE } from '../../../../src/core/steps/all/static-slicing/00-slice'
-import { NAIVE_RECONSTRUCT } from '../../../../src/core/steps/all/static-slicing/10-reconstruct'
+import { NAIVE_RECONSTRUCT_SLICED } from '../../../../src/core/steps/all/static-slicing/10-reconstruct'
 import { createPipeline } from '../../../../src/core/steps/pipeline/pipeline'
 
 describe('Create Pipeline (includes dependency checks)', () => {
@@ -92,7 +92,7 @@ describe('Create Pipeline (includes dependency checks)', () => {
 				NORMALIZE,
 				STATIC_DATAFLOW,
 				STATIC_SLICE,
-				NAIVE_RECONSTRUCT
+				NAIVE_RECONSTRUCT_SLICED
 			], ['parse', 'normalize', 'dataflow', 'slice', 'reconstruct'], 3)
 		})
 		describe('with decorators', () => {
@@ -157,7 +157,7 @@ describe('Create Pipeline (includes dependency checks)', () => {
 					decorates: 'dataflow'
 				},
 				STATIC_SLICE,
-				NAIVE_RECONSTRUCT
+				NAIVE_RECONSTRUCT_SLICED
 			], ['parse', 'normalize', 'dataflow', 'dataflow-decorator',  'slice', 'reconstruct'], 4)
 		})
 	})

diff --git a/test/functionality/slicing/dicing/simple-tests.ts b/test/functionality/slicing/dicing/simple-tests.ts
@@ -0,0 +1,57 @@
+import { assertDiced, withShell } from '../../_helper/shell'
+import type { SlicingCriteria } from '../../../../src/slicing/criterion/parse'
+import type { TestLabel } from '../../_helper/label'
+import { label } from '../../_helper/label'
+
+describe.only('Simple', withShell(shell => {
+	describe('Base Dicing Cases', () => {
+		const testcases: { label: TestLabel, input: string, endCriterion: SlicingCriteria, startCriterion: SlicingCriteria, expected: string }[]
+		= [
+			{ label: label('Simple Example for a', ['assignment-functions', 'binary-operator']), input: 'a <- 3\nb <- 4\nc <- a + b', endCriterion: ['3@c'] as SlicingCriteria, startCriterion: ['1@a'] as SlicingCriteria, expected: 'a <- 3\nc <- a + b' },
+			{ label: label('Simple Example for b', ['assignment-functions', 'binary-operator']), input: 'a <- 3\nb <- 4\nc <- a + b', endCriterion: ['3@c'] as SlicingCriteria, startCriterion: ['2@b'] as SlicingCriteria, expected: 'b <- 4\nc <- a + b' },
+			{ label: label('Extended Example', ['assignment-functions', 'binary-operator']), input: 'a <- 3\nb <- 4\nc <- a + b\nd <- 5\ne <- d + c', endCriterion: ['5@e'] as SlicingCriteria, startCriterion: ['4@d'] as SlicingCriteria, expected: 'd <- 5\ne <- d + c' },
+			{ label: label('Multiple Start Points', ['assignment-functions', 'binary-operator']), input: 'a <- 3\nb <- 4\nc <- a + b\nd <- 5\ne <- d + c', endCriterion: ['5@e'] as SlicingCriteria, startCriterion: ['4@d', '3@c'] as SlicingCriteria, expected: 'c <- a + b\nd <- 5\ne <- d + c' },
+			{ label: label('Multiple End Points', ['assignment-functions', 'binary-operator']), input: 'a <- 3\nb <- 4\nc <- a + b\nd <- b + 5\ne <- d + c', endCriterion: ['4@d', '3@c'] as SlicingCriteria, startCriterion: ['2@b'] as SlicingCriteria, expected: 'b <- 4\nc <- a + b\nd <- b + 5' },
+		]
+
+		for(const testcase of testcases) {
+			assertDiced(testcase.label, shell, testcase.input, testcase.startCriterion, testcase.endCriterion, testcase.expected)
+		}
+	})
+
+	describe('Dicing for Loops', () => {
+		const fibWhile = `x <- 1
+y <- 1
+i <- 0
+while (i < 10) {
+  h <- x
+  x <- x + y
+  y <- h
+  i <- i + 1
+}
+cat(x)`
+
+		assertDiced(label('Simple while', ['assignment-functions', 'binary-operator', 'while-loop']), shell, fibWhile, ['2@y'], ['10@x'], 'y <- 1\nwhile(i < 10) x <- x + y\nx')
+		assertDiced(label('Complex while', ['assignment-functions', 'binary-operator', 'while-loop']), shell, fibWhile, ['2@y', '1@x'], ['10@x'], 'x <- 1\ny <- 1\nwhile(i < 10) x <- x + y\nx')
+		assertDiced(label('End in while', ['assignment-functions', 'binary-operator', 'while-loop']), shell, fibWhile, ['1@x'], ['7@y'], 'x <- 1\nwhile(i < 10) {\n    h <- x\n    x <- x + y\n    y <- h\n}')
+		assertDiced(label('Start in while', ['assignment-functions', 'binary-operator', 'while-loop']), shell, fibWhile, ['6@x'], ['10@x'], 'while(i < 10) x <- x + y\nx')
+		assertDiced(label('Dice in while', ['assignment-functions', 'binary-operator', 'while-loop']), shell, fibWhile, ['5@x'], ['7@y'], 'while(i < 10) {\n    h <- x\n    y <- h\n}')
+
+	})
+
+	describe('Dicing with functions', () => {
+		const code = `x <- function(a, b) {
+  y <- 10
+  y <- y + a
+  y * b
+}
+c <- 2
+d <- 10
+z <- x(d, c)`
+
+		assertDiced(label('Simple function', ['assignment-functions', 'binary-operator', 'function-calls', 'function-definitions']), shell, code, ['6@c'], ['8@z'], 'x <- { y * b }\nc <- 2\nz <- x(d,c)')
+		assertDiced(label('Start in function paramenter', ['assignment-functions', 'binary-operator', 'function-calls', 'function-definitions']), shell, code, ['1@a'], ['8@z'], 'x <- function(a, b) {\n        y <- a\n        y * b\n    }\nx(d, c)')
+		assertDiced(label('Start in function', ['assignment-functions', 'binary-operator', 'function-calls', 'function-definitions']), shell, code, ['3@a'], ['8@z'], 'y <- a\ny * b')
+		assertDiced(label('Cuts out function parameter', ['assignment-functions', 'binary-operator', 'function-calls', 'function-definitions']), shell, code, ['1@x'], ['8@z'], 'x <- { y * b }\nx(d, c)')
+	})
+}))
diff --git a/test/functionality/slicing/slicing.spec.ts b/test/functionality/slicing/slicing.spec.ts
@@ -11,4 +11,7 @@ describe('Slicing', () => {
 	describe('Slicing-Criterion', () => {
 		requireAllTestsInFolder(path.join(__dirname, 'slicing-criterion'))
 	})
+	describe('Dicing', () => {
+		requireAllTestsInFolder(path.join(__dirname, 'dicing'))
+	})
 })