-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
implementation of scale using jumps #1452
Draft
mstoykov
wants to merge
1
commit into
master
Choose a base branch
from
experimentalSegmentedJumps
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -494,9 +494,9 @@ type ExecutionSegmentSequenceWrapper struct { | |||||
ExecutionSegmentSequence // a filled-out segment sequence | ||||||
lcd int64 // pre-calculated least common denominator | ||||||
|
||||||
// The striped offsets, i.e. the repeating indexes that "belong" to each | ||||||
// The striped jumps, i.e. the repeating indexes that "belong" to each | ||||||
// execution segment in the sequence. | ||||||
offsets [][]int64 | ||||||
jumps [][]int64 | ||||||
} | ||||||
|
||||||
// NewExecutionSegmentSequenceWrapper expects a filled-out execution segment | ||||||
|
@@ -508,7 +508,7 @@ func NewExecutionSegmentSequenceWrapper(ess ExecutionSegmentSequence) *Execution | |||||
} | ||||||
|
||||||
sequenceLength := len(ess) | ||||||
offsets := make([][]int64, sequenceLength) | ||||||
jumps := make([][]int64, sequenceLength) | ||||||
lcd := ess.LCD() | ||||||
|
||||||
// This will contain the normalized numerator values (i.e. what they would have | ||||||
|
@@ -524,7 +524,7 @@ func NewExecutionSegmentSequenceWrapper(ess ExecutionSegmentSequence) *Execution | |||||
normalizedNumerator := ess[i].length.Num().Int64() * (lcd / ess[i].length.Denom().Int64()) | ||||||
sortedNormalizedIndexes[i].normNumerator = normalizedNumerator | ||||||
sortedNormalizedIndexes[i].originalIndex = i | ||||||
offsets[i] = make([]int64, 0, normalizedNumerator+1) | ||||||
jumps[i] = make([]int64, 0, normalizedNumerator) | ||||||
} | ||||||
|
||||||
sort.SliceStable(sortedNormalizedIndexes, func(i, j int) bool { | ||||||
|
@@ -561,28 +561,21 @@ func NewExecutionSegmentSequenceWrapper(ess ExecutionSegmentSequence) *Execution | |||||
// sorting of the segments from biggest to smallest helps with the fact that | ||||||
// the biggest elements will need to take the most elements, and for them it | ||||||
// will be the hardest to not get sequential elements. | ||||||
prev := make([]int64, sequenceLength) | ||||||
chosenCounts := make([]int64, sequenceLength) | ||||||
saveIndex := func(iteration int64, index int, numerator int64) { | ||||||
offsets[index] = append(offsets[index], iteration-prev[index]) | ||||||
prev[index] = iteration | ||||||
if int64(len(offsets[index])) == numerator { | ||||||
offsets[index] = append(offsets[index], offsets[index][0]+lcd-iteration) | ||||||
} | ||||||
} | ||||||
for i := int64(0); i < lcd; i++ { | ||||||
for sortedIndex, chosenCount := range chosenCounts { | ||||||
num := chosenCount * lcd | ||||||
denom := sortedNormalizedIndexes[sortedIndex].normNumerator | ||||||
if i > num/denom || (i == num/denom && num%denom == 0) { | ||||||
chosenCounts[sortedIndex]++ | ||||||
saveIndex(i, sortedNormalizedIndexes[sortedIndex].originalIndex, denom) | ||||||
index := sortedNormalizedIndexes[sortedIndex].originalIndex | ||||||
jumps[index] = append(jumps[index], i) | ||||||
break | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
return &ExecutionSegmentSequenceWrapper{ExecutionSegmentSequence: ess, lcd: lcd, offsets: offsets} | ||||||
return &ExecutionSegmentSequenceWrapper{ExecutionSegmentSequence: ess, lcd: lcd, jumps: jumps} | ||||||
} | ||||||
|
||||||
// LCD returns the (cached) least common denominator of the sequence - no need | ||||||
|
@@ -593,13 +586,23 @@ func (essw *ExecutionSegmentSequenceWrapper) LCD() int64 { | |||||
|
||||||
// ScaleInt64 scales the provided value for the given segment. | ||||||
func (essw *ExecutionSegmentSequenceWrapper) ScaleInt64(segmentIndex int, value int64) int64 { | ||||||
start := essw.offsets[segmentIndex][0] | ||||||
offsets := essw.offsets[segmentIndex][1:] | ||||||
result := (value / essw.lcd) * int64(len(offsets)) | ||||||
for gi, i := 0, start; i < value%essw.lcd; gi, i = gi+1, i+offsets[gi] { | ||||||
result++ | ||||||
jumps := essw.jumps[segmentIndex] | ||||||
endValue := (value / essw.lcd) * int64(len(jumps)) | ||||||
remaining := value % essw.lcd | ||||||
if jumps[0] <= remaining { | ||||||
i, j := 0, len(jumps) | ||||||
for i < j { | ||||||
h := int(uint(i+j) >> 1) // avoid overflow when computing h | ||||||
// i ≤ h < j | ||||||
if jumps[h] < remaining { | ||||||
i = h + 1 // preserves f(i-1) == false | ||||||
} else { | ||||||
j = h // preserves f(j) == true | ||||||
} | ||||||
} | ||||||
endValue += int64(i) | ||||||
} | ||||||
return result | ||||||
return endValue | ||||||
} | ||||||
|
||||||
// GetStripedOffsets returns the stripped offsets for the given segment | ||||||
|
@@ -611,8 +614,24 @@ func (essw *ExecutionSegmentSequenceWrapper) ScaleInt64(segmentIndex int, value | |||||
// - lcd: the LCD of the lengths of all segments in the sequence. This is also the number of | ||||||
// elements after which the algorithm starts to loop and give the same values | ||||||
func (essw *ExecutionSegmentSequenceWrapper) GetStripedOffsets(segmentIndex int) (int64, []int64, int64) { | ||||||
offsets := essw.offsets[segmentIndex] | ||||||
return offsets[0], offsets[1:], essw.lcd | ||||||
jumps := essw.jumps[segmentIndex] | ||||||
offsets := make([]int64, len(jumps)) | ||||||
for i := 1; i < len(jumps); i++ { | ||||||
offsets[i-1] = jumps[i] - jumps[i-1] | ||||||
} | ||||||
offsets[len(offsets)-1] = essw.lcd - jumps[len(jumps)-1] + jumps[0] | ||||||
return jumps[0], offsets, essw.lcd | ||||||
} | ||||||
|
||||||
// GetStripedJumps returns the stripped jumps for the given segment | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
(Copied from |
||||||
// the returned values are as follows in order: | ||||||
// - jumps: a list of jumps from the beginning value for the segment. This are only the jumps | ||||||
// to from the start to the next start if we chunk the elements we are going to strip | ||||||
// into lcd sized chunks | ||||||
// - lcd: the LCD of the lengths of all segments in the sequence. This is also the number of | ||||||
// elements after which the algorithm starts to loop and give the same values | ||||||
func (essw *ExecutionSegmentSequenceWrapper) GetStripedJumps(segmentIndex int) ([]int64, int64) { | ||||||
return essw.jumps[segmentIndex], essw.lcd | ||||||
} | ||||||
|
||||||
// GetTuple returns an ExecutionTuple for the specified segment index. | ||||||
|
@@ -758,6 +777,11 @@ func (et *ExecutionTuple) GetStripedOffsets() (int64, []int64, int64) { | |||||
return et.Sequence.GetStripedOffsets(et.SegmentIndex) | ||||||
} | ||||||
|
||||||
// GetStripedJumps returns the striped jumps for our execution segment. | ||||||
func (et *ExecutionTuple) GetStripedJumps() ([]int64, int64) { | ||||||
return et.Sequence.GetStripedJumps(et.SegmentIndex) | ||||||
} | ||||||
|
||||||
// GetNewExecutionTupleFromValue re-segments the sequence, based on the given | ||||||
// value (see GetNewExecutionSegmentSequenceFromValue() above), and either | ||||||
// returns the new tuple, or an error if the current segment isn't present in | ||||||
|
@@ -783,14 +807,16 @@ func (et *ExecutionTuple) GetNewExecutionTupleFromValue(value int64) (*Execution | |||||
type SegmentedIndex struct { | ||||||
start, lcd int64 | ||||||
offsets []int64 | ||||||
jumps []int64 | ||||||
scaled, unscaled int64 // for both the first element(vu) is 1 not 0 | ||||||
} | ||||||
|
||||||
// NewSegmentedIndex returns a pointer to a new SegmentedIndex instance, | ||||||
// given an ExecutionTuple. | ||||||
func NewSegmentedIndex(et *ExecutionTuple) *SegmentedIndex { | ||||||
start, offsets, lcd := et.GetStripedOffsets() | ||||||
return &SegmentedIndex{start: start, lcd: lcd, offsets: offsets} | ||||||
jumps, _ := et.GetStripedJumps() | ||||||
return &SegmentedIndex{start: start, lcd: lcd, offsets: offsets, jumps: jumps} | ||||||
} | ||||||
|
||||||
// Next goes to the next scaled index and moves the unscaled one accordingly. | ||||||
|
@@ -821,36 +847,41 @@ func (s *SegmentedIndex) Prev() (int64, int64) { | |||||
// GoTo sets the scaled index to its biggest value for which the corresponding | ||||||
// unscaled index is smaller or equal to value. | ||||||
func (s *SegmentedIndex) GoTo(value int64) (int64, int64) { // TODO optimize | ||||||
var gi int64 | ||||||
// Because of the cyclical nature of the striping algorithm (with a cycle | ||||||
// length of LCD, the least common denominator), when scaling large values | ||||||
// (i.e. many multiples of the LCD), we can quickly calculate how many times | ||||||
// the cycle repeats. | ||||||
wholeCycles := (value / s.lcd) | ||||||
// So we can set some approximate initial values quickly, since we also know | ||||||
// precisely how many scaled values there are per cycle length. | ||||||
s.scaled = wholeCycles * int64(len(s.offsets)) | ||||||
s.unscaled = wholeCycles*s.lcd + s.start + 1 // our indexes are from 1 the start is from 0 | ||||||
s.scaled = wholeCycles * int64(len(s.jumps)) | ||||||
s.unscaled = wholeCycles * s.lcd // our indexes are from 1 the start is from 0 | ||||||
// Approach the final value using the slow algorithm with the step by step loop | ||||||
// TODO: this can be optimized by another array with size offsets that instead of the offsets | ||||||
// from the previous is the offset from either 0 or start | ||||||
i := s.start | ||||||
for ; i < value%s.lcd; gi, i = gi+1, i+s.offsets[gi] { | ||||||
s.scaled++ | ||||||
s.unscaled += s.offsets[gi] | ||||||
} | ||||||
|
||||||
if gi > 0 { // there were more values after the wholecycles | ||||||
// the last offset actually shouldn't have been added | ||||||
s.unscaled -= s.offsets[gi-1] | ||||||
} else if s.scaled > 0 { // we didn't actually have more values after the wholecycles but we still had some | ||||||
remaining := value % s.lcd | ||||||
switch { | ||||||
case s.jumps[0]+1 > remaining: | ||||||
// we didn't actually have more values after the wholecycles but we still had some | ||||||
// in this case the unscaled value needs to move back by the last offset as it would've been | ||||||
// the one to get it from the value it needs to be to it's current one | ||||||
s.unscaled -= s.offsets[len(s.offsets)-1] | ||||||
} | ||||||
|
||||||
if s.scaled == 0 { | ||||||
s.unscaled = 0 // we would've added the start and 1 | ||||||
if wholeCycles > 0 { | ||||||
s.unscaled -= s.lcd - s.jumps[len(s.jumps)-1] - 1 | ||||||
} | ||||||
default: | ||||||
i, j := 0, len(s.jumps) | ||||||
for i < j { | ||||||
h := int(uint(i+j) >> 1) // avoid overflow when computing h | ||||||
// i ≤ h < j | ||||||
if s.jumps[h] < remaining { | ||||||
i = h + 1 // preserves f(i-1) == false | ||||||
} else { | ||||||
j = h // preserves f(j) == true | ||||||
} | ||||||
} | ||||||
s.scaled += int64(i) | ||||||
s.unscaled += s.jumps[i-1] + 1 | ||||||
} | ||||||
|
||||||
return s.scaled, s.unscaled | ||||||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is impossible to follow by reading for me, will have to step through it with an example. The comments don't help much either... :-/
Probably no action needed from your part, just pointing it out. :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I haven't (really) documented anything ... I wanted to see if it's worth it ... also to rebase it (as I previously did a month ago or so).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
as an explanation/help for you to understand it.
The jumps instead of the difference between indexes in each cycle are the actual indexes
Example: the LCD(cycle size) is 7 and the jumps = {2, 4}previously that would've been start=2 offsets={2, 5} (the first 2 is the difference between the two indexes and the 5 is how much you need to add to 4 to loop to the start(2) - (4 + 5) %7 == 2.
Previously in order to find how many actual elements you have in NOT full cycle, we iterated over the offsets and added 1 until we pass the mark, which more or less is linearly searching through the offsets.
This new approach searches for the "jump" up to which we will go to (jump to :P). So the first three lines above are just calculating the full cycles and finding the
remaining
which is to search in the list of jumps ... which are sorted in increasing order ... because they go from smallest to biggest .. by definition :D.There is a small tricky thing that .. the index actually needs to be 1 bigger (as the start is indexed as 0 ;) )which is why I just search for the jump that is bigger than the remaining, not something else. This is more tricky in the VLV unfortunately but I would argue it might've gotten more readable:rofl:
For the record literally the whole part between
i, j :=...
and the end of the if is the copy of thesort.Search
... as I mentioned in the commit message I saw 30-70% better performance this way ... which IMO is significant enough for having this 6 lines (we can probably comment around them)Now as you can imagine the one has O(n) and the other is O(log(n)) which while great is not awesome for small
n
as I now need to do a whole search instead of just iterate over ... 1,2,3,4 offsets :). Luckily apparently even for small inputs, the difference is negligible, but for inputs where there are a lot of offsets, the difference is 99%+ which given that this is not entirely out of the question is probably a good idea.I was debating whether to add an if to go back to linear search if the jumps are less then ... choose a number .. but decided against it as that would make it even longer ... and now there is still a chance this will be inlined (I hope, I probably need to check).