Skip to content

Commit a7263a5

Browse files
authored
Add waitForProcessComplete (#126)
1 parent a33e9ff commit a7263a5

16 files changed

+518
-40
lines changed

engine/const.go

+4
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,7 @@ var defaultWorkerTaskBackoffRetryPolicy = xcapi.RetryPolicy{
1616
MaximumAttempts: ptr.Any(int32(0)),
1717
MaximumAttemptsDurationSeconds: ptr.Any(int32(0)),
1818
}
19+
20+
const DEFAULT_WAIT_FOR_TIMEOUT_MAX int32 = 30
21+
22+
const WaitForProcessCompletionResultStop string = "STOP"

engine/immediate_task_concurrent_processor.go

+61-19
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"fmt"
99
"io/ioutil"
1010
"net/http"
11+
"sync"
1112
"time"
1213

1314
"github.com/xcherryio/apis/goapi/xcapi"
@@ -27,14 +28,15 @@ type immediateTaskConcurrentProcessor struct {
2728
rootCtx context.Context
2829
cfg config.Config
2930
taskToProcessChan chan data_models.ImmediateTask
30-
// for quickly checking if the shardId is being processed
31-
currentShards map[int32]bool
32-
// shardId to the channel
31+
// shardId: channel
3332
taskToCommitChans map[int32]chan<- data_models.ImmediateTask
34-
taskNotifier TaskNotifier
35-
processStore persistence.ProcessStore
36-
visibilityStore persistence.VisibilityStore
37-
logger log.Logger
33+
// shardId: WaitForProcessCompletionChannels
34+
waitForProcessCompletionChannelsPerShardMap map[int32]WaitForProcessCompletionChannels
35+
taskNotifier TaskNotifier
36+
processStore persistence.ProcessStore
37+
visibilityStore persistence.VisibilityStore
38+
logger log.Logger
39+
lock sync.RWMutex
3840
}
3941

4042
func NewImmediateTaskConcurrentProcessor(
@@ -47,12 +49,13 @@ func NewImmediateTaskConcurrentProcessor(
4749
rootCtx: ctx,
4850
cfg: cfg,
4951
taskToProcessChan: make(chan data_models.ImmediateTask, bufferSize),
50-
currentShards: map[int32]bool{},
5152
taskToCommitChans: make(map[int32]chan<- data_models.ImmediateTask),
52-
taskNotifier: notifier,
53-
processStore: processStore,
54-
visibilityStore: visibilityStore,
55-
logger: logger,
53+
waitForProcessCompletionChannelsPerShardMap: make(map[int32]WaitForProcessCompletionChannels),
54+
taskNotifier: notifier,
55+
processStore: processStore,
56+
visibilityStore: visibilityStore,
57+
logger: logger,
58+
lock: sync.RWMutex{},
5659
}
5760
}
5861

@@ -66,17 +69,44 @@ func (w *immediateTaskConcurrentProcessor) GetTasksToProcessChan() chan<- data_m
6669
func (w *immediateTaskConcurrentProcessor) AddImmediateTaskQueue(
6770
shardId int32, tasksToCommitChan chan<- data_models.ImmediateTask,
6871
) (alreadyExisted bool) {
69-
exists := w.currentShards[shardId]
70-
w.currentShards[shardId] = true
71-
w.taskToCommitChans[shardId] = tasksToCommitChan
72+
w.lock.Lock()
73+
defer w.lock.Unlock()
74+
75+
_, exists := w.taskToCommitChans[shardId]
76+
if !exists {
77+
w.taskToCommitChans[shardId] = tasksToCommitChan
78+
}
79+
7280
return exists
7381
}
7482

7583
func (w *immediateTaskConcurrentProcessor) RemoveImmediateTaskQueue(shardId int32) {
76-
delete(w.currentShards, shardId)
84+
w.lock.Lock()
85+
defer w.lock.Unlock()
86+
7787
delete(w.taskToCommitChans, shardId)
7888
}
7989

90+
func (w *immediateTaskConcurrentProcessor) AddWaitForProcessCompletionChannels(shardId int32,
91+
waitForProcessCompletionChannelsPerShard WaitForProcessCompletionChannels) (alreadyExisted bool) {
92+
w.lock.Lock()
93+
defer w.lock.Unlock()
94+
95+
_, exists := w.waitForProcessCompletionChannelsPerShardMap[shardId]
96+
if !exists {
97+
w.waitForProcessCompletionChannelsPerShardMap[shardId] = waitForProcessCompletionChannelsPerShard
98+
}
99+
100+
return exists
101+
}
102+
103+
func (w *immediateTaskConcurrentProcessor) RemoveWaitForProcessCompletionChannels(shardId int32) {
104+
w.lock.Lock()
105+
defer w.lock.Unlock()
106+
107+
delete(w.waitForProcessCompletionChannelsPerShardMap, shardId)
108+
}
109+
80110
func (w *immediateTaskConcurrentProcessor) Start() error {
81111
concurrency := w.cfg.AsyncService.ImmediateTaskQueue.ProcessorConcurrency
82112

@@ -90,15 +120,18 @@ func (w *immediateTaskConcurrentProcessor) Start() error {
90120
if !ok {
91121
return
92122
}
93-
if !w.currentShards[task.ShardId] {
123+
124+
_, exists := w.taskToCommitChans[task.ShardId]
125+
if !exists {
94126
w.logger.Info("skip the stale task that is due to shard movement", tag.Shard(task.ShardId), tag.ID(task.GetTaskId()))
95127
continue
96128
}
97129

98130
err := w.processImmediateTask(w.rootCtx, task)
99131

100-
if w.currentShards[task.ShardId] { // check again
101-
commitChan := w.taskToCommitChans[task.ShardId]
132+
commitChan, exists := w.taskToCommitChans[task.ShardId]
133+
134+
if exists { // check again
102135
if err != nil {
103136
// put it back to the queue for immediate retry
104137
// Note that if the error is because of invoking worker APIs, it will be sent to
@@ -496,6 +529,15 @@ func (w *immediateTaskConcurrentProcessor) processExecuteTask(
496529
if compResp.HasNewImmediateTask {
497530
w.notifyNewImmediateTask(task.ShardId, prep, task)
498531
}
532+
533+
// signal to the process completion waiting channel
534+
waitForProcessCompletionChannelsPerShard, ok := w.waitForProcessCompletionChannelsPerShardMap[task.ShardId]
535+
if ok && compResp.ProcessStatus != data_models.ProcessExecutionStatusUndefined &&
536+
compResp.ProcessStatus != data_models.ProcessExecutionStatusRunning {
537+
538+
waitForProcessCompletionChannelsPerShard.Signal(task.ProcessExecutionId.String(), compResp.ProcessStatus.String())
539+
}
540+
499541
return nil
500542
}
501543

engine/interfaces.go

+13
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ type ImmediateTaskProcessor interface {
5454
shardId int32, tasksToCommitChan chan<- data_models.ImmediateTask,
5555
) (alreadyExisted bool)
5656
RemoveImmediateTaskQueue(shardId int32)
57+
58+
AddWaitForProcessCompletionChannels(shardId int32,
59+
waitForProcessCompletionChannelsPerShard WaitForProcessCompletionChannels) (alreadyExisted bool)
60+
RemoveWaitForProcessCompletionChannels(shardId int32)
5761
}
5862

5963
type TimerTaskProcessor interface {
@@ -68,3 +72,12 @@ type TimerTaskProcessor interface {
6872
) (alreadyExisted bool)
6973
RemoveTimerTaskQueue(shardId int32)
7074
}
75+
76+
type WaitForProcessCompletionChannels interface {
77+
Start()
78+
Stop()
79+
80+
Add(processExecutionId string) chan string
81+
Signal(processExecutionId string, result string)
82+
TerminateWaiting(processExecutionId string)
83+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
// Copyright 2023 xCherryIO organization
2+
3+
// Copyright (c) 2023 xCherryIO Organization
4+
// SPDX-License-Identifier: Apache-2.0
5+
6+
package engine
7+
8+
import (
9+
"fmt"
10+
"github.com/xcherryio/xcherry/common/log"
11+
"sync"
12+
"time"
13+
)
14+
15+
type WaitForProcessCompletionChannelsPerShardImpl struct {
16+
shardId int32
17+
logger log.Logger
18+
19+
processor ImmediateTaskProcessor
20+
21+
// processExecutionId : channel
22+
channelMap map[string]chan string
23+
// processExecutionId : a list of timestamps of when the waiting requests were created
24+
waitingRequestCreatedAt map[string][]int64
25+
lock sync.RWMutex
26+
}
27+
28+
func NewWaitForProcessCompletionChannelsPerShardImplImpl(
29+
shardId int32, logger log.Logger, processor ImmediateTaskProcessor) WaitForProcessCompletionChannels {
30+
return &WaitForProcessCompletionChannelsPerShardImpl{
31+
shardId: shardId,
32+
logger: logger,
33+
34+
processor: processor,
35+
36+
channelMap: map[string]chan string{},
37+
waitingRequestCreatedAt: map[string][]int64{},
38+
lock: sync.RWMutex{},
39+
}
40+
}
41+
42+
func (w *WaitForProcessCompletionChannelsPerShardImpl) Start() {
43+
w.processor.AddWaitForProcessCompletionChannels(w.shardId, w)
44+
}
45+
46+
func (w *WaitForProcessCompletionChannelsPerShardImpl) Stop() {
47+
w.processor.RemoveWaitForProcessCompletionChannels(w.shardId)
48+
49+
var procIds []string
50+
51+
for procId := range w.channelMap {
52+
procIds = append(procIds, procId)
53+
}
54+
55+
for _, procId := range procIds {
56+
w.Signal(procId, WaitForProcessCompletionResultStop)
57+
}
58+
}
59+
60+
func (w *WaitForProcessCompletionChannelsPerShardImpl) Add(processExecutionId string) chan string {
61+
w.logger.Info(fmt.Sprintf("Add process execution completion waiting request for %s in shard %d",
62+
processExecutionId, w.shardId))
63+
64+
w.lock.Lock()
65+
defer w.lock.Unlock()
66+
67+
channel, ok := w.channelMap[processExecutionId]
68+
if !ok {
69+
channel = make(chan string)
70+
w.channelMap[processExecutionId] = channel
71+
}
72+
73+
w.waitingRequestCreatedAt[processExecutionId] = append(w.waitingRequestCreatedAt[processExecutionId], w.now())
74+
75+
return channel
76+
}
77+
78+
func (w *WaitForProcessCompletionChannelsPerShardImpl) Signal(processExecutionId string, result string) {
79+
channel, ok := w.channelMap[processExecutionId]
80+
if !ok {
81+
return
82+
}
83+
84+
w.lock.Lock()
85+
defer w.lock.Unlock()
86+
87+
count := len(w.waitingRequestCreatedAt[processExecutionId])
88+
89+
for i := 0; i < count; i++ {
90+
select {
91+
case channel <- result:
92+
w.logger.Info(fmt.Sprintf("Signal process execution completion waiting result %d for %s: %s",
93+
i, processExecutionId, result))
94+
default:
95+
w.logger.Info(fmt.Sprintf("Not signal process execution completion waiting result %d for %s: %s",
96+
i, processExecutionId, result))
97+
}
98+
}
99+
100+
w.waitingRequestCreatedAt[processExecutionId] = []int64{}
101+
102+
go func() {
103+
// sleep 3 seconds before close the channel
104+
time.Sleep(time.Second * 3)
105+
106+
w.cleanup(processExecutionId)
107+
}()
108+
}
109+
110+
func (w *WaitForProcessCompletionChannelsPerShardImpl) TerminateWaiting(processExecutionId string) {
111+
w.logger.Info(fmt.Sprintf("Terminate process execution completion waiting for %s in shard %d",
112+
processExecutionId, w.shardId))
113+
114+
w.lock.Lock()
115+
defer w.lock.Unlock()
116+
117+
var validWaitingRequestCreatedAt []int64
118+
119+
now := w.now()
120+
for _, createdAt := range w.waitingRequestCreatedAt[processExecutionId] {
121+
if createdAt+int64(DEFAULT_WAIT_FOR_TIMEOUT_MAX) < now {
122+
w.logger.Info(fmt.Sprintf(
123+
"Remove process execution completion waiting request created at %d for %s in shard %d",
124+
createdAt, processExecutionId, w.shardId))
125+
continue
126+
}
127+
128+
validWaitingRequestCreatedAt = append(validWaitingRequestCreatedAt, createdAt)
129+
}
130+
131+
w.waitingRequestCreatedAt[processExecutionId] = validWaitingRequestCreatedAt
132+
133+
if len(w.waitingRequestCreatedAt) == 0 {
134+
w.cleanup(processExecutionId)
135+
}
136+
}
137+
138+
func (w *WaitForProcessCompletionChannelsPerShardImpl) cleanup(processExecutionId string) {
139+
w.lock.Lock()
140+
defer w.lock.Unlock()
141+
142+
delete(w.waitingRequestCreatedAt, processExecutionId)
143+
144+
channel, ok := w.channelMap[processExecutionId]
145+
if !ok {
146+
return
147+
}
148+
149+
delete(w.channelMap, processExecutionId)
150+
close(channel)
151+
152+
w.logger.Info(fmt.Sprintf("Close process execution completion waiting channel for %s in shard %d",
153+
processExecutionId, w.shardId))
154+
}
155+
156+
func (w *WaitForProcessCompletionChannelsPerShardImpl) now() int64 {
157+
return time.Now().Unix()
158+
}

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ require (
1313
github.com/serialx/hashring v0.0.0-20200727003509-22c0c7ab6b1b
1414
github.com/stretchr/testify v1.8.4
1515
github.com/urfave/cli/v2 v2.25.7
16-
github.com/xcherryio/apis v0.0.3-0.20240313171434-ae652fc3c70f
16+
github.com/xcherryio/apis v0.0.3-0.20240422013829-2c1c8bfbaa4b
1717
github.com/xcherryio/sdk-go v0.0.0-20240115163029-e21cc0710e61
1818
go.uber.org/multierr v1.10.0
1919
go.uber.org/zap v1.26.0

go.sum

+2-2
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@ github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4d
116116
github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
117117
github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs=
118118
github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
119-
github.com/xcherryio/apis v0.0.3-0.20240313171434-ae652fc3c70f h1:csBDKtifwAIRXaHpw3xiUqNDdS0As8OSrflQPr0bTm8=
120-
github.com/xcherryio/apis v0.0.3-0.20240313171434-ae652fc3c70f/go.mod h1:7peiYpRUjmq0rl/8F0MmvFH8Vp7Y8Dq5OpRgpH0cMJU=
119+
github.com/xcherryio/apis v0.0.3-0.20240422013829-2c1c8bfbaa4b h1:ea05r21EcjiedKXoADNLNAeOdm1W1mS/EMDWYfgSlnQ=
120+
github.com/xcherryio/apis v0.0.3-0.20240422013829-2c1c8bfbaa4b/go.mod h1:7peiYpRUjmq0rl/8F0MmvFH8Vp7Y8Dq5OpRgpH0cMJU=
121121
github.com/xcherryio/sdk-go v0.0.0-20240115163029-e21cc0710e61 h1:6Xr3S342Di2QuvagFb4uG1AkA8lQLWfED1ynZvnu3V0=
122122
github.com/xcherryio/sdk-go v0.0.0-20240115163029-e21cc0710e61/go.mod h1:Ouc00E061VNVYemKbVQCxB3LSOgIkxV81h//1O1ODws=
123123
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=

persistence/data_models/complete_execute_execution_req_resp.go

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ type (
2828

2929
CompleteExecuteExecutionResponse struct {
3030
HasNewImmediateTask bool
31+
ProcessStatus ProcessExecutionStatus
3132
FailedAtWritingAppDatabase bool
3233
AppDatabaseWritingError error
3334
}

persistence/process/complete_execute.go

+1
Original file line numberDiff line numberDiff line change
@@ -187,5 +187,6 @@ func (p sqlProcessStoreImpl) doCompleteExecuteExecutionTx(
187187

188188
return &data_models.CompleteExecuteExecutionResponse{
189189
HasNewImmediateTask: hasNewImmediateTask,
190+
ProcessStatus: prcRow.Status,
190191
}, nil
191192
}

service/api/default_server.go

+2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ const PathStopProcessExecution = "/api/v1/xcherry/service/process-execution/stop
2020
const PathPublishToLocalQueue = "/api/v1/xcherry/service/process-execution/publish-to-local-queue"
2121
const PathProcessExecutionRpc = "/api/v1/xcherry/service/process-execution/rpc"
2222
const PathListProcessExecutions = "/api/v1/xcherry/service/process-execution/list"
23+
const PathWaitForProcessCompletion = "/api/v1/xcherry/service/process-execution/wait-for-process-completion"
2324

2425
type defaultSever struct {
2526
rootCtx context.Context
@@ -50,6 +51,7 @@ func NewDefaultAPIServerWithGin(
5051
engine.POST(PathPublishToLocalQueue, handler.PublishToLocalQueue)
5152
engine.POST(PathProcessExecutionRpc, handler.Rpc)
5253
engine.POST(PathListProcessExecutions, handler.ListProcessExecutions)
54+
engine.POST(PathWaitForProcessCompletion, handler.WaitForProcessCompletion)
5355

5456
svrCfg := cfg.ApiService.HttpServer
5557
httpServer := &http.Server{

0 commit comments

Comments
 (0)