Skip to content
This repository was archived by the owner on Apr 24, 2023. It is now read-only.

Commit 4ca4e09

Browse files
authored
Revert "Parallel watch processing. (#2169)" (#2174)
This reverts commit 637b734.
1 parent 878911f commit 4ca4e09

File tree

4 files changed

+13
-164
lines changed

4 files changed

+13
-164
lines changed

scheduler/java/com/twosigma/cook/kubernetes/ParallelWatchQueue.java

Lines changed: 0 additions & 131 deletions
This file was deleted.

scheduler/src/cook/kubernetes/compute_cluster.clj

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
[metrics.timers :as timers]
2828
[opentracing-clj.core :as tracing])
2929
(:import (com.google.auth.oauth2 GoogleCredentials)
30-
(com.twosigma.cook.kubernetes TokenRefreshingAuthenticator ParallelWatchQueue)
30+
(com.twosigma.cook.kubernetes TokenRefreshingAuthenticator)
3131
(io.kubernetes.client.openapi ApiClient)
3232
(io.kubernetes.client.openapi.models V1Node V1Pod)
3333
(io.kubernetes.client.util ClientBuilder KubeConfig)
@@ -214,23 +214,15 @@
214214

215215
(defn make-cook-pod-watch-callback
216216
"Make a callback function that is passed to the pod-watch callback. This callback forwards changes to the cook.kubernetes.controller."
217-
[{:keys [^ParallelWatchQueue parallel-watch-queue] :as kcc}]
217+
[kcc]
218218
(fn pod-watch-callback
219-
[_ ^V1Pod prev-pod ^V1Pod pod]
219+
[_ prev-pod pod]
220220
(try
221-
(let [name (or (some-> prev-pod .getMetadata .getName)
222-
(some-> pod .getMetadata .getName))
223-
shardNum (mod (.hashCode name) (.getShardCount parallel-watch-queue))
224-
^Runnable event (fn []
225-
(try
226-
(if (nil? pod)
227-
(controller/pod-deleted kcc prev-pod)
228-
(controller/pod-update kcc pod))
229-
(catch Exception e
230-
(log/error e "Error processing status update on" name))))]
231-
(.submitEvent parallel-watch-queue event shardNum))
221+
(if (nil? pod)
222+
(controller/pod-deleted kcc prev-pod)
223+
(controller/pod-update kcc pod))
232224
(catch Exception e
233-
(log/error e "Error submitting pod status update")))))
225+
(log/error e "Error processing status update")))))
234226

235227
(defn task-ents->map-by-task-id
236228
"Given seq of task entities from datomic, generate a map of task-id -> entity."
@@ -416,8 +408,7 @@
416408
compute-cluster-launch-rate-limiter cook-pool-taint-name cook-pool-taint-prefix
417409
cook-pool-taint2-name cook-pool-taint2-value
418410
cook-pool-label-name cook-pool-label-prefix
419-
controller-lock-objects kill-lock-object
420-
parallel-watch-queue]
411+
controller-lock-objects kill-lock-object]
421412
cc/ComputeCluster
422413
(launch-tasks [this pool-name matches process-task-post-launch-fn]
423414
(let [task-metadata-seq (mapcat :task-metadata-seq matches)]
@@ -895,8 +886,6 @@
895886
name
896887
namespace
897888
node-blocklist-labels
898-
parallel-watch-max-outstanding
899-
parallel-watch-shards
900889
read-timeout-seconds
901890
scan-frequency-seconds
902891
state
@@ -917,8 +906,6 @@
917906
state :running
918907
state-locked? false
919908
use-google-service-account? true
920-
parallel-watch-max-outstanding 1000
921-
parallel-watch-shards 200
922909
cook-pool-taint-prefix ""
923910
cook-pool-label-prefix ""
924911
use-token-refreshing-authenticator? false}
@@ -976,7 +963,6 @@
976963
(with-meta (vec (repeatedly lock-shard-count #(ReentrantLock.)))
977964
{:json-value (str "<count of " lock-shard-count " ReentrantLocks>")})
978965
; cluster-level kill-lock. See cc/kill-lock-object
979-
(ReentrantReadWriteLock. true)
980-
(ParallelWatchQueue. controller-executor-service parallel-watch-max-outstanding parallel-watch-shards))]
966+
(ReentrantReadWriteLock. true))]
981967
(cc/register-compute-cluster! compute-cluster)
982968
compute-cluster))

scheduler/src/cook/test/testutil.clj

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
[ring.middleware.params :refer [wrap-params]])
4444
(:import (com.google.common.cache CacheBuilder)
4545
(com.netflix.fenzo SimpleAssignmentResult)
46-
(com.twosigma.cook.kubernetes ParallelWatchQueue)
4746
(io.kubernetes.client.custom Quantity Quantity$Format)
4847
(io.kubernetes.client.openapi.models V1Container V1Node V1NodeSpec V1NodeStatus V1ObjectMeta V1Pod V1PodSpec V1ResourceRequirements V1Taint)
4948
(java.util.concurrent Executors TimeUnit)
@@ -662,5 +661,4 @@
662661
"some-random-label-A"
663662
"some-random-label-val-B"
664663
(repeatedly 16 #(ReentrantLock.))
665-
(ReentrantReadWriteLock. true)
666-
(ParallelWatchQueue. (Executors/newSingleThreadExecutor) 1000 100))))
664+
(ReentrantReadWriteLock. true))))

scheduler/test/cook/test/kubernetes/compute_cluster.clj

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
[cook.test.testutil :as tu]
1313
[datomic.api :as d])
1414
(:import (clojure.lang ExceptionInfo)
15-
(com.twosigma.cook.kubernetes ParallelWatchQueue)
1615
(io.kubernetes.client.openapi.models V1NodeSelectorRequirement V1Pod V1PodSecurityContext)
1716
(java.util.concurrent Executors)
1817
(java.util.concurrent.locks ReentrantLock ReentrantReadWriteLock)
@@ -81,8 +80,7 @@
8180
{} (atom :running) (atom false) false
8281
cook.rate-limit/AllowAllRateLimiter "t-a" "p-a" "t2-a" "p2-a" "l-p" "l-v1"
8382
(repeatedly 16 #(ReentrantLock.))
84-
(ReentrantReadWriteLock. true)
85-
(ParallelWatchQueue. (Executors/newSingleThreadExecutor) 1000 10))
83+
(ReentrantReadWriteLock. true))
8684
task-metadata (task/TaskAssignmentResult->task-metadata (d/db conn)
8785
nil
8886
compute-cluster
@@ -105,8 +103,7 @@
105103
{} (atom :running) (atom false) false
106104
cook.rate-limit/AllowAllRateLimiter "t-b" "p-b" "t2-a" "p2-a" "l-p" "l-v2"
107105
(repeatedly 16 #(ReentrantLock.))
108-
(ReentrantReadWriteLock. true)
109-
(ParallelWatchQueue. (Executors/newSingleThreadExecutor) 1000 10))
106+
(ReentrantReadWriteLock. true))
110107
task-metadata (task/TaskAssignmentResult->task-metadata (d/db conn)
111108
nil
112109
compute-cluster
@@ -137,8 +134,7 @@
137134
{} (atom :running) (atom false) false
138135
cook.rate-limit/AllowAllRateLimiter "t-c" "p-c" "t2-a" "p2-a" "l-p" "l-c2"
139136
(repeatedly 16 #(ReentrantLock.))
140-
(ReentrantReadWriteLock. true)
141-
(ParallelWatchQueue. (Executors/newSingleThreadExecutor) 1000 10))
137+
(ReentrantReadWriteLock. true))
142138
node-name->node {"nodeA" (tu/node-helper "nodeA" 1.0 1000.0 10 "nvidia-tesla-p100" nil nil)
143139
"nodeB" (tu/node-helper "nodeB" 1.0 1000.0 25 "nvidia-tesla-p100" nil nil)
144140
"nodeC" (tu/node-helper "nodeC" 1.0 1000.0 nil nil nil nil)

0 commit comments

Comments
 (0)