Skip to content

Commit 92ddde6

Browse files
authored
Subrange incremental repair (#1509)
* Add support for subrange incremental repairs
1 parent f2854d7 commit 92ddde6

File tree

72 files changed

+1466
-599
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+1466
-599
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@
309309
<dependency>
310310
<groupId>com.puppycrawl.tools</groupId>
311311
<artifactId>checkstyle</artifactId>
312-
<version>8.19</version>
312+
<version>10.17.0</version>
313313
</dependency>
314314
</dependencies>
315315
</plugin>

src/docs/content/docs/api.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,8 @@ Returns OK if all goes well NOT_MODIFIED if new state is the same as the old one
196196
* *segmentCountPerNode*: Defines the amount of segments per node to create for scheduled repair runs. (Optional)
197197
* *repairParallelism*: Defines the used repair parallelism for scheduled repair runs. (Optional)
198198
* *intensity*: Defines the repair intensity for scheduled repair runs. (Optional)
199-
* *incrementalRepair*: Defines if incremental repair should be done. [true/false] (Optional)
199+
* *incrementalRepair*: Defines if incremental repair should be done on all tokens of each node at once. [true/false] (Optional)
200+
* *subrangeIncrementalRepair*: Defines if incremental repair should be done in subrange mode, against discrete token ranges. [true/false] (Optional)
200201
* *scheduleDaysBetween*: Defines the amount of days to wait between scheduling new repairs.
201202
For example, use value 7 for weekly schedule, and 0 for continuous.
202203
* *scheduleTriggerTime*: Defines the time for first scheduled trigger for the run.

src/docs/content/docs/configuration/docker_vars.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ The Docker environment variables listed in this section map directly to Reaper s
2727
<code class="codeLarge">REAPER_ENABLE_DYNAMIC_SEED_LIST</code> | [enableDynamicSeedList]({{< relref "reaper_specific.md#enabledynamicseedlist" >}}) | true
2828
<code class="codeLarge">REAPER_HANGING_REPAIR_TIMEOUT_MINS</code> | [hangingRepairTimeoutMins]({{< relref "reaper_specific.md#hangingrepairtimeoutmins" >}}) | 30
2929
<code class="codeLarge">REAPER_INCREMENTAL_REPAIR</code> | [incrementalRepair]({{< relref "reaper_specific.md#incrementalrepair" >}}) | false
30+
<code class="codeLarge">REAPER_SUBRANGE_INCREMENTAL</code> | [subrangeIncrementalRepair]({{< relref "reaper_specific.md#subrangeincremental" >}}) | false
3031
<code class="codeLarge">REAPER_JMX_AUTH_PASSWORD</code> | [password]({{< relref "reaper_specific.md#password" >}}) |
3132
<code class="codeLarge">REAPER_JMX_AUTH_USERNAME</code> | [username]({{< relref "reaper_specific.md#username" >}}) |
3233
<code class="codeLarge">REAPER_JMX_CREDENTIALS</code> | [jmxCredentials]({{< relref "reaper_specific.md#jmxcredentials" >}}) |

src/docs/content/docs/configuration/reaper_specific.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,21 @@ Sets the default repair type unless specifically defined for each run. Note that
154154

155155
<br/>
156156

157+
### `subrangeIncrementalRepair`
158+
159+
Type: *Boolean*
160+
161+
Default: *false*
162+
163+
Sets the default repair type unless specifically defined for each run. Note that this is only supported with the PARALLEL repairParallelism setting. For more details in incremental repair, please refer to the following article.http://www.datastax.com/dev/blog/more-efficient-repairs.
164+
This mode will split the repair jobs into sets of token ranges using the incremental mode.
165+
This will prevail over the `incrementalRepair` setting.
166+
167+
168+
*Note*: Subrange incremental repair is only available since Cassandra 4.0.
169+
170+
<br/>
171+
157172
### `blacklistTwcsTables`
158173

159174
Type: *Boolean*

src/packaging/bin/spreaper

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,9 @@ def _arguments_for_repair_and_schedule(parser):
233233
parser.add_argument("--incremental", default="false",
234234
help=("Incremental repair (true or false), "
235235
"or use the configured default if not given (false)"))
236+
parser.add_argument("--subrange-incremental", default="false",
237+
help=("Subrange incremental repair (true or false), "
238+
"or use the configured default if not given (false)"))
236239
parser.add_argument("--datacenters", default=None,
237240
help=("a comma separated list of datacenters to repair (do not use spaces after commas). "
238241
"Cannot be used in conjunction with --nodes."))
@@ -734,6 +737,7 @@ class ReaperCLI(object):
734737
repairParallelism=args.repair_parallelism,
735738
intensity=args.intensity,
736739
incrementalRepair=args.incremental,
740+
subrangeIncrementalRepair=args.subrange_incremental,
737741
nodes=args.nodes,
738742
datacenters=args.datacenters,
739743
blacklistedTables=args.blacklisted_tables,
@@ -755,6 +759,7 @@ class ReaperCLI(object):
755759
repairParallelism=args.repair_parallelism,
756760
intensity=args.intensity,
757761
incrementalRepair=args.incremental,
762+
subrangeIncrementalRepair=args.subrange_incremental,
758763
nodes=args.nodes,
759764
datacenters=args.datacenters,
760765
blacklistedTables=args.blacklisted_tables,
@@ -797,6 +802,7 @@ class ReaperCLI(object):
797802
scheduleDaysBetween=args.schedule_days_between,
798803
scheduleTriggerTime=args.schedule_trigger_time,
799804
incrementalRepair=args.incremental,
805+
subrangeIncrementalRepair=args.subrange_incremental,
800806
nodes=args.nodes,
801807
datacenters=args.datacenters,
802808
blacklistedTables=args.blacklisted_tables,
@@ -818,6 +824,7 @@ class ReaperCLI(object):
818824
scheduleDaysBetween=args.schedule_days_between,
819825
scheduleTriggerTime=args.schedule_trigger_time,
820826
incrementalRepair=args.incremental,
827+
subrangeIncrementalRepair=args.subrange_incremental,
821828
nodes=args.nodes,
822829
datacenters=args.datacenters,
823830
blacklistedTables=args.blacklisted_tables,

src/packaging/docker-services/reaper/reaper.env

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ REAPER_REPAIR_RUN_THREADS=32
3535
REAPER_HANGING_REPAIR_TIMEOUT_MINS=90
3636
REAPER_ENABLE_CROSS_ORIGIN=true
3737
REAPER_INCREMENTAL_REPAIR=true
38+
REAPER_SUBRANGE_INCREMENTAL=false
3839
REAPER_BLACKLIST_TWCS=true
3940
REAPER_ENABLE_DYNAMIC_SEED_LIST=false
4041
REAPER_REPAIR_MANAGER_SCHEDULING_INTERVAL_SECONDS=10

src/server/checkstyle.xml

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,30 +14,11 @@
1414
limitations under the License.
1515
-->
1616
<!DOCTYPE module PUBLIC
17-
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
18-
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
19-
20-
<!--
21-
22-
- Modified from Google Java Style
23-
https://google-styleguide.googlecode.com/svn-history/r130/trunk/javaguide.html
24-
25-
Checkstyle is very configurable. Be sure to read the documentation at
26-
http://checkstyle.sf.net (or in your downloaded distribution).
27-
28-
Most Checks are configurable, be sure to consult the documentation.
29-
30-
To completely disable a check, just comment it out or delete it from the file.
31-
32-
-->
33-
34-
<module name = "Checker">
35-
<module name="SuppressWarningsFilter"/>
17+
"-//Checkstyle//DTD Checkstyle Configuration 1.3//EN"
18+
"https://checkstyle.org/dtds/configuration_1_3.dtd">
19+
<module name = "RootModule">
3620
<property name="charset" value="UTF-8"/>
37-
3821
<property name="fileExtensions" value="java, properties, xml"/>
39-
<!-- Checks for whitespace -->
40-
<!-- See http://checkstyle.sf.net/config_whitespace.html -->
4122
<module name="FileTabCharacter">
4223
<property name="eachLine" value="true"/>
4324
</module>
@@ -48,9 +29,12 @@
4829
<property name="multiLines" value="2"/>
4930
</module>
5031

51-
<module name="TreeWalker">
52-
<module name="SuppressWarningsHolder"/>
32+
<module name="LineLength">
33+
<property name="max" value="120"/>
34+
<property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
35+
</module>
5336

37+
<module name="TreeWalker">
5438
<module name="SuppressWarnings">
5539
<property name="id" value="checkstyle:suppresswarnings"/>
5640
</module>
@@ -70,10 +54,6 @@
7054
<property name="allowByTailComment" value="true"/>
7155
<property name="allowNonPrintableEscapes" value="true"/>
7256
</module>
73-
<module name="LineLength">
74-
<property name="max" value="120"/>
75-
<property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
76-
</module>
7757
<module name="Regexp">
7858
<property name="format" value="[ \t]+$"/>
7959
<property name="illegalPattern" value="true"/>
@@ -234,6 +214,6 @@
234214
<module name="FinalClass"/>
235215
</module>
236216
<module name="SuppressionFilter">
237-
<property name="file" value="${checkstyle.suppressions.file}"/>
217+
<property name="file" value="${checkstyle.suppressions.file}"/>
238218
</module>
239219
</module>

src/server/src/main/docker/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ ENV REAPER_SEGMENT_COUNT_PER_NODE=64 \
2727
REAPER_STORAGE_TYPE=memory \
2828
REAPER_ENABLE_CROSS_ORIGIN=true \
2929
REAPER_INCREMENTAL_REPAIR=false \
30+
REAPER_SUBRANGE_INCREMENTAL=false \
3031
REAPER_BLACKLIST_TWCS=false \
3132
REAPER_ENABLE_DYNAMIC_SEED_LIST=true \
3233
REAPER_REPAIR_MANAGER_SCHEDULING_INTERVAL_SECONDS=30 \

src/server/src/main/docker/cassandra-reaper.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ hangingRepairTimeoutMins: ${REAPER_HANGING_REPAIR_TIMEOUT_MINS}
2323
storageType: ${REAPER_STORAGE_TYPE}
2424
enableCrossOrigin: ${REAPER_ENABLE_CROSS_ORIGIN}
2525
incrementalRepair: ${REAPER_INCREMENTAL_REPAIR}
26+
subrangeIncrementalRepair: ${REAPER_SUBRANGE_INCREMENTAL}
2627
blacklistTwcsTables: ${REAPER_BLACKLIST_TWCS}
2728
enableDynamicSeedList: ${REAPER_ENABLE_DYNAMIC_SEED_LIST}
2829
repairManagerSchedulingIntervalSeconds: ${REAPER_REPAIR_MANAGER_SCHEDULING_INTERVAL_SECONDS}

src/server/src/main/java/io/cassandrareaper/AppContext.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ public String getLocalNodeAddress() {
5656
return localNodeAddress;
5757
}
5858

59-
private static class Private {
59+
private static final class Private {
6060
private static final Logger LOG = LoggerFactory.getLogger(AppContext.class);
6161
private static final String DEFAULT_INSTANCE_ADDRESS = "127.0.0.1";
6262

src/server/src/main/java/io/cassandrareaper/ReaperApplication.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,7 @@ private void schedulePurge(ScheduledExecutorService scheduler) {
421421
private void checkConfiguration(ReaperApplicationConfiguration config) {
422422
LOG.debug("repairIntensity: {}", config.getRepairIntensity());
423423
LOG.debug("incrementalRepair: {}", config.getIncrementalRepair());
424+
LOG.debug("subrangeIncrementalRepair: {}", config.getSubrangeIncrementalRepair());
424425
LOG.debug("repairRunThreadCount: {}", config.getRepairRunThreadCount());
425426
LOG.debug("segmentCount: {}", config.getSegmentCount());
426427
LOG.debug("repairParallelism: {}", config.getRepairParallelism());

src/server/src/main/java/io/cassandrareaper/ReaperApplicationConfiguration.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ public final class ReaperApplicationConfiguration extends Configuration {
7474
@DefaultValue("false")
7575
private Boolean incrementalRepair;
7676

77+
@JsonProperty
78+
@DefaultValue("false")
79+
private Boolean subrangeIncrementalRepair;
80+
7781
@JsonProperty
7882
private Boolean blacklistTwcsTables;
7983

@@ -237,6 +241,14 @@ public void setIncrementalRepair(boolean incrementalRepair) {
237241
this.incrementalRepair = incrementalRepair;
238242
}
239243

244+
public boolean getSubrangeIncrementalRepair() {
245+
return subrangeIncrementalRepair != null ? subrangeIncrementalRepair : false;
246+
}
247+
248+
public void setSubrangeIncrementalRepair(boolean subrangeIncrementalRepair) {
249+
this.subrangeIncrementalRepair = subrangeIncrementalRepair;
250+
}
251+
240252
public boolean getBlacklistTwcsTables() {
241253
return blacklistTwcsTables != null ? blacklistTwcsTables : false;
242254
}
@@ -578,6 +590,9 @@ public static final class AutoSchedulingConfiguration {
578590
@JsonProperty
579591
private Boolean incremental;
580592

593+
@JsonProperty
594+
private Boolean subrangeIncrementalRepair;
595+
581596
@JsonProperty
582597
private Integer percentUnrepairedThreshold;
583598

@@ -658,6 +673,14 @@ public void setIncremental(Boolean incremental) {
658673
this.incremental = incremental;
659674
}
660675

676+
public Boolean subrangeIncrementalRepair() {
677+
return subrangeIncrementalRepair == null ? false : subrangeIncrementalRepair;
678+
}
679+
680+
public void setSubrangeIncrementalRepair(Boolean subrangeIncrementalRepair) {
681+
this.subrangeIncrementalRepair = subrangeIncrementalRepair;
682+
}
683+
661684
public Integer getPercentUnrepairedThreshold() {
662685
return percentUnrepairedThreshold == null ? -1 : percentUnrepairedThreshold;
663686
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Copyright 2024-2024 DataStax, Inc.
3+
*
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package io.cassandrareaper.core;
19+
20+
public enum RepairType {
21+
SUBRANGE_FULL,
22+
INCREMENTAL,
23+
SUBRANGE_INCREMENTAL;
24+
25+
public String getRepairType() {
26+
return name().toLowerCase();
27+
}
28+
29+
public static RepairType fromName(String name) {
30+
return valueOf(name.toUpperCase());
31+
}
32+
33+
public boolean isIncremental() {
34+
return this == INCREMENTAL || this == SUBRANGE_INCREMENTAL;
35+
}
36+
37+
public boolean isFull() {
38+
return this == SUBRANGE_FULL;
39+
}
40+
41+
public boolean isSubrange() {
42+
return this == SUBRANGE_FULL || this == SUBRANGE_INCREMENTAL;
43+
}
44+
}

src/server/src/main/java/io/cassandrareaper/core/RepairUnit.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ public final class RepairUnit {
3131
private final String keyspaceName;
3232
private final Set<String> columnFamilies;
3333
private final boolean incrementalRepair;
34+
private final boolean subrangeIncrementalRepair;
3435
private final Set<String> nodes;
3536
private final Set<String> datacenters;
3637
private final Set<String> blacklistedTables;
@@ -42,7 +43,9 @@ private RepairUnit(Builder builder, UUID id) {
4243
this.clusterName = builder.clusterName;
4344
this.keyspaceName = builder.keyspaceName;
4445
this.columnFamilies = builder.columnFamilies;
45-
this.incrementalRepair = builder.incrementalRepair;
46+
// If subrange incremental repair is true, we set incremental repair to true as well
47+
this.incrementalRepair = builder.incrementalRepair || builder.subrangeIncrementalRepair;
48+
this.subrangeIncrementalRepair = builder.subrangeIncrementalRepair;
4649
this.nodes = builder.nodes;
4750
this.datacenters = builder.datacenters;
4851
this.blacklistedTables = builder.blacklistedTables;
@@ -74,6 +77,10 @@ public boolean getIncrementalRepair() {
7477
return incrementalRepair;
7578
}
7679

80+
public boolean getSubrangeIncrementalRepair() {
81+
return subrangeIncrementalRepair;
82+
}
83+
7784
public Set<String> getNodes() {
7885
return nodes;
7986
}
@@ -104,6 +111,7 @@ public static final class Builder {
104111
public String keyspaceName;
105112
public Set<String> columnFamilies = Collections.emptySet();
106113
public Boolean incrementalRepair;
114+
public Boolean subrangeIncrementalRepair;
107115
public Set<String> nodes = Collections.emptySet();
108116
public Set<String> datacenters = Collections.emptySet();
109117
public Set<String> blacklistedTables = Collections.emptySet();
@@ -117,6 +125,7 @@ private Builder(RepairUnit original) {
117125
keyspaceName = original.keyspaceName;
118126
columnFamilies = original.columnFamilies;
119127
incrementalRepair = original.incrementalRepair;
128+
subrangeIncrementalRepair = original.subrangeIncrementalRepair;
120129
nodes = original.nodes;
121130
datacenters = original.datacenters;
122131
blacklistedTables = original.blacklistedTables;
@@ -144,6 +153,11 @@ public Builder incrementalRepair(boolean incrementalRepair) {
144153
return this;
145154
}
146155

156+
public Builder subrangeIncrementalRepair(boolean subrangeIncrementalRepair) {
157+
this.subrangeIncrementalRepair = subrangeIncrementalRepair;
158+
return this;
159+
}
160+
147161
public Builder nodes(Set<String> nodes) {
148162
this.nodes = Collections.unmodifiableSet(nodes);
149163
return this;
@@ -173,6 +187,8 @@ public RepairUnit build(UUID id) {
173187
Preconditions.checkState(null != clusterName, "clusterName(..) must be called before build(..)");
174188
Preconditions.checkState(null != keyspaceName, "keyspaceName(..) must be called before build(..)");
175189
Preconditions.checkState(null != incrementalRepair, "incrementalRepair(..) must be called before build(..)");
190+
Preconditions.checkState(null != subrangeIncrementalRepair,
191+
"subrangeIncrementalRepair(..) must be called before build(..)");
176192
Preconditions.checkState(null != repairThreadCount, "repairThreadCount(..) must be called before build(..)");
177193
Preconditions.checkState(null != timeout, "timeout(..) must be called before build(..)");
178194
return new RepairUnit(this, id);
@@ -190,6 +206,8 @@ public int hashCode() {
190206
hash *= 59;
191207
hash += (this.incrementalRepair ? 2 : 1);
192208
hash *= 59;
209+
hash += (this.subrangeIncrementalRepair ? 8 : 4);
210+
hash *= 59;
193211
hash += Objects.hashCode(this.nodes);
194212
hash *= 59;
195213
hash += Objects.hashCode(this.datacenters);
@@ -212,6 +230,7 @@ public boolean equals(Object obj) {
212230
}
213231

214232
return Objects.equals(this.incrementalRepair, ((Builder) obj).incrementalRepair)
233+
&& Objects.equals(this.subrangeIncrementalRepair, ((Builder) obj).subrangeIncrementalRepair)
215234
&& Objects.equals(this.clusterName, ((Builder) obj).clusterName)
216235
&& Objects.equals(this.keyspaceName, ((Builder) obj).keyspaceName)
217236
&& Objects.equals(this.columnFamilies, ((Builder) obj).columnFamilies)

src/server/src/main/java/io/cassandrareaper/management/ClusterFacade.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,7 @@ private Collection<String> enforceLocalNodeForSidecar(Collection<String> endpoin
924924
: endpoints;
925925
}
926926

927-
private static class Async {
927+
private static final class Async {
928928
private static final ExecutorService ASYNC = Executors.newSingleThreadExecutor();
929929

930930
private static boolean markClusterActive(Cluster cluster, AppContext context) {

0 commit comments

Comments
 (0)