diff --git a/charts/milvus/Chart.yaml b/charts/milvus/Chart.yaml index 92e1c8ab..71f9ad91 100644 --- a/charts/milvus/Chart.yaml +++ b/charts/milvus/Chart.yaml @@ -1,9 +1,9 @@ apiVersion: v1 name: milvus -appVersion: "2.2.0" +appVersion: "2.2.1" kubeVersion: "^1.10.0-0" description: Milvus is an open-source vector database built to power AI applications and vector similarity search. -version: 3.3.6 +version: 4.0.0 keywords: - milvus - elastic diff --git a/charts/milvus/README.md b/charts/milvus/README.md index a0f9a35c..3a2bc5e6 100644 --- a/charts/milvus/README.md +++ b/charts/milvus/README.md @@ -35,6 +35,7 @@ Assume the release name is `my-release`: $ helm upgrade --install my-release --set cluster.enabled=false --set etcd.replicaCount=1 --set pulsar.enabled=false --set minio.mode=standalone milvus/milvus ``` By default, milvus standalone uses `rocksmq` as message queue. You can also use `pulsar` or `kafka` as message queue: + ```bash # Helm v3.x # Milvus Standalone with pulsar as message queue @@ -67,11 +68,24 @@ $ helm upgrade --install my-release milvus/milvus --set pulsar.enabled=false --s > **IMPORTANT** If you have installed a milvus cluster with version below v2.1.x, you need follow the instructions at here: https://github.com/milvus-io/milvus/blob/master/deployments/migrate-meta/README.md. After meta migration, you use `helm upgrade` to update your cluster again. E.g. to scale out query node from 1(default) to 2: + ```bash # Helm v3.x $ helm upgrade --install --set queryNode.replicas=2 my-release milvus/milvus ``` +### Breaking Changes +> **IMPORTANT** Milvus helm chart 4.0.0 has breaking changes for milvus configuration. Previously, you can set segment size like this `--set dataCoordinator.segment.maxSize=1024`. Now we have remove all the shortcut config option. Instead, you can set using `extraConfigFiles` like this: +```bash +extraConfigFiles: + user.yaml: |+ + dataCoord: + segment: + maxSize: 1024 +``` + +So if you had deployed a cluster with helm chart version below 4.0.0 and also specified extra config, you need set the configs under `extraConfigFiles` when running `helm upgrade`. + ## Uninstall the Chart ```bash @@ -125,8 +139,6 @@ The following table lists the configurable parameters of the Milvus Service and | `metrics.enabled` | Export Prometheus monitoring metrics | `true` | | `metrics.serviceMonitor.enabled` | Create ServiceMonitor for Prometheus operator | `false` | | `metrics.serviceMonitor.additionalLabels` | Additional labels that can be used so ServiceMonitor will be discovered by Prometheus | `unset` | -| `metadata.rootPath` | Root of key prefix to etcd | `by-dev` | -| `authorization.enabled` | Enable milvus authorization | `false` | | `log.level` | Logging level to be used. Valid levels are `debug`, `info`, `warn`, `error`, `fatal` | `info` | | `log.file.maxSize` | The size limit of the log file (MB) | `300` | | `log.file.maxAge` | The maximum number of days that the log is retained. (day) | `10` | @@ -140,38 +152,6 @@ The following table lists the configurable parameters of the Milvus Service and | `log.persistence.persistentVolumeClaim.accessModes` | The Milvus logs data Persistence access modes | `ReadWriteOnce` | | `log.persistence.persistentVolumeClaim.size` | The size of Milvus logs data Persistent Volume Storage Class | `5Gi` | | `log.persistence.persistentVolumeClaim.subPath` | SubPath for Milvus logs data mount | `unset` | -| `msgChannel.chanNamePrefix.cluster` | Pulsar topic name prefix | `by-dev` | -| `quotaAndLimits.enabled` | Enable milvus quota and limits | `false` | -| `quotaAndLimits.quotaCenterCollectInterval` | Collect metrics interval | `3` | -| `quotaAndLimits.ddl.eabled` | Enable milvus ddl limit | `false` | -| `quotaAndLimits.ddl.collectionRate` | Milvus ddl collection rate qps | `unset` | -| `quotaAndLimits.ddl.partitionRate` | Milvus ddl partition rate qps | `unset` | -| `quotaAndLimits.indexRate.eabled` | Enable milvus index rate limit | `false` | -| `quotaAndLimits.indexRate.max` | Milvus max index rate qps | `unset` | -| `quotaAndLimits.flushRate.eabled` | Enable milvus flush rate limit | `false` | -| `quotaAndLimits.flushRate.max` | Milvus max flush rate qps | `unset` | -| `quotaAndLimits.compactionRate.eabled` | Enable milvus compaction rate limit | `false` | -| `quotaAndLimits.compactionRate.max` | Milvus max compaction rate qps | `unset` | -| `quotaAndLimits.dml.eabled` | Enable milvus dml limit | `false` | -| `quotaAndLimits.dml.insertRate.max` | Milvus dml max insert rate MB/s | `unset` | -| `quotaAndLimits.dml.deleteRate.max` | Milvus dml max delete rate MB/s | `unset` | -| `quotaAndLimits.dml.bulkLoadRate.max` | Milvus dml max bulk load rate MB/s | `unset` | -| `quotaAndLimits.dql.eabled` | Enable milvus dql limit | `false` | -| `quotaAndLimits.dql.searchRate.max` | Milvus dml max search vps | `unset` | -| `quotaAndLimits.dql.queryRate.max` | Milvus dml max query qps | `unset` | -| `quotaAndLimits.limitWriting.forceDeny` | Deny write requests if quota exceeded | `false` | -| `quotaAndLimits.limitWriting.ttProtection.enabled` | Enable milvus time tick protection | `true` | -| `quotaAndLimits.limitWriting.ttProtection.maxTimeTickDelay` | Max time tick delay in seconds | `30` | -| `quotaAndLimits.limitWriting.memProtection.enabled` | Enable milvus memory protection | `true` | -| `quotaAndLimits.limitWriting.memProtection.dataNodeMemoryLowWaterLevel` | Low water level for data node | `0.85` | -| `quotaAndLimits.limitWriting.memProtection.dataNodeMemoryHighWaterLevel` | High water level for data node | `0.95` | -| `quotaAndLimits.limitWriting.memProtection.queryNodeMemoryLowWaterLevel` | Low water level for query node | `0.85` | -| `quotaAndLimits.limitWriting.memProtection.queryNodeMemoryHighWaterLevel` | High water level for query node | `0.95` | -| `quotaAndLimits.limitReading.forceDeny` | Deny read requests if quota exceeded | `false` | -| `quotaAndLimits.limitReading.queueProtection.enabled` | Enable queue protection | `false` | -| `quotaAndLimits.limitReading.queueProtection.nqInQueueThreshold` | NQ in queue threshold | `unset` | -| `quotaAndLimits.limitReading.queueProtection.queueLatencyThreshold` | Queue latency threshold | `unset` | -| `quotaAndLimits.limitReading.queueProtection.coolOffSpeed` | Cooloff speed | `0.9` | | `externalS3.enabled` | Enable or disable external S3 | `false` | | `externalS3.host` | The host of the external S3 | `unset` | | `externalS3.port` | The port of the external S3 | `unset` | @@ -221,8 +201,6 @@ The following table lists the configurable parameters of the Milvus Standalone c | `standalone.profiling.enabled` | Whether to enable live profiling | `false` | | `standalone.extraEnv` | Additional Milvus Standalone container environment variables | `[]` | | `standalone.messageQueue` | Message queue for Milvus Standalone: rocksmq, pulsar, kafka | `rocksmq` | -| `standalone.rocksmq.retentionTimeInMinutes` | Set the retention time of rocksmq | `10080` | -| `standalone.rocksmq.retentionSizeInMB` | Set the retention size of rocksmq | `0` | | `standalone.persistence.enabled` | Use persistent volume to store Milvus standalone data | `true` | | `standalone.persistence.mountPath` | Milvus standalone data persistence volume mount path | `/var/lib/milvus` | | `standalone.persistence.annotations` | PersistentVolumeClaim annotations | `{}` | @@ -314,9 +292,6 @@ The following table lists the configurable parameters of the Milvus Query Node c | `queryNode.disk.enabled` | Whether to enable disk for query | `true` | | `queryNode.profiling.enabled` | Whether to enable live profiling | `false` | | `queryNode.extraEnv` | Additional Milvus Query Node container environment variables | `[]` | -| `queryNode.grouping.enabled` | Enable grouping small nq search | `true` | -| `queryNode.grouping.maxNQ` | Grouping small nq search max threshold | `1000` | -| `queryNode.scheduler.maxReadConcurrentRatio` | Concurrency ratio of read tasks | `2.0` | ### Milvus Index Coordinator Deployment Configuration @@ -341,7 +316,6 @@ The following table lists the configurable parameters of the Milvus Index Coordi | `indexCoordinator.service.loadBalancerIP` | IP address to assign to load balancer (if supported) | `unset` | | `indexCoordinator.service.loadBalancerSourceRanges` | List of IP CIDRs allowed access to lb (if supported) | `[]` | | `indexCoordinator.service.externalIPs` | Service external IP addresses | `[]` | -| `indexCoordinator.gc.interval` | GC interval in seconds | `600` | ### Milvus Index Node Deployment Configuration @@ -359,7 +333,6 @@ The following table lists the configurable parameters of the Milvus Index Node c | `indexNode.disk.enabled` | Whether to enable disk for index node | `true` | | `indexNode.profiling.enabled` | Whether to enable live profiling | `false` | | `indexNode.extraEnv` | Additional Milvus Index Node container environment variables | `[]` | -| `indexNode.scheduler.buildParallel` | Index task build paralellism | `1` | ### Milvus Data Coordinator Deployment Configuration @@ -374,13 +347,6 @@ The following table lists the configurable parameters of the Milvus Data Coordin | `dataCoordinator.tolerations` | Toleration labels for Milvus Data Coordinator pods assignment | `[]` | | `dataCoordinator.heaptrack.enabled` | Whether to enable heaptrack | `false` | | `dataCoordinator.profiling.enabled` | Whether to enable live profiling | `false` | -| `dataCoordinator.segment.maxSize` | Maximum size of a segment in MB | `512` | -| `dataCoordinator.segment.diskSegmentMaxSize` | Maximum size of a segment in MB for disk index collection | `2048` | -| `dataCoordinator.segment.sealProportion` | Minimum proportion for a segment which can be sealed | `0.25` | -| `dataCoordinator.segment.maxLife` | Maximum lifetime of a segment in seconds | `3600` | -| `dataCoordinator.segment.maxIdleTime` | Maximum idle time for growing segment in seconds | `300` | -| `dataCoordinator.segment.minSizeFromIdleToSealed` | The minimum size in MB of segment which can be idle from sealed | `16` | -| `dataCoordinator.segment.smallProportion` | The proportion for a sealed segment, which would not be compacted | `0.9` | | `dataCoordinator.extraEnv` | Additional Milvus Data Coordinator container environment variables | `[]` | | `dataCoordinator.service.type` | Service type | `ClusterIP` | | `dataCoordinator.service.port` | Port where service is exposed | `19530` | diff --git a/charts/milvus/templates/config.tpl b/charts/milvus/templates/config.tpl index d2d2b913..4384075b 100644 --- a/charts/milvus/templates/config.tpl +++ b/charts/milvus/templates/config.tpl @@ -24,9 +24,6 @@ etcd: - {{ .Release.Name }}-{{ .Values.etcd.name }}:{{ .Values.etcd.service.port }} {{- end }} {{- end }} - rootPath: {{ .Values.metadata.rootPath }} - metaSubPath: meta # metaRootPath = rootPath + '/' + metaSubPath - kvSubPath: kv # kvRootPath = rootPath + '/' + kvSubPath metastore: {{- if or .Values.mysql.enabled .Values.externalMysql.enabled }} @@ -157,13 +154,6 @@ kafka: messageQueue: rocksmq -rocksmq: - path: "{{ .Values.standalone.persistence.mountPath }}/rdb_data" - rocksmqPageSize: "{{ .Values.standalone.rocksmq.rocksmqPageSize }}" # 2 GB - retentionTimeInMinutes: {{ .Values.standalone.rocksmq.retentionTimeInMinutes }} - retentionSizeInMB: {{ .Values.standalone.rocksmq.retentionSizeInMB }} - lrucacheratio: {{ .Values.standalone.rocksmq.lrucacheratio }} - {{- end }} rootCoord: @@ -174,26 +164,9 @@ rootCoord: {{- end }} port: {{ .Values.rootCoordinator.service.port }} - dmlChannelNum: "{{ .Values.rootCoordinator.dmlChannelNum }}" # The number of dml channels created at system startup - maxPartitionNum: "{{ .Values.rootCoordinator.maxPartitionNum }}" # Maximum number of partitions in a collection - minSegmentSizeToEnableIndex: "{{ .Values.rootCoordinator.minSegmentSizeToEnableIndex }}" # It's a threshold. When the segment size is less than this value, the segment will not be indexed - proxy: port: 19530 internalPort: 19529 - http: - enabled: {{ .Values.proxy.http.enabled }} # Whether to enable the http server - debug_mode: {{ .Values.proxy.http.debugMode.enabled }} # Whether to enable http server debug mode - - timeTickInterval: "{{ .Values.proxy.timeTickInterval }}" # ms, the interval that proxy synchronize the time tick - msgStream: - timeTick: - bufSize: 512 - maxNameLength: 255 # Maximum length of name for a collection or alias - maxFieldNum: "{{ .Values.proxy.maxFieldNum }}" # max field number of a collection - maxDimension: 32768 # Maximum dimension of a vector - maxShardNum: "{{ .Values.proxy.maxShardNum }}" # Maximum number of shards in a collection - maxTaskNum: "{{ .Values.proxy.maxTaskNum }}" # max task number of proxy task queue queryCoord: {{- if .Values.cluster.enabled }} @@ -202,55 +175,15 @@ queryCoord: address: localhost {{- end }} port: {{ .Values.queryCoordinator.service.port }} - autoHandoff: {{ .Values.queryCoordinator.autoHandoff }} - autoBalance: {{ .Values.queryCoordinator.autoBalance }} - overloadedMemoryThresholdPercentage: 90 - balanceIntervalSeconds: 60 - memoryUsageMaxDifferencePercentage: 30 - checkInterval: "{{ .Values.queryCoordinator.checkInterval }}" - channelTaskTimeout: "{{ .Values.queryCoordinator.channelTaskTimeout }}" - segmentTaskTimeout: "{{ .Values.queryCoordinator.segmentTaskTimeout }}" - distPullInterval: "{{ .Values.queryCoordinator.distPullInterval }}" - loadTimeoutSeconds: "{{ .Values.queryCoordinator.loadTimeoutSeconds }}" - checkHandoffInterval: "{{ .Values.queryCoordinator.checkHandoffInterval }}" - taskMergeCap: "{{ .Values.queryCoordinator.taskMergeCap }}" queryNode: port: 21123 - loadMemoryUsageFactor: 3 # The multiply factor of calculating the memory usage while loading segments {{- if .Values.cluster.enabled }} enableDisk: {{ .Values.queryNode.disk.enabled }} # Enable querynode load disk index, and search on disk index {{- else }} enableDisk: {{ .Values.standalone.disk.enabled }} # Enable querynode load disk index, and search on disk index {{- end }} - stats: - publishInterval: 1000 # Interval for querynode to report node information (milliseconds) - dataSync: - flowGraph: - maxQueueLength: 1024 # Maximum length of task queue in flowgraph - maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph - segcore: - chunkRows: {{ .Values.queryNode.segcore.chunkRows }} # The number of vectors in a chunk. - smallIndex: - nlist: 128 # small index nlist, recommend to set sqrt(chunkRows), must smaller than chunkRows/8 - nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist - cache: - enabled: true - memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024 - - scheduler: - receiveChanSize: 10240 - unsolvedQueueSize: 10240 - maxReadConcurrentRatio: "{{ .Values.queryNode.scheduler.maxReadConcurrentRatio }}" - cpuRatio: 10.0 # ratio used to estimate read task cpu usage. - - grouping: - enabled: {{ .Values.queryNode.grouping.enabled }} - maxNQ: "{{ .Values.queryNode.grouping.maxNQ }}" - topKMergeRatio: 10.0 - - indexCoord: {{- if .Values.cluster.enabled }} address: {{ template "milvus.indexcoord.fullname" . }} @@ -259,9 +192,6 @@ indexCoord: {{- end }} port: {{ .Values.indexCoordinator.service.port }} - gc: - interval: {{ .Values.indexCoordinator.gc.interval }} # gc interval in seconds - indexNode: port: 21121 @@ -271,10 +201,6 @@ indexNode: enableDisk: {{ .Values.standalone.disk.enabled }} # Enable index node build disk vector index {{- end }} - scheduler: - buildParallel: {{ .Values.indexNode.scheduler.buildParallel }} # one index node can run how many index tasks in parallel - - dataCoord: {{- if .Values.cluster.enabled }} address: {{ template "milvus.datacoord.fullname" . }} @@ -283,39 +209,9 @@ dataCoord: {{- end }} port: {{ .Values.dataCoordinator.service.port }} - enableCompaction: {{ .Values.dataCoordinator.enableCompaction }} - enableGarbageCollection: {{ .Values.dataCoordinator.enableGarbageCollection }} - - segment: - maxSize: "{{ .Values.dataCoordinator.segment.maxSize }}" # Maximum size of a segment in MB - diskSegmentMaxSize: "{{ .Values.dataCoordinator.segment.diskSegmentMaxSize }}" # Maximum size of segment in MB for disk index collection - sealProportion: "{{ .Values.dataCoordinator.segment.sealProportion }}" # It's the minimum proportion for a segment which can be sealed - assignmentExpiration: 2000 # The time of the assignment expiration in ms - maxLife: "{{ .Values.dataCoordinator.segment.maxLife }}" # The max lifetime of segment in seconds, 60*60 - maxIdleTime: "{{ .Values.dataCoordinator.segment.maxIdleTime }}" # The maximum idle time of a growing segment in seconds, 5*60 - minSizeFromIdleToSealed: "{{ .Values.dataCoordinator.segment.minSizeFromIdleToSealed }}" # The minimum size in MB of segment which can be idle from sealed - smallProportion: "{{ .Values.dataCoordinator.segment.smallProportion }}" # The proportion for a sealed segment, which would not be compacted - - compaction: - enableAutoCompaction: {{ .Values.dataCoordinator.compaction.enableAutoCompaction }} - - gc: - interval: {{ .Values.dataCoordinator.gc.interval }} # gc interval in seconds - missingTolerance: {{ .Values.dataCoordinator.gc.missingTolerance }} # file meta missing tolerance duration in seconds, 1 day - dropTolerance: {{ .Values.dataCoordinator.gc.dropTolerance }} # file belongs to dropped entity tolerance duration in seconds, 1 day - dataNode: port: 21124 - dataSync: - flowGraph: - maxQueueLength: 1024 # Maximum length of task queue in flowgraph - maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph - segment: - insertBufSize: "{{ .Values.dataNode.segment.insertBufSize }}" # Bytes, 16 MB - deleteBufBytes: "{{ .Values.dataNode.segment.deleteBufBytes }}" # Bytes, 64 MB - syncPeriod: "{{ .Values.dataNode.segment.syncPeriod }}" # Seconds, 10min - log: level: {{ .Values.log.level }} file: @@ -329,129 +225,4 @@ log: maxBackups: {{ .Values.log.file.maxBackups }} format: {{ .Values.log.format }} -grpc: - log: - level: WARNING - - serverMaxRecvSize: 2147483647 # math.MaxInt32 - serverMaxSendSize: 2147483647 # math.MaxInt32 - clientMaxRecvSize: 104857600 # 100 MB, 100 * 1024 * 1024 - clientMaxSendSize: 104857600 # 100 MB, 100 * 1024 * 1024 - - client: - dialTimeout: 5000 - keepAliveTime: 10000 - keepAliveTimeout: 20000 - maxMaxAttempts: 5 - initialBackOff: 1.0 - maxBackoff: 60.0 - backoffMultiplier: 2.0 - -common: - # Channel name generation rule: ${namePrefix}-${ChannelIdx} - chanNamePrefix: - cluster: {{ .Values.msgChannel.chanNamePrefix.cluster }} - rootCoordTimeTick: "rootcoord-timetick" - rootCoordStatistics: "rootcoord-statistics" - rootCoordDml: "rootcoord-dml" - rootCoordDelta: "rootcoord-delta" - search: "search" - searchResult: "searchResult" - queryTimeTick: "queryTimeTick" - queryNodeStats: "query-node-stats" - # Cmd for loadIndex, flush, etc... - cmd: "cmd" - dataCoordStatistic: "datacoord-statistics-channel" - dataCoordTimeTick: "datacoord-timetick-channel" - dataCoordSegmentInfo: "segment-info-channel" - - # Sub name generation rule: ${subNamePrefix}-${NodeID} - subNamePrefix: - rootCoordSubNamePrefix: "rootCoord" - proxySubNamePrefix: "proxy" - queryNodeSubNamePrefix: "queryNode" - dataNodeSubNamePrefix: "dataNode" - dataCoordSubNamePrefix: "dataCoord" - - defaultPartitionName: "_default" # default partition name for a collection - defaultIndexName: "_default_idx" # default index name - retentionDuration: {{ .Values.common.compaction.retentionDuration }} - entityExpiration: -1 # Entity expiration in seconds, CAUTION make sure entityExpiration >= retentionDuration and -1 means never expire - - gracefulTime: 5000 # milliseconds. it represents the interval (in ms) by which the request arrival time needs to be subtracted in the case of Bounded Consistency. - security: - authorizationEnabled: {{ .Values.authorization.enabled }} - simdType: {{ .Values.common.simdType }} # default to auto - indexSliceSize: 16 # MB - threadCoreCoefficient: {{ .Values.common.threadCoreCoefficient }} - - storageType: minio - mem_purge_ratio: 0.2 # in Linux os, if memory-fragmentation-size >= used-memory * ${mem_purge_ratio}, then do `malloc_trim` - -quotaAndLimits: - enabled: {{ .Values.quotaAndLimits.enabled }} - quotaCenterCollectInterval: {{ .Values.quotaAndLimits.quotaCenterCollectInterval }} # seconds - - ddl: - enabled: {{ .Values.quotaAndLimits.ddl.enabled }} - collectionRate: {{ .Values.quotaAndLimits.ddl.collectionRate }} - partitionRate: {{ .Values.quotaAndLimits.ddl.partitionRate }} - - indexRate: - enabled: {{ .Values.quotaAndLimits.indexRate.enabled }} - max: {{ .Values.quotaAndLimits.indexRate.max }} - flushRate: - enabled: {{ .Values.quotaAndLimits.flushRate.enabled }} - max: {{ .Values.quotaAndLimits.flushRate.max }} - compactionRate: - enabled: {{ .Values.quotaAndLimits.compactionRate.enabled }} - max: {{ .Values.quotaAndLimits.compactionRate.max }} - - dml: - enabled: {{ .Values.quotaAndLimits.dml.enabled }} - insertRate: - max: {{ .Values.quotaAndLimits.dml.insertRate.max }} - deleteRate: - max: {{ .Values.quotaAndLimits.dml.deleteRate.max }} - bulkLoadRate: - max: {{ .Values.quotaAndLimits.dml.bulkLoadRate.max }} - - dql: - enabled: {{ .Values.quotaAndLimits.dql.enabled }} - searchRate: - max: {{ .Values.quotaAndLimits.dql.searchRate.max }} - queryRate: - max: {{ .Values.quotaAndLimits.dql.queryRate.max }} - - limitWriting: - forceDeny: {{ .Values.quotaAndLimits.limitWriting.forceDeny }} - - ttProtection: - enabled: {{ .Values.quotaAndLimits.limitWriting.ttProtection.enabled }} - maxTimeTickDelay: {{ .Values.quotaAndLimits.limitWriting.ttProtection.maxTimeTickDelay }} # seconds - - memProtection: - enabled: {{ .Values.quotaAndLimits.limitWriting.memProtection.enabled }} - dataNodeMemoryLowWaterLevel: {{ .Values.quotaAndLimits.limitWriting.memProtection.dataNodeMemoryLowWaterLevel }} - dataNodeMemoryHighWaterLevel: {{ .Values.quotaAndLimits.limitWriting.memProtection.dataNodeMemoryHighWaterLevel }} - queryNodeMemoryLowWaterLevel: {{ .Values.quotaAndLimits.limitWriting.memProtection.queryNodeMemoryLowWaterLevel }} - queryNodeMemoryHighWaterLevel: {{ .Values.quotaAndLimits.limitWriting.memProtection.queryNodeMemoryHighWaterLevel }} - diskProtection: - enabled: {{ .Values.quotaAndLimits.limitWriting.diskProtection.enabled }} - diskQuota: {{ .Values.quotaAndLimits.limitWriting.diskProtection.diskQuota }} - - limitReading: - forceDeny: {{ .Values.quotaAndLimits.limitReading.forceDeny }} - - queueProtection: - enabled: {{ .Values.quotaAndLimits.limitReading.queueProtection.enabled }} - nqInQueueThreshold: {{ .Values.quotaAndLimits.limitReading.queueProtection.nqInQueueThreshold }} - queueLatencyThreshold: {{ .Values.quotaAndLimits.limitReading.queueProtection.queueLatencyThreshold }} - - resultProtection: - enabled: {{ .Values.quotaAndLimits.limitReading.resultProtection.enabled }} - maxReadResultRate: {{ .Values.quotaAndLimits.limitReading.resultProtection.maxReadResultRate }} - - coolOffSpeed: {{ .Values.quotaAndLimits.limitReading.coolOffSpeed }} - {{- end }} diff --git a/charts/milvus/values.yaml b/charts/milvus/values.yaml index d17ccb2c..a68ec070 100644 --- a/charts/milvus/values.yaml +++ b/charts/milvus/values.yaml @@ -5,7 +5,7 @@ cluster: image: all: repository: milvusdb/milvus - tag: v2.2.0 + tag: v2.2.1 pullPolicy: IfNotPresent ## Optionally specify an array of imagePullSecrets. ## Secrets must be manually created in the namespace. @@ -155,20 +155,6 @@ log: size: 10Gi subPath: "" -metadata: - rootPath: "by-dev" - -## pulsar message channel -## -msgChannel: - chanNamePrefix: - cluster: "by-dev" - -## milvus authorization -## -authorization: - enabled: false - ## Heaptrack traces all memory allocations and annotates these events with stack traces. ## See more: https://github.com/KDE/heaptrack ## Enable heaptrack in production is not recommended. @@ -178,114 +164,6 @@ heaptrack: tag: v0.1.0 pullPolicy: IfNotPresent -## Configuration for Milvus quota and limits. -# By default, we enable: -# 1. TT protection; -# 2. Memory protection; -# 3. Disk quota protection; -# You can enable: -# 1. DML throughput limitation; -# 2. DDL, DQL qps/rps limitation; -# 3. DQL Queue length/latency protection; -# 4. DQL result rate protection; -# If necessary, you can also manually force to deny RW requests. -quotaAndLimits: - enabled: true # `true` to enable quota and limits, `false` to disable. - - # quotaCenterCollectInterval is the time interval that quotaCenter - # collects metrics from Proxies, Query cluster and Data cluster. - quotaCenterCollectInterval: 3 # seconds, (0 ~ 65536) - - ddl: # ddl limit rates, default no limit. - enabled: false - collectionRate: -1 # qps, default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection - partitionRate: -1 # qps, default no limit, rate for CreatePartition, DropPartition, LoadPartition, ReleasePartition - - indexRate: - enabled: false - max: -1 # qps, default no limit, rate for CreateIndex, DropIndex - flushRate: - enabled: false - max: -1 # qps, default no limit, rate for flush - compactionRate: - enabled: false - max: -1 # qps, default no limit, rate for manualCompaction - - # dml limit rates, default no limit. - # The maximum rate will not be greater than `max`. - dml: - enabled: false - insertRate: - max: -1 # MB/s, default no limit - deleteRate: - max: -1 # MB/s, default no limit - bulkLoadRate: # not support yet. TODO: limit bulkLoad rate - max: -1 # MB/s, default no limit - - # dql limit rates, default no limit. - # The maximum rate will not be greater than `max`. - dql: - enabled: false - searchRate: - max: -1 # vps (vectors per second), default no limit - queryRate: - max: -1 # qps, default no limit - - # limitWriting decides whether dml requests are allowed. - limitWriting: - # forceDeny `false` means dml requests are allowed (except for some - # specific conditions, such as memory of nodes to water marker), `true` means always reject all dml requests. - forceDeny: false - ttProtection: - enabled: false - # maxTimeTickDelay indicates the backpressure for DML Operations. - # DML rates would be reduced according to the ratio of time tick delay to maxTimeTickDelay, - # if time tick delay is greater than maxTimeTickDelay, all DML requests would be rejected. - maxTimeTickDelay: 300 # in seconds - memProtection: - enabled: true - # When memory usage > memoryHighWaterLevel, all dml requests would be rejected; - # When memoryLowWaterLevel < memory usage < memoryHighWaterLevel, reduce the dml rate; - # When memory usage < memoryLowWaterLevel, no action. - # memoryLowWaterLevel should be less than memoryHighWaterLevel. - dataNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in DataNodes - dataNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in DataNodes - queryNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in QueryNodes - queryNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in QueryNodes - diskProtection: - # When the total file size of object storage is greater than `diskQuota`, all dml requests would be rejected; - enabled: true - diskQuota: -1 # MB, (0, +inf), default no limit - - # limitReading decides whether dql requests are allowed. - limitReading: - # forceDeny `false` means dql requests are allowed (except for some - # specific conditions, such as collection has been dropped), `true` means always reject all dql requests. - forceDeny: false - - queueProtection: - enabled: false - # nqInQueueThreshold indicated that the system was under backpressure for Search/Query path. - # If NQ in any QueryNode's queue is greater than nqInQueueThreshold, search&query rates would gradually cool off - # until the NQ in queue no longer exceeds nqInQueueThreshold. We think of the NQ of query request as 1. - nqInQueueThreshold: -1 # int, default no limit - - # queueLatencyThreshold indicated that the system was under backpressure for Search/Query path. - # If dql latency of queuing is greater than queueLatencyThreshold, search&query rates would gradually cool off - # until the latency of queuing no longer exceeds queueLatencyThreshold. - # The latency here refers to the averaged latency over a period of time. - queueLatencyThreshold: -1 # milliseconds, default no limit - - resultProtection: - enabled: false - # maxReadResultRate indicated that the system was under backpressure for Search/Query path. - # If dql result rate is greater than maxReadResultRate, search&query rates would gradually cool off - # until the read result rate no longer exceeds maxReadResultRate. - maxReadResultRate: -1 # MB/s, default no limit - - # coolOffSpeed is the speed of search&query rates cool off. - coolOffSpeed: 0.9 # (0, 1] - standalone: replicas: 1 # Run standalone mode with replication disabled resources: {} @@ -295,9 +173,7 @@ standalone: nodeSelector: {} affinity: {} tolerations: [] - extraEnv: - - name: GODEBUG - value: "madvdontneed=1" + extraEnv: [] heaptrack: enabled: false disk: @@ -310,12 +186,6 @@ standalone: ## Default message queue for milvus standalone ## Supported value: rocksmq, pulsar and kafka messageQueue: rocksmq - rocksmq: - retentionTimeInMinutes: 10080 ## 7 days - retentionSizeInMB: 8192 ## 8 GB - rocksmqPageSize: "2147483648" ## 2 GB - lrucacheratio: 0.06 ## rocksdb cache memory ratio - persistence: mountPath: "/var/lib/milvus" ## If true, alertmanager will create/use a Persistent Volume Claim @@ -344,9 +214,7 @@ proxy: nodeSelector: {} affinity: {} tolerations: [] - extraEnv: - - name: GODEBUG - value: "madvdontneed=1" + extraEnv: [] heaptrack: enabled: false profiling: @@ -356,11 +224,6 @@ proxy: debugMode: enabled: false - timeTickInterval: 200 # ms, the interval that proxy synchronize the time tick - maxFieldNum: 256 # max field number of a collection - maxShardNum: 256 # Maximum number of shards in a collection - maxTaskNum: 1024 # max task number of proxy task queue - rootCoordinator: enabled: true replicas: 1 # Run Root Coordinator mode with replication disabled @@ -368,18 +231,12 @@ rootCoordinator: nodeSelector: {} affinity: {} tolerations: [] - extraEnv: - - name: GODEBUG - value: "madvdontneed=1" + extraEnv: [] heaptrack: enabled: false profiling: enabled: false # Enable live profiling - dmlChannelNum: 256 # The number of dml channels created at system startup - maxPartitionNum: 4096 # Maximum number of partitions in a collection - minSegmentSizeToEnableIndex: 1024 # It's a threshold. When the segment size is less than this value, the segment will not be indexed - service: port: 53100 annotations: {} @@ -393,9 +250,7 @@ queryCoordinator: nodeSelector: {} affinity: {} tolerations: [] - extraEnv: - - name: GODEBUG - value: "madvdontneed=1" + extraEnv: [] heaptrack: enabled: false profiling: @@ -407,16 +262,6 @@ queryCoordinator: labels: {} clusterIP: "" - autoHandoff: true # Enable auto handoff - autoBalance: true # Disable auto balance - checkInterval: 1000 # 1000ms - channelTaskTimeout: 60000 # 1 minute - segmentTaskTimeout: 120000 # 2 minute - distPullInterval: 500 # 500ms - loadTimeoutSeconds: 600 - checkHandoffInterval: 5000 # 5000ms - taskMergeCap: 16 - queryNode: enabled: true replicas: 1 @@ -427,9 +272,7 @@ queryNode: nodeSelector: {} affinity: {} tolerations: [] - extraEnv: - - name: GODEBUG - value: "madvdontneed=1" + extraEnv: [] heaptrack: enabled: false disk: @@ -439,16 +282,6 @@ queryNode: profiling: enabled: false # Enable live profiling - segcore: - chunkRows: 1024 # The number of vectors in a chunk. - - grouping: - enabled: true # Grouping small nq search - maxNQ: 1000 - - scheduler: - maxReadConcurrentRatio: "2.0" - indexCoordinator: enabled: true replicas: 1 # Run Index Coordinator mode with replication disabled @@ -456,17 +289,12 @@ indexCoordinator: nodeSelector: {} affinity: {} tolerations: [] - extraEnv: - - name: GODEBUG - value: "madvdontneed=1" + extraEnv: [] heaptrack: enabled: false profiling: enabled: false # Enable live profiling - gc: - interval: 600 # gc interval in seconds - service: port: 31000 annotations: {} @@ -483,9 +311,7 @@ indexNode: nodeSelector: {} affinity: {} tolerations: [] - extraEnv: - - name: GODEBUG - value: "madvdontneed=1" + extraEnv: [] heaptrack: enabled: false profiling: @@ -495,10 +321,6 @@ indexNode: size: enabled: false # Enable local storage size limit - ## Specify how many index tasks can parallelly run in the same index node - scheduler: - buildParallel: 1 - dataCoordinator: enabled: true replicas: 1 # Run Data Coordinator mode with replication disabled @@ -506,35 +328,12 @@ dataCoordinator: nodeSelector: {} affinity: {} tolerations: [] - extraEnv: - - name: GODEBUG - value: "madvdontneed=1" + extraEnv: [] heaptrack: enabled: false profiling: enabled: true # Enable live profiling - enableCompaction: true - enableGarbageCollection: true - - segment: - maxSize: 512 # Maximum size of a segment in MB - diskSegmentMaxSize: 2048 # Maximum segment size in MB for disk index collection - sealProportion: 0.25 # Minimum proportion for a segment which can be sealed - maxLife: 3600 # The max lifetime of segment in seconds, 60*60 - maxIdleTime: 300 # The maximum idle time of a growing segment in seconds, 5*60 - minSizeFromIdleToSealed: 16 # The minimum size in MB of segment which can be idle from sealed - smallProportion: 0.9 # The proportion for a sealed segment, which would not be compacted - - compaction: - enableAutoCompaction: true - - gc: - interval: 3600 # gc interval in seconds - missingTolerance: 86400 # file meta missing tolerance duration in seconds, 1 day - dropTolerance: 86400 # file belongs to dropped entity tolerance duration in seconds, 1 day - - service: port: 13333 annotations: {} @@ -548,31 +347,12 @@ dataNode: nodeSelector: {} affinity: {} tolerations: [] - extraEnv: - - name: GODEBUG - value: "madvdontneed=1" + extraEnv: [] heaptrack: enabled: false profiling: enabled: false # Enable live profiling - segment: - # Max buffer size to flush for a single segment. - insertBufSize: "16777216" # Bytes, 16 MB - # Max buffer size to flush del for a single channel - deleteBufBytes: "67108864" # Bytes, 64MB - # The period to sync segments if buffer is not empty. - syncPeriod: 600 # Seconds, 10min - -common: - compaction: - retentionDuration: 0 # time travel reserved time, insert/delete will not be cleaned in this period. - # Default value: auto - # Valid values: [auto, avx512, avx2, avx, sse4_2] - simdType: auto - # Specify how many times the number of threads in pool is the number of cores - threadCoreCoefficient: "10" - attu: enabled: false name: attu @@ -861,6 +641,7 @@ pulsar: -XX:+ExitOnOutOfMemoryError maxMessageSize: "104857600" defaultRetentionTimeInMinutes: "10080" + defaultRetentionSizeInMB: "-1" backlogQuotaDefaultLimitGB: "8" ttlDurationDefaultInSeconds: "259200" subscriptionExpirationTimeMinutes: "30"