Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transparent* database sharding #46639

Merged
merged 18 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
c09ec95
feat: track expected output columns in query builder
icewind1991 Jul 31, 2024
114db05
fix: don't make ICacheFactory depend on database
icewind1991 Aug 8, 2024
c58bdbf
fix: delay calculating global cache prefix untill a cache is created
icewind1991 Aug 20, 2024
f5b3486
feat: add option to automatically partition queries by specific tables
icewind1991 Jun 13, 2024
62f8b65
feat: implement distributing partitioned queries over multiple shards
icewind1991 Jul 31, 2024
ddbeb4c
test: mark share test cleanup as running across all shards
icewind1991 Jul 16, 2024
fc05a67
fix: only allow pre-defined shards
icewind1991 Jul 18, 2024
4d9b563
test: run sharding tests in ci
icewind1991 Jul 18, 2024
390f6a7
fix: hint storage id in more places
icewind1991 Jul 19, 2024
2eaeeee
fix: run mimetype repair query across all shards
icewind1991 Jul 19, 2024
382d102
test: fix share provider tests for sharding
icewind1991 Jul 19, 2024
80a2553
fix: make background scan job compatible with sharding
icewind1991 Jul 25, 2024
e538f46
fix: adjust systemtag orphan cleanup query to work with sharding
icewind1991 Jul 31, 2024
cc091b1
fix: fix share cleanup for deleted groups with sharding
icewind1991 Aug 6, 2024
b21a399
fix: implement sharding compatible cleanup for various bits
icewind1991 Aug 15, 2024
1363e14
fix: make preload custom proterties sharding compatible
icewind1991 Aug 21, 2024
9d02485
fix: mark systemconfig value as not being tainted because they are im…
icewind1991 Aug 22, 2024
2574cbf
chore: Apply php:cs recommendations
artonge Aug 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions .github/workflows/phpunit-mysql-sharding.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
# This workflow is provided via the organization template repository
#
# https://github.com/nextcloud/.github
# https://docs.github.com/en/actions/learn-github-actions/sharing-workflows-with-your-organization
#
# SPDX-FileCopyrightText: 2022-2024 Nextcloud GmbH and Nextcloud contributors
# SPDX-License-Identifier: MIT

name: PHPUnit sharding

on:
pull_request:
schedule:
- cron: "5 2 * * *"

permissions:
contents: read

concurrency:
group: phpunit-mysql-sharding-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
changes:
runs-on: ubuntu-latest-low

outputs:
src: ${{ steps.changes.outputs.src }}

steps:
- uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
id: changes
continue-on-error: true
with:
filters: |
src:
- '.github/workflows/**'
- '3rdparty/**'
- '**/appinfo/**'
- '**/lib/**'
- '**/templates/**'
- '**/tests/**'
- 'vendor/**'
- 'vendor-bin/**'
- '.php-cs-fixer.dist.php'
- 'composer.json'
- 'composer.lock'
- '**.php'

phpunit-mysql:
runs-on: ubuntu-latest

needs: changes
if: needs.changes.outputs.src != 'false'

strategy:
matrix:
php-versions: ['8.1']
mysql-versions: ['8.4']

name: Sharding - MySQL ${{ matrix.mysql-versions }} (PHP ${{ matrix.php-versions }}) - database tests

services:
cache:
image: ghcr.io/nextcloud/continuous-integration-redis:latest
ports:
- 6379:6379/tcp
options: --health-cmd="redis-cli ping" --health-interval=10s --health-timeout=5s --health-retries=3

mysql:
image: ghcr.io/nextcloud/continuous-integration-mysql-${{ matrix.mysql-versions }}:latest
ports:
- 4444:3306/tcp
env:
MYSQL_ROOT_PASSWORD: rootpassword
MYSQL_USER: oc_autotest
MYSQL_PASSWORD: nextcloud
MYSQL_DATABASE: oc_autotest
options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 10
shard1:
image: ghcr.io/nextcloud/continuous-integration-mysql-${{ matrix.mysql-versions }}:latest
ports:
- 5001:3306/tcp
env:
MYSQL_ROOT_PASSWORD: rootpassword
MYSQL_USER: oc_autotest
MYSQL_PASSWORD: nextcloud
MYSQL_DATABASE: nextcloud
options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 10
shard2:
image: ghcr.io/nextcloud/continuous-integration-mysql-${{ matrix.mysql-versions }}:latest
ports:
- 5002:3306/tcp
env:
MYSQL_ROOT_PASSWORD: rootpassword
MYSQL_USER: oc_autotest
MYSQL_PASSWORD: nextcloud
MYSQL_DATABASE: nextcloud
options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 10
shard3:
image: ghcr.io/nextcloud/continuous-integration-mysql-${{ matrix.mysql-versions }}:latest
ports:
- 5003:3306/tcp
env:
MYSQL_ROOT_PASSWORD: rootpassword
MYSQL_USER: oc_autotest
MYSQL_PASSWORD: nextcloud
MYSQL_DATABASE: nextcloud
options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 10
shard4:
image: ghcr.io/nextcloud/continuous-integration-mysql-${{ matrix.mysql-versions }}:latest
ports:
- 5004:3306/tcp
env:
MYSQL_ROOT_PASSWORD: rootpassword
MYSQL_USER: oc_autotest
MYSQL_PASSWORD: nextcloud
MYSQL_DATABASE: nextcloud
options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 10

steps:
- name: Checkout server
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
with:
submodules: true

- name: Set up php ${{ matrix.php-versions }}
uses: shivammathur/setup-php@2e947f1f6932d141d076ca441d0e1e881775e95b #v2.31.0
with:
php-version: ${{ matrix.php-versions }}
# https://docs.nextcloud.com/server/stable/admin_manual/installation/source_installation.html#prerequisites-for-manual-installation
extensions: bz2, ctype, curl, dom, fileinfo, gd, iconv, intl, json, libxml, mbstring, openssl, pcntl, posix, redis, session, simplexml, xmlreader, xmlwriter, zip, zlib, mysql, pdo_mysql
coverage: ${{ matrix.coverage && 'xdebug' || 'none' }}
ini-file: development
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Set up dependencies
run: composer i

- name: Enable ONLY_FULL_GROUP_BY MySQL option
run: |
echo "SET GLOBAL sql_mode=(SELECT CONCAT(@@sql_mode,',ONLY_FULL_GROUP_BY'));" | mysql -h 127.0.0.1 -P 4444 -u root -prootpassword
echo "SELECT @@sql_mode;" | mysql -h 127.0.0.1 -P 4444 -u root -prootpassword

- name: Set up Nextcloud
env:
DB_PORT: 4444
SHARDING: 1
run: |
mkdir data
cp tests/redis.config.php config/
cp tests/preseed-config.php config/config.php
./occ maintenance:install --verbose --database=mysql --database-name=nextcloud --database-host=127.0.0.1 --database-port=$DB_PORT --database-user=root --database-pass=rootpassword --admin-user admin --admin-pass admin
php -f tests/enable_all.php | grep -i -C9999 error && echo "Error during app setup" && exit 1 || exit 0

- name: PHPUnit
run: composer run test:db ${{ matrix.coverage && ' -- --coverage-clover ./clover.db.xml' || '' }}

- name: Upload db code coverage
if: ${{ !cancelled() && matrix.coverage }}
uses: codecov/codecov-action@v4.1.1
with:
files: ./clover.db.xml
flags: phpunit-mysql

- name: Print logs
if: always()
run: |
cat data/nextcloud.log

summary:
permissions:
contents: none
runs-on: ubuntu-latest-low
needs: [changes, phpunit-mysql]

if: always()

name: phpunit-mysql-summary

steps:
- name: Summary status
run: if ${{ needs.changes.outputs.src != 'false' && needs.phpunit-mysql.result != 'success' }}; then exit 1; fi
18 changes: 9 additions & 9 deletions apps/dav/lib/DAV/CustomPropertiesBackend.php
Original file line number Diff line number Diff line change
Expand Up @@ -364,16 +364,16 @@ private function getPublishedProperties(string $path, array $requestedProperties
private function cacheDirectory(string $path, Directory $node): void {
$prefix = ltrim($path . '/', '/');
$query = $this->connection->getQueryBuilder();
$query->select('name', 'propertypath', 'propertyname', 'propertyvalue', 'valuetype')
$query->select('name', 'p.propertypath', 'p.propertyname', 'p.propertyvalue', 'p.valuetype')
->from('filecache', 'f')
->leftJoin('f', 'properties', 'p', $query->expr()->andX(
$query->expr()->eq('propertypath', $query->func()->concat(
$query->createNamedParameter($prefix),
'name'
)),
$query->expr()->eq('userid', $query->createNamedParameter($this->user->getUID()))
))
->where($query->expr()->eq('parent', $query->createNamedParameter($node->getInternalFileId(), IQueryBuilder::PARAM_INT)));
->hintShardKey('storage', $node->getNode()->getMountPoint()->getNumericStorageId())
->leftJoin('f', 'properties', 'p', $query->expr()->eq('p.propertypath', $query->func()->concat(
$query->createNamedParameter($prefix),
'f.name'
)),
)
->where($query->expr()->eq('parent', $query->createNamedParameter($node->getInternalFileId(), IQueryBuilder::PARAM_INT)))
->andWhere($query->expr()->eq('p.userid', $query->createNamedParameter($this->user->getUID())));
$result = $query->executeQuery();

$propsByPath = [];
Expand Down
86 changes: 69 additions & 17 deletions apps/files/lib/BackgroundJob/DeleteOrphanedItems.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,34 +52,86 @@ public function run($argument) {
* @param string $typeCol
* @return int Number of deleted entries
*/
protected function cleanUp($table, $idCol, $typeCol) {
protected function cleanUp(string $table, string $idCol, string $typeCol): int {
$deletedEntries = 0;

$query = $this->connection->getQueryBuilder();
$query->select('t1.' . $idCol)
->from($table, 't1')
->where($query->expr()->eq($typeCol, $query->expr()->literal('files')))
->andWhere($query->expr()->isNull('t2.fileid'))
->leftJoin('t1', 'filecache', 't2', $query->expr()->eq($query->expr()->castColumn('t1.' . $idCol, IQueryBuilder::PARAM_INT), 't2.fileid'))
->groupBy('t1.' . $idCol)
->setMaxResults(self::CHUNK_SIZE);

$deleteQuery = $this->connection->getQueryBuilder();
$deleteQuery->delete($table)
->where($deleteQuery->expr()->in($idCol, $deleteQuery->createParameter('objectid')));
->where($deleteQuery->expr()->eq($idCol, $deleteQuery->createParameter('objectid')));

if ($this->connection->getShardDefinition('filecache')) {
$sourceIdChunks = $this->getItemIds($table, $idCol, $typeCol, 1000);
foreach ($sourceIdChunks as $sourceIdChunk) {
$deletedSources = $this->findMissingSources($sourceIdChunk);
$deleteQuery->setParameter('objectid', $deletedSources, IQueryBuilder::PARAM_INT_ARRAY);
$deletedEntries += $deleteQuery->executeStatement();
}
} else {
$query = $this->connection->getQueryBuilder();
$query->select('t1.' . $idCol)
->from($table, 't1')
->where($query->expr()->eq($typeCol, $query->expr()->literal('files')))
->leftJoin('t1', 'filecache', 't2', $query->expr()->eq($query->expr()->castColumn('t1.' . $idCol, IQueryBuilder::PARAM_INT), 't2.fileid'))
->andWhere($query->expr()->isNull('t2.fileid'))
->groupBy('t1.' . $idCol)
->setMaxResults(self::CHUNK_SIZE);

$deleteQuery = $this->connection->getQueryBuilder();
$deleteQuery->delete($table)
->where($deleteQuery->expr()->in($idCol, $deleteQuery->createParameter('objectid')));

$deletedInLastChunk = self::CHUNK_SIZE;
while ($deletedInLastChunk === self::CHUNK_SIZE) {
$chunk = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
$deletedInLastChunk = count($chunk);
$deletedInLastChunk = self::CHUNK_SIZE;
while ($deletedInLastChunk === self::CHUNK_SIZE) {
$chunk = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
$deletedInLastChunk = count($chunk);

$deleteQuery->setParameter('objectid', $chunk, IQueryBuilder::PARAM_INT_ARRAY);
$deletedEntries += $deleteQuery->executeStatement();
$deleteQuery->setParameter('objectid', $chunk, IQueryBuilder::PARAM_INT_ARRAY);
$deletedEntries += $deleteQuery->executeStatement();
}
}

return $deletedEntries;
}

/**
* @param string $table
* @param string $idCol
* @param string $typeCol
* @param int $chunkSize
* @return \Iterator<int[]>
* @throws \OCP\DB\Exception
*/
private function getItemIds(string $table, string $idCol, string $typeCol, int $chunkSize): \Iterator {
$query = $this->connection->getQueryBuilder();
$query->select($idCol)
->from($table)
->where($query->expr()->eq($typeCol, $query->expr()->literal('files')))
->groupBy($idCol)
->andWhere($query->expr()->gt($idCol, $query->createParameter('min_id')))
->setMaxResults($chunkSize);

$minId = 0;
while (true) {
$query->setParameter('min_id', $minId);
$rows = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
if (count($rows) > 0) {
$minId = $rows[count($rows) - 1];
yield $rows;
} else {
break;
}
}
}

private function findMissingSources(array $ids): array {
$qb = $this->connection->getQueryBuilder();
$qb->select('fileid')
->from('filecache')
->where($qb->expr()->in('fileid', $qb->createNamedParameter($ids, IQueryBuilder::PARAM_INT_ARRAY)));
$found = $qb->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
return array_diff($ids, $found);
}

/**
* Deleting orphaned system tag mappings
*
Expand Down
Loading
Loading