Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
a9577b7
feat(import): add command to import markdown files into a collective
mejo- Dec 18, 2025
25fca67
feat(ImportService): provide immediate progress feedback via callback
mejo- Dec 22, 2025
cb447c8
feat(ImportService): Regard readme as index page
mejo- Dec 22, 2025
9939955
feat(import): try to rewrite internal links
mejo- Dec 22, 2025
c9dd20d
fix(PageService): Add more lightweight `createBase()` function
mejo- Dec 23, 2025
f9910b8
chore: fix some typos
mejo- Jan 12, 2026
15bffdf
feat(markdown): add `getImagesfromContent()` function
mejo- Jan 12, 2026
3d084c9
feat(ImportMarkdownDirectory): rewrite internal attachment references
mejo- Jan 12, 2026
af8354f
fix(PageService): Allow to pass page content to `newPage()`
mejo- Jan 15, 2026
e42cd7f
fix(ImportService): don't keep full PageInfo to lower memory footprint
mejo- Jan 16, 2026
a41c1dd
fix(AttachmentService): cache attachmentDirectory, pass content to ne…
mejo- Jan 16, 2026
8486fb0
fix(PageService): cache allPageInfos
mejo- Jan 16, 2026
7829c98
chore(ImportService): refactor usage of progressCallback
mejo- Jan 16, 2026
2d3cc0c
fix(PageService): pass content directly to `newFile()`
mejo- Jan 16, 2026
15d3678
docs: add basic documentation on how to import markdown files
mejo- Jan 16, 2026
5cb3d98
fix(ImportService): get parentPageInfo when necessary
mejo- Jan 16, 2026
19ac0f7
fix(ImportService): no duplicates if page and directory have same name
mejo- Feb 9, 2026
00b66ff
fix(ImportService): fix processing dokuwiki links with leading colon
mejo- Feb 17, 2026
d833579
fix(ImportService): Migrate warning boxes to our syntax
mejo- Feb 17, 2026
5b3d6de
fix(ImportService): try to resolve internal links at subdirectory import
mejo- Feb 17, 2026
4483c1d
test(playwright): add basic test for importing markdown
mejo- Feb 19, 2026
cf7c829
test(playwright): reuse apiUrl and ocsHeaders
mejo- Feb 21, 2026
d93816a
chore(ImportService): move progress callback into a ProgressReporter …
mejo- Feb 25, 2026
18fcbc6
chore(ImportService): use more instance variables
mejo- Feb 25, 2026
d9de203
chore(ImportService): remove superfluous temporary message variables
mejo- Feb 25, 2026
2731bf2
chore(ImportMarkdownDirectory): use `$output->isVerbose()`
mejo- Feb 26, 2026
75eba20
chore(psalm): remove superfluous Symfony stubs
mejo- Feb 26, 2026
3c53ae1
fix(fts): continue with indexing collectives after one failed
mejo- Feb 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions REUSE.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,21 @@ SPDX-FileCopyrightText = "none"
SPDX-License-Identifier = "CC0-1.0"

[[annotations]]
path = ["cypress/fixtures/**", "openapi.json", "tests/phpunit.xml", "tests/psalm-baseline.xml", "tests/Integration/features/fixtures/**", "vendor-bin/**"]
path = ["cypress/fixtures/**", "openapi.json", "playwright/support/fixtures/files/**", "tests/phpunit.xml", "tests/psalm-baseline.xml", "tests/Integration/features/fixtures/**", "vendor-bin/**"]
precedence = "aggregate"
SPDX-FileCopyrightText = "2020-2024 Nextcloud GmbH and Nextcloud contributors"
SPDX-FileCopyrightText = "2020-2026 Nextcloud GmbH and Nextcloud contributors"
SPDX-License-Identifier = "AGPL-3.0-or-later"

[[annotations]]
path = ["docs/archetypes/default.md", "docs/content/**", "docs/static/images/**"]
precedence = "aggregate"
SPDX-FileCopyrightText = "2020-2024 Nextcloud GmbH and Nextcloud contributors"
SPDX-FileCopyrightText = "2020-2026 Nextcloud GmbH and Nextcloud contributors"
SPDX-License-Identifier = "AGPL-3.0-or-later"

[[annotations]]
path = ["l10n/**.js", "l10n/**.json", "skeleton/**.md"]
precedence = "aggregate"
SPDX-FileCopyrightText = "2020-2024 Nextcloud GmbH and Nextcloud contributors"
SPDX-FileCopyrightText = "2020-2026 Nextcloud GmbH and Nextcloud contributors"
SPDX-License-Identifier = "AGPL-3.0-or-later"

[[annotations]]
Expand Down
1 change: 1 addition & 0 deletions appinfo/info.xml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ In your Nextcloud instance, simply navigate to **»Apps«**, find the
<command>OCA\Collectives\Command\CreateCollective</command>
<command>OCA\Collectives\Command\ExpirePageVersions</command>
<command>OCA\Collectives\Command\GenerateSlugs</command>
<command>OCA\Collectives\Command\ImportMarkdownDirectory</command>
<command>OCA\Collectives\Command\IndexCollectives</command>
<command>OCA\Collectives\Command\PageTrashCleanup</command>
<command>OCA\Collectives\Command\PurgeObsoletePages</command>
Expand Down
34 changes: 34 additions & 0 deletions docs/content/administration/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,37 @@ this also allows adding entire groups to collectives.

Keep in mind thought that in contrast to teams, groups can only be
managed by server admins.

## Importing existing data

It's possible to import existing Markdown files with `occ collectives:import:markdown`.

The command imports Markdown files from a directory as new pages into a collective. After
importing all files, it processes relative links and referenced local attachments in the
Markdown files. It tries to fix links to other pages and uploads referenced attachments when
the source file is found in the import directory.

Please beware that the command is memory intensive. When importing a directory with many
Markdown files, make sure to increase the PHP memory limit accordingly:

```shell
php -d memory_limit=<X>G ./occ collectives:import:markdown -c <collectiveId> -u <userId> /path/to/markdown/files
```

Tests show that importing 500 Markdown files without attachments needs around 1.5GB of memory.

### Importing from Dokuwiki

The Markdown directory import command (see above) supports to import Markdown files
generated from a Dokuwiki instance and tries to fix relative links to other pages and
upload referenced attachments.

Importing is tested with Markdown files generated with the [Dokuwiki2Markdown](https://github.com/mm503/Dokuwiki2Markdown)
tool.

Here's an example how to import from a Dokuwiki instance:

```shell
/path/to/doku2md.py -d /path/to/dokuwiki/data/pages -T
php -d memory_limit=2G ./occ collectives:import:markdown -c 123 -u alice /path/to/dokuwiki/data/pages
```
107 changes: 107 additions & 0 deletions lib/Command/ImportMarkdownDirectory.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
<?php

declare(strict_types=1);

/**
* SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/

namespace OCA\Collectives\Command;

use OCA\Collectives\Service\AttachmentService;
use OCA\Collectives\Service\CollectiveService;
use OCA\Collectives\Service\ImportService;
use OCA\Collectives\Service\NotFoundException;
use OCA\Collectives\Service\PageService;
use OCA\Collectives\Service\ProgressReporter;
use OCP\Files\IMimeTypeDetector;
use OCP\IUserManager;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;

class ImportMarkdownDirectory extends Command {
public function __construct(
private readonly CollectiveService $collectiveService,
private readonly IUserManager $userManager,
private readonly PageService $pageService,
private readonly AttachmentService $attachmentService,
private readonly IMimeTypeDetector $mimeTypeDetector,
) {
parent::__construct();
}

protected function configure(): void {
$this
->setName('collectives:import:markdown')
->setDescription('Import markdown files from a directory to a collective')
->setHelp('<info>Memory-intensive operation if importing many files. Consider to raise memory limit with `php -d memory_limit=<X>G occ ...`</info>')
->addArgument('directory', InputArgument::REQUIRED, 'Directory containing markdown files to import')
->addOption('collective-id', 'c', InputOption::VALUE_REQUIRED, 'Collective ID to import into')
->addOption('user-id', 'u', InputOption::VALUE_REQUIRED, 'UserId of collective member performing the import')
->addOption('parent-id', 'p', InputOption::VALUE_REQUIRED, 'Parent page ID for the import (0 for root)', '0');
parent::configure();
}

protected function execute(InputInterface $input, OutputInterface $output): int {
$collectiveId = (int)$input->getOption('collective-id');
$directory = $input->getArgument('directory');
$userId = $input->getOption('user-id');
$parentId = (int)$input->getOption('parent-id');

$progressReporter = new ProgressReporter($output, $output->isVerbose());

if ($collectiveId === 0) {
$progressReporter->writeError('Required option missing: --collective-id=COLLECTIVE_ID');
return 1;
}

if ($userId === null) {
$progressReporter->writeError('Required option missing: --user-id=USER_ID');
return 1;
}

// Verify user exists
$user = $this->userManager->get($userId);
if (!$user) {
$progressReporter->writeError('User ' . $userId . ' not found');
return 1;
}

// Verify collective exists
try {
$collective = $this->collectiveService->getCollective($collectiveId, $userId);
} catch (NotFoundException $e) {
if (str_starts_with($e->getMessage(), 'Circle not found')) {
$progressReporter->writeError('Collective with ID ' . $collectiveId . ' not accessible for user ' . $userId);
} else {
$progressReporter->writeError('Collective with ID ' . $collectiveId . ' not found');
}
return 1;
}

$importService = new ImportService(
$this->pageService,
$this->attachmentService,
$this->mimeTypeDetector,
$progressReporter,
$collective,
$user,
);

try {
$importService->importDirectory($directory, $parentId);
} catch (NotFoundException $e) {
$progressReporter->writeError($e->getMessage());
return 1;
}

$progressReporter->writeInfo('');
$progressReporter->writeInfo('Processed ' . $importService->getCount() . ' file(s) for collective "' . $collective->getName() . '" (ID: ' . $collectiveId . ').');

return 0;
}
}
4 changes: 2 additions & 2 deletions lib/Command/IndexCollectives.php
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ protected function execute(InputInterface $input, OutputInterface $output): int
$output->writeln('<info>done</info>');
} catch (MissingDependencyException|NotFoundException|NotPermittedException) {
$output->writeln("<error>Failed to find team associated with collective with ID={$collective->getId()}</error>");
return 1;
continue;
} catch (FileSearchException) {
$output->writeln('<error>Failed to save the indices to the collectives folder.</error>');
return 1;
continue;
}
}

Expand Down
73 changes: 67 additions & 6 deletions lib/Fs/MarkdownHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
use League\CommonMark\Environment\Environment;
use League\CommonMark\Exception\CommonMarkException;
use League\CommonMark\Extension\CommonMark\CommonMarkCoreExtension;
use League\CommonMark\Extension\CommonMark\Node\Inline\Image;
use League\CommonMark\Extension\CommonMark\Node\Inline\Link;
use League\CommonMark\Node\Inline\Text;
use League\CommonMark\Node\Node;
use League\CommonMark\Node\NodeWalker;
use League\CommonMark\Parser\MarkdownParser;
use OC;
use OCA\Collectives\Db\Collective;
Expand All @@ -36,17 +38,21 @@ private static function collectText(Node $node): string {
return $out;
}

private static function getDocumentWalker(string $content): NodeWalker {
$environment = new Environment();
$environment->addExtension(new CommonMarkCoreExtension());
$parser = new MarkdownParser($environment);
$document = $parser->parse($content);
return $document->walker();
}

/**
* Extracts markdown links and returns them with link text, href and title
* Extracts Markdown links and returns them with link text, href and title
*
* @throws CommonMarkException
*/
public static function getLinksFromContent(string $content): array {
$environment = new Environment();
$environment->addExtension(new CommonMarkCoreExtension());
$parser = new MarkdownParser($environment);
$document = $parser->parse($content);
$walker = $document->walker();
$walker = self::getDocumentWalker($content);

$links = [];
while ($event = $walker->next()) {
Expand Down Expand Up @@ -74,6 +80,40 @@ public static function getLinksFromContent(string $content): array {
return $links;
}

/**
* Extracts Markdown images and returns them with alt text, url and title
*
* @throws CommonMarkException
*/
public static function getImageLinksFromContent(string $content): array {
$walker = self::getDocumentWalker($content);

$images = [];
while ($event = $walker->next()) {
if (! $event->isEntering()) {
continue;
}
$node = $event->getNode();
if (!($node instanceof Image)) {
continue;
}

$altTextParts = [];
foreach ($node->children() as $child) {
$altTextParts[] = self::collectText($child);
}
$altText = trim(implode('', array_filter($altTextParts)));

$images[] = [
'alt' => $altText,
'url' => $node->getUrl(),
'title' => $node->getTitle() ?? '',
];
}

return $images;
}

/**
* Returns hrefs that point to given collective or are relative links (.e.g. `../Page-21`)
*/
Expand Down Expand Up @@ -160,4 +200,25 @@ public static function getLinkedPageIds(Collective $collective, string $content,

return array_unique($pageIds, SORT_NUMERIC);
}

/**
* Replace callout syntax `:!: <callout text>` with ours (`::: warning\n<callout text>\n\n:::`)
*/
public static function processCallouts(string $content): string {
$lines = explode("\n", $content);
$result = [];

foreach ($lines as &$line) {
if (preg_match('/^:!:\s+(.+)$/', $line, $matches)) {
$calloutText = trim($matches[1]);
$result[] = '::: warn';
$result[] = $calloutText;
$result[] = '';
$result[] = ':::';
} else {
$result[] = $line;
}
}
return implode("\n", $result);
}
}
45 changes: 33 additions & 12 deletions lib/Service/AttachmentService.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

class AttachmentService {
private ?PageTrashBackend $trashBackend = null;
private array $attachmentDirectory = [];

public function __construct(
private readonly IAppManager $appManager,
Expand Down Expand Up @@ -63,21 +64,29 @@ private function fileToInfo(File $file, folder $folder, string $type = 'text'):
* @throws NotFoundException
*/
private function getAttachmentDirectory(File $pageFile, bool $create = false): Folder {
try {
$parentFolder = $pageFile->getParent();
$attachmentFolderName = '.attachments.' . $pageFile->getId();
if ($parentFolder->nodeExists($attachmentFolderName)) {
$attachmentFolder = $parentFolder->get($attachmentFolderName);
if ($attachmentFolder instanceof Folder) {
return $attachmentFolder;
$id = $pageFile->getId();
if (!isset($this->attachmentDirectory[$id])) {
try {
$parentFolder = $pageFile->getParent();
$attachmentFolderName = '.attachments.' . $id;
if ($parentFolder->nodeExists($attachmentFolderName)) {
$attachmentFolder = $parentFolder->get($attachmentFolderName);
if ($attachmentFolder instanceof Folder) {
$this->attachmentDirectory[$id] = $attachmentFolder;
}
} elseif ($create) {
$this->attachmentDirectory[$id] = $parentFolder->newFolder($attachmentFolderName);
}
} elseif ($create) {
return $parentFolder->newFolder($attachmentFolderName);
} catch (FilesNotFoundException|InvalidPathException) {
throw new NotFoundException('Failed to get attachment directory for page ' . $id . '.');
}

if (!isset($this->attachmentDirectory[$id])) {
throw new NotFoundException('Attachment directory for page ' . $id . ' does not exist.');
}
} catch (FilesNotFoundException|InvalidPathException) {
throw new NotFoundException('Failed to get attachment directory for page ' . $pageFile->getId() . '.');
}
throw new NotFoundException('Failed to get attachment directory for page ' . $pageFile->getId() . '.');

return $this->attachmentDirectory[$id];
}

private function getTextAttachments(File $pageFile, Folder $folder): array {
Expand Down Expand Up @@ -208,4 +217,16 @@ public function restoreAttachment(int $collectiveId, File $pageFile, int $attach

return $this->getAttachmentDirectory($pageFile)->getById($attachmentId);
}

/**
* @throws NotFoundException
* @throws NotPermittedException
*/
public function putAttachment(File $pageFile, string $attachmentName, string $content): string {
$attachmentDir = $this->getAttachmentDirectory($pageFile, true);

$filename = NodeHelper::generateFilename($attachmentDir, $attachmentName);
$attachmentDir->newFile($filename, $content);
return '.attachments.' . $pageFile->getId() . DIRECTORY_SEPARATOR . $filename;
}
}
20 changes: 20 additions & 0 deletions lib/Service/IProgressReporter.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?php

declare(strict_types=1);

/**
* SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/

namespace OCA\Collectives\Service;

interface IProgressReporter {
public function writeInfo(string $message): void;

public function writeInfoVerbose(string $message): void;

public function writeError(string $message): void;

public function writeErrorVerbose(string $message): void;
}
Loading
Loading