Skip to content

Commit

Permalink
Merge pull request #3273 from OpenNeuroOrg/cli-feat-annex-remote
Browse files Browse the repository at this point in the history
Deno CLI special remote implementation
  • Loading branch information
nellh authored Jan 14, 2025
2 parents ec2c84b + 64d8fb9 commit 67ac848
Show file tree
Hide file tree
Showing 11 changed files with 910 additions and 674 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/deno.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
- uses: actions/checkout@v4
- uses: denoland/setup-deno@v2
with:
deno-version: v1.x
deno-version: v2.x
- name: Collect coverage
run: deno task coverage
if: ${{ always() }}
Expand Down
3 changes: 3 additions & 0 deletions bin/git-annex-remote-openneuro
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/sh
# Use this script as your openneuro special remote when using datalad or git-annex to access annexed objects
deno run -A jsr:@openneuro/cli special-remote
2 changes: 1 addition & 1 deletion cli/deno.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"coverage": "deno test --allow-read --allow-write --allow-net --allow-env --coverage ./ && deno coverage ./coverage --lcov > ./coverage.lcov"
},
"imports": {
"@bids/validator": "jsr:@bids/validator@^1.14.12",
"@bids/validator": "jsr:@bids/validator@^2.0.1",
"@cliffy/command": "jsr:@cliffy/command@^1.0.0-rc.5",
"@cliffy/prompt": "jsr:@cliffy/prompt@^1.0.0-rc.5",
"@deno-library/progress": "jsr:@deno-library/progress@^1.4.9",
Expand Down
1,356 changes: 725 additions & 631 deletions cli/deno.lock

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion cli/mod.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,17 @@ Sentry.init({
release: `openneuro-cli@${denoJson.version}`,
})
import { commandLine } from "./src/options.ts"
import { annexSpecialRemote } from "./src/commands/special-remote.ts"

/**
* Entrypoint for running OpenNeuro command line tools
*/
export async function main() {
await commandLine(Deno.args)
if (Deno.execPath().endsWith("git-annex-remote-openneuro")) {
await annexSpecialRemote()
} else {
await commandLine(Deno.args)
}
}

await main()
5 changes: 4 additions & 1 deletion cli/src/commands/git-credential.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,10 @@ export const gitCredential = new Command()
.description(
"A git credentials helper for easier datalad or git-annex access to datasets.",
)
.command("fill")
// Credentials here are short lived so store is not useful
.command("store")
.action(() => {})
.command("get")
.action(async () => {
console.log(await gitCredentialAction())
})
120 changes: 120 additions & 0 deletions cli/src/commands/special-remote.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import { Command } from "@cliffy/command"
import * as readline from "node:readline"
import { once } from "node:events"
import {
checkKey,
removeKey,
retrieveKey,
storeKey,
type TransferKeyState,
} from "../worker/transferKey.ts"
import process from "node:process"
import { getRepoAccess } from "./git-credential.ts"

const GIT_ANNEX_VERSION = "VERSION 1"

export async function handleGitAnnexMessage(
line: string,
state: TransferKeyState,
) {
if (line.startsWith("EXTENSIONS")) {
return "EXTENSIONS"
} else if (line.startsWith("PREPARE")) {
// Ask for configuration to validate
return "GETCONFIG url"
} else if (line.startsWith("VALUE ")) {
// Check if VALUE is configured already
if (state.url) {
return "PREPARE-SUCCESS"
} else {
return "PREPARE-FAILURE url must be configured when running initremote or enableremote"
}
} else if (line.startsWith("TRANSFER STORE")) {
const [, , key, file] = line.split(" ", 4)
if (await storeKey(state, key, file)) {
return `TRANSFER-SUCCESS STORE ${key}`
} else {
return `TRANSFER-FAILURE STORE ${key}`
}
} else if (line.startsWith("TRANSFER RETRIEVE")) {
const [, , key, file] = line.split(" ", 4)
if (await retrieveKey(state, key, file)) {
return `TRANSFER-SUCCESS RETRIEVE ${key}`
} else {
return `TRANSFER-FAILURE RETRIEVE ${key}`
}
} else if (line.startsWith("CHECKPRESENT")) {
const key = line.split("CHECKPRESENT ", 2)[1]
if (await checkKey(state, key)) {
return `CHECKPRESENT-SUCCESS ${key}`
} else {
return `CHECKPRESENT-FAILURE ${key}`
}
} else if (line.startsWith("INITREMOTE")) {
// No init steps are required - always succeed
return "INITREMOTE-SUCCESS"
} else if (line.startsWith("GETAVAILABILITY")) {
return "AVAILABILITY GLOBAL"
} else if (line.startsWith("REMOVE")) {
const key = line.split("REMOVE ", 2)[1]
if (await removeKey(state, key)) {
return `REMOVE-SUCCESS ${key}`
} else {
return `REMOVE-FAILURE ${key}`
}
} else {
return "UNSUPPORTED-REQUEST"
}
}

/**
* Stateful response handling for git annex protocol
* @returns {() => void}
*/
export const response = () => {
const state: TransferKeyState = {
url: "",
token: "",
}
return async (line: string) => {
if (line.startsWith("VALUE ")) {
try {
const url = line.split("VALUE ")[1]
// Obtain the filename (no extensions) in url value
const datasetId = url.substring(url.lastIndexOf("/") + 1, url.length)
state.url = url
const { token } = await getRepoAccess(datasetId)
state.token = token
} catch (_err) {
state.url = ""
state.token = ""
}
}
console.log(await handleGitAnnexMessage(line, state))
}
}

/**
* Git annex special remote
*/
export async function annexSpecialRemote() {
try {
const rl = readline.createInterface({
input: process.stdin,
})
console.log(GIT_ANNEX_VERSION)
rl.on("line", response())
await once(rl, "close")
} catch (err) {
console.error(err)
}
}

export const specialRemote = new Command()
.name("special-remote")
.description(
"git-annex special remote for uploading or downloading from OpenNeuro",
)
.action(async () => {
await annexSpecialRemote()
})
3 changes: 3 additions & 0 deletions cli/src/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { login } from "./commands/login.ts"
import { upload } from "./commands/upload.ts"
import { download } from "./commands/download.ts"
import { gitCredential } from "./commands/git-credential.ts"
import { specialRemote } from "./commands/special-remote.ts"

export type OpenNeuroOptions = {
datasetPath: string
Expand All @@ -29,8 +30,10 @@ const openneuroCommand = new Command()
.globalEnv("OPENNEURO_API_KEY=<key:string>", "Specify an OpenNeuro API key.")
.command("login", login)
.command("download", download)
// @ts-expect-error This is typed correctly but not loaded from the dependency as expected
.command("upload", upload)
.command("git-credential", gitCredential)
.command("special-remote", specialRemote)

/**
* Parse command line options and return a OpenNeuroOptions config
Expand Down
10 changes: 5 additions & 5 deletions cli/src/worker/git.ts
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ async function commitAnnexBranch(annexKeys: Record<string, string>) {
try {
uuidLog = await readAnnexPath("uuid.log", context)
} catch (err) {
if (err.name !== "NotFoundError") {
if (err instanceof Error && err.name !== "NotFoundError") {
throw err
}
}
Expand All @@ -204,7 +204,7 @@ async function commitAnnexBranch(annexKeys: Record<string, string>) {
})
} catch (err) {
// Create the branch if it doesn't exist
if (err.name === "NotFoundError") {
if (err instanceof Error && err.name === "NotFoundError") {
await createAnnexBranch()
}
}
Expand All @@ -225,7 +225,7 @@ async function commitAnnexBranch(annexKeys: Record<string, string>) {
{ encoding: "utf8" },
)
} catch (_err) {
if (_err.name !== "NotFound") {
if (_err instanceof Error && _err.name !== "NotFound") {
throw _err
}
} finally {
Expand All @@ -247,7 +247,7 @@ async function commitAnnexBranch(annexKeys: Record<string, string>) {
try {
log = await readAnnexPath(annexBranchPath, context)
} catch (err) {
if (err.name === "NotFoundError") {
if (err instanceof Error && err.name === "NotFoundError") {
logger.debug(`Annex branch object "${annexBranchPath}" not found`)
} else {
throw err
Expand Down Expand Up @@ -283,7 +283,7 @@ async function commitAnnexBranch(annexKeys: Record<string, string>) {
ref: "main",
})
} catch (err) {
if (err.name === "NotFoundError") {
if (err instanceof Error && err.name === "NotFoundError") {
// Fallback to master and error if neither exists
await git.checkout({
...context.config(),
Expand Down
2 changes: 1 addition & 1 deletion cli/src/worker/transferKey.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ export async function storeKey(
try {
fileHandle?.close()
} catch (err) {
if (err.name !== "BadResource") {
if (err instanceof Error && err.name !== "BadResource") {
logger.error(err)
}
}
Expand Down
74 changes: 41 additions & 33 deletions docs/git.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,36 +34,26 @@ LICENSE annex.largefiles=nothing

## Credential Helper

Using openneuro-cli, git can be configured to automatically use your OpenNeuro credentials to allow access to datasets. This is the preferred method for authenticating regular git access. An advanced method of issuing a key is documented below if you cannot use the [git credential helper](https://git-scm.com/docs/gitcredentials) for your use case.
Using [@openneuro/cli](https://jsr.io/@openneuro/cli), git can be configured to automatically use your OpenNeuro credentials to allow access to datasets. This is the preferred method for authenticating regular git access. An advanced method of issuing a key is documented below if you cannot use the [git credential helper](https://git-scm.com/docs/gitcredentials) for your use case.

### Setup

Once you have openneuro-cli installed and you've logged in with `openneuro login`, you can configure git to automatically use your login.
Once you have logged in with `deno run -A jsr:@openneuro/cli`, you can configure git to automatically use your login.

```shell
# This allows the helper to identify which dataset you are accessing automatically and issue a key for that dataset
git config credential.useHttpPath true
# Point git at the openneuro-cli tool (this must be an absolute path)
git config credential.helper "/path/to/openneuro git-credential"
git config --global credential.https://openneuro.org.useHttpPath true
# Point git at the @openneuro/cli tool (this must be an absolute path)
git config --global credential.https://openneuro.org.helper "/path/to/deno -A jsr:@openneuro/cli git-credential"
```

Alternatively openneuro-cli can be given the name `git-credential-openneuro` and this shorter command will work.
If you are using [Git Credential Manager](https://github.com/git-ecosystem/git-credential-manager) add the provider entry to avoid duplicating entries.

```shell
git config credential.helper "openneuro"
```

This will configure these options for one repository.

To enable for all OpenNeuro repositories add this to your [git configuration file](https://git-scm.com/docs/git-config#FILES).

```cfg
[credential "https://openneuro.org"]
useHttpPath = true
helper = "/path/to/openneuro git-credential"
git config credential.https://openneuro.org.provider generic
```

If you are using [Git Credential Manager](https://github.com/git-ecosystem/git-credential-manager) add the provider entry to avoid duplicating entries.
Or by modifying your .gitconfig:

```cfg
[credential "https://openneuro.org"]
Expand All @@ -76,20 +66,20 @@ If you are using [Git Credential Manager](https://github.com/git-ecosystem/git-c

Most datalad or git operations will work as expected but there are a few limitations. Force pushes or unrelated history will be rejected. Annexed data is accepted but only via the git transport, using other annexes will result in unreachable files or failed validation due to missing data.

To download a new dataset using the credential helper you can start with an empty repo and then configure that repo.
Once the helper has been configured clone a repo:

```shell
mkdir ds000001
# You can use git clone...
git clone https://openneuro.org/git/0/ds0000001
# Or datalad install
datalad install https://openneuro.org/git/0/ds0000001
cd ds0000001
git init
git remote add origin https://openneuro.org/git/0/ds0000001
# Follow the above steps to setup the credential helper
git pull origin master
git pull origin git-annex:git-annex
# From here you can treat this like a datalad dataset and export back to OpenNeuro to deploy changes
```

When you are ready to push changes, make sure to validate them before attempting to push. OpenNeuro will reject some invalid pushes but cannot run the full bids-validator until after your changes have been pushed.
When you are ready to push changes, make sure to validate them before attempting to push. OpenNeuro runs a limited version of BIDS validation on pushes and will reject datasets that cannot pass validation of the file tree. File contents are validated only after upload.

To push annexed files, see `Configuring OpenNeuro special remote` below.

### Advanced authentication

Expand All @@ -115,25 +105,43 @@ For private datasets or to add new data with DataLad or git-annex, a special rem

### Configuring OpenNeuro special remote

```shell
# A script is provided to wrap the CLI as a special remote
curl https://raw.githubusercontent.com/OpenNeuroOrg/openneuro/refs/heads/master/bin/git-annex-remote-openneuro -o git-annex-remote-openneuro
# Make this executable and move this script to your path
chmod +x git-annex-remote-openneuro
```

Deno compile can be used if a single binary without network access is needed:

```shell
# This will create a `git-annex-remote-openneuro` executable you add to your path
deno compile -A --output git-annex-remote-openneuro jsr:@openneuro/cli
```

Obtain the URL from the dataset page and run initremote (or enableremote if you need to update it).

```shell
# Make sure openneuro-cli is installed and available in your path
# You should see 'VERSION 1' 'EXTENSIONS' if this is working
echo "EXTENSIONS" | git-annex-remote-openneuro
echo "EXTENSIONS" | deno run -A jsr:@openneuro/cli special-remote
# Configure the remote with the URL for your dataset
git annex initremote openneuro type=external externaltype=openneuro encryption=none url=https://openneuro.org/git/0/ds0000001
```

After this you can use regular git-annex or datalad commands to upload or download any annexed files by using the openneuro remote.
To download annexed objects from the remote, you may need to manually ask git-annex update the local state of the OpenNeuro remote. You can force this update for all files:

```shell
# To upload any annexed objects to the remote
git annex copy --to openneuro
git annex fsck --fast --from openneuro
```

To download annexed objects from the remote, you may need to manually ask git-annex update the local state of the OpenNeuro remote. You can force this update for all files:
After this you can use regular git-annex or datalad commands to upload or download any annexed files by using the openneuro remote.

```shell
git annex fsck --fast --from openneuro
# Download any annexed objects
datalad get .
```

```shell
# To upload any annexed objects to the remote
git annex copy --to openneuro
```

0 comments on commit 67ac848

Please sign in to comment.