Skip to content

Commit

Permalink
feat and ci: grow support with updated branch
Browse files Browse the repository at this point in the history
This currently uses a custom branch, and will need to be
updated when merged into flux-sched. This last change
updates the CI to use the latest noble image, and
adds back the shrink support and test.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed Jan 15, 2025
1 parent 9d3e31a commit c9e180e
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 13 deletions.
3 changes: 2 additions & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ RUN wget https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz && tar -xvf go${G
ENV PATH=$PATH:/usr/local/go/bin:/home/vscode/go/bin

# Testing grow/shrink from custom branch
RUN git clone -b debug-resource-error-messages https://github.com/researchapps/flux-sched /opt/flux-sched
# RUN git clone -b debug-resource-error-messages https://github.com/researchapps/flux-sched /opt/flux-sched
RUN git clone -b grow-api https://github.com/milroy/flux-sched /opt/flux-sched
# RUN git clone https://github.com/flux-framework/flux-sched /opt/flux-sched

# We also need to rebuild into the system install
Expand Down
17 changes: 12 additions & 5 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:
pull_request: []
workflow_dispatch:

# Bindings should always be checked against the latest containers
# Bindings shoul be checked against the latest containers
# 2:10 every Thursday
schedule:
- cron: '10 2 * * 4'
Expand All @@ -21,10 +21,9 @@ jobs:
fail-fast: false
matrix:
# container base and lib prefix
test: [["fluxrm/flux-sched:jammy", "/usr/lib"],
test: [["fluxrm/flux-sched:noble", "/usr/lib"],
["fluxrm/flux-sched:fedora38", "/usr/lib64"],
["fluxrm/flux-sched:bookworm-amd64", "/usr/lib"],
["fluxrm/flux-sched:el8", "/usr/lib64"]]
["fluxrm/flux-sched:bookworm-amd64", "/usr/lib"]]

container:
image: ${{ matrix.test[0] }}
Expand All @@ -38,7 +37,15 @@ jobs:

# TODO: we should consider distributing the header files with the release builds
- name: flux-sched build
run: git clone https://github.com/flux-framework/flux-sched /opt/flux-sched
run: git clone -b grow-api https://github.com/milroy/flux-sched /opt/flux-sched
- name: flux-sched compile
run: |
export FLUX_SCHED_VERSION=0.39.0
cd /opt/flux-sched
cmake -B build
make -C build
make -C build install
cd -
- name: Build
run: LIB_PREFIX=${{ matrix.test[1] }} make build
- name: Test Binary
Expand Down
16 changes: 12 additions & 4 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ jobs:
strategy:
fail-fast: false
matrix:
test: [["fluxrm/flux-sched:jammy", "/usr/lib"],
test: [["fluxrm/flux-sched:noble", "/usr/lib"],
["fluxrm/flux-sched:fedora38", "/usr/lib64"],
["fluxrm/flux-sched:bookworm-amd64", "/usr/lib"],
["fluxrm/flux-sched:el8", "/usr/lib64"]]
["fluxrm/flux-sched:bookworm-amd64", "/usr/lib"]]

container:
image: ${{ matrix.test[0] }}
Expand All @@ -33,7 +32,16 @@ jobs:
go-version: ^1.21

- name: flux-sched build
run: git clone https://github.com/flux-framework/flux-sched /opt/flux-sched
run: git clone -b grow-api https://github.com/milroy/flux-sched /opt/flux-sched
- name: flux-sched compile
run: |
export FLUX_SCHED_VERSION=0.39.0
cd /opt/flux-sched
cmake -B build
make -C build
make -C build install
cd -
- name: Build
run: LIB_PREFIX=${{ matrix.test[1] }} make build
- name: Test
Expand Down
4 changes: 2 additions & 2 deletions cmd/test/data/grow/new-nodes.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
"type": "rack",
"basename": "rack",
"name": "rack0",
"id": 0,
"uniq_id": 1,
"id": 3,
"uniq_id": 3,
"rank": -1,
"exclusive": false,
"unit": "",
Expand Down
19 changes: 19 additions & 0 deletions cmd/test/test.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,25 @@ func main() {
log.Fatalf("Error in ReapiClient MatchSatisfy - asking for 4 nodes should now succeed: %v\n", err)
}

// Shrink (remove subgraph) for node2
fmt.Println("🥕 Asking to Shrink from 4 to 3 Nodes")
err = cli.Shrink("/tiny0/rack0/node2")
if err != nil {
log.Fatalf("Error in ReapiClient Shrink: %s %s\n", err, cli.GetErrMsg())
}
fmt.Printf("Shrink request return value: %v\n", err)

fmt.Println("Asking to MatchSatisfy 4 nodes (again, not possible)")
sat, overhead, err = cli.MatchSatisfy(growJobspec)
checkErrors(cli)
if err != nil {
log.Fatalf("Error in ReapiClient MatchSatisfy: %v\n", err)
}
printSatOutput(sat, err)
if sat {
log.Fatalf("Error in ReapiClient MatchSatisfy - asking for 4 nodes with only 3 should fail: %v\n", err)
}

}

func printOutput(reserved bool, allocated string, at int64, jobid uint64, err error) {
Expand Down
17 changes: 16 additions & 1 deletion pkg/fluxcli/reapi_cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,11 +232,26 @@ func (cli *ReapiClient) UpdateAllocate(jobid int, r string) (at int64, overhead
func (cli *ReapiClient) Grow(rSubgraph string) (err error) {
var resources = C.CString(rSubgraph)
defer C.free(unsafe.Pointer(resources))

fluxerr := (int)(C.reapi_cli_grow((*C.struct_reapi_cli_ctx)(cli.ctx), resources))
return retvalToError(fluxerr, "issue resource api client grow")
}

// Update the resource state (shrink) with R_node_path.
//
// \param h Opaque handle. How it is used is an implementation
// detail. However, when it is used within a Flux's
// service module, it is expected to be a pointer
// to a flux_t object.
// \param R_node_path R String to prune down
// \return 0 on success; -1 on error.
// int reapi_cli_shrink (reapi_cli_ctx_t *ctx, const char *R_node_path);
func (cli *ReapiClient) Shrink(rNodePath string) (err error) {
var nodePath = C.CString(rNodePath)
fluxerr := (int)(C.reapi_cli_shrink((*C.struct_reapi_cli_ctx)(cli.ctx), nodePath))
defer C.free(unsafe.Pointer(nodePath))
return retvalToError(fluxerr, "issue resource api client shrink")
}

// Cancel cancels the allocation or reservation corresponding to jobid.
//
// \param jobid jobid of the uint64_t type.
Expand Down

0 comments on commit c9e180e

Please sign in to comment.