Skip to content

Commit 9a0b59d

Browse files
Xarbirusslarenggerganov
committed
ggml : introduce ggml_status (ggml/750)
* using enum as an exit code instead of macros * update return type from enum to unsigned int * indentation fix * compound update ggml_compute_exit_code -> ggml_status changed ggml_status from a bit-field type to simple codes ggml_status to string cast * ggml_status to string cast * GGML_CALL was removed Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: slaren <slarengh@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
1 parent 93a84a1 commit 9a0b59d

11 files changed

+88
-63
lines changed

ggml-backend-impl.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,13 +91,14 @@ extern "C" {
9191
// (optional) complete all pending operations
9292
void (*GGML_CALL synchronize)(ggml_backend_t backend);
9393

94-
// compute graph with a plan
94+
// create a plan for ggml_cgraph and free it
9595
ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
9696
void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
97-
void (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
9897

98+
// compute graph with a plan
99+
enum ggml_status (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
99100
// compute graph without a plan (async)
100-
bool (*GGML_CALL graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
101+
enum ggml_status (*GGML_CALL graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
101102

102103
// check if the backend supports an operation
103104
bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);

ggml-backend.c

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -262,11 +262,11 @@ void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_pla
262262
backend->iface.graph_plan_free(backend, plan);
263263
}
264264

265-
void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
266-
backend->iface.graph_plan_compute(backend, plan);
265+
enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
266+
return backend->iface.graph_plan_compute(backend, plan);
267267
}
268268

269-
bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
269+
enum ggml_status ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
270270
return backend->iface.graph_compute(backend, cgraph);
271271
}
272272

@@ -732,15 +732,15 @@ GGML_CALL static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, g
732732
GGML_UNUSED(backend);
733733
}
734734

735-
GGML_CALL static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
735+
GGML_CALL static enum ggml_status ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
736736
struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
737737

738-
ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
738+
return ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
739739

740740
GGML_UNUSED(backend);
741741
}
742742

743-
GGML_CALL static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
743+
GGML_CALL static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
744744
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
745745

746746
struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
@@ -755,8 +755,7 @@ GGML_CALL static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, str
755755
cplan.abort_callback = cpu_ctx->abort_callback;
756756
cplan.abort_callback_data = cpu_ctx->abort_callback_data;
757757

758-
ggml_graph_compute(cgraph, &cplan);
759-
return true;
758+
return ggml_graph_compute(cgraph, &cplan);
760759
}
761760

762761
GGML_CALL static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
@@ -1437,7 +1436,7 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
14371436
return true;
14381437
}
14391438

1440-
static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
1439+
static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
14411440
uint64_t copy_us[GGML_MAX_BACKENDS] = {0};
14421441
uint64_t compute_us[GGML_MAX_BACKENDS] = {0};
14431442

@@ -1472,8 +1471,9 @@ static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
14721471

14731472
uint64_t compute_start_us = ggml_time_us();
14741473
if (!sched->callback_eval) {
1475-
if (!ggml_backend_graph_compute(split_backend, &split->graph)) {
1476-
return false;
1474+
enum ggml_status ec = ggml_backend_graph_compute(split_backend, &split->graph);
1475+
if (ec != GGML_STATUS_SUCCESS) {
1476+
return ec;
14771477
}
14781478
//ggml_backend_synchronize(split_backend); // necessary to measure compute time
14791479
} else {
@@ -1494,8 +1494,9 @@ static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
14941494

14951495
struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1);
14961496

1497-
if (!ggml_backend_graph_compute(split_backend, &gv)) {
1498-
return false;
1497+
enum ggml_status ec = ggml_backend_graph_compute(split_backend, &gv);
1498+
if (ec != GGML_STATUS_SUCCESS) {
1499+
return ec;
14991500
}
15001501

15011502
if (need && !sched->callback_eval(t, false, sched->callback_eval_user_data)) {
@@ -1519,7 +1520,7 @@ static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
15191520
}
15201521
#endif
15211522

1522-
return true;
1523+
return GGML_STATUS_SUCCESS;
15231524
}
15241525

15251526
ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size) {
@@ -1581,7 +1582,7 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
15811582
return true;
15821583
}
15831584

1584-
bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
1585+
enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
15851586
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS);
15861587

15871588
if (!sched->is_reset) {
@@ -1590,14 +1591,10 @@ bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cg
15901591

15911592
ggml_backend_sched_split_graph(sched, graph);
15921593
if (!ggml_backend_sched_alloc_splits(sched)) {
1593-
return false;
1594+
return GGML_STATUS_ALLOC_FAILED;
15941595
}
15951596

1596-
if (!ggml_backend_sched_compute_splits(sched)) {
1597-
return false;
1598-
}
1599-
1600-
return true;
1597+
return ggml_backend_sched_compute_splits(sched);
16011598
}
16021599

16031600
void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {

ggml-backend.h

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,13 @@ extern "C" {
6666

6767
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
6868

69-
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create (ggml_backend_t backend, struct ggml_cgraph * cgraph);
69+
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
70+
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
7071

71-
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
72-
GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
73-
GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
74-
GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
72+
GGML_API enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
73+
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
74+
75+
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
7576

7677
// tensor copy between different backends
7778
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
@@ -157,26 +158,26 @@ extern "C" {
157158
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
158159

159160
// Initialize a backend scheduler
160-
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
161-
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
161+
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
162+
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
162163
// Initialize backend buffers from a measure graph
163-
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
164+
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
164165
// Get the number of splits of the last graph
165-
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
166+
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
166167

167-
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
168+
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
168169

169-
GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
170-
GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
170+
GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
171+
GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
171172

172173
// Allocate and compute graph on the backend scheduler
173-
GGML_API bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
174+
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
174175

175176
// Reset all assignments and allocators - must be called before changing the node backends
176-
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
177+
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
177178

178179
// Set a callback to be called for each resulting node during graph compute
179-
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
180+
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
180181

181182
//
182183
// Utils

ggml-cuda.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12241,7 +12241,7 @@ GGML_CALL static void ggml_backend_cuda_synchronize(ggml_backend_t backend) {
1224112241
UNUSED(backend);
1224212242
}
1224312243

12244-
GGML_CALL static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
12244+
GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
1224512245
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
1224612246

1224712247
ggml_cuda_set_main_device(cuda_ctx->device);
@@ -12277,7 +12277,7 @@ GGML_CALL static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, gg
1227712277
GGML_ASSERT(ok);
1227812278
}
1227912279

12280-
return true;
12280+
return GGML_STATUS_SUCCESS;
1228112281
}
1228212282

1228312283
GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_tensor * op) {

ggml-kompute.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1927,10 +1927,10 @@ static ggml_backend_buffer_type_t ggml_backend_kompute_get_default_buffer_type(g
19271927
return ggml_backend_kompute_buffer_type(ctx->device);
19281928
}
19291929

1930-
static bool ggml_backend_kompute_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
1930+
static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
19311931
auto * ctx = static_cast<ggml_kompute_context *>(backend->context);
19321932
ggml_vk_graph_compute(ctx, cgraph);
1933-
return true;
1933+
return GGML_STATUS_SUCCESS;
19341934
}
19351935

19361936
static bool ggml_backend_kompute_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {

ggml-metal.m

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -748,7 +748,7 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const
748748
}
749749
}
750750

751-
static bool ggml_metal_graph_compute(
751+
static enum ggml_status ggml_metal_graph_compute(
752752
struct ggml_metal_context * ctx,
753753
struct ggml_cgraph * gf) {
754754

@@ -2484,7 +2484,7 @@ static bool ggml_metal_graph_compute(
24842484
MTLCommandBufferStatus status = [command_buffer status];
24852485
if (status != MTLCommandBufferStatusCompleted) {
24862486
GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
2487-
return false;
2487+
return GGML_STATUS_FAILED;
24882488
}
24892489
}
24902490

@@ -2493,7 +2493,7 @@ static bool ggml_metal_graph_compute(
24932493
}
24942494

24952495
}
2496-
return true;
2496+
return GGML_STATUS_SUCCESS;
24972497
}
24982498

24992499
////////////////////////////////////////////////////////////////////////////////
@@ -2795,7 +2795,7 @@ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_metal_get_default_buffe
27952795
UNUSED(backend);
27962796
}
27972797

2798-
GGML_CALL static bool ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
2798+
GGML_CALL static enum ggml_status ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
27992799
struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context;
28002800

28012801
return ggml_metal_graph_compute(metal_ctx, cgraph);

ggml-opencl.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2231,7 +2231,7 @@ static ggml_backend_buffer_type_t ggml_backend_opencl_get_default_buffer_type(gg
22312231
GGML_UNUSED(backend);
22322232
}
22332233

2234-
static bool ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggml_cgraph * graph) {
2234+
static ggml_status ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggml_cgraph * graph) {
22352235
for (int i = 0; i < graph->n_nodes; ++i) {
22362236
ggml_tensor * node = graph->nodes[i];
22372237
switch (node->op) {
@@ -2246,7 +2246,7 @@ static bool ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggml_cgrap
22462246
}
22472247
}
22482248

2249-
return true;
2249+
return GGML_STATUS_SUCCESS;
22502250

22512251
GGML_UNUSED(backend);
22522252
}

ggml-sycl.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15581,7 +15581,7 @@ catch (sycl::exception const &exc) {
1558115581
std::exit(1);
1558215582
}
1558315583

15584-
GGML_CALL static bool ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
15584+
GGML_CALL static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
1558515585
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
1558615586
ggml_sycl_set_main_device(sycl_ctx->device);
1558715587

@@ -15613,7 +15613,7 @@ GGML_CALL static bool ggml_backend_sycl_graph_compute(ggml_backend_t backend, gg
1561315613
GGML_ASSERT(ok);
1561415614
}
1561515615

15616-
return true;
15616+
return GGML_STATUS_SUCCESS;
1561715617
}
1561815618

1561915619
GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, const ggml_tensor * op) {

ggml-vulkan.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5092,7 +5092,7 @@ GGML_CALL static void ggml_backend_vk_synchronize(ggml_backend_t backend) {
50925092
ctx->transfer_ctx = nullptr;
50935093
}
50945094

5095-
GGML_CALL static bool ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
5095+
GGML_CALL static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
50965096
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
50975097

50985098
for (int i = 0; i < cgraph->n_nodes; i++) {
@@ -5135,7 +5135,7 @@ GGML_CALL static bool ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml
51355135

51365136
ggml_vk_graph_cleanup(ctx);
51375137

5138-
return true;
5138+
return GGML_STATUS_SUCCESS;
51395139

51405140
UNUSED(backend);
51415141
}

0 commit comments

Comments
 (0)