Skip to content

Commit

Permalink
expose rest api for splits (#3942)
Browse files Browse the repository at this point in the history
* expose rest api for splits

expose rest api for split list
change rest api response data structure
add offset/limit pagniation
change SplitClient list() accordingly

* fix broken test case

fix broken test case

* fix broken test case in index_api

fix broken test case in index_api

* update documents accordingly

update cli.md
update rest-api.md

* Update quickwit/quickwit-serve/src/index_api/rest_handler.rs

Co-authored-by: François Massot <francois.massot@gmail.com>

* fix bug in rest handler

fix bug in rest handler

* code lint

code lint

---------

Co-authored-by: François Massot <francois.massot@gmail.com>
  • Loading branch information
JerryKwan and fmassot authored Oct 25, 2023
1 parent d094784 commit fd1547b
Show file tree
Hide file tree
Showing 7 changed files with 160 additions and 20 deletions.
4 changes: 4 additions & 0 deletions docs/reference/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,8 @@ Lists the splits of an index.
```bash
quickwit split list
--index <index>
[--offset <offset>]
[--limit <limit>]
[--states <states>]
[--create-date <create-date>]
[--start-date <start-date>]
Expand All @@ -659,6 +661,8 @@ quickwit split list
| Option | Description |
|-----------------|-------------|
| `--index` | Target index ID |
| `--offset` | Number of splits to skip |
| `--limit` | Maximum number of splits to retrieve |
| `--states` | Selects the splits whose states are included in this comma-separated list of states. Possible values are `staged`, `published`, and `marked`. |
| `--create-date` | Selects the splits whose creation dates are before this date. |
| `--start-date` | Selects the splits that contain documents after this date (time-series indexes only). |
Expand Down
79 changes: 79 additions & 0 deletions docs/reference/rest-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,85 @@ The response is the stats about the requested index, and the content type is `ap
| `min_timestamp` | Starting time of timestamp. | `number` |
| `max_timestamp` | Ending time of timestamp. | `number` |


### Get splits

```
GET api/v1/indexes/<index id>/splits
```
Get splits belongs to an index of ID `index id`.

#### Path variable

| Variable | Description |
| ------------- | ------------- |
| `index id` | The index id |

#### Get parameters

| Variable | Type | Description |
|---------------------|------------|------------------------------------------------------------------------------------------------------------------|
| `offset` | `number` | If set, restrict the number of splits to skip|
| `limit ` | `number` | If set, restrict maximum number of splits to retrieve|
| `split_states` | `usize` | If set, specific split state(s) to filter by|
| `start_timestamp` | `number` | If set, restrict splits to documents with a `timestamp >= start_timestamp|
| `end_timestamp` | `number` | If set, restrict splits to documents with a `timestamp < end_timestamp|
| `end_create_timestamp` | `number` | If set, restrict splits whose creation dates are before this date|


#### Response

The response is the stats about the requested index, and the content type is `application/json; charset=UTF-8.`

| Field | Description | Type |
|-------------------------------------|----------------------------------------------------------|:---------------------:|
| `offset` | Index ID of index. | `String` |
| `size` | Uri of index | `String` |
| `splits` | Number of published splits. | `List` |

#### Examples
```
GET /api/v1/indexes/stackoverflow/splits?offset=0&limit=10
```
```json
{
"offset": 0,
"size": 1,
"splits": [
{
"split_state": "Published",
"update_timestamp": 1695642901,
"publish_timestamp": 1695642901,
"version": "0.6",
"split_id": "01HB632HD8W6WHNM7CZFH3KG1X",
"index_uid": "stackoverflow:01HB6321TDT3SP58D4EZP14KSX",
"partition_id": 0,
"source_id": "_ingest-api-source",
"node_id": "jerry",
"num_docs": 10000,
"uncompressed_docs_size_in_bytes": 6674940,
"time_range": {
"start": 1217540572,
"end": 1219335682
},
"create_timestamp": 1695642900,
"maturity": {
"type": "immature",
"maturation_period_millis": 172800000
},
"tags": [],
"footer_offsets": {
"start": 4714989,
"end": 4719999
},
"delete_opstamp": 0,
"num_merge_ops": 0
}
]
}
```


### Clears an index

```
Expand Down
29 changes: 23 additions & 6 deletions quickwit/quickwit-cli/src/split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,24 @@ pub fn build_split_command() -> Command {
arg!(--index <INDEX> "Target index ID")
.display_order(1)
.required(true),
arg!(--states <SPLIT_STATES> "Selects the splits whose states are included in this comma-separated list of states. Possible values are `staged`, `published`, and `marked`.")
arg!(--"offset" <OFFSET> "Number of splits to skip.")
.display_order(2)
.required(false),
arg!(--"limit" <LIMIT> "Maximum number of splits to retrieve.")
.display_order(3)
.required(false),
arg!(--states <SPLIT_STATES> "Selects the splits whose states are included in this comma-separated list of states. Possible values are `staged`, `published`, and `marked`.")
.display_order(4)
.required(false)
.value_delimiter(','),
arg!(--"create-date" <CREATE_DATE> "Selects the splits whose creation dates are before this date.")
.display_order(3)
.display_order(5)
.required(false),
arg!(--"start-date" <START_DATE> "Selects the splits that contain documents after this date (time-series indexes only).")
.display_order(4)
.display_order(6)
.required(false),
arg!(--"end-date" <END_DATE> "Selects the splits that contain documents before this date (time-series indexes only).")
.display_order(5)
.display_order(7)
.required(false),
// See #2762:
// arg!(--tags <TAGS> "Selects the splits whose tags are all included in this comma-separated list of tags.")
Expand All @@ -64,7 +70,7 @@ pub fn build_split_command() -> Command {
// .use_value_delimiter(true),
arg!(--"output-format" <OUTPUT_FORMAT> "Output format. Possible values are `table`, `json`, and `pretty-json`.")
.alias("format")
.display_order(7)
.display_order(8)
.required(false)
])
)
Expand Down Expand Up @@ -128,6 +134,8 @@ impl FromStr for OutputFormat {
pub struct ListSplitArgs {
pub client_args: ClientArgs,
pub index_id: String,
pub offset: Option<usize>,
pub limit: Option<usize>,
pub split_states: Option<Vec<SplitState>>,
pub create_date: Option<OffsetDateTime>,
pub start_date: Option<OffsetDateTime>,
Expand Down Expand Up @@ -177,6 +185,12 @@ impl SplitCliCommand {
let index_id = matches
.remove_one::<String>("index")
.expect("`index` should be a required arg.");
let offset = matches
.remove_one::<String>("offset")
.and_then(|s| s.parse::<usize>().ok());
let limit = matches
.remove_one::<String>("limit")
.and_then(|s| s.parse::<usize>().ok());
let split_states = matches
.remove_many::<String>("states")
.map(|values| {
Expand Down Expand Up @@ -215,10 +229,11 @@ impl SplitCliCommand {
.map(|s| OutputFormat::from_str(s.as_str()))
.transpose()?
.unwrap_or(OutputFormat::Table);

Ok(Self::List(ListSplitArgs {
client_args,
index_id,
offset,
limit,
split_states,
start_date,
end_date,
Expand Down Expand Up @@ -277,6 +292,8 @@ async fn list_split_cli(args: ListSplitArgs) -> anyhow::Result<()> {
debug!(args=?args, "list-split");
let qw_client = args.client_args.client();
let list_splits_query_params = ListSplitsQueryParams {
offset: args.offset,
limit: args.limit,
split_states: args.split_states,
start_timestamp: args.start_date.map(OffsetDateTime::unix_timestamp),
end_timestamp: args.end_date.map(OffsetDateTime::unix_timestamp),
Expand Down
15 changes: 10 additions & 5 deletions quickwit/quickwit-rest-client/src/rest_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use quickwit_indexing::actors::IndexingServiceCounters;
pub use quickwit_ingest::CommitType;
use quickwit_metastore::{IndexMetadata, Split, SplitInfo};
use quickwit_search::SearchResponseRest;
use quickwit_serve::{ListSplitsQueryParams, SearchRequestQueryString};
use quickwit_serve::{ListSplitsQueryParams, ListSplitsResponse, SearchRequestQueryString};
use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE};
use reqwest::{Client, ClientBuilder, Method, StatusCode, Url};
use serde::Serialize;
Expand Down Expand Up @@ -427,8 +427,8 @@ impl<'a, 'b> SplitClient<'a, 'b> {
self.timeout,
)
.await?;
let splits = response.deserialize().await?;
Ok(splits)
let list_splits_response: ListSplitsResponse = response.deserialize().await?;
Ok(list_splits_response.splits)
}

pub async fn mark_for_deletion(&self, split_ids: Vec<String>) -> Result<(), Error> {
Expand Down Expand Up @@ -652,7 +652,7 @@ mod test {
use quickwit_ingest::CommitType;
use quickwit_metastore::IndexMetadata;
use quickwit_search::SearchResponseRest;
use quickwit_serve::{ListSplitsQueryParams, SearchRequestQueryString};
use quickwit_serve::{ListSplitsQueryParams, ListSplitsResponse, SearchRequestQueryString};
use reqwest::header::CONTENT_TYPE;
use reqwest::{StatusCode, Url};
use serde_json::json;
Expand Down Expand Up @@ -969,10 +969,15 @@ mod test {
start_timestamp: Some(1),
..Default::default()
};
let response = ListSplitsResponse {
offset: 0,
size: 1,
splits: vec![split.clone()],
};
Mock::given(method("GET"))
.and(path("/api/v1/indexes/my-index/splits"))
.and(query_param("start_timestamp", "1"))
.respond_with(ResponseTemplate::new(StatusCode::OK).set_body_json(vec![split.clone()]))
.respond_with(ResponseTemplate::new(StatusCode::OK).set_body_json(response))
.up_to_n_times(1)
.mount(&mock_server)
.await;
Expand Down
3 changes: 2 additions & 1 deletion quickwit/quickwit-serve/src/index_api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@
mod rest_handler;

pub use self::rest_handler::{
index_management_handlers, IndexApi, ListSplitsQueryParams, UnsupportedContentType,
index_management_handlers, IndexApi, ListSplitsQueryParams, ListSplitsResponse,
UnsupportedContentType,
};
48 changes: 41 additions & 7 deletions quickwit/quickwit-serve/src/index_api/rest_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,14 @@ fn describe_index_handler(
#[derive(Debug, Clone, Deserialize, Serialize, utoipa::IntoParams, utoipa::ToSchema, Default)]
#[into_params(parameter_in = Query)]
pub struct ListSplitsQueryParams {
/// If set, define the number of splits to skip
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub offset: Option<usize>,
/// If set, restrict maximum number of splits to retrieve
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub limit: Option<usize>,
/// A specific split state(s) to filter by.
#[serde(deserialize_with = "from_simple_list")]
#[serde(serialize_with = "to_simple_list")]
Expand All @@ -282,12 +290,22 @@ pub struct ListSplitsQueryParams {
pub end_create_timestamp: Option<i64>,
}

#[derive(Serialize, Deserialize, Debug, utoipa::ToSchema)]
pub struct ListSplitsResponse {
#[serde(default)]
pub offset: usize,
#[serde(default)]
pub size: usize,
#[serde(default)]
pub splits: Vec<Split>,
}

#[utoipa::path(
get,
tag = "Indexes",
path = "/indexes/{index_id}/splits",
responses(
(status = 200, description = "Successfully fetched splits.", body = [Split])
(status = 200, description = "Successfully fetched splits.", body = ListSplitsResponse)
),
params(
ListSplitsQueryParams,
Expand All @@ -300,7 +318,7 @@ async fn list_splits(
index_id: String,
list_split_query: ListSplitsQueryParams,
mut metastore: MetastoreServiceClient,
) -> MetastoreResult<Vec<Split>> {
) -> MetastoreResult<ListSplitsResponse> {
let index_metadata_request = IndexMetadataRequest::for_index_id(index_id.to_string());
let index_uid: IndexUid = metastore
.index_metadata(index_metadata_request)
Expand All @@ -309,6 +327,14 @@ async fn list_splits(
.index_uid;
info!(index_id = %index_id, list_split_query = ?list_split_query, "get-splits");
let mut query = ListSplitsQuery::for_index(index_uid);
let mut offset = 0;
if let Some(offset_value) = list_split_query.offset {
query = query.with_offset(offset_value);
offset = offset_value;
}
if let Some(limit) = list_split_query.limit {
query = query.with_limit(limit);
}
if let Some(split_states) = list_split_query.split_states {
query = query.with_split_states(split_states);
}
Expand All @@ -326,7 +352,11 @@ async fn list_splits(
.list_splits(list_splits_request)
.await?
.deserialize_splits()?;
Ok(splits)
Ok(ListSplitsResponse {
offset,
size: splits.len(),
splits,
})
}

fn list_splits_handler(
Expand Down Expand Up @@ -955,10 +985,14 @@ mod tests {
.await;
assert_eq!(resp.status(), 200);
let actual_response_json: JsonValue = serde_json::from_slice(resp.body()).unwrap();
let expected_response_json = serde_json::json!([{
"create_timestamp": 0,
"split_id": "split_1",
}]);
let expected_response_json = serde_json::json!({
"splits": [
{
"create_timestamp": 0,
"split_id": "split_1",
}
]
});
assert_json_include!(
actual: actual_response_json,
expected: expected_response_json
Expand Down
2 changes: 1 addition & 1 deletion quickwit/quickwit-serve/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ use tracing::{debug, error, info, warn};
use warp::{Filter, Rejection};

pub use crate::build_info::{BuildInfo, RuntimeInfo};
pub use crate::index_api::ListSplitsQueryParams;
pub use crate::index_api::{ListSplitsQueryParams, ListSplitsResponse};
pub use crate::metrics::SERVE_METRICS;
use crate::rate_modulator::RateModulator;
#[cfg(test)]
Expand Down

0 comments on commit fd1547b

Please sign in to comment.