Skip to content

Commit

Permalink
spi-search: improve word combination rule (#735)
Browse files Browse the repository at this point in the history
* update

* update

* spi-search: improve word combination rule
  • Loading branch information
ZzIsGod1019 authored May 18, 2024
1 parent 6c5c334 commit c59d860
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 29 deletions.
4 changes: 2 additions & 2 deletions backend/middlewares/flow/tests/mock_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,15 @@ where
}))
}

pub fn accepted(data: T) -> MockApiResponse<T> {
pub fn _accepted(data: T) -> MockApiResponse<T> {
MockApiResponse::Ok(Json(MockResp {
code: TARDIS_RESULT_ACCEPTED_CODE.to_string(),
message: "".to_string(),
body: Some(data),
}))
}

pub fn err(error: TardisError) -> MockApiResponse<T> {
pub fn _err(error: TardisError) -> MockApiResponse<T> {
MockApiResponse::Err(error.into())
}
}
1 change: 1 addition & 0 deletions backend/spi/spi-search/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ spi-es = ["tardis/web-client"]

[dependencies]
serde.workspace = true
itertools.workspace = true
tardis = { workspace = true, features = [
"reldb-postgres",
"web-server",
Expand Down
15 changes: 15 additions & 0 deletions backend/spi/spi-search/src/api/ci/search_ci_item_api.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use tardis::basic::dto::TardisContext;
use tardis::web::context_extractor::TardisContextExtractor;
use tardis::web::poem::web::Query;

Expand Down Expand Up @@ -65,4 +66,18 @@ impl SearchCiItemApi {
let resp = search_item_serv::query_metrics(&query_req.0, &funs, &ctx.0).await?;
TardisResp::ok(resp)
}

/// Refresh TSV Result By Tag
///
/// 通过指定 tag 刷新分词结果
#[oai(path = "/:tag/refresh", method = "put")]
async fn refresh_tsv(&self, tag: Path<String>, ctx: TardisContextExtractor) -> TardisApiResult<Void> {
let global_ctx = TardisContext {
own_paths: "".to_string(),
..ctx.0.clone()
};
let funs = crate::get_tardis_inst();
search_item_serv::refresh_tsv(&tag.0, &funs, &global_ctx).await?;
TardisResp::ok(Void {})
}
}
4 changes: 2 additions & 2 deletions backend/spi/spi-search/src/dto/search_item_dto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pub struct SearchItemAddReq {
pub visit_keys: Option<SearchItemVisitKeysReq>,
}

#[derive(poem_openapi::Object, Serialize, Deserialize, Debug)]
#[derive(poem_openapi::Object, Serialize, Deserialize, Debug, Default)]
pub struct SearchItemModifyReq {
#[oai(validator(min_length = "2"))]
pub kind: Option<String>,
Expand Down Expand Up @@ -110,7 +110,7 @@ pub struct SearchItemSearchReq {
pub page: SearchItemSearchPageReq,
}

#[derive(poem_openapi::Object, Serialize, Deserialize, Debug)]
#[derive(poem_openapi::Object, Serialize, Deserialize, Debug, Default)]
pub struct SearchItemSearchCtxReq {
pub accounts: Option<Vec<String>>,
pub apps: Option<Vec<String>>,
Expand Down
4 changes: 4 additions & 0 deletions backend/spi/spi-search/src/serv/es/search_es_item_serv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -834,3 +834,7 @@ fn merge(a: &mut serde_json::Value, b: serde_json::Value) {
pub async fn query_metrics(_query_req: &SearchQueryMetricsReq, funs: &TardisFunsInst, _ctx: &TardisContext, _inst: &SpiBsInst) -> TardisResult<SearchQueryMetricsResp> {
Err(funs.err().format_error("search_es_item_serv", "query_metrics", "not supports", "500-not-supports"))
}

pub async fn refresh_tsv(tag: &str, funs: &TardisFunsInst, _ctx: &TardisContext, _inst: &SpiBsInst) -> TardisResult<()> {
Err(funs.err().format_error("search_es_item_serv", "refresh_tsv", "not supports", "500-not-supports"))
}
81 changes: 56 additions & 25 deletions backend/spi/spi-search/src/serv/pg/search_pg_item_serv.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,19 @@
use std::collections::HashMap;
use std::{collections::HashMap, vec};

use pinyin::{to_pinyin_vec, Pinyin};
use tardis::{
basic::{dto::TardisContext, error::TardisError, result::TardisResult},
chrono::Utc,
db::{
basic::{dto::TardisContext, error::TardisError, result::TardisResult}, chrono::Utc, db::{
reldb_client::{TardisRelDBClient, TardisRelDBlConnection},
sea_orm::{FromQueryResult, Value},
},
serde_json::{self, json, Map},
web::web_resp::TardisPage,
TardisFuns, TardisFunsInst,
}, futures::future::join_all, serde_json::{self, json, Map}, web::web_resp::TardisPage, TardisFuns, TardisFunsInst
};
use itertools::Itertools;

use bios_basic::{dto::BasicQueryCondInfo, enumeration::BasicQueryOpKind, helper::db_helper, spi::spi_funs::SpiBsInst};

use crate::{
dto::search_item_dto::{
AdvBasicQueryCondInfo, SearchItemAddReq, SearchItemModifyReq, SearchItemSearchQScopeKind, SearchItemSearchReq, SearchItemSearchResp, SearchQueryMetricsReq,
SearchQueryMetricsResp,
AdvBasicQueryCondInfo, SearchItemAddReq, SearchItemModifyReq, SearchItemSearchQScopeKind, SearchItemSearchReq, SearchItemSearchResp, SearchQueryMetricsReq, SearchQueryMetricsResp
},
search_config::SearchConfig,
};
Expand All @@ -34,20 +29,26 @@ pub async fn add(add_req: &mut SearchItemAddReq, funs: &TardisFunsInst, ctx: &Ta
params.push(Value::from(add_req.kind.to_string()));
params.push(Value::from(add_req.key.to_string()));
params.push(Value::from(add_req.title.as_str()));

let pinyin_vec = to_pinyin_vec(add_req.title.as_str(), Pinyin::plain);
if add_req.title.chars().count() > funs.conf::<SearchConfig>().split_strategy_rule_config.specify_word_length.unwrap_or(30) {
params.push(Value::from(format!(
"{} {}",
"{} {} {}",
add_req.title.as_str(),
generate_word_combinations(to_pinyin_vec(add_req.title.as_str(), Pinyin::plain)).join(" ")
pinyin_vec.clone().into_iter().map(|pinyin| pinyin.chars().next().unwrap_or_default()).join(""),
generate_word_combinations(pinyin_vec).join(" ")
)));
} else {
let content = add_req.title.as_str().split(' ').last().unwrap_or_default();
params.push(Value::from(format!(
"{} {} {} {} {}",
"{} {} {} {} {} {} {}",
add_req.title.as_str(),
generate_word_combinations_with_length(add_req.title.as_str(), 1).join(" "),
generate_word_combinations_with_length(add_req.title.as_str(), 2).join(" "),
generate_word_combinations_with_length(add_req.title.as_str(), 3).join(" "),
generate_word_combinations(to_pinyin_vec(add_req.title.as_str(), Pinyin::plain)).join(" ")
pinyin_vec.clone().into_iter().map(|pinyin| pinyin.chars().next().unwrap_or_default()).join(""),
generate_word_combinations_with_length(content, 1).join(" "),
generate_word_combinations_with_length(content, 2).join(" "),
generate_word_combinations_with_length(content, 3).join(" "),
generate_word_combinations_with_symbol(content, vec!["-", "_"]).join(" "),
generate_word_combinations(pinyin_vec).join(" ")
)));
}

Expand Down Expand Up @@ -116,20 +117,26 @@ pub async fn modify(tag: &str, key: &str, modify_req: &mut SearchItemModifyReq,
"simple"
};
sql_sets.push(format!("title_tsv = to_tsvector('{word_combinations_way}', ${})", params.len() + 1));
if title.chars().count() > 15 {

let pinyin_vec = to_pinyin_vec(title, Pinyin::plain);
if title.chars().count() > funs.conf::<SearchConfig>().split_strategy_rule_config.specify_word_length.unwrap_or(30) {
params.push(Value::from(format!(
"{} {}",
"{} {} {}",
title,
generate_word_combinations(to_pinyin_vec(title, Pinyin::plain)).join(" ")
pinyin_vec.clone().into_iter().map(|pinyin| pinyin.chars().next().unwrap_or_default()).join(""),
generate_word_combinations(pinyin_vec).join(" ")
)));
} else {
let content = title.split(' ').last().unwrap_or_default();
params.push(Value::from(format!(
"{} {} {} {} {}",
"{} {} {} {} {} {} {}",
title,
generate_word_combinations_with_length(title, 1).join(" "),
generate_word_combinations_with_length(title, 2).join(" "),
generate_word_combinations_with_length(title, 3).join(" "),
generate_word_combinations(to_pinyin_vec(title, Pinyin::plain)).join(" ")
pinyin_vec.clone().into_iter().map(|pinyin| pinyin.chars().next().unwrap_or_default()).join(""),
generate_word_combinations_with_length(content, 1).join(" "),
generate_word_combinations_with_length(content, 2).join(" "),
generate_word_combinations_with_length(content, 3).join(" "),
generate_word_combinations_with_symbol(content, vec!["-", "_"]).join(" "),
generate_word_combinations(pinyin_vec).join(" ")
)));
}
};
Expand Down Expand Up @@ -199,6 +206,15 @@ fn generate_word_combinations_with_length(original_str: &str, split_len: usize)
combinations
}

fn generate_word_combinations_with_symbol(original_str: &str, symbols: Vec<&str>) -> Vec<String> {
let mut combinations = Vec::new();
for symbol in symbols {
let mut splited_words = original_str.split(symbol).collect_vec();
combinations.append(&mut splited_words);
}
combinations.into_iter().map(|word| word.to_string()).collect_vec()
}

fn generate_word_combinations(chars: Vec<&str>) -> Vec<String> {
let mut combinations = Vec::new();
for i in 0..chars.len() {
Expand Down Expand Up @@ -1547,3 +1563,18 @@ fn package_groups_agg(record: serde_json::Value) -> Result<serde_json::Value, St
None => Ok(serde_json::Value::Null),
}
}

pub async fn refresh_tsv(tag: &str, funs: &TardisFunsInst, ctx: &TardisContext, inst: &SpiBsInst) -> TardisResult<()> {
let bs_inst = inst.inst::<TardisRelDBClient>();
let (conn, table_name) = search_pg_initializer::init_table_and_conn(bs_inst, tag, ctx, false).await?;
let result = conn.query_all(&format!("SELECT key, title FROM {table_name}"), vec![]).await?;
join_all(
result.into_iter().map(|row| async move {
modify(tag, row.try_get::<String>("", "key").expect("not found key").as_str(), &mut SearchItemModifyReq {
title: Some(row.try_get("", "title").expect("not found title")),
..Default::default()
}, funs, ctx, inst).await.expect("modify error")
}).collect_vec(),
).await;
Ok(())
}
1 change: 1 addition & 0 deletions backend/spi/spi-search/src/serv/search_item_serv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@ spi_dispatch_service! {
delete_by_ownership(tag: &str, own_paths: &str) -> TardisResult<()>;
search(search_req: &mut SearchItemSearchReq) -> TardisResult<TardisPage<SearchItemSearchResp>>;
query_metrics(query_req: &SearchQueryMetricsReq) -> TardisResult<SearchQueryMetricsResp>;
refresh_tsv(tag: &str) -> TardisResult<()>;
}
}

0 comments on commit c59d860

Please sign in to comment.