From c5c816b981079ab9b63b70805951d4df5da7fe6e Mon Sep 17 00:00:00 2001 From: shaohuzhang1 <80892890+shaohuzhang1@users.noreply.github.com> Date: Wed, 7 Aug 2024 19:52:05 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=88=86=E6=AE=B5?= =?UTF-8?q?=E6=97=B6,=E7=89=B9=E6=AE=8A=E6=83=85=E5=86=B5=E4=BC=9A?= =?UTF-8?q?=E4=B8=A2=E5=A4=B1=E6=95=B0=E6=8D=AE=20#938=20(#946)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry picked from commit 0ad5a7659864fe62b99ffc9b6bd694917f7e58a9) --- apps/common/util/split_model.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/apps/common/util/split_model.py b/apps/common/util/split_model.py index c747cb1fc88..e347b60c14a 100644 --- a/apps/common/util/split_model.py +++ b/apps/common/util/split_model.py @@ -27,7 +27,7 @@ def get_level_block(text, level_content_list, level_content_index, cursor): level_content_list) else None start_index = text.index(start_content, cursor) end_index = text.index(next_content, start_index + 1) if next_content is not None else len(text) - return text[start_index+len(start_content):end_index], end_index + return text[start_index + len(start_content):end_index], end_index def to_tree_obj(content, state='title'): @@ -303,17 +303,20 @@ def parse_to_tree(self, text: str, index=0): level_content_list.insert(0, to_tree_obj("")) cursor = 0 - for i in range(len(level_content_list)): - block, cursor = get_level_block(text, level_content_list, i, cursor) + level_title_content_list = [item for item in level_content_list if item.get('state') == 'title'] + for i in range(len(level_title_content_list)): + start_content: str = level_title_content_list[i].get('content') + if cursor < text.index(start_content, cursor): + level_content_list.insert(0, to_tree_obj(text[cursor: text.index(start_content, cursor)], 'block')) + block, cursor = get_level_block(text, level_title_content_list, i, cursor) if len(block) == 0: - level_content_list[i]['children'] = [to_tree_obj("", "block")] continue children = self.parse_to_tree(text=block, index=index + 1) - level_content_list[i]['children'] = children + level_title_content_list[i]['children'] = children first_child_idx_in_block = block.lstrip().index(children[0]["content"].lstrip()) if first_child_idx_in_block != 0: inner_children = self.parse_to_tree(block[:first_child_idx_in_block], index + 1) - level_content_list[i]['children'].extend(inner_children) + level_title_content_list[i]['children'].extend(inner_children) return level_content_list def parse(self, text: str):