diff --git a/lib/Cargo.toml b/lib/Cargo.toml
index 695d503..141d555 100644
--- a/lib/Cargo.toml
+++ b/lib/Cargo.toml
@@ -8,20 +8,21 @@ repository = { workspace = true }
 readme = { workspace = true }
 keywords = { workspace = true }
 rust-version = { workspace = true }
+
 [dependencies]
-zip = "6.0.0"
+zip = "7.0.0"
 anyhow = "1.0.100"
-quick-xml = { version = "0.38.3" }
+quick-xml = { version = "0.38.4" }
 ab_glyph = { version = "0.2.32", optional = true }
-imageproc = { version = "0.25.0", optional = true}
-serde_json = { version = "1.0.145", optional = true }
+imageproc = { version = "0.26.0", optional = true}
+serde_json = { version = "1.0.149", optional = true }
 iepub-derive = { path = "../derive", version = "1.2.6" }
 serde = { version = "1.0.228", features = ["derive"], optional = true }
-image = { version = "0.25.8", default-features = false, features = ["jpeg"], optional = true }
+image = { version = "0.25.9", default-features = false, features = ["jpeg"], optional = true }
 md-5 = {version = "0.10.6", optional = true }
 
 [dev-dependencies]
-reqwest = { version = "0.11", features = ["blocking"] }
+reqwest = { version = "0.13.1", features = ["blocking"] }
 
 [features]
 no_nav=[]
diff --git a/lib/src/common.rs b/lib/src/common.rs
index 96e910c..b5b4f53 100644
--- a/lib/src/common.rs
+++ b/lib/src/common.rs
@@ -139,6 +139,100 @@ impl From<FromUtf8Error> for IError {
         IError::Utf8(value)
     }
 }
+
+/// 内容类型枚举
+#[derive(Debug, Clone)]
+pub enum ContentType {
+    /// 段落
+    Paragraph,
+    /// 标题 (level: 1-6)
+    Heading(u8),
+    /// 图片
+    Image,
+    /// 链接
+    Link,
+    /// 列表项
+    ListItem,
+    /// 引用块
+    BlockQuote,
+    /// 代码块
+    CodeBlock,
+    /// 分隔线
+    HorizontalRule,
+    /// 普通文本
+    Text,
+    /// 其他标签
+    Other(String),
+}
+
+/// 解析后的内容项
+#[derive(Debug, Clone)]
+pub struct ContentItem {
+    /// 内容类型
+    pub content_type: ContentType,
+    /// 文本内容
+    pub text: String,
+    /// 属性 (例如图片的 src, 链接的 href 等)
+    pub attributes: Vec<(String, String)>,
+    /// 子内容
+    pub children: Vec<ContentItem>,
+}
+
+impl ContentItem {
+    pub fn new(content_type: ContentType) -> Self {
+        Self {
+            content_type,
+            text: String::new(),
+            attributes: Vec::new(),
+            children: Vec::new(),
+        }
+    }
+
+    /// 添加属性
+    pub fn add_attribute(&mut self, key: String, value: String) {
+        self.attributes.push((key, value));
+    }
+
+    /// 添加子内容
+    pub fn add_child(&mut self, child: ContentItem) {
+        self.children.push(child);
+    }
+
+    /// 添加文本
+    pub fn add_text(&mut self, text: &str) {
+        self.text.push_str(text);
+    }
+
+    /// 格式化输出
+    pub fn format(&self, indent: usize) -> String {
+        let indent_str = "  ".repeat(indent);
+        let mut result = format!("{}[{:?}]", indent_str, self.content_type);
+
+        if !self.text.is_empty() {
+            result.push_str(&format!(" 文本: \"{}\"", self.text.trim()));
+        }
+
+        if !self.attributes.is_empty() {
+            result.push_str(" 属性: {");
+            for (i, (key, value)) in self.attributes.iter().enumerate() {
+                if i > 0 {
+                    result.push_str(", ");
+                }
+                result.push_str(&format!("{}: \"{}\"", key, value));
+            }
+            result.push('}');
+        }
+
+        result.push('\n');
+
+        for child in &self.children {
+            result.push_str(&child.format(indent + 1));
+        }
+
+        result
+    }
+}
+
 cache_struct! {
     #[derive(Debug, Default)]
     pub(crate) struct BookInfo {
diff --git a/lib/src/epub/core.rs b/lib/src/epub/core.rs
index c1b3ab6..c6b7f89 100644
--- a/lib/src/epub/core.rs
+++ b/lib/src/epub/core.rs
@@ -9,6 +9,7 @@ use crate::cache_struct;
 use crate::common::{escape_xml, urldecode_enhanced, IError, IResult};
 use crate::epub::common::LinkRel;
 use crate::epub::html;
+use crate::parser::HtmlParser;
 crate::cache_enum! {
     #[derive(Clone)]
     pub enum Direction {
@@ -171,8 +172,11 @@ impl Debug for EpubHtml {
 }
 
 impl EpubHtml {
-    pub fn string_data(&self) -> String {
-        if let Some(data) = &self._data {
+    pub fn string_data(&mut self) -> String {
+        if self._data.is_none() {
+            self.data_mut();
+        }
+        if let Some(data) = &mut self._data {
             String::from_utf8(data.clone()).unwrap_or_else(|_e| String::new())
         } else {
             String::new()
@@ -183,6 +187,18 @@ impl EpubHtml {
         self._data.as_deref()
     }
 
+    pub fn parser(&mut self) -> Option<HtmlParser> {
+        let mut obj = None;
+        let html = self.string_data();
+        if !html.is_empty() {
+            let mut parser = HtmlParser::new();
+            if parser.parse(&html).is_ok() {
+                obj = Some(parser);
+            }
+        }
+        obj
+    }
+
     pub(crate) fn read_data(&mut self, reader: &mut impl EpubReaderTrait) {
         let (id, origin) = if let Some(index) = self._file_name.find('#') {
             (
diff --git a/lib/src/epub/reader.rs b/lib/src/epub/reader.rs
index 2aae107..73c4776 100644
--- a/lib/src/epub/reader.rs
+++ b/lib/src/epub/reader.rs
@@ -1197,7 +1197,6 @@ html
         assert_ne!(0, nav.len());
         assert_ne!("", nav[0].title());
         let mut chap = book.chapters_mut();
-
         assert_eq!(75, chap.next().unwrap().data_mut().unwrap().len());
 
         // println!("{}", String::from_utf8( chap.next().unwrap().data().unwrap().to_vec()).unwrap());
@@ -1208,11 +1207,15 @@ html
             chap.next().unwrap().data_mut().unwrap().to_vec().len()
         );
 
+        for i in chap {
+            if let Some(p) = i.parser() {
+                assert_ne!(0, p.extract_plain_text().len());
+            }
+        }
         assert!(book.get_chapter("s04.xhtml#pgepubid00536").is_some());
         // assert!(chap.next().is_some());
         // chap.next();
         // chap.next();
-
         // assert_ne!("", chap.next().unwrap().title());
         // assert_ne!(None, book.chapters_mut().next().unwrap().data());
     }
diff --git a/lib/src/lib.rs b/lib/src/lib.rs
index d740b34..1ba5beb 100644
--- a/lib/src/lib.rs
+++ b/lib/src/lib.rs
@@ -3,10 +3,11 @@
 #[allow(clippy::needless_range_loop)]
 extern crate iepub_derive;
 mod adapter;
-mod common;
+pub mod common;
 mod cover;
 mod epub;
 mod mobi;
+pub mod parser;
 pub mod path;
 pub use crate::common::DateTimeFormater;
 
diff --git a/lib/src/parser.rs b/lib/src/parser.rs
new file mode 100644
index 0000000..5664b6c
--- /dev/null
+++ b/lib/src/parser.rs
@@ -0,0 +1,342 @@
+use crate::common::{ContentItem, ContentType};
+use anyhow::{anyhow, Result};
+use quick_xml::{events::Event, reader::Reader};
+
+/// HTML 解析器
+pub struct HtmlParser {
+    /// 解析结果
+    pub items: Vec<ContentItem>,
+}
+
+impl HtmlParser {
+    pub fn new() -> Self {
+        Self { items: Vec::new() }
+    }
+
+    /// 解析 HTML 字符串
+    pub fn parse(&mut self, html: &str) -> Result<()> {
+        let mut reader = Reader::from_str(html);
+        reader.config_mut().trim_text(false);
+        reader.config_mut().expand_empty_elements = true;
+        reader.config_mut().check_end_names = false;
+
+        let mut buf = Vec::new();
+        let mut stack: Vec<ContentItem> = Vec::new();
+        let mut in_body = false;
+        let mut has_body_tag = false;
+        let mut depth: u32 = 0; // 用于跟踪标签深度
+
+        loop {
+            match reader.read_event_into(&mut buf) {
+                Ok(Event::Eof) => break,
+
+                Ok(Event::Start(ref e)) => {
+                    let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
+                    depth += 1;
+
+                    // 检查是否进入 body 标签
+                    if tag_name.to_lowercase() == "body" {
+                        in_body = true;
+                        has_body_tag = true;
+                        buf.clear();
+                        continue;
+                    }
+
+                    // 如果没有 body 标签，且不是 html/head 标签，则开始解析
+                    if !has_body_tag && depth > 0 {
+                        let lower_tag = tag_name.to_lowercase();
+                        if lower_tag != "html" && lower_tag != "head" && lower_tag != "meta" 
+                            && lower_tag != "title" && lower_tag != "link" && lower_tag != "style" {
+                            in_body = true;
+                        }
+                    }
+
+                    // 只解析 body 内的内容或无 body 时的内容
+                    if !in_body {
+                        buf.clear();
+                        continue;
+                    }
+
+                    let content_type = Self::tag_to_content_type(&tag_name);
+                    let mut item = ContentItem::new(content_type);
+
+                    // 提取属性
+                    for attr_result in e.attributes() {
+                        if let Ok(attr) = attr_result {
+                            let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
+                            let value = attr
+                                .unescape_value()
+                                .unwrap_or_else(|_| std::borrow::Cow::Borrowed(""))
+                                .to_string();
+                            item.add_attribute(key, value);
+                        }
+                    }
+
+                    stack.push(item);
+                }
+
+                Ok(Event::End(ref e)) => {
+                    let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
+                    depth = depth.saturating_sub(1);
+
+                    // 检查是否离开 body 标签
+                    if tag_name.to_lowercase() == "body" {
+                        in_body = false;
+                        buf.clear();
+                        continue;
+                    }
+
+                    if !in_body {
+                        buf.clear();
+                        continue;
+                    }
+
+                    if let Some(item) = stack.pop() {
+                        if let Some(parent) = stack.last_mut() {
+                            parent.add_child(item);
+                        } else {
+                            self.items.push(item);
+                        }
+                    }
+                }
+
+                Ok(Event::Text(ref e)) => {
+                    if in_body {
+                        // 手动解码文本
+                        let decoded = String::from_utf8_lossy(e.as_ref()).to_string();
+                        if !decoded.trim().is_empty() {
+                            if let Some(item) = stack.last_mut() {
+                                item.add_text(&decoded);
+                            } else {
+                                // 如果没有父标签，创建一个文本节点
+                                let mut text_item = ContentItem::new(ContentType::Text);
+                                text_item.add_text(&decoded);
+                                self.items.push(text_item);
+                            }
+                        }
+                    }
+                }
+
+                Ok(Event::Empty(ref e)) => {
+                    if !in_body {
+                        buf.clear();
+                        continue;
+                    }
+
+                    let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
+                    let content_type = Self::tag_to_content_type(&tag_name);
+                    let mut item = ContentItem::new(content_type);
+
+                    // 提取属性 (对 img, br, hr 等自闭合标签很重要)
+                    for attr_result in e.attributes() {
+                        if let Ok(attr) = attr_result {
+                            let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
+                            let value = attr
+                                .unescape_value()
+                                .unwrap_or_else(|_| std::borrow::Cow::Borrowed(""))
+                                .to_string();
+                            item.add_attribute(key, value);
+                        }
+                    }
+
+                    if let Some(parent) = stack.last_mut() {
+                        parent.add_child(item);
+                    } else {
+                        self.items.push(item);
+                    }
+                }
+
+                Ok(Event::CData(ref e)) => {
+                    if in_body {
+                        let text = String::from_utf8_lossy(e.as_ref());
+                        if let Some(item) = stack.last_mut() {
+                            item.add_text(&text);
+                        }
+                    }
+                }
+
+                Err(e) => {
+                    return Err(anyhow!("解析错误: {:?}", e));
+                }
+
+                _ => {}
+            }
+
+            buf.clear();
+        }
+
+        // 处理未关闭的标签
+        while let Some(item) = stack.pop() {
+            if let Some(parent) = stack.last_mut() {
+                parent.add_child(item);
+            } else {
+                self.items.push(item);
+            }
+        }
+
+        Ok(())
+    }
+
+    /// 将 HTML 标签名转换为内容类型
+    fn tag_to_content_type(tag: &str) -> ContentType {
+        match tag.to_lowercase().as_str() {
+            "p" => ContentType::Paragraph,
+            "h1" => ContentType::Heading(1),
+            "h2" => ContentType::Heading(2),
+            "h3" => ContentType::Heading(3),
+            "h4" => ContentType::Heading(4),
+            "h5" => ContentType::Heading(5),
+            "h6" => ContentType::Heading(6),
+            "img" => ContentType::Image,
+            "a" => ContentType::Link,
+            "li" => ContentType::ListItem,
+            "blockquote" => ContentType::BlockQuote,
+            "pre" | "code" => ContentType::CodeBlock,
+            "hr" => ContentType::HorizontalRule,
+            _ => ContentType::Other(tag.to_string()),
+        }
+    }
+
+    /// 打印解析结果
+    pub fn print_result(&self) {
+        println!("=== 解析结果 ===\n");
+        for (i, item) in self.items.iter().enumerate() {
+            println!("--- 内容项 {} ---", i + 1);
+            print!("{}", item.format(0));
+        }
+    }
+
+    /// 提取所有段落文本
+    pub fn extract_paragraphs(&self) -> Vec<String> {
+        let mut paragraphs = Vec::new();
+        self.extract_paragraphs_recursive(&self.items, &mut paragraphs);
+        paragraphs
+    }
+
+    fn extract_paragraphs_recursive(&self, items: &[ContentItem], result: &mut Vec<String>) {
+        for item in items {
+            match item.content_type {
+                ContentType::Paragraph => {
+                    if !item.text.trim().is_empty() {
+                        result.push(item.text.trim().to_string());
+                    }
+                }
+                _ => {}
+            }
+            // 递归处理子元素
+            self.extract_paragraphs_recursive(&item.children, result);
+        }
+    }
+
+    /// 提取所有标题
+    pub fn extract_headings(&self) -> Vec<(u8, String)> {
+        let mut headings = Vec::new();
+        self.extract_headings_recursive(&self.items, &mut headings);
+        headings
+    }
+
+    fn extract_headings_recursive(&self, items: &[ContentItem], result: &mut Vec<(u8, String)>) {
+        for item in items {
+            if let ContentType::Heading(level) = item.content_type {
+                if !item.text.trim().is_empty() {
+                    result.push((level, item.text.trim().to_string()));
+                }
+            }
+            // 递归处理子元素
+            self.extract_headings_recursive(&item.children, result);
+        }
+    }
+
+    /// 提取所有图片链接
+    pub fn extract_images(&self) -> Vec<String> {
+        let mut images = Vec::new();
+        self.extract_images_recursive(&self.items, &mut images);
+        images
+    }
+
+    fn extract_images_recursive(&self, items: &[ContentItem], result: &mut Vec<String>) {
+        for item in items {
+            if let ContentType::Image = item.content_type {
+                for (key, value) in &item.attributes {
+                    if key.to_lowercase() == "src" {
+                        result.push(value.clone());
+                        break;
+                    }
+                }
+            }
+            // 递归处理子元素
+            self.extract_images_recursive(&item.children, result);
+        }
+    }
+
+    /// 获取纯文本内容
+    pub fn extract_plain_text(&self) -> String {
+        let mut text = String::new();
+        self.extract_text_recursive(&self.items, &mut text);
+        text
+    }
+
+    fn extract_text_recursive(&self, items: &[ContentItem], result: &mut String) {
+        for item in items {
+            if !item.text.is_empty() {
+                result.push_str(item.text.trim());
+                result.push(' ');
+            }
+            // 递归处理子元素
+            self.extract_text_recursive(&item.children, result);
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_html_without_body_tag() {
+        // 测试没有 body 标签的 HTML 片段
+        let html = r#"<div class="center"><span>171</span></div>
+<h3>INTRODUCTORY</h3>
+<p>The difficulties of classification are very apparent here.</p>
+<p>Another paragraph with some text.</p>"#;
+
+        let mut parser = HtmlParser::new();
+        parser.parse(html).unwrap();
+        
+        println!("解析到 {} 个顶层元素", parser.items.len());
+        assert!(parser.items.len() > 0, "应该解析到至少一个元素");
+        
+        let paragraphs = parser.extract_paragraphs();
+        println!("提取到 {} 个段落", paragraphs.len());
+        assert_eq!(paragraphs.len(), 2, "应该提取到 2 个段落");
+        
+        let headings = parser.extract_headings();
+        println!("提取到 {} 个标题", headings.len());
+        assert_eq!(headings.len(), 1, "应该提取到 1 个标题");
+        assert_eq!(headings[0].0, 3, "标题级别应该是 3");
+        assert_eq!(headings[0].1, "INTRODUCTORY", "标题内容应该是 INTRODUCTORY");
+    }
+    
+    #[test]
+    fn test_parse_html_with_body_tag() {
+        // 测试有 body 标签的 HTML
+        let html = r#"<html>
+<body>
+<h1>章节标题</h1>
+<p>这是第一段内容。</p>
+</body>
+</html>"#;
+
+        let mut parser = HtmlParser::new();
+        parser.parse(html).unwrap();
+        
+        assert!(parser.items.len() > 0);
+        
+        let paragraphs = parser.extract_paragraphs();
+        assert_eq!(paragraphs.len(), 1);
+        
+        let headings = parser.extract_headings();
+        assert_eq!(headings.len(), 1);
+        assert_eq!(headings[0].0, 1);
+    }
+}