From 9933f44411c0b38fdddb66a07b17a0a2fe7f8cd4 Mon Sep 17 00:00:00 2001 From: "michael.ding" Date: Fri, 23 Jan 2026 17:55:50 +0800 Subject: [PATCH 1/3] code fix --- lib/Cargo.toml | 4 ++-- lib/src/epub/reader.rs | 15 +++++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 0736806..f25621d 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -10,9 +10,9 @@ keywords = { workspace = true } rust-version = { workspace = true } [dependencies] -zip = "7.0.0" +zip = "7.2.0" anyhow = "1.0.100" -quick-xml = { version = "0.38.4" } +quick-xml = { version = "0.39.0" } ab_glyph = { version = "0.2.32", optional = true } imageproc = { version = "0.26.0", optional = true} serde_json = { version = "1.0.149", optional = true } diff --git a/lib/src/epub/reader.rs b/lib/src/epub/reader.rs index 5fee397..d89f8fd 100644 --- a/lib/src/epub/reader.rs +++ b/lib/src/epub/reader.rs @@ -721,7 +721,7 @@ fn read_nav_xhtml(xhtml: &str, root_path: String, book: &mut EpubBook) -> IResul b"a" if in_toc_nav => { if let Some(href) = e .attributes() - .find(|a| a.as_ref().unwrap().key.as_ref() == b"href") + .find(|a| a.as_ref().map(|a| a.key.0 == b"href").unwrap_or(false)) { let mut href = String::from_utf8_lossy(&href.unwrap().value).to_string(); if !href.starts_with(&root_path) { @@ -733,9 +733,9 @@ fn read_nav_xhtml(xhtml: &str, root_path: String, book: &mut EpubBook) -> IResul b"span" => { if let Some(class) = e .attributes() - .find(|a| a.as_ref().unwrap().key.as_ref() == b"class") + .find(|a| a.as_ref().map(|a| a.key.0 == b"class").unwrap_or(false)) { - if class.unwrap().value.as_ref() == b"toc-label" { + if class.as_ref().map(|a| &*a.value == b"toc-label").unwrap_or(false) { in_label = true } } @@ -791,8 +791,11 @@ fn read_nav_xhtml(xhtml: &str, root_path: String, book: &mut EpubBook) -> IResul fn has_epub_type(e: &BytesStart, value: &str) -> bool { e.attributes().any(|a| { - let attr = a.as_ref().unwrap(); - attr.key.as_ref() == b"epub:type" && attr.value.as_ref() == value.as_bytes() + if let Ok(attr) = a { + attr.key.0 == b"epub:type" && &*attr.value == value.as_bytes() + } else { + false + } }) } @@ -1278,7 +1281,7 @@ html .metadata("h", "m") .file(f) .unwrap(); - let mut book = read_from_file(f).unwrap(); + let book = read_from_file(f).unwrap(); assert_eq!(1, book.nav().len()); assert_eq!( "1. Test Title `~!@#$%^&*()_+ and []\\{}| and2 ;':\" and3 ,./<>?", From 272b8b3f21e56b446998657afbf2373074a09a1b Mon Sep 17 00:00:00 2001 From: michael Date: Wed, 4 Feb 2026 14:15:36 +0800 Subject: [PATCH 2/3] fix cover in xhtml --- lib/src/epub/core.rs | 12 +++-- lib/src/epub/reader.rs | 114 ++++++++++++++++++++++------------------- 2 files changed, 70 insertions(+), 56 deletions(-) diff --git a/lib/src/epub/core.rs b/lib/src/epub/core.rs index c6b7f89..fd37885 100644 --- a/lib/src/epub/core.rs +++ b/lib/src/epub/core.rs @@ -62,9 +62,9 @@ macro_rules! epub_base_field{ $(#[$meta])* pub struct $struct_name{ - id:String, - _file_name:String, - media_type:String, + pub(crate) id:String, + pub(crate) _file_name:String, + pub(crate) media_type:String, _data: Option>, #[cfg(not(feature="cache"))] reader:Option>>>, @@ -448,6 +448,7 @@ impl EpubAssets { pub fn data_mut(&mut self) -> Option<&[u8]> { let mut f = String::from(self._file_name.as_str()); + println!("reader {:?}",self.reader.is_some()); if self._data.is_none() && self.reader.is_some() && !f.is_empty() { let prefixs = ["", common::EPUB, common::EPUB3]; if self._data.is_none() && self.reader.is_some() && !f.is_empty() { @@ -913,7 +914,10 @@ impl EpubBook { self.nav.iter() } - pub fn set_cover(&mut self, cover: EpubAssets) { + pub fn set_cover(&mut self, mut cover: EpubAssets) { + if let Some(r) = &self.reader { + cover.reader = Some(Arc::clone(r)); + } self.cover = Some(cover); } diff --git a/lib/src/epub/reader.rs b/lib/src/epub/reader.rs index d89f8fd..3f84f57 100644 --- a/lib/src/epub/reader.rs +++ b/lib/src/epub/reader.rs @@ -382,20 +382,40 @@ fn read_manifest_xml( match e.name().as_ref() { b"item" => { let mut a = EpubAssets::default(); - if let Ok(href) = e.try_get_attribute("href") { - if let Some(h) = href.map(|f| { - f.unescape_value() - .map_or_else(|_| String::new(), |v| v.to_string()) - }) { - a.set_file_name(h.as_str()); + let mut is_cover = false; + for attr in e.attributes().flatten() { + match attr.key.as_ref() { + b"href" => { + let h = attr.unescape_value().unwrap_or_default().to_string(); + a.set_file_name(h.as_str()); + } + b"id" => { + let h = attr.unescape_value().unwrap_or_default().to_string(); + if h.eq_ignore_ascii_case("cover") { + is_cover = true; + } + a.set_id(h.as_str()); + } + b"media-type" => { + let h = attr.unescape_value().unwrap_or_default().to_string(); + a.media_type = h; + } + b"properties" => { + let h = attr.unescape_value().unwrap_or_default().to_string(); + if h.split_whitespace().any(|s| s == "cover-image") { + is_cover = true; + } + } + _ => {} } } - if let Ok(href) = e.try_get_attribute("id") { - if let Some(h) = href.map(|f| { - f.unescape_value() - .map_or_else(|_| String::new(), |v| v.to_string()) - }) { - a.set_id(h.as_str()); + + if is_cover { + let href = a.file_name(); + if href.ends_with(".xhtml") || href.ends_with(".html") { + book.cover_chapter = Some(EpubHtml::default().with_file_name(href)); + } else if a.media_type.starts_with("image/") { + book.set_cover(a.clone()); } } assets.push(a); @@ -887,6 +907,7 @@ impl EpubReaderTrait for EpubReader { // 查找第一个img标签,然后找到对应的文件 if let Some(cover) = get_img_src(co.0, "img", "src") .or_else(|| get_img_src(co.0, "image", "xlink:href")) + .or_else(|| get_img_src(co.0, "image", "href")) .and_then(|f| String::from_utf8(f).ok()) .map(|src| crate::path::Path::system(co.1).pop().join(src).to_str()) .and_then(|img| book.assets().find(|f| f.file_name() == img.as_str())) @@ -895,6 +916,7 @@ impl EpubReaderTrait for EpubReader { } } } + book.update_chapter(); book.update_assets(); Ok(()) @@ -941,41 +963,28 @@ impl EpubReaderTrait for EpubReader { /// 获取img的src值 pub(crate) fn get_img_src(html: &[u8], tag: &str, attribute: &str) -> Option> { - let mut index: usize = 0; - let chars = html; - - let tag = format!("<{tag} "); - let key = tag.as_bytes(); - - while index < chars.len() { - let mut now = chars[index]; - let mut j = 0; - while j < key.len() { - if now == key[j] { - now = chars[index + j + 1]; - } else { - break; - } - j += 1; - } - if j == key.len() { - // 找到 img 标签,接下来查找 src 属性 - index += j; - // 查找完后数据被分成三段,第一段 为开头到 src=,第二段是src=到value结束,第三段是value结束到之后 - // 第一段原样添加,第二段如果找到值替换recindex,没找到则原样添加,第三段继续循环 - - let att = crate::mobi::image::get_attr_value( - &chars[index..], - format!("{attribute}=").as_str(), - ); - if let Some(v) = att.0 { - return Some(v); - } + use quick_xml::events::Event; + use quick_xml::reader::Reader; - index += att.1; - } else { - index += 1; + let mut reader = Reader::from_reader(html); + reader.config_mut().trim_text(true); + let mut buf = Vec::new(); + loop { + match reader.read_event_into(&mut buf) { + Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { + if e.name().as_ref().eq_ignore_ascii_case(tag.as_bytes()) { + for attr in e.attributes().flatten() { + if attr.key.as_ref().eq_ignore_ascii_case(attribute.as_bytes()) { + return Some(attr.value.to_vec()); + } + } + } + } + Ok(Event::Eof) => break, + Err(_) => break, + _ => {} } + buf.clear(); } None @@ -1019,6 +1028,7 @@ pub fn is_epub(value: &mut T) -> IResult { #[cfg(test)] mod tests { + use std::fs; use crate::{ common::tests::download_epub_file, epub::reader::{get_img_src, read_meta_xml}, @@ -1164,9 +1174,9 @@ html name, "https://github.com/user-attachments/files/19544787/epub-book.epub.zip", ) - .as_str(), + .as_str(), ) - .unwrap(); + .unwrap(); let nav = book.nav().as_slice(); @@ -1184,7 +1194,7 @@ html #[test] fn test_read_epub3() { let name = "../target/epub3.epub"; - let url = "https://github.com/IDPF/epub3-samples/releases/download/20230704/childrens-literature.epub"; + let url = "https://github.com/IDPF/epub3-samples/releases/download/20230704/childrens-literature.epub"; download_epub_file(name, url); let mut book = read_from_file(name).unwrap(); @@ -1237,15 +1247,15 @@ html .unwrap() .to_vec() ) - .unwrap() - .len() + .unwrap() + .len() ); } /// 测试epub3的读取资源文件 #[test] fn test_read_epub3_assets() { let name = "../target/epub3.epub"; - let url = "https://github.com/IDPF/epub3-samples/releases/download/20230704/childrens-literature.epub"; + let url = "https://github.com/IDPF/epub3-samples/releases/download/20230704/childrens-literature.epub"; download_epub_file(name, url); let mut book = read_from_file(name).unwrap(); @@ -1371,4 +1381,4 @@ html let book = read_from_file(name).unwrap(); assert!(book.cover().is_some()); } -} +} \ No newline at end of file From 482acf88dde59632db075df76b5d0cd9b50aa675 Mon Sep 17 00:00:00 2001 From: michael Date: Wed, 4 Feb 2026 14:22:01 +0800 Subject: [PATCH 3/3] clean print --- lib/src/epub/core.rs | 1 - lib/src/epub/reader.rs | 2 -- 2 files changed, 3 deletions(-) diff --git a/lib/src/epub/core.rs b/lib/src/epub/core.rs index fd37885..5c6c963 100644 --- a/lib/src/epub/core.rs +++ b/lib/src/epub/core.rs @@ -448,7 +448,6 @@ impl EpubAssets { pub fn data_mut(&mut self) -> Option<&[u8]> { let mut f = String::from(self._file_name.as_str()); - println!("reader {:?}",self.reader.is_some()); if self._data.is_none() && self.reader.is_some() && !f.is_empty() { let prefixs = ["", common::EPUB, common::EPUB3]; if self._data.is_none() && self.reader.is_some() && !f.is_empty() { diff --git a/lib/src/epub/reader.rs b/lib/src/epub/reader.rs index 3f84f57..4fe5332 100644 --- a/lib/src/epub/reader.rs +++ b/lib/src/epub/reader.rs @@ -206,8 +206,6 @@ fn read_meta_xml( if !parent.is_empty() && parent[parent.len() - 1] == name { parent.remove(parent.len() - 1); } - - // println!("end {}",String::from_utf8(e.name().as_ref().to_vec()).unwrap()); } _ => {} }