From 3f3ab5a7be7ead74962da969c2e4d217021211c7 Mon Sep 17 00:00:00 2001 From: AwkwardPeak7 <48650614+AwkwardPeak7@users.noreply.github.com> Date: Sat, 23 Dec 2023 19:14:51 +0500 Subject: [PATCH 1/6] Hitomi.la --- .../parsers/site/all/HitomiLaParser.kt | 596 ++++++++++++++++++ .../koitharu/kotatsu/parsers/util/Parse.kt | 6 + 2 files changed, 602 insertions(+) create mode 100644 src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt new file mode 100644 index 000000000..caf525bc7 --- /dev/null +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -0,0 +1,596 @@ +package org.koitharu.kotatsu.parsers.site.all + +import kotlinx.coroutines.* +import kotlinx.coroutines.sync.* +import okhttp3.Headers +import org.json.* +import org.koitharu.kotatsu.parsers.* +import org.koitharu.kotatsu.parsers.config.ConfigKey +import org.koitharu.kotatsu.parsers.model.* +import org.koitharu.kotatsu.parsers.util.* +import org.koitharu.kotatsu.parsers.util.json.getStringOrNull +import org.koitharu.kotatsu.parsers.util.json.mapJSON +import java.nio.ByteBuffer +import java.nio.ByteOrder +import java.security.MessageDigest +import java.text.SimpleDateFormat +import java.util.* +import kotlin.math.min + +@OptIn(ExperimentalUnsignedTypes::class) +@MangaSourceParser("HITOMILA", "Hitomi.La", type = ContentType.HENTAI) +class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSource.HITOMILA) { + + override val configKeyDomain = ConfigKey.Domain("hitomi.la") + + private val ltnBaseUrl get() = "https://${getDomain("ltn")}" + + override val availableSortOrders: Set = EnumSet.of( + SortOrder.NEWEST, + SortOrder.POPULARITY, + ) + + override suspend fun getAvailableTags(): Set { + return coroutineScope { + ('a'..'z').map { alphabet -> + async { + val doc = webClient.httpGet("https://$domain/alltags-$alphabet.html").parseHtml() + + doc.select(".posts > li").mapNotNull { element -> + val num = element.ownText().let { + Regex("""\((\d+)\)""").find(it)?.groupValues?.get(1)?.toIntOrNull() ?: 0 + } + + if (num > 100) { + val url = element.selectFirst("a") + val href = url?.attrAsRelativeUrl("href") + ?: return@mapNotNull null + + MangaTag( + title = url.ownText().toCamelCase(), + key = href.tagUrlToTag(), + source = source, + ) + } else { + null + } + } + } + }.awaitAll().flatten().toSet() + } + } + + private var cachedSearchIds: List = emptyList() + + override suspend fun getList(offset: Int, filter: MangaListFilter?): List { + return when (filter) { + is MangaListFilter.Advanced -> { + if (filter.tags.isEmpty()) { + when (filter.sortOrder) { + SortOrder.POPULARITY -> { + getGalleryIDsFromNozomi("popular", "today", "all", offset.nextOffsetRange()) + } + + else -> { + getGalleryIDsFromNozomi(null, "index", "all", offset.nextOffsetRange()) + } + } + } else { + if (offset == 0) { + cachedSearchIds = hitomiSearch( + filter.tags.joinToString(" ") { it.key }, + filter.sortOrder == SortOrder.POPULARITY + ).toList() + } + cachedSearchIds.subList(offset, min(offset+25, cachedSearchIds.size)) + } + } + + is MangaListFilter.Search -> { + if (offset == 0) { + cachedSearchIds = hitomiSearch( + filter.query, + filter.sortOrder == SortOrder.POPULARITY + ).toList() + } + cachedSearchIds.subList(offset, min(offset+25, cachedSearchIds.size)) + } + + else -> getGalleryIDsFromNozomi(null, "popular", "all", offset.nextOffsetRange()) + }.toMangaList() + } + + private fun Int.nextOffsetRange(): LongRange { + val bytes = this*4L + return bytes.until(bytes+100L) + } + + private suspend fun hitomiSearch(query: String, sortByPopularity: Boolean = false) : Set = coroutineScope { + val terms = query + .trim() + .replace(Regex("""^\?"""), "") + .lowercase() + .split(Regex("\\s+")) + .map { + it.replace('_', ' ') + } + + val positiveTerms = LinkedList() + val negativeTerms = LinkedList() + + for (term in terms) { + if (term.startsWith("-")) + negativeTerms.push(term.removePrefix("-")) + else if (term.isNotBlank()) + positiveTerms.push(term) + } + + val positiveResults = positiveTerms.map { + async { + runCatching { + getGalleryIDsForQuery(it) + }.getOrDefault(emptySet()) + } + } + + val negativeResults = negativeTerms.map { + async { + runCatching { + getGalleryIDsForQuery(it) + }.getOrDefault(emptySet()) + } + } + + val results = when { + sortByPopularity -> getGalleryIDsFromNozomi(null, "popular", "all") + positiveTerms.isEmpty() -> getGalleryIDsFromNozomi(null, "index", "all") + else -> emptySet() + }.toMutableSet() + + fun filterPositive(newResults: Set) { + when { + results.isEmpty() -> results.addAll(newResults) + else -> results.retainAll(newResults) + } + } + + fun filterNegative(newResults: Set) { + results.removeAll(newResults) + } + + //positive results + positiveResults.forEach { + filterPositive(it.await()) + } + + //negative results + negativeResults.forEach { + filterNegative(it.await()) + } + + results + } + + //search.js + private suspend fun getGalleryIDsForQuery(query: String) : Set { + query.replace("_", " ").let { + if (it.indexOf(':') > -1) { + val sides = it.split(":") + val ns = sides[0] + var tag = sides[1] + + var area : String? = ns + var language = "all" + when (ns) { + "female", "male" -> { + area = "tag" + tag = it + } + "language" -> { + area = null + language = tag + tag = "index" + } + } + + return getGalleryIDsFromNozomi(area, tag, language) + } + + val key = hashTerm(it) + val field = "galleries" + + val node = getNodeAtAddress(field, 0) + + val data = bSearch(field, key, node) + + if (data != null) + return getGalleryIDsFromData(data) + + return emptySet() + } + } + + private suspend fun getGalleryIDsFromData(data: Pair) : Set { + val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.data" + val (offset, length) = data + if (length > 100000000 || length <= 0) + throw Exception("length $length is too long") + + val inbuf = getURLAtRange(url, offset.until(offset+length)) + + val galleryIDs = mutableSetOf() + + val buffer = ByteBuffer + .wrap(inbuf) + .order(ByteOrder.BIG_ENDIAN) + + val numberOfGalleryIDs = buffer.int + + val expectedLength = numberOfGalleryIDs*4+4 + + if (numberOfGalleryIDs > 10000000 || numberOfGalleryIDs <= 0) + throw Exception("number_of_galleryids $numberOfGalleryIDs is too long") + else if (inbuf.size != expectedLength) + throw Exception("inbuf.byteLength ${inbuf.size} != expected_length $expectedLength") + + for (i in 0.until(numberOfGalleryIDs)) + galleryIDs.add(buffer.int) + + return galleryIDs + } + + private suspend fun bSearch(field: String, key: UByteArray, node: Node) : Pair? { + fun compareArrayBuffers(dv1: UByteArray, dv2: UByteArray) : Int { + val top = min(dv1.size, dv2.size) + + for (i in 0.until(top)) { + if (dv1[i] < dv2[i]) + return -1 + else if (dv1[i] > dv2[i]) + return 1 + } + + return 0 + } + + fun locateKey(key: UByteArray, node: Node) : Pair { + for (i in node.keys.indices) { + val cmpResult = compareArrayBuffers(key, node.keys[i]) + + if (cmpResult <= 0) + return Pair(cmpResult==0, i) + } + + return Pair(false, node.keys.size) + } + + fun isLeaf(node: Node) : Boolean { + for (subnode in node.subNodeAddresses) + if (subnode != 0L) + return false + + return true + } + + if (node.keys.isEmpty()) + return null + + val (there, where) = locateKey(key, node) + if (there) + return node.datas[where] + else if (isLeaf(node)) + return null + + val nextNode = getNodeAtAddress(field, node.subNodeAddresses[where]) + + return bSearch(field, key, nextNode) + } + + private suspend fun getGalleryIDsFromNozomi(area: String?, tag: String, language: String, range: LongRange? = null) : Set { + val nozomiAddress = when(area) { + null -> "$ltnBaseUrl/$tag-$language.nozomi" + else -> "$ltnBaseUrl/$area/$tag-$language.nozomi" + } + + val bytes = getURLAtRange(nozomiAddress, range) + val nozomi = mutableSetOf() + + val arrayBuffer = ByteBuffer + .wrap(bytes) + .order(ByteOrder.BIG_ENDIAN) + + while (arrayBuffer.hasRemaining()) + nozomi.add(arrayBuffer.int) + + return nozomi + } + + private val tagIndexVersion = SuspendLazy { getIndexVersion("tagindex") } + private val galleriesIndexVersion = SuspendLazy { getIndexVersion("galleriesindex") } + + private suspend fun getIndexVersion(name: String) = + webClient.httpGet("$ltnBaseUrl/$name/version?_=${System.currentTimeMillis()}").parseRaw() + + private data class Node( + val keys: List, + val datas: List>, + val subNodeAddresses: List, + ) + + private fun decodeNode(data: ByteArray) : Node { + val buffer = ByteBuffer + .wrap(data) + .order(ByteOrder.BIG_ENDIAN) + + val uData = data.toUByteArray() + + val numberOfKeys = buffer.int + val keys = ArrayList() + + for (i in 0.until(numberOfKeys)) { + val keySize = buffer.int + + if (keySize == 0 || keySize > 32) + throw Exception("fatal: !keySize || keySize > 32") + + keys.add(uData.sliceArray(buffer.position().until(buffer.position()+keySize))) + buffer.position(buffer.position()+keySize) + } + + val numberOfDatas = buffer.int + val datas = ArrayList>() + + for (i in 0.until(numberOfDatas)) { + val offset = buffer.long + val length = buffer.int + + datas.add(Pair(offset, length)) + } + + val numberOfSubNodeAddresses = 16 + 1 + val subNodeAddresses = ArrayList() + + for (i in 0.until(numberOfSubNodeAddresses)) { + val subNodeAddress = buffer.long + subNodeAddresses.add(subNodeAddress) + } + + return Node(keys, datas, subNodeAddresses) + } + + private suspend fun getNodeAtAddress(field: String, address: Long) : Node { + val url = + when(field) { + "galleries" -> "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.index" + "languages" -> "$ltnBaseUrl/galleriesindex/languages.${galleriesIndexVersion.get()}.index" + "nozomiurl" -> "$ltnBaseUrl/galleriesindex/nozomiurl.${galleriesIndexVersion.get()}.index" + else -> "$ltnBaseUrl/tagindex/$field.${tagIndexVersion.get()}.index" + } + + val nodedata = getURLAtRange(url, address.until(address + 464)) + + return decodeNode(nodedata) + } + + private suspend fun getURLAtRange(url: String, range: LongRange? = null) : ByteArray { + val rangeHeaders = when (range) { + null -> Headers.headersOf() + else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}") + } + + return webClient.httpGet(url, rangeHeaders).parseBytes() + } + + private fun hashTerm(term: String) : UByteArray { + return sha256(term.toByteArray()).copyOfRange(0, 4).toUByteArray() + } + + private fun sha256(data: ByteArray) : ByteArray { + return MessageDigest.getInstance("SHA-256").digest(data) + } + + private suspend fun Collection.toMangaList(): List { + return coroutineScope { + map { id -> + async { + runCatching { + val doc = webClient.httpGet("$ltnBaseUrl/galleryblock/$id.html").parseHtml() + + Manga( + id = generateUid(id.toString()), + title = doc.selectFirstOrThrow("h1").text(), + url = id.toString(), + coverUrl = "https:" + doc.selectFirstOrThrow("picture > source") + .attr("data-srcset") + .substringBefore(" "), + publicUrl = doc.selectFirstOrThrow("h1 > a") + .attrAsRelativeUrl("href") + .toAbsoluteUrl(domain), + author = null, + tags = emptySet(), + isNsfw = true, + rating = RATING_UNKNOWN, + altTitle = null, + state = null, + source = source, + ) + }.getOrNull() + } + }.awaitAll().filterNotNull() + } + } + + override suspend fun getDetails(manga: Manga): Manga { + val json = webClient.httpGet("$ltnBaseUrl/galleries/${manga.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) + + return manga.copy( + title = json.getString("title"), + largeCoverUrl = json.getJSONArray("files").getJSONObject(0).let { + val hash = it.getString("hash") + val commonId = commonImageId() + val imageId = imageIdFromHash(hash) + val subDomain = 'a' + subdomainOffset(imageId) + + "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp" + }, + author = json.optJSONArray("artists") + ?.mapJSON { it.getString("artist").toCamelCase() } + ?.joinToString(), + publicUrl = json.getString("galleryurl").toAbsoluteUrl(domain), + tags = buildSet { + json.optJSONArray("characters") + ?.mapToTags("character") + ?.let(::addAll) + json.optJSONArray("tags") + ?.mapToTags("tag") + ?.let(::addAll) + json.optJSONArray("artists") + ?.mapToTags("artist") + ?.let(::addAll) + json.optJSONArray("parodys") + ?.mapToTags("parody") + ?.let(::addAll) + json.optJSONArray("groups") + ?.mapToTags("group") + ?.let(::addAll) + }, + chapters = listOf( + MangaChapter( + id = generateUid(manga.url), + url = manga.url, + name = json.getString("title"), + scanlator = json.getString("type").toTitleCase(), + number = 1, + branch = json.getString("language_localname"), + source = source, + uploadDate = dateFormat.tryParse(json.getString("date").substringBeforeLast("-")), + ) + ) + ) + } + + companion object { + private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH) + } + + private fun JSONArray.mapToTags(key: String): Set { + val tags = mutableSetOf() + mapJSON { + MangaTag( + title = it.getString(key).toCamelCase().let { title -> + if (it.getStringOrNull("female")?.toIntOrNull() == 1) { + "$title ♀" + } else if (it.getStringOrNull("male")?.toIntOrNull() == 1) { + "$title ♂" + } else { + title + } + }, + key = it.getString("url").tagUrlToTag(), + source = source + ).let(tags::add) + } + return tags + } + + private fun String.tagUrlToTag(): String { + val urlContent = this.split("/") + val ns = urlContent[1] + val tag = urlContent[2] + .substringBeforeLast("-") + .urlDecode() + .replace(" ", "_") + + return if (tag.split(":")[0] in listOf("female", "male")) { + tag + } else { + "$ns:$tag" + } + } + + override suspend fun getRelatedManga(seed: Manga): List { + val json = webClient.httpGet("$ltnBaseUrl/galleries/${seed.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) + + // any better way to get List from this json? + return json.getJSONArray("related").let { + 0.until(it.length()).map { i -> it.getInt(i) } + }.toMangaList() + } + + override suspend fun getPages(chapter: MangaChapter): List { + val json = webClient.httpGet("$ltnBaseUrl/galleries/${chapter.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) + + return json.getJSONArray("files").mapJSON { image -> + val hash = image.getString("hash") + val commonId = commonImageId() + val imageId = imageIdFromHash(hash) + val subDomain = 'a' + subdomainOffset(imageId) + + MangaPage( + id= generateUid(hash), + url = "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp", + preview = "https://${getDomain("${subDomain}tn")}/webpsmalltn/${thumbPathFromHash(hash)}/$hash.webp", + source = source + ) + } + } + + /// ---> + + private var scriptLastRetrieval: Long? = null + private val mutex = Mutex() + private var subdomainOffsetDefault = 0 + private val subdomainOffsetMap = mutableMapOf() + private var commonImageId = "" + + private suspend fun refreshScript() = mutex.withLock { + if (scriptLastRetrieval == null || (scriptLastRetrieval!! + 60000) < System.currentTimeMillis()) { + val ggScript = webClient.httpGet("$ltnBaseUrl/gg.js?_=${System.currentTimeMillis()}").parseRaw() + + subdomainOffsetDefault = Regex("var o = (\\d)").find(ggScript)!!.groupValues[1].toInt() + val o = Regex("o = (\\d); break;").find(ggScript)!!.groupValues[1].toInt() + + subdomainOffsetMap.clear() + Regex("case (\\d+):").findAll(ggScript).forEach { + val case = it.groupValues[1].toInt() + subdomainOffsetMap[case] = o + } + + commonImageId = Regex("b: '(.+)'").find(ggScript)!!.groupValues[1] + + scriptLastRetrieval = System.currentTimeMillis() + } + } + + // m <-- gg.js + private suspend fun subdomainOffset(imageId: Int): Int { + refreshScript() + return subdomainOffsetMap[imageId] ?: subdomainOffsetDefault + } + + // b <-- gg.js + private suspend fun commonImageId(): String { + refreshScript() + return commonImageId + } + + // s <-- gg.js + private fun imageIdFromHash(hash: String): Int { + val match = Regex("(..)(.)$").find(hash) + return match!!.groupValues.let { it[2]+it[1] }.toInt(16) + } + + // real_full_path_from_hash <-- common.js + private fun thumbPathFromHash(hash: String): String { + return hash.replace(Regex("""^.*(..)(.)$"""), "$2/$1") + } +} diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Parse.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Parse.kt index 342b71cfa..5ebd88071 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Parse.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/util/Parse.kt @@ -52,6 +52,12 @@ fun Response.parseRaw(): String = try { closeQuietly() } +fun Response.parseBytes(): ByteArray = try { + requireBody().bytes() +} finally { + closeQuietly() +} + /** * Convert url to relative if it is on [domain] * @return an url relative to the [domain] or absolute, if domain is mismatching From a40a8d329abd0a8c609fbd84b8e48b1e47102309 Mon Sep 17 00:00:00 2001 From: AwkwardPeak7 <48650614+AwkwardPeak7@users.noreply.github.com> Date: Sat, 23 Dec 2023 19:55:21 +0500 Subject: [PATCH 2/6] simplify search a bit --- .../parsers/site/all/HitomiLaParser.kt | 46 +++++++------------ 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt index caf525bc7..2c16566d5 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -197,16 +197,11 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } val key = hashTerm(it) - val field = "galleries" + val node = getGalleryNodeAtAddress(0) + val data = bSearch(key, node) + ?: return emptySet() - val node = getNodeAtAddress(field, 0) - - val data = bSearch(field, key, node) - - if (data != null) - return getGalleryIDsFromData(data) - - return emptySet() + return getGalleryIDsFromData(data) } } @@ -216,7 +211,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo if (length > 100000000 || length <= 0) throw Exception("length $length is too long") - val inbuf = getURLAtRange(url, offset.until(offset+length)) + val inbuf = getRangedResponse(url, offset.until(offset+length)) val galleryIDs = mutableSetOf() @@ -239,7 +234,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return galleryIDs } - private suspend fun bSearch(field: String, key: UByteArray, node: Node) : Pair? { + private suspend fun bSearch(key: UByteArray, node: Node) : Pair? { fun compareArrayBuffers(dv1: UByteArray, dv2: UByteArray) : Int { val top = min(dv1.size, dv2.size) @@ -281,9 +276,8 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo else if (isLeaf(node)) return null - val nextNode = getNodeAtAddress(field, node.subNodeAddresses[where]) - - return bSearch(field, key, nextNode) + val nextNode = getGalleryNodeAtAddress(node.subNodeAddresses[where]) + return bSearch(key, nextNode) } private suspend fun getGalleryIDsFromNozomi(area: String?, tag: String, language: String, range: LongRange? = null) : Set { @@ -292,7 +286,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo else -> "$ltnBaseUrl/$area/$tag-$language.nozomi" } - val bytes = getURLAtRange(nozomiAddress, range) + val bytes = getRangedResponse(nozomiAddress, range) val nozomi = mutableSetOf() val arrayBuffer = ByteBuffer @@ -305,11 +299,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return nozomi } - private val tagIndexVersion = SuspendLazy { getIndexVersion("tagindex") } - private val galleriesIndexVersion = SuspendLazy { getIndexVersion("galleriesindex") } - - private suspend fun getIndexVersion(name: String) = - webClient.httpGet("$ltnBaseUrl/$name/version?_=${System.currentTimeMillis()}").parseRaw() + private val galleriesIndexVersion = SuspendLazy { + webClient.httpGet("$ltnBaseUrl/galleriesindex/version?_=${System.currentTimeMillis()}").parseRaw() + } private data class Node( val keys: List, @@ -358,21 +350,15 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return Node(keys, datas, subNodeAddresses) } - private suspend fun getNodeAtAddress(field: String, address: Long) : Node { - val url = - when(field) { - "galleries" -> "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.index" - "languages" -> "$ltnBaseUrl/galleriesindex/languages.${galleriesIndexVersion.get()}.index" - "nozomiurl" -> "$ltnBaseUrl/galleriesindex/nozomiurl.${galleriesIndexVersion.get()}.index" - else -> "$ltnBaseUrl/tagindex/$field.${tagIndexVersion.get()}.index" - } + private suspend fun getGalleryNodeAtAddress(address: Long) : Node { + val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.index" - val nodedata = getURLAtRange(url, address.until(address + 464)) + val nodedata = getRangedResponse(url, address.until(address + 464)) return decodeNode(nodedata) } - private suspend fun getURLAtRange(url: String, range: LongRange? = null) : ByteArray { + private suspend fun getRangedResponse(url: String, range: LongRange? = null) : ByteArray { val rangeHeaders = when (range) { null -> Headers.headersOf() else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}") From b61c5e8f12319ac2bfbd9cd0222a35d0cd4d5eb9 Mon Sep 17 00:00:00 2001 From: AwkwardPeak7 <48650614+AwkwardPeak7@users.noreply.github.com> Date: Sat, 23 Dec 2023 20:29:16 +0500 Subject: [PATCH 3/6] hitomi: locales --- .../parsers/site/all/HitomiLaParser.kt | 62 ++++++++++++++++--- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt index 2c16566d5..5ecee0ce7 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -30,6 +30,45 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo SortOrder.POPULARITY, ) + private val localeMap: Map = mapOf( + Locale("id") to "indonesian", + Locale("jv") to "javanese", + Locale("ca") to "catalan", + Locale("ceb") to "cebuano", + Locale("cs") to "czech", + Locale("da") to "danish", + Locale("de") to "german", + Locale("et") to "estonian", + Locale.ENGLISH to "english", + Locale("es") to "spanish", + Locale("eo") to "esperanto", + Locale("fr") to "french", + Locale("it") to "italian", + Locale("hi") to "hindi", + Locale("hu") to "hungarian", + Locale("pl") to "polish", + Locale("pt") to "portuguese", + Locale("vi") to "vietnamese", + Locale("tr") to "turkish", + Locale("ru") to "russian", + Locale("uk") to "ukrainian", + Locale("ar") to "arabic", + Locale.KOREAN to "korean", + Locale.CHINESE to "chinese", + Locale.JAPANESE to "japanese", + ) + + private fun Locale?.getSiteLang(): String { + return when (this) { + null -> "all" + else -> localeMap[this] ?: "all" + } + } + + override suspend fun getAvailableLocales(): Set { + return localeMap.keys + } + override suspend fun getAvailableTags(): Set { return coroutineScope { ('a'..'z').map { alphabet -> @@ -68,19 +107,25 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo if (filter.tags.isEmpty()) { when (filter.sortOrder) { SortOrder.POPULARITY -> { - getGalleryIDsFromNozomi("popular", "today", "all", offset.nextOffsetRange()) + getGalleryIDsFromNozomi("popular", "today", filter.locale.getSiteLang(), offset.nextOffsetRange()) } else -> { - getGalleryIDsFromNozomi(null, "index", "all", offset.nextOffsetRange()) + getGalleryIDsFromNozomi(null, "index", filter.locale.getSiteLang(), offset.nextOffsetRange()) } } } else { if (offset == 0) { - cachedSearchIds = hitomiSearch( - filter.tags.joinToString(" ") { it.key }, - filter.sortOrder == SortOrder.POPULARITY - ).toList() + val query = filter.tags.joinToString(" ") { it.key }.let { + val lang = filter.locale.getSiteLang() + if (lang != "all") { + "$it language:$lang" + } else { + it + } + } + + cachedSearchIds = hitomiSearch(query,filter.sortOrder == SortOrder.POPULARITY).toList() } cachedSearchIds.subList(offset, min(offset+25, cachedSearchIds.size)) } @@ -88,10 +133,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo is MangaListFilter.Search -> { if (offset == 0) { - cachedSearchIds = hitomiSearch( - filter.query, - filter.sortOrder == SortOrder.POPULARITY - ).toList() + cachedSearchIds = hitomiSearch(filter.query, filter.sortOrder == SortOrder.POPULARITY).toList() } cachedSearchIds.subList(offset, min(offset+25, cachedSearchIds.size)) } From 495c9fad33ade61157b551280c064e0ece8e7080 Mon Sep 17 00:00:00 2001 From: AwkwardPeak7 <48650614+AwkwardPeak7@users.noreply.github.com> Date: Sat, 23 Dec 2023 22:23:43 +0500 Subject: [PATCH 4/6] hitomi: formatting --- .../parsers/site/all/HitomiLaParser.kt | 549 ++++++++++-------- 1 file changed, 305 insertions(+), 244 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt index 5ecee0ce7..af141efd1 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -20,43 +20,44 @@ import kotlin.math.min @OptIn(ExperimentalUnsignedTypes::class) @MangaSourceParser("HITOMILA", "Hitomi.La", type = ContentType.HENTAI) class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSource.HITOMILA) { - override val configKeyDomain = ConfigKey.Domain("hitomi.la") private val ltnBaseUrl get() = "https://${getDomain("ltn")}" - override val availableSortOrders: Set = EnumSet.of( - SortOrder.NEWEST, - SortOrder.POPULARITY, - ) + override val availableSortOrders: Set = + EnumSet.of( + SortOrder.NEWEST, + SortOrder.POPULARITY, + ) - private val localeMap: Map = mapOf( - Locale("id") to "indonesian", - Locale("jv") to "javanese", - Locale("ca") to "catalan", - Locale("ceb") to "cebuano", - Locale("cs") to "czech", - Locale("da") to "danish", - Locale("de") to "german", - Locale("et") to "estonian", - Locale.ENGLISH to "english", - Locale("es") to "spanish", - Locale("eo") to "esperanto", - Locale("fr") to "french", - Locale("it") to "italian", - Locale("hi") to "hindi", - Locale("hu") to "hungarian", - Locale("pl") to "polish", - Locale("pt") to "portuguese", - Locale("vi") to "vietnamese", - Locale("tr") to "turkish", - Locale("ru") to "russian", - Locale("uk") to "ukrainian", - Locale("ar") to "arabic", - Locale.KOREAN to "korean", - Locale.CHINESE to "chinese", - Locale.JAPANESE to "japanese", - ) + private val localeMap: Map = + mapOf( + Locale("id") to "indonesian", + Locale("jv") to "javanese", + Locale("ca") to "catalan", + Locale("ceb") to "cebuano", + Locale("cs") to "czech", + Locale("da") to "danish", + Locale("de") to "german", + Locale("et") to "estonian", + Locale.ENGLISH to "english", + Locale("es") to "spanish", + Locale("eo") to "esperanto", + Locale("fr") to "french", + Locale("it") to "italian", + Locale("hi") to "hindi", + Locale("hu") to "hungarian", + Locale("pl") to "polish", + Locale("pt") to "portuguese", + Locale("vi") to "vietnamese", + Locale("tr") to "turkish", + Locale("ru") to "russian", + Locale("uk") to "ukrainian", + Locale("ar") to "arabic", + Locale.KOREAN to "korean", + Locale.CHINESE to "chinese", + Locale.JAPANESE to "japanese", + ) private fun Locale?.getSiteLang(): String { return when (this) { @@ -76,14 +77,16 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val doc = webClient.httpGet("https://$domain/alltags-$alphabet.html").parseHtml() doc.select(".posts > li").mapNotNull { element -> - val num = element.ownText().let { - Regex("""\((\d+)\)""").find(it)?.groupValues?.get(1)?.toIntOrNull() ?: 0 - } + val num = + element.ownText().let { + Regex("""\((\d+)\)""").find(it)?.groupValues?.get(1)?.toIntOrNull() ?: 0 + } if (num > 100) { val url = element.selectFirst("a") - val href = url?.attrAsRelativeUrl("href") - ?: return@mapNotNull null + val href = + url?.attrAsRelativeUrl("href") + ?: return@mapNotNull null MangaTag( title = url.ownText().toCamelCase(), @@ -101,7 +104,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private var cachedSearchIds: List = emptyList() - override suspend fun getList(offset: Int, filter: MangaListFilter?): List { + override suspend fun getList( + offset: Int, + filter: MangaListFilter?, + ): List { return when (filter) { is MangaListFilter.Advanced -> { if (filter.tags.isEmpty()) { @@ -116,18 +122,19 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } } else { if (offset == 0) { - val query = filter.tags.joinToString(" ") { it.key }.let { - val lang = filter.locale.getSiteLang() - if (lang != "all") { - "$it language:$lang" - } else { - it + val query = + filter.tags.joinToString(" ") { it.key }.let { + val lang = filter.locale.getSiteLang() + if (lang != "all") { + "$it language:$lang" + } else { + it + } } - } - cachedSearchIds = hitomiSearch(query,filter.sortOrder == SortOrder.POPULARITY).toList() + cachedSearchIds = hitomiSearch(query, filter.sortOrder == SortOrder.POPULARITY).toList() } - cachedSearchIds.subList(offset, min(offset+25, cachedSearchIds.size)) + cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size)) } } @@ -135,7 +142,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo if (offset == 0) { cachedSearchIds = hitomiSearch(filter.query, filter.sortOrder == SortOrder.POPULARITY).toList() } - cachedSearchIds.subList(offset, min(offset+25, cachedSearchIds.size)) + cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size)) } else -> getGalleryIDsFromNozomi(null, "popular", "all", offset.nextOffsetRange()) @@ -143,85 +150,94 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } private fun Int.nextOffsetRange(): LongRange { - val bytes = this*4L - return bytes.until(bytes+100L) + val bytes = this * 4L + return bytes.until(bytes + 100L) } - private suspend fun hitomiSearch(query: String, sortByPopularity: Boolean = false) : Set = coroutineScope { - val terms = query - .trim() - .replace(Regex("""^\?"""), "") - .lowercase() - .split(Regex("\\s+")) - .map { - it.replace('_', ' ') + private suspend fun hitomiSearch( + query: String, + sortByPopularity: Boolean = false, + ): Set = + coroutineScope { + val terms = + query + .trim() + .replace(Regex("""^\?"""), "") + .lowercase() + .split(Regex("\\s+")) + .map { + it.replace('_', ' ') + } + + val positiveTerms = LinkedList() + val negativeTerms = LinkedList() + + for (term in terms) { + if (term.startsWith("-")) { + negativeTerms.push(term.removePrefix("-")) + } else if (term.isNotBlank()) { + positiveTerms.push(term) + } } - val positiveTerms = LinkedList() - val negativeTerms = LinkedList() + val positiveResults = + positiveTerms.map { + async { + runCatching { + getGalleryIDsForQuery(it) + }.getOrDefault(emptySet()) + } + } - for (term in terms) { - if (term.startsWith("-")) - negativeTerms.push(term.removePrefix("-")) - else if (term.isNotBlank()) - positiveTerms.push(term) - } + val negativeResults = + negativeTerms.map { + async { + runCatching { + getGalleryIDsForQuery(it) + }.getOrDefault(emptySet()) + } + } - val positiveResults = positiveTerms.map { - async { - runCatching { - getGalleryIDsForQuery(it) - }.getOrDefault(emptySet()) + val results = + when { + sortByPopularity -> getGalleryIDsFromNozomi(null, "popular", "all") + positiveTerms.isEmpty() -> getGalleryIDsFromNozomi(null, "index", "all") + else -> emptySet() + }.toMutableSet() + + fun filterPositive(newResults: Set) { + when { + results.isEmpty() -> results.addAll(newResults) + else -> results.retainAll(newResults) + } } - } - val negativeResults = negativeTerms.map { - async { - runCatching { - getGalleryIDsForQuery(it) - }.getOrDefault(emptySet()) + fun filterNegative(newResults: Set) { + results.removeAll(newResults) } - } - - val results = when { - sortByPopularity -> getGalleryIDsFromNozomi(null, "popular", "all") - positiveTerms.isEmpty() -> getGalleryIDsFromNozomi(null, "index", "all") - else -> emptySet() - }.toMutableSet() - fun filterPositive(newResults: Set) { - when { - results.isEmpty() -> results.addAll(newResults) - else -> results.retainAll(newResults) + // positive results + positiveResults.forEach { + filterPositive(it.await()) } - } - - fun filterNegative(newResults: Set) { - results.removeAll(newResults) - } - //positive results - positiveResults.forEach { - filterPositive(it.await()) - } + // negative results + negativeResults.forEach { + filterNegative(it.await()) + } - //negative results - negativeResults.forEach { - filterNegative(it.await()) + results } - results - } - - //search.js - private suspend fun getGalleryIDsForQuery(query: String) : Set { + // search.js + private suspend fun getGalleryIDsForQuery(query: String): Set { query.replace("_", " ").let { if (it.indexOf(':') > -1) { val sides = it.split(":") val ns = sides[0] var tag = sides[1] - var area : String? = ns + var area: String? = ns var language = "all" when (ns) { "female", "male" -> { @@ -240,35 +256,39 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val key = hashTerm(it) val node = getGalleryNodeAtAddress(0) - val data = bSearch(key, node) - ?: return emptySet() + val data = + bSearch(key, node) + ?: return emptySet() return getGalleryIDsFromData(data) } } - private suspend fun getGalleryIDsFromData(data: Pair) : Set { + private suspend fun getGalleryIDsFromData(data: Pair): Set { val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.data" val (offset, length) = data - if (length > 100000000 || length <= 0) + if (length > 100000000 || length <= 0) { throw Exception("length $length is too long") + } - val inbuf = getRangedResponse(url, offset.until(offset+length)) + val inbuf = getRangedResponse(url, offset.until(offset + length)) val galleryIDs = mutableSetOf() - val buffer = ByteBuffer - .wrap(inbuf) - .order(ByteOrder.BIG_ENDIAN) + val buffer = + ByteBuffer + .wrap(inbuf) + .order(ByteOrder.BIG_ENDIAN) val numberOfGalleryIDs = buffer.int - val expectedLength = numberOfGalleryIDs*4+4 + val expectedLength = numberOfGalleryIDs * 4 + 4 - if (numberOfGalleryIDs > 10000000 || numberOfGalleryIDs <= 0) + if (numberOfGalleryIDs > 10000000 || numberOfGalleryIDs <= 0) { throw Exception("number_of_galleryids $numberOfGalleryIDs is too long") - else if (inbuf.size != expectedLength) + } else if (inbuf.size != expectedLength) { throw Exception("inbuf.byteLength ${inbuf.size} != expected_length $expectedLength") + } for (i in 0.until(numberOfGalleryIDs)) galleryIDs.add(buffer.int) @@ -276,64 +296,85 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return galleryIDs } - private suspend fun bSearch(key: UByteArray, node: Node) : Pair? { - fun compareArrayBuffers(dv1: UByteArray, dv2: UByteArray) : Int { + private suspend fun bSearch( + key: UByteArray, + node: Node, + ): Pair? { + fun compareArrayBuffers( + dv1: UByteArray, + dv2: UByteArray, + ): Int { val top = min(dv1.size, dv2.size) for (i in 0.until(top)) { - if (dv1[i] < dv2[i]) + if (dv1[i] < dv2[i]) { return -1 - else if (dv1[i] > dv2[i]) + } else if (dv1[i] > dv2[i]) { return 1 + } } return 0 } - fun locateKey(key: UByteArray, node: Node) : Pair { + fun locateKey( + key: UByteArray, + node: Node, + ): Pair { for (i in node.keys.indices) { val cmpResult = compareArrayBuffers(key, node.keys[i]) - if (cmpResult <= 0) - return Pair(cmpResult==0, i) + if (cmpResult <= 0) { + return Pair(cmpResult == 0, i) + } } return Pair(false, node.keys.size) } - fun isLeaf(node: Node) : Boolean { + fun isLeaf(node: Node): Boolean { for (subnode in node.subNodeAddresses) - if (subnode != 0L) + if (subnode != 0L) { return false + } return true } - if (node.keys.isEmpty()) + if (node.keys.isEmpty()) { return null + } val (there, where) = locateKey(key, node) - if (there) + if (there) { return node.datas[where] - else if (isLeaf(node)) + } else if (isLeaf(node)) { return null + } val nextNode = getGalleryNodeAtAddress(node.subNodeAddresses[where]) return bSearch(key, nextNode) } - private suspend fun getGalleryIDsFromNozomi(area: String?, tag: String, language: String, range: LongRange? = null) : Set { - val nozomiAddress = when(area) { - null -> "$ltnBaseUrl/$tag-$language.nozomi" - else -> "$ltnBaseUrl/$area/$tag-$language.nozomi" - } + private suspend fun getGalleryIDsFromNozomi( + area: String?, + tag: String, + language: String, + range: LongRange? = null, + ): Set { + val nozomiAddress = + when (area) { + null -> "$ltnBaseUrl/$tag-$language.nozomi" + else -> "$ltnBaseUrl/$area/$tag-$language.nozomi" + } val bytes = getRangedResponse(nozomiAddress, range) val nozomi = mutableSetOf() - val arrayBuffer = ByteBuffer - .wrap(bytes) - .order(ByteOrder.BIG_ENDIAN) + val arrayBuffer = + ByteBuffer + .wrap(bytes) + .order(ByteOrder.BIG_ENDIAN) while (arrayBuffer.hasRemaining()) nozomi.add(arrayBuffer.int) @@ -341,9 +382,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return nozomi } - private val galleriesIndexVersion = SuspendLazy { - webClient.httpGet("$ltnBaseUrl/galleriesindex/version?_=${System.currentTimeMillis()}").parseRaw() - } + private val galleriesIndexVersion = + SuspendLazy { + webClient.httpGet("$ltnBaseUrl/galleriesindex/version?_=${System.currentTimeMillis()}").parseRaw() + } private data class Node( val keys: List, @@ -351,10 +393,11 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val subNodeAddresses: List, ) - private fun decodeNode(data: ByteArray) : Node { - val buffer = ByteBuffer - .wrap(data) - .order(ByteOrder.BIG_ENDIAN) + private fun decodeNode(data: ByteArray): Node { + val buffer = + ByteBuffer + .wrap(data) + .order(ByteOrder.BIG_ENDIAN) val uData = data.toUByteArray() @@ -364,11 +407,12 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo for (i in 0.until(numberOfKeys)) { val keySize = buffer.int - if (keySize == 0 || keySize > 32) + if (keySize == 0 || keySize > 32) { throw Exception("fatal: !keySize || keySize > 32") + } - keys.add(uData.sliceArray(buffer.position().until(buffer.position()+keySize))) - buffer.position(buffer.position()+keySize) + keys.add(uData.sliceArray(buffer.position().until(buffer.position() + keySize))) + buffer.position(buffer.position() + keySize) } val numberOfDatas = buffer.int @@ -392,7 +436,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return Node(keys, datas, subNodeAddresses) } - private suspend fun getGalleryNodeAtAddress(address: Long) : Node { + private suspend fun getGalleryNodeAtAddress(address: Long): Node { val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.index" val nodedata = getRangedResponse(url, address.until(address + 464)) @@ -400,20 +444,24 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return decodeNode(nodedata) } - private suspend fun getRangedResponse(url: String, range: LongRange? = null) : ByteArray { - val rangeHeaders = when (range) { - null -> Headers.headersOf() - else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}") - } + private suspend fun getRangedResponse( + url: String, + range: LongRange? = null, + ): ByteArray { + val rangeHeaders = + when (range) { + null -> Headers.headersOf() + else -> Headers.headersOf("Range", "bytes=${range.first}-${range.last}") + } return webClient.httpGet(url, rangeHeaders).parseBytes() } - private fun hashTerm(term: String) : UByteArray { + private fun hashTerm(term: String): UByteArray { return sha256(term.toByteArray()).copyOfRange(0, 4).toUByteArray() } - private fun sha256(data: ByteArray) : ByteArray { + private fun sha256(data: ByteArray): ByteArray { return MessageDigest.getInstance("SHA-256").digest(data) } @@ -428,12 +476,15 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo id = generateUid(id.toString()), title = doc.selectFirstOrThrow("h1").text(), url = id.toString(), - coverUrl = "https:" + doc.selectFirstOrThrow("picture > source") - .attr("data-srcset") - .substringBefore(" "), - publicUrl = doc.selectFirstOrThrow("h1 > a") - .attrAsRelativeUrl("href") - .toAbsoluteUrl(domain), + coverUrl = + "https:" + + doc.selectFirstOrThrow("picture > source") + .attr("data-srcset") + .substringBefore(" "), + publicUrl = + doc.selectFirstOrThrow("h1 > a") + .attrAsRelativeUrl("href") + .toAbsoluteUrl(domain), author = null, tags = emptySet(), isNsfw = true, @@ -449,54 +500,59 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } override suspend fun getDetails(manga: Manga): Manga { - val json = webClient.httpGet("$ltnBaseUrl/galleries/${manga.url}.js") - .parseRaw() - .substringAfter("var galleryinfo = ") - .let(::JSONObject) + val json = + webClient.httpGet("$ltnBaseUrl/galleries/${manga.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) return manga.copy( title = json.getString("title"), - largeCoverUrl = json.getJSONArray("files").getJSONObject(0).let { - val hash = it.getString("hash") - val commonId = commonImageId() - val imageId = imageIdFromHash(hash) - val subDomain = 'a' + subdomainOffset(imageId) - - "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp" - }, - author = json.optJSONArray("artists") - ?.mapJSON { it.getString("artist").toCamelCase() } - ?.joinToString(), - publicUrl = json.getString("galleryurl").toAbsoluteUrl(domain), - tags = buildSet { - json.optJSONArray("characters") - ?.mapToTags("character") - ?.let(::addAll) - json.optJSONArray("tags") - ?.mapToTags("tag") - ?.let(::addAll) + largeCoverUrl = + json.getJSONArray("files").getJSONObject(0).let { + val hash = it.getString("hash") + val commonId = commonImageId() + val imageId = imageIdFromHash(hash) + val subDomain = 'a' + subdomainOffset(imageId) + + "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp" + }, + author = json.optJSONArray("artists") - ?.mapToTags("artist") - ?.let(::addAll) - json.optJSONArray("parodys") - ?.mapToTags("parody") - ?.let(::addAll) - json.optJSONArray("groups") - ?.mapToTags("group") - ?.let(::addAll) - }, - chapters = listOf( - MangaChapter( - id = generateUid(manga.url), - url = manga.url, - name = json.getString("title"), - scanlator = json.getString("type").toTitleCase(), - number = 1, - branch = json.getString("language_localname"), - source = source, - uploadDate = dateFormat.tryParse(json.getString("date").substringBeforeLast("-")), - ) - ) + ?.mapJSON { it.getString("artist").toCamelCase() } + ?.joinToString(), + publicUrl = json.getString("galleryurl").toAbsoluteUrl(domain), + tags = + buildSet { + json.optJSONArray("characters") + ?.mapToTags("character") + ?.let(::addAll) + json.optJSONArray("tags") + ?.mapToTags("tag") + ?.let(::addAll) + json.optJSONArray("artists") + ?.mapToTags("artist") + ?.let(::addAll) + json.optJSONArray("parodys") + ?.mapToTags("parody") + ?.let(::addAll) + json.optJSONArray("groups") + ?.mapToTags("group") + ?.let(::addAll) + }, + chapters = + listOf( + MangaChapter( + id = generateUid(manga.url), + url = manga.url, + name = json.getString("title"), + scanlator = json.getString("type").toTitleCase(), + number = 1, + branch = json.getString("language_localname"), + source = source, + uploadDate = dateFormat.tryParse(json.getString("date").substringBeforeLast("-")), + ), + ), ) } @@ -508,17 +564,18 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val tags = mutableSetOf() mapJSON { MangaTag( - title = it.getString(key).toCamelCase().let { title -> - if (it.getStringOrNull("female")?.toIntOrNull() == 1) { - "$title ♀" - } else if (it.getStringOrNull("male")?.toIntOrNull() == 1) { - "$title ♂" - } else { - title - } - }, + title = + it.getString(key).toCamelCase().let { title -> + if (it.getStringOrNull("female")?.toIntOrNull() == 1) { + "$title ♀" + } else if (it.getStringOrNull("male")?.toIntOrNull() == 1) { + "$title ♂" + } else { + title + } + }, key = it.getString("url").tagUrlToTag(), - source = source + source = source, ).let(tags::add) } return tags @@ -527,10 +584,11 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private fun String.tagUrlToTag(): String { val urlContent = this.split("/") val ns = urlContent[1] - val tag = urlContent[2] - .substringBeforeLast("-") - .urlDecode() - .replace(" ", "_") + val tag = + urlContent[2] + .substringBeforeLast("-") + .urlDecode() + .replace(" ", "_") return if (tag.split(":")[0] in listOf("female", "male")) { tag @@ -540,10 +598,11 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } override suspend fun getRelatedManga(seed: Manga): List { - val json = webClient.httpGet("$ltnBaseUrl/galleries/${seed.url}.js") - .parseRaw() - .substringAfter("var galleryinfo = ") - .let(::JSONObject) + val json = + webClient.httpGet("$ltnBaseUrl/galleries/${seed.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) // any better way to get List from this json? return json.getJSONArray("related").let { @@ -552,10 +611,11 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } override suspend fun getPages(chapter: MangaChapter): List { - val json = webClient.httpGet("$ltnBaseUrl/galleries/${chapter.url}.js") - .parseRaw() - .substringAfter("var galleryinfo = ") - .let(::JSONObject) + val json = + webClient.httpGet("$ltnBaseUrl/galleries/${chapter.url}.js") + .parseRaw() + .substringAfter("var galleryinfo = ") + .let(::JSONObject) return json.getJSONArray("files").mapJSON { image -> val hash = image.getString("hash") @@ -564,15 +624,15 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val subDomain = 'a' + subdomainOffset(imageId) MangaPage( - id= generateUid(hash), + id = generateUid(hash), url = "https://${getDomain("${subDomain}a")}/webp/$commonId$imageId/$hash.webp", preview = "https://${getDomain("${subDomain}tn")}/webpsmalltn/${thumbPathFromHash(hash)}/$hash.webp", - source = source + source = source, ) } } - /// ---> + // / ---> private var scriptLastRetrieval: Long? = null private val mutex = Mutex() @@ -580,24 +640,25 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private val subdomainOffsetMap = mutableMapOf() private var commonImageId = "" - private suspend fun refreshScript() = mutex.withLock { - if (scriptLastRetrieval == null || (scriptLastRetrieval!! + 60000) < System.currentTimeMillis()) { - val ggScript = webClient.httpGet("$ltnBaseUrl/gg.js?_=${System.currentTimeMillis()}").parseRaw() + private suspend fun refreshScript() = + mutex.withLock { + if (scriptLastRetrieval == null || (scriptLastRetrieval!! + 60000) < System.currentTimeMillis()) { + val ggScript = webClient.httpGet("$ltnBaseUrl/gg.js?_=${System.currentTimeMillis()}").parseRaw() - subdomainOffsetDefault = Regex("var o = (\\d)").find(ggScript)!!.groupValues[1].toInt() - val o = Regex("o = (\\d); break;").find(ggScript)!!.groupValues[1].toInt() + subdomainOffsetDefault = Regex("var o = (\\d)").find(ggScript)!!.groupValues[1].toInt() + val o = Regex("o = (\\d); break;").find(ggScript)!!.groupValues[1].toInt() - subdomainOffsetMap.clear() - Regex("case (\\d+):").findAll(ggScript).forEach { - val case = it.groupValues[1].toInt() - subdomainOffsetMap[case] = o - } + subdomainOffsetMap.clear() + Regex("case (\\d+):").findAll(ggScript).forEach { + val case = it.groupValues[1].toInt() + subdomainOffsetMap[case] = o + } - commonImageId = Regex("b: '(.+)'").find(ggScript)!!.groupValues[1] + commonImageId = Regex("b: '(.+)'").find(ggScript)!!.groupValues[1] - scriptLastRetrieval = System.currentTimeMillis() + scriptLastRetrieval = System.currentTimeMillis() + } } - } // m <-- gg.js private suspend fun subdomainOffset(imageId: Int): Int { @@ -614,7 +675,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo // s <-- gg.js private fun imageIdFromHash(hash: String): Int { val match = Regex("(..)(.)$").find(hash) - return match!!.groupValues.let { it[2]+it[1] }.toInt(16) + return match!!.groupValues.let { it[2] + it[1] }.toInt(16) } // real_full_path_from_hash <-- common.js From 039075086e295a6a8f1dee0fb747671e55a07c74 Mon Sep 17 00:00:00 2001 From: AwkwardPeak7 <48650614+AwkwardPeak7@users.noreply.github.com> Date: Sat, 23 Dec 2023 22:48:13 +0500 Subject: [PATCH 5/6] hitomi: better locale in tag search --- .../parsers/site/all/HitomiLaParser.kt | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt index af141efd1..21027f8b8 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -122,17 +122,12 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } } else { if (offset == 0) { - val query = - filter.tags.joinToString(" ") { it.key }.let { - val lang = filter.locale.getSiteLang() - if (lang != "all") { - "$it language:$lang" - } else { - it - } - } - - cachedSearchIds = hitomiSearch(query, filter.sortOrder == SortOrder.POPULARITY).toList() + cachedSearchIds = + hitomiSearch( + filter.tags.joinToString(" ") { it.key }, + filter.sortOrder == SortOrder.POPULARITY, + filter.locale.getSiteLang(), + ).toList() } cachedSearchIds.subList(offset, min(offset + 25, cachedSearchIds.size)) } @@ -157,6 +152,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private suspend fun hitomiSearch( query: String, sortByPopularity: Boolean = false, + language: String = "all", ): Set = coroutineScope { val terms = @@ -184,7 +180,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo positiveTerms.map { async { runCatching { - getGalleryIDsForQuery(it) + getGalleryIDsForQuery(it, language) }.getOrDefault(emptySet()) } } @@ -193,15 +189,15 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo negativeTerms.map { async { runCatching { - getGalleryIDsForQuery(it) + getGalleryIDsForQuery(it, language) }.getOrDefault(emptySet()) } } val results = when { - sortByPopularity -> getGalleryIDsFromNozomi(null, "popular", "all") - positiveTerms.isEmpty() -> getGalleryIDsFromNozomi(null, "index", "all") + sortByPopularity -> getGalleryIDsFromNozomi(null, "popular", language) + positiveTerms.isEmpty() -> getGalleryIDsFromNozomi(null, "index", language) else -> emptySet() }.toMutableSet() @@ -230,7 +226,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } // search.js - private suspend fun getGalleryIDsForQuery(query: String): Set { + private suspend fun getGalleryIDsForQuery( + query: String, + language: String = "all", + ): Set { query.replace("_", " ").let { if (it.indexOf(':') > -1) { val sides = it.split(":") @@ -238,7 +237,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo var tag = sides[1] var area: String? = ns - var language = "all" + var lang = language when (ns) { "female", "male" -> { area = "tag" @@ -246,12 +245,12 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo } "language" -> { area = null - language = tag + lang = tag tag = "index" } } - return getGalleryIDsFromNozomi(area, tag, language) + return getGalleryIDsFromNozomi(area, tag, lang) } val key = hashTerm(it) From 3d10456a87abdaaaf674ba92b848f2ae0d83e33f Mon Sep 17 00:00:00 2001 From: Koitharu Date: Mon, 25 Dec 2023 12:50:56 +0200 Subject: [PATCH 6/6] Apply suggestions from code review --- .../kotatsu/parsers/site/all/HitomiLaParser.kt | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt index 21027f8b8..5cfe0c8f8 100644 --- a/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt +++ b/src/main/kotlin/org/koitharu/kotatsu/parsers/site/all/HitomiLaParser.kt @@ -266,8 +266,8 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo private suspend fun getGalleryIDsFromData(data: Pair): Set { val url = "$ltnBaseUrl/galleriesindex/galleries.${galleriesIndexVersion.get()}.data" val (offset, length) = data - if (length > 100000000 || length <= 0) { - throw Exception("length $length is too long") + require(length in 0..100000000) { + "Length $length is too long" } val inbuf = getRangedResponse(url, offset.until(offset + length)) @@ -284,9 +284,9 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo val expectedLength = numberOfGalleryIDs * 4 + 4 if (numberOfGalleryIDs > 10000000 || numberOfGalleryIDs <= 0) { - throw Exception("number_of_galleryids $numberOfGalleryIDs is too long") + throw IllegalArgumentException("number_of_galleryids $numberOfGalleryIDs is too long") } else if (inbuf.size != expectedLength) { - throw Exception("inbuf.byteLength ${inbuf.size} != expected_length $expectedLength") + throw IllegalArgumentException("inbuf.byteLength ${inbuf.size} != expected_length $expectedLength") } for (i in 0.until(numberOfGalleryIDs)) @@ -316,7 +316,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return 0 } - fun locateKey( + private fun locateKey( key: UByteArray, node: Node, ): Pair { @@ -331,7 +331,7 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo return Pair(false, node.keys.size) } - fun isLeaf(node: Node): Boolean { + private fun isLeaf(node: Node): Boolean { for (subnode in node.subNodeAddresses) if (subnode != 0L) { return false @@ -555,12 +555,10 @@ class HitomiLaParser(context: MangaLoaderContext) : MangaParser(context, MangaSo ) } - companion object { - private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH) - } + private val dateFormat = SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH) private fun JSONArray.mapToTags(key: String): Set { - val tags = mutableSetOf() + val tags = ArraySet(length()) mapJSON { MangaTag( title =