-
Notifications
You must be signed in to change notification settings - Fork 9.2k
Embed the public suffix database list directly inside a class #8589
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c038ba7
6b776ec
7bd9eca
c980268
7c0424f
b85aecf
02805af
eb9c8a2
d8d8c87
4d20d66
7e4ba40
3daaa99
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,41 +15,19 @@ | |
*/ | ||
package okhttp3.internal.publicsuffix | ||
|
||
import java.io.IOException | ||
import java.io.InterruptedIOException | ||
import java.net.IDN | ||
import java.util.concurrent.CountDownLatch | ||
import java.util.concurrent.atomic.AtomicBoolean | ||
import okhttp3.internal.and | ||
import okhttp3.internal.platform.Platform | ||
import okio.FileSystem | ||
import okio.GzipSource | ||
import okio.Path | ||
import okio.Path.Companion.toPath | ||
import okio.buffer | ||
import okio.ByteString | ||
import okio.ByteString.Companion.encodeUtf8 | ||
|
||
/** | ||
* A database of public suffixes provided by [publicsuffix.org][publicsuffix_org]. | ||
* | ||
* [publicsuffix_org]: https://publicsuffix.org/ | ||
*/ | ||
class PublicSuffixDatabase internal constructor( | ||
val path: Path = PUBLIC_SUFFIX_RESOURCE, | ||
val fileSystem: FileSystem = FileSystem.RESOURCES, | ||
private val publicSuffixList: PublicSuffixList, | ||
) { | ||
/** True after we've attempted to read the list for the first time. */ | ||
private val listRead = AtomicBoolean(false) | ||
|
||
/** Used for concurrent threads reading the list for the first time. */ | ||
private val readCompleteLatch = CountDownLatch(1) | ||
|
||
// The lists are held as a large array of UTF-8 bytes. This is to avoid allocating lots of strings | ||
// that will likely never be used. Each rule is separated by '\n'. Please see the | ||
// PublicSuffixListGenerator class for how these lists are generated. | ||
// Guarded by this. | ||
private lateinit var publicSuffixListBytes: ByteArray | ||
private lateinit var publicSuffixExceptionListBytes: ByteArray | ||
|
||
/** | ||
* Returns the effective top-level domain plus one (eTLD+1) by referencing the public suffix list. | ||
* Returns null if the domain is a public suffix or a private address. | ||
|
@@ -101,29 +79,16 @@ class PublicSuffixDatabase internal constructor( | |
} | ||
|
||
private fun findMatchingRule(domainLabels: List<String>): List<String> { | ||
if (!listRead.get() && listRead.compareAndSet(false, true)) { | ||
readTheListUninterruptibly() | ||
} else { | ||
try { | ||
readCompleteLatch.await() | ||
} catch (_: InterruptedException) { | ||
Thread.currentThread().interrupt() // Retain interrupted status. | ||
} | ||
} | ||
|
||
check(::publicSuffixListBytes.isInitialized) { | ||
// May have failed with an IOException | ||
"Unable to load $PUBLIC_SUFFIX_RESOURCE resource from the classpath." | ||
} | ||
publicSuffixList.ensureLoaded() | ||
|
||
// Break apart the domain into UTF-8 labels, i.e. foo.bar.com turns into [foo, bar, com]. | ||
val domainLabelsUtf8Bytes = Array(domainLabels.size) { i -> domainLabels[i].toByteArray() } | ||
val domainLabelsUtf8Bytes = Array(domainLabels.size) { i -> domainLabels[i].encodeUtf8() } | ||
|
||
// Start by looking for exact matches. We start at the leftmost label. For example, foo.bar.com | ||
// will look like: [foo, bar, com], [bar, com], [com]. The longest matching rule wins. | ||
var exactMatch: String? = null | ||
for (i in domainLabelsUtf8Bytes.indices) { | ||
val rule = publicSuffixListBytes.binarySearch(domainLabelsUtf8Bytes, i) | ||
val rule = publicSuffixList.bytes.binarySearch(domainLabelsUtf8Bytes, i) | ||
if (rule != null) { | ||
exactMatch = rule | ||
break | ||
|
@@ -140,7 +105,7 @@ class PublicSuffixDatabase internal constructor( | |
val labelsWithWildcard = domainLabelsUtf8Bytes.clone() | ||
for (labelIndex in 0 until labelsWithWildcard.size - 1) { | ||
labelsWithWildcard[labelIndex] = WILDCARD_LABEL | ||
val rule = publicSuffixListBytes.binarySearch(labelsWithWildcard, labelIndex) | ||
val rule = publicSuffixList.bytes.binarySearch(labelsWithWildcard, labelIndex) | ||
if (rule != null) { | ||
wildcardMatch = rule | ||
break | ||
|
@@ -153,7 +118,7 @@ class PublicSuffixDatabase internal constructor( | |
if (wildcardMatch != null) { | ||
for (labelIndex in 0 until domainLabelsUtf8Bytes.size - 1) { | ||
val rule = | ||
publicSuffixExceptionListBytes.binarySearch( | ||
publicSuffixList.exceptionBytes.binarySearch( | ||
domainLabelsUtf8Bytes, | ||
labelIndex, | ||
) | ||
|
@@ -182,84 +147,20 @@ class PublicSuffixDatabase internal constructor( | |
} | ||
} | ||
|
||
/** | ||
* Reads the public suffix list treating the operation as uninterruptible. We always want to read | ||
* the list otherwise we'll be left in a bad state. If the thread was interrupted prior to this | ||
* operation, it will be re-interrupted after the list is read. | ||
*/ | ||
private fun readTheListUninterruptibly() { | ||
var interrupted = false | ||
try { | ||
while (true) { | ||
try { | ||
readTheList() | ||
return | ||
} catch (_: InterruptedIOException) { | ||
Thread.interrupted() // Temporarily clear the interrupted state. | ||
interrupted = true | ||
} catch (e: IOException) { | ||
Platform.get().log("Failed to read public suffix list", Platform.WARN, e) | ||
return | ||
} | ||
} | ||
} finally { | ||
if (interrupted) { | ||
Thread.currentThread().interrupt() // Retain interrupted status. | ||
} | ||
} | ||
} | ||
|
||
@Throws(IOException::class) | ||
private fun readTheList() { | ||
var publicSuffixListBytes: ByteArray? | ||
var publicSuffixExceptionListBytes: ByteArray? | ||
|
||
try { | ||
GzipSource(fileSystem.source(path)).buffer().use { bufferedSource -> | ||
val totalBytes = bufferedSource.readInt() | ||
publicSuffixListBytes = bufferedSource.readByteArray(totalBytes.toLong()) | ||
|
||
val totalExceptionBytes = bufferedSource.readInt() | ||
publicSuffixExceptionListBytes = bufferedSource.readByteArray(totalExceptionBytes.toLong()) | ||
} | ||
|
||
synchronized(this) { | ||
this.publicSuffixListBytes = publicSuffixListBytes!! | ||
this.publicSuffixExceptionListBytes = publicSuffixExceptionListBytes!! | ||
} | ||
} finally { | ||
readCompleteLatch.countDown() | ||
} | ||
} | ||
|
||
/** Visible for testing. */ | ||
fun setListBytes( | ||
publicSuffixListBytes: ByteArray, | ||
publicSuffixExceptionListBytes: ByteArray, | ||
) { | ||
this.publicSuffixListBytes = publicSuffixListBytes | ||
this.publicSuffixExceptionListBytes = publicSuffixExceptionListBytes | ||
listRead.set(true) | ||
readCompleteLatch.countDown() | ||
} | ||
|
||
companion object { | ||
@JvmField | ||
val PUBLIC_SUFFIX_RESOURCE = "/okhttp3/internal/publicsuffix/${PublicSuffixDatabase::class.java.simpleName}.gz".toPath() | ||
|
||
private val WILDCARD_LABEL = byteArrayOf('*'.code.toByte()) | ||
private val WILDCARD_LABEL = ByteString.of('*'.code.toByte()) | ||
private val PREVAILING_RULE = listOf("*") | ||
|
||
private const val EXCEPTION_MARKER = '!' | ||
|
||
private val instance = PublicSuffixDatabase() | ||
private val instance = PublicSuffixDatabase(EmbeddedPublicSuffixList) | ||
|
||
fun get(): PublicSuffixDatabase { | ||
return instance | ||
} | ||
|
||
private fun ByteArray.binarySearch( | ||
labels: Array<ByteArray>, | ||
private fun ByteString.binarySearch( | ||
labels: Array<ByteString>, | ||
labelIndex: Int, | ||
): String? { | ||
var low = 0 | ||
|
@@ -338,7 +239,7 @@ class PublicSuffixDatabase internal constructor( | |
low = mid + end + 1 | ||
} else { | ||
// Found a match. | ||
match = String(this, mid, publicSuffixLength) | ||
match = this.substring(mid, mid + publicSuffixLength).string(Charsets.UTF_8) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will raise a PR. |
||
break | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
/* | ||
* Copyright (C) 2024 Block, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package okhttp3.internal.publicsuffix | ||
|
||
import okio.ByteString | ||
|
||
/** | ||
* Basic I/O for the PublicSuffixDatabase.gz. | ||
*/ | ||
internal interface PublicSuffixList { | ||
fun ensureLoaded() | ||
|
||
val bytes: ByteString | ||
val exceptionBytes: ByteString | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
/* | ||
* Copyright (C) 2024 Block, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package okhttp3.internal.publicsuffix | ||
|
||
//Note that PublicSuffixDatabase.gz is compiled from The Public Suffix List: | ||
//https://publicsuffix.org/list/public_suffix_list.dat | ||
// | ||
//It is subject to the terms of the Mozilla Public License, v. 2.0: | ||
//https://mozilla.org/MPL/2.0/ | ||
|
||
import okio.Buffer | ||
import okio.ByteString | ||
import okio.ByteString.Companion.decodeBase64 | ||
import okio.GzipSource | ||
import okio.buffer | ||
|
||
/** | ||
* A implementation of I/O for PublicSuffixDatabase.gz by directly encoding | ||
* the relevant byte arrays in a class file. | ||
*/ | ||
internal object EmbeddedPublicSuffixList: PublicSuffixList { | ||
override fun ensureLoaded() { | ||
} | ||
|
||
override val bytes: ByteString | ||
|
||
override val exceptionBytes: ByteString | ||
|
||
init { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not done in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I figured the JVM classloader would be a nice simple mutex. Ther s so much locking code in the other impl |
||
Buffer().use { buffer -> | ||
buffer.write($publicSuffixListBytes) | ||
GzipSource(buffer).buffer().use { source -> | ||
val totalBytes = source.readInt() | ||
bytes = source.readByteString(totalBytes.toLong()) | ||
|
||
val totalExceptionBytes = source.readInt() | ||
exceptionBytes = source.readByteString(totalExceptionBytes.toLong()) | ||
} | ||
} | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.