-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Replaces uses of BufferedInputStreams Advantages: - can recycle the byte[] buffer; so significant reduction in GC load - if consumer is reading into an array and there is no mark, no need to allocate a buffer - doesn't aim to support multi-threaded reads, so no syncs or locking Also, reduced the DefaultBufferSize to 8K from 32K
- Loading branch information
Showing
3 changed files
with
142 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
132 changes: 132 additions & 0 deletions
132
src/main/java/org/jsoup/internal/SimpleBufferedInput.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
package org.jsoup.internal; | ||
|
||
import org.jsoup.helper.Validate; | ||
import org.jspecify.annotations.Nullable; | ||
|
||
import java.io.FilterInputStream; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
|
||
import static org.jsoup.internal.SharedConstants.DefaultBufferSize; | ||
|
||
/** | ||
A simple implemented of a buffered input stream, in which we can control the byte[] buffer to recycle it. Not safe for | ||
use between threads; no sync or locks. The buffer is borrowed on initial demand in fill. */ | ||
class SimpleBufferedInput extends FilterInputStream { | ||
static final int BufferSize = DefaultBufferSize; | ||
static final SoftPool<byte[]> BufferPool = new SoftPool<>(() -> new byte[BufferSize]); | ||
|
||
byte @Nullable [] byteBuf; // the byte buffer; recycled via SoftPool. Created in fill if required | ||
int bufPos; | ||
int bufLength; | ||
int bufMark = -1; | ||
|
||
SimpleBufferedInput(InputStream in) { | ||
super(in); | ||
} | ||
|
||
@Override | ||
public int read() throws IOException { | ||
if (bufPos >= bufLength) { | ||
fill(); | ||
if (bufPos >= bufLength) | ||
return -1; | ||
} | ||
return getBuf()[bufPos++] & 0xff; | ||
} | ||
|
||
@Override | ||
public int read(byte[] dest, int offset, int desiredLen) throws IOException { | ||
Validate.notNull(dest); | ||
if (offset < 0 || desiredLen < 0 || desiredLen > dest.length - offset) { | ||
throw new IndexOutOfBoundsException(); | ||
} else if (desiredLen == 0) { | ||
return 0; | ||
} | ||
|
||
int bufAvail = bufLength - bufPos; | ||
if (bufAvail <= 0) { | ||
if (desiredLen >= BufferSize && bufMark < 0) { | ||
// We can skip creating / copying into a local buffer; just pass through | ||
return in.read(dest, offset, desiredLen); | ||
} | ||
fill(); | ||
bufAvail = bufLength - bufPos; | ||
} | ||
|
||
int read = Math.min(bufAvail, desiredLen); | ||
if (read <= 0) { | ||
return -1; | ||
} | ||
|
||
System.arraycopy(getBuf(), bufPos, dest, offset, read); | ||
bufPos += read; | ||
return read; | ||
} | ||
|
||
private void fill() throws IOException { | ||
if (byteBuf == null) { // get one on first demand | ||
byteBuf = BufferPool.borrow(); | ||
} | ||
|
||
if (bufMark < 0) { // no mark, can lose buffer (assumes we've read to bufLen) | ||
bufPos = 0; | ||
} else if (bufPos >= BufferSize) { // no room left in buffer | ||
if (bufMark > 0) { // can throw away early part of the buffer | ||
int size = bufPos - bufMark; | ||
System.arraycopy(byteBuf, bufMark, byteBuf, 0, size); | ||
bufPos = size; | ||
bufMark = 0; | ||
} else { // invalidate mark | ||
bufMark = -1; | ||
bufPos = 0; | ||
} | ||
} | ||
bufLength = bufPos; | ||
int read = in.read(byteBuf, bufPos, byteBuf.length - bufPos); | ||
if (read > 0) { | ||
bufLength = read + bufPos; | ||
while (byteBuf.length - bufLength > 0) { // read in more if we have space, without blocking | ||
if (in.available() < 1) break; | ||
read = in.read(byteBuf, bufLength, byteBuf.length - bufLength); | ||
if (read <= 0) break; | ||
bufLength += read; | ||
} | ||
} | ||
} | ||
|
||
byte[] getBuf() { | ||
Validate.notNull(byteBuf); | ||
return byteBuf; | ||
} | ||
|
||
@Override | ||
public int available() throws IOException { | ||
if (byteBuf != null && bufLength - bufPos > 0) | ||
return bufLength - bufPos; // doesn't include those in.available(), but mostly used as a block test | ||
return in.available(); | ||
} | ||
|
||
@Override | ||
public void mark(int readlimit) { | ||
Check warning Code scanning / CodeQL Non-synchronized override of synchronized method Warning
Method 'mark' overrides a synchronized method in
java.io.FilterInputStream Error loading related location Loading |
||
if (readlimit > BufferSize) { | ||
throw new IllegalArgumentException("Read-ahead limit is greater than buffer size"); | ||
} | ||
bufMark = bufPos; | ||
} | ||
|
||
@Override | ||
public void reset() throws IOException { | ||
Check warning Code scanning / CodeQL Non-synchronized override of synchronized method Warning
Method 'reset' overrides a synchronized method in
java.io.FilterInputStream Error loading related location Loading |
||
if (bufMark < 0) | ||
throw new IOException("Resetting to invalid mark"); | ||
bufPos = bufMark; | ||
} | ||
|
||
@Override | ||
public void close() throws IOException { | ||
super.close(); | ||
if (byteBuf == null) return; // already closed, or never allocated | ||
BufferPool.release(byteBuf); // return the buffer to the pool | ||
byteBuf = null; // NPE further attempts to read | ||
} | ||
} |