Skip to content

Commit

Permalink
Added SimpleBufferedInput
Browse files Browse the repository at this point in the history
Replaces uses of BufferedInputStreams

Advantages:
- can recycle the byte[] buffer; so significant reduction in GC load
- if consumer is reading into an array and there is no mark, no need to allocate a buffer
- doesn't aim to support multi-threaded reads, so no syncs or locking

Also, reduced the DefaultBufferSize to 8K from 32K
  • Loading branch information
jhy committed Aug 6, 2024
1 parent 9e56e93 commit 2e64811
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 6 deletions.
14 changes: 9 additions & 5 deletions src/main/java/org/jsoup/internal/ControllableInputStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
*/
// reimplemented from ConstrainableInputStream for JDK21 - extending BufferedInputStream will pin threads during read
public class ControllableInputStream extends FilterInputStream {
private final BufferedInputStream buff;
private final InputStream buff;
private final boolean capped;
private final int maxSize;
private long startTime;
Expand All @@ -35,7 +35,7 @@ public class ControllableInputStream extends FilterInputStream {
private int contentLength = -1;
private int readPos = 0; // amount read; can be reset()

private ControllableInputStream(BufferedInputStream in, int maxSize) {
private ControllableInputStream(InputStream in, int maxSize) {
super(in);
Validate.isTrue(maxSize >= 0);
buff = in;
Expand All @@ -54,12 +54,13 @@ private ControllableInputStream(BufferedInputStream in, int maxSize) {
* @return a controllable input stream
*/
public static ControllableInputStream wrap(InputStream in, int bufferSize, int maxSize) {
// bufferSize currently unused; consider implementing as a min size in the SoftPool recycler
if (in instanceof ControllableInputStream)
return (ControllableInputStream) in;
else if (in instanceof BufferedInputStream)
return new ControllableInputStream((BufferedInputStream) in, maxSize);
return new ControllableInputStream(in, maxSize);
else
return new ControllableInputStream(new BufferedInputStream(in, bufferSize), maxSize);
return new ControllableInputStream(new SimpleBufferedInput(in), maxSize);
}

@Override
Expand Down Expand Up @@ -173,6 +174,9 @@ private boolean expired() {
}

public BufferedInputStream inputStream() {
return buff;
// called via HttpConnection.Response.bodyStream(), needs an OG BufferedInputStream
if (buff instanceof BufferedInputStream)
return (BufferedInputStream) buff; // if originally supplied a BIS in .wrap()
else return new BufferedInputStream(buff);
}
}
2 changes: 1 addition & 1 deletion src/main/java/org/jsoup/internal/SharedConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public final class SharedConstants {
public static final String RangeKey = "jsoup.start";
public static final String EndRangeKey = "jsoup.end";

public static final int DefaultBufferSize = 1024 * 32;
public static final int DefaultBufferSize = 8 * 1024;

public static final String[] FormSubmitTags = {
"input", "keygen", "object", "select", "textarea"
Expand Down
132 changes: 132 additions & 0 deletions src/main/java/org/jsoup/internal/SimpleBufferedInput.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
package org.jsoup.internal;

import org.jsoup.helper.Validate;
import org.jspecify.annotations.Nullable;

import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;

import static org.jsoup.internal.SharedConstants.DefaultBufferSize;

/**
A simple implemented of a buffered input stream, in which we can control the byte[] buffer to recycle it. Not safe for
use between threads; no sync or locks. The buffer is borrowed on initial demand in fill. */
class SimpleBufferedInput extends FilterInputStream {
static final int BufferSize = DefaultBufferSize;
static final SoftPool<byte[]> BufferPool = new SoftPool<>(() -> new byte[BufferSize]);

byte @Nullable [] byteBuf; // the byte buffer; recycled via SoftPool. Created in fill if required
int bufPos;
int bufLength;
int bufMark = -1;

SimpleBufferedInput(InputStream in) {
super(in);
}

@Override
public int read() throws IOException {
if (bufPos >= bufLength) {
fill();
if (bufPos >= bufLength)
return -1;
}
return getBuf()[bufPos++] & 0xff;
}

@Override
public int read(byte[] dest, int offset, int desiredLen) throws IOException {
Validate.notNull(dest);
if (offset < 0 || desiredLen < 0 || desiredLen > dest.length - offset) {
throw new IndexOutOfBoundsException();
} else if (desiredLen == 0) {
return 0;
}

int bufAvail = bufLength - bufPos;
if (bufAvail <= 0) {
if (desiredLen >= BufferSize && bufMark < 0) {
// We can skip creating / copying into a local buffer; just pass through
return in.read(dest, offset, desiredLen);
}
fill();
bufAvail = bufLength - bufPos;
}

int read = Math.min(bufAvail, desiredLen);
if (read <= 0) {
return -1;
}

System.arraycopy(getBuf(), bufPos, dest, offset, read);
bufPos += read;
return read;
}

private void fill() throws IOException {
if (byteBuf == null) { // get one on first demand
byteBuf = BufferPool.borrow();
}

if (bufMark < 0) { // no mark, can lose buffer (assumes we've read to bufLen)
bufPos = 0;
} else if (bufPos >= BufferSize) { // no room left in buffer
if (bufMark > 0) { // can throw away early part of the buffer
int size = bufPos - bufMark;
System.arraycopy(byteBuf, bufMark, byteBuf, 0, size);
bufPos = size;
bufMark = 0;
} else { // invalidate mark
bufMark = -1;
bufPos = 0;
}
}
bufLength = bufPos;
int read = in.read(byteBuf, bufPos, byteBuf.length - bufPos);
if (read > 0) {
bufLength = read + bufPos;
while (byteBuf.length - bufLength > 0) { // read in more if we have space, without blocking
if (in.available() < 1) break;
read = in.read(byteBuf, bufLength, byteBuf.length - bufLength);
if (read <= 0) break;
bufLength += read;
}
}
}

byte[] getBuf() {
Validate.notNull(byteBuf);
return byteBuf;
}

@Override
public int available() throws IOException {
if (byteBuf != null && bufLength - bufPos > 0)
return bufLength - bufPos; // doesn't include those in.available(), but mostly used as a block test
return in.available();
}

@Override
public void mark(int readlimit) {

Check warning

Code scanning / CodeQL

Non-synchronized override of synchronized method Warning

Method 'mark' overrides a synchronized method in
java.io.FilterInputStream
but is not synchronized.
if (readlimit > BufferSize) {
throw new IllegalArgumentException("Read-ahead limit is greater than buffer size");
}
bufMark = bufPos;
}

@Override
public void reset() throws IOException {

Check warning

Code scanning / CodeQL

Non-synchronized override of synchronized method Warning

Method 'reset' overrides a synchronized method in
java.io.FilterInputStream
but is not synchronized.
if (bufMark < 0)
throw new IOException("Resetting to invalid mark");
bufPos = bufMark;
}

@Override
public void close() throws IOException {
super.close();
if (byteBuf == null) return; // already closed, or never allocated
BufferPool.release(byteBuf); // return the buffer to the pool
byteBuf = null; // NPE further attempts to read
}
}

0 comments on commit 2e64811

Please sign in to comment.