Skip to content

Commit 1fd1735

Browse files
authored
Merge pull request #37 from prdoyle/bson
Add bsok-bson library
2 parents 32c5a6a + 45684ff commit 1fd1735

29 files changed

+566
-385
lines changed

bosk-bson/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
## bosk-bson
2+
3+
This is the subproject for the published `bosk-bson` library,
4+
containing facilities for manipulating MongoDB's binary JSON data structures.
5+
Outside of MongoDB itself, we also use BSON as a handy utility
6+
for divide up and re-combine large JSON documents,
7+
which is useful even outside of MongoDB.

bosk-bson/build.gradle

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
2+
plugins {
3+
id 'bosk.development'
4+
id 'bosk.maven-publish'
5+
id 'info.solidsoft.pitest' version '1.15.0'
6+
id 'com.github.spotbugs' version '5.1.5'
7+
}
8+
9+
base {
10+
archivesName = 'bosk-bson'
11+
}
12+
13+
java {
14+
toolchain {
15+
languageVersion = JavaLanguageVersion.of(jdkVersion)
16+
}
17+
}
18+
19+
compileJava {
20+
options.release = prodJavaVersion
21+
}
22+
23+
compileTestJava {
24+
options.release = null
25+
}
26+
27+
dependencies {
28+
api project(":bosk-core")
29+
api 'org.mongodb:bson:5.1.2'
30+
implementation 'com.github.spotbugs:spotbugs-annotations:4.8.6' // To stop warnings about When from MongoDB driver
31+
32+
testImplementation project(":bosk-logback")
33+
testImplementation project(":bosk-testing")
34+
testImplementation project(":lib-testing")
35+
}
36+
37+
pitest {
38+
pitestVersion = '1.15.0'
39+
junit5PluginVersion = '1.2.0'
40+
jvmArgs = ['-ea'] // Our unit tests check for assert statements
41+
targetClasses = ['works.bosk.drivers.mongo.BsonSurgeon']
42+
targetTests = ['works.bosk.drivers.mongo.BsonSurgeonTest']
43+
threads = 4
44+
outputFormats = ['XML', 'HTML']
45+
timestampedReports = false
46+
//verbose = true
47+
}

bosk-mongo/src/main/java/works/bosk/drivers/mongo/BsonFormatException.java renamed to bosk-bson/src/main/java/works/bosk/bson/BsonFormatException.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package works.bosk.drivers.mongo;
1+
package works.bosk.bson;
22

33
class BsonFormatException extends IllegalStateException {
44
public BsonFormatException(String s) { super(s); }
Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
package works.bosk.bson;
2+
3+
import java.lang.reflect.Type;
4+
import java.net.URLDecoder;
5+
import java.nio.charset.StandardCharsets;
6+
import java.util.ArrayList;
7+
import java.util.Arrays;
8+
import java.util.Iterator;
9+
import java.util.List;
10+
import java.util.NoSuchElementException;
11+
import java.util.function.Function;
12+
import java.util.function.UnaryOperator;
13+
import java.util.regex.Pattern;
14+
import org.bson.BsonDocument;
15+
import org.bson.BsonDocumentWriter;
16+
import org.bson.BsonReader;
17+
import org.bson.BsonValue;
18+
import org.bson.codecs.Codec;
19+
import org.bson.codecs.DecoderContext;
20+
import org.bson.codecs.DocumentCodecProvider;
21+
import org.bson.codecs.EncoderContext;
22+
import org.bson.codecs.ValueCodecProvider;
23+
import org.bson.codecs.configuration.CodecRegistries;
24+
import org.bson.codecs.configuration.CodecRegistry;
25+
import works.bosk.BoskDiagnosticContext;
26+
import works.bosk.BoskInfo;
27+
import works.bosk.Listing;
28+
import works.bosk.Path;
29+
import works.bosk.Reference;
30+
import works.bosk.SerializationPlugin;
31+
import works.bosk.SideTable;
32+
import works.bosk.exceptions.InvalidTypeException;
33+
34+
import static works.bosk.Path.validSegment;
35+
import static works.bosk.ReferenceUtils.rawClass;
36+
37+
/**
38+
* Utility class for encoding and decoding bosk state trees as BSON.
39+
*/
40+
public class BsonFormatter {
41+
protected static final UnaryOperator<String> DECODER;
42+
protected static final UnaryOperator<String> ENCODER;
43+
protected final CodecRegistry simpleCodecs;
44+
protected final Function<Type, Codec<?>> preferredBoskCodecs;
45+
protected final Function<Reference<?>, SerializationPlugin.DeserializationScope> deserializationScopeFunction;
46+
47+
public BsonFormatter(BoskInfo<?> boskInfo, BsonPlugin bsonPlugin) {
48+
this.simpleCodecs = CodecRegistries.fromProviders(
49+
bsonPlugin.codecProviderFor(boskInfo),
50+
new ValueCodecProvider(),
51+
new DocumentCodecProvider());
52+
this.preferredBoskCodecs = type -> bsonPlugin.getCodec(type, rawClass(type), simpleCodecs, boskInfo);
53+
this.deserializationScopeFunction = bsonPlugin::newDeserializationScope;
54+
}
55+
56+
public static String dottedFieldNameSegment(String segment) {
57+
return ENCODER.apply(validSegment(segment));
58+
}
59+
60+
public static String undottedFieldNameSegment(String dottedSegment) {
61+
return DECODER.apply(dottedSegment);
62+
}
63+
64+
/**
65+
* @param refLength behave as though <code>ref</code> were truncated to this length without actually having to do it
66+
*/
67+
private static <T> void buildDottedFieldNameOf(Reference<T> ref, int startLength, int refLength, ArrayList<String> segments) {
68+
if (ref.path().length() > startLength) {
69+
Reference<?> enclosingReference = ref.enclosingReference(Object.class);
70+
BsonFormatter.buildDottedFieldNameOf(enclosingReference, startLength, refLength, segments);
71+
if (ref.path().length() <= refLength) {
72+
if (Listing.class.isAssignableFrom(enclosingReference.targetClass())) {
73+
segments.add("ids");
74+
} else if (SideTable.class.isAssignableFrom(enclosingReference.targetClass())) {
75+
segments.add("valuesById");
76+
}
77+
segments.add(BsonFormatter.dottedFieldNameSegment(ref.path().lastSegment()));
78+
}
79+
}
80+
}
81+
82+
static {
83+
DECODER = s->{
84+
return URLDecoder.decode(s, StandardCharsets.UTF_8);
85+
};
86+
87+
ENCODER = s->{
88+
// Selective percent-encoding of characters MongoDB doesn't like.
89+
// Standard percent-encoding doesn't handle the period character, which
90+
// we want, so if we're already diverging from the standard, we might
91+
// as well do something that suits our needs.
92+
// Good to stay compatible with standard percent-DEcoding, though.
93+
StringBuilder sb = new StringBuilder();
94+
for (int i = 0; i < s.length(); ) {
95+
int cp = s.codePointAt(i);
96+
switch (cp) {
97+
case '%': // For percent-encoding
98+
case '+': case ' ': // These two are affected by URLDecoder
99+
case '$': // MongoDB treats these specially
100+
case '.': // MongoDB separator for dotted field names
101+
case 0: // Can MongoDB handle nulls? Probably. Do we want to find out? Not really.
102+
case '|': // (These are reserved for internal use)
103+
case '!':
104+
case '~':
105+
case '[':
106+
case ']':
107+
appendPercentEncoded(sb, cp);
108+
break;
109+
default:
110+
sb.appendCodePoint(cp);
111+
break;
112+
}
113+
i += Character.charCount(cp);
114+
}
115+
return sb.toString();
116+
};
117+
}
118+
119+
private static void appendPercentEncoded(StringBuilder sb, int cp) {
120+
assert 0 <= cp && cp <= 255;
121+
sb
122+
.append('%')
123+
.append(hexCharForDigit(cp / 16))
124+
.append(hexCharForDigit(cp % 16));
125+
}
126+
127+
/**
128+
* An uppercase version of {@link Character#forDigit} with a radix of 16.
129+
*/
130+
private static char hexCharForDigit(int value) {
131+
if (value < 10) {
132+
return (char)('0' + value);
133+
} else {
134+
return (char)('A' + value - 10);
135+
}
136+
}
137+
138+
/**
139+
* @param ref the bosk node whose field name is required
140+
* @param startingRef the bosk node corresponding to the MongoDB document in which {@code ref} is located
141+
* @return MongoDB field name corresponding to {@code ref} within the document for {@code startingRef}, starting with {@link DocumentFields#state state}
142+
* @see #referenceTo(String, Reference)
143+
*/
144+
public static <T> String dottedFieldNameOf(Reference<T> ref, Reference<?> startingRef) {
145+
assert startingRef.encloses(ref);
146+
return dottedFieldNameOf(ref, ref.path().length(), startingRef);
147+
}
148+
149+
/**
150+
* @param refLength behave as though <code>ref</code> were truncated to this length, without actually having to do it
151+
* @return MongoDB field name corresponding to the given Reference
152+
* @see #referenceTo(String, Reference)
153+
*/
154+
static <T> String dottedFieldNameOf(Reference<T> ref, int refLength, Reference<?> startingRef) {
155+
// TODO: Is this required? It's currently only really called by tests
156+
ArrayList<String> segments = dottedFieldNameSegments(ref, refLength, startingRef);
157+
return String.join(".", segments.toArray(new String[0]));
158+
}
159+
160+
/**
161+
* @return Reference corresponding to the given field name within a document representing {@code startingReference}
162+
* @see #dottedFieldNameOf
163+
*/
164+
@SuppressWarnings("unchecked")
165+
public static <T> Reference<T> referenceTo(String dottedName, Reference<?> startingReference) throws InvalidTypeException {
166+
Reference<?> ref = startingReference;
167+
Iterator<String> iter = Arrays.asList(dottedName.split(Pattern.quote("."))).iterator();
168+
BsonFormatter.skipField(ref, iter, DocumentFields.state.name()); // The entire Bosk state is in this field
169+
while (iter.hasNext()) {
170+
if (Listing.class.isAssignableFrom(ref.targetClass())) {
171+
BsonFormatter.skipField(ref, iter, "ids");
172+
} else if (SideTable.class.isAssignableFrom(ref.targetClass())) {
173+
BsonFormatter.skipField(ref, iter, "valuesById");
174+
}
175+
if (iter.hasNext()) {
176+
String segment = undottedFieldNameSegment(iter.next());
177+
ref = ref.then(Object.class, segment);
178+
}
179+
}
180+
return (Reference<T>) ref;
181+
}
182+
183+
private static void skipField(Reference<?> ref, Iterator<String> iter, String expectedName) {
184+
String actualName;
185+
try {
186+
actualName = iter.next();
187+
} catch (NoSuchElementException e) {
188+
throw new IllegalStateException("Expected '" + expectedName + "' for " + ref.targetClass().getSimpleName() + "; encountered end of dotted field name");
189+
}
190+
if (!expectedName.equals(actualName)) {
191+
throw new IllegalStateException("Expected '" + expectedName + "' for " + ref.targetClass().getSimpleName() + "; was: " + actualName);
192+
}
193+
}
194+
195+
/**
196+
* A <em>BSON path</em> is a pipe-delimited sequence of BSON fields.
197+
*
198+
* <p>
199+
* Note that the BSON structure is such that the BSON path's segments may
200+
* not exactly match those of {@code ref.path()}.
201+
*
202+
* <p>
203+
* This computes the BSON path for {@code ref} within a document representing {@code startingRef}.
204+
*
205+
* @param ref reference to the node whose path is desired
206+
* @param startingRef reference to the node corresponding to the document containing the {@code ref} node
207+
* @return the BSON path leading to {@code ref} within a document representing {@code startingRef}
208+
*/
209+
public static String docBsonPath(Reference<?> ref, Reference<?> startingRef) {
210+
return "|" + String.join("|", docSegments(ref, startingRef));
211+
}
212+
213+
/**
214+
* @return list of field names suitable for {@link #lookup} to find the document corresponding
215+
* to <code>docRef</code> inside a document corresponding to <code>rootRef</code>
216+
*/
217+
private static List<String> docSegments(Reference<?> docRef, Reference<?> rootRef) {
218+
ArrayList<String> allSegments = dottedFieldNameSegments(docRef, docRef.path().length(), rootRef);
219+
return allSegments
220+
.subList(1, allSegments.size()); // Skip the "state" field
221+
}
222+
223+
protected Codec<?> codecFor(Type type) {
224+
// BsonPlugin gives better codecs than CodecRegistry, because BsonPlugin is aware of generics,
225+
// so we always try that first. The CodecSupplier protocol uses "null" to indicate that another
226+
// CodecSupplier should be used, so we follow that protocol and fall back on the CodecRegistry.
227+
// TODO: Should this logic be in BsonPlugin? It has nothing to do with MongoDriver really.
228+
Codec<?> result = preferredBoskCodecs.apply(type);
229+
if (result == null) {
230+
return simpleCodecs.get(rawClass(type));
231+
} else {
232+
return result;
233+
}
234+
}
235+
236+
/**
237+
* Converts a bosk state node to BSON.
238+
*
239+
* <p>
240+
* A common way to call this is, for a given {@link Reference} {@code ref}, is:
241+
* <pre>
242+
* ref.value(), ref.targetType()
243+
* </pre>
244+
* @param object the bosk state node to convert
245+
* @param type the type of {@code object}
246+
* @see #bsonValue2object(BsonValue, Reference)
247+
*/
248+
@SuppressWarnings("unchecked")
249+
public <T> BsonValue object2bsonValue(T object, Type type) {
250+
rawClass(type).cast(object);
251+
Codec<T> objectCodec = (Codec<T>) codecFor(type);
252+
BsonDocument document = new BsonDocument();
253+
try (BsonDocumentWriter writer = new BsonDocumentWriter(document)) {
254+
// To support arbitrary values, not just whole documents, we put the result INSIDE a document.
255+
writer.writeStartDocument();
256+
writer.writeName("value");
257+
objectCodec.encode(writer, object, EncoderContext.builder().build());
258+
writer.writeEndDocument();
259+
}
260+
return document.get("value");
261+
}
262+
263+
/**
264+
* Converts a BSON value to a bosk state node.
265+
* @param bson the value to convert
266+
* @param target the bosk location of the resulting node
267+
* @see #object2bsonValue(Object, Type)
268+
*/
269+
@SuppressWarnings("unchecked")
270+
public <T> T bsonValue2object(BsonValue bson, Reference<T> target) {
271+
Codec<T> objectCodec = (Codec<T>) codecFor(target.targetType());
272+
BsonDocument document = new BsonDocument();
273+
document.append("value", bson);
274+
try (
275+
@SuppressWarnings("unused") SerializationPlugin.DeserializationScope scope = deserializationScopeFunction.apply(target);
276+
BsonReader reader = document.asBsonReader()
277+
) {
278+
reader.readStartDocument();
279+
reader.readName("value");
280+
return objectCodec.decode(reader, DecoderContext.builder().build());
281+
}
282+
}
283+
284+
/**
285+
* The fields of the main MongoDB document. Case-sensitive.
286+
*
287+
* <p>
288+
* No field name should be a prefix of any other.
289+
*/
290+
public enum DocumentFields {
291+
/**
292+
* The location of this document's state within the conceptual giant document.
293+
* A pipe-separated list of BSON field names.
294+
*/
295+
path,
296+
297+
/**
298+
* The BSON-encoded portion of the bosk state represented by this document.
299+
*/
300+
state,
301+
302+
/**
303+
* An ever-increasing 64-bit long that is incremented every time the document changes.
304+
*/
305+
revision,
306+
307+
/**
308+
* The contents of {@link BoskDiagnosticContext} corresponding to this
309+
* document's last update.
310+
*/
311+
diagnostics,
312+
}
313+
314+
static <T> ArrayList<String> dottedFieldNameSegments(Reference<T> ref, int refLength, Reference<?> startingRef) {
315+
assert startingRef.path().matchesPrefixOf(ref.path()): "'" + ref + "' must be under '" + startingRef + "'";
316+
ArrayList<String> segments = new ArrayList<>();
317+
segments.add(DocumentFields.state.name());
318+
buildDottedFieldNameOf(ref, startingRef.path().length(), refLength, segments);
319+
return segments;
320+
}
321+
322+
/**
323+
* @param elementRefLength behave as though <code>elementRef</code> were truncated to this length, without actually having to do it
324+
* @return MongoDB field name corresponding to the object that contains the given element
325+
* @see #referenceTo(String, Reference)
326+
*/
327+
static <T> List<String> containerSegments(Reference<T> elementRef, int elementRefLength, Reference<?> startingRef) {
328+
List<String> elementSegments = dottedFieldNameSegments(elementRef, elementRefLength, startingRef);
329+
return elementSegments.subList(0, elementSegments.size()-1); // Trim off the element itself
330+
}
331+
332+
}

0 commit comments

Comments
 (0)