|
| 1 | +package works.bosk.bson; |
| 2 | + |
| 3 | +import java.lang.reflect.Type; |
| 4 | +import java.net.URLDecoder; |
| 5 | +import java.nio.charset.StandardCharsets; |
| 6 | +import java.util.ArrayList; |
| 7 | +import java.util.Arrays; |
| 8 | +import java.util.Iterator; |
| 9 | +import java.util.List; |
| 10 | +import java.util.NoSuchElementException; |
| 11 | +import java.util.function.Function; |
| 12 | +import java.util.function.UnaryOperator; |
| 13 | +import java.util.regex.Pattern; |
| 14 | +import org.bson.BsonDocument; |
| 15 | +import org.bson.BsonDocumentWriter; |
| 16 | +import org.bson.BsonReader; |
| 17 | +import org.bson.BsonValue; |
| 18 | +import org.bson.codecs.Codec; |
| 19 | +import org.bson.codecs.DecoderContext; |
| 20 | +import org.bson.codecs.DocumentCodecProvider; |
| 21 | +import org.bson.codecs.EncoderContext; |
| 22 | +import org.bson.codecs.ValueCodecProvider; |
| 23 | +import org.bson.codecs.configuration.CodecRegistries; |
| 24 | +import org.bson.codecs.configuration.CodecRegistry; |
| 25 | +import works.bosk.BoskDiagnosticContext; |
| 26 | +import works.bosk.BoskInfo; |
| 27 | +import works.bosk.Listing; |
| 28 | +import works.bosk.Path; |
| 29 | +import works.bosk.Reference; |
| 30 | +import works.bosk.SerializationPlugin; |
| 31 | +import works.bosk.SideTable; |
| 32 | +import works.bosk.exceptions.InvalidTypeException; |
| 33 | + |
| 34 | +import static works.bosk.Path.validSegment; |
| 35 | +import static works.bosk.ReferenceUtils.rawClass; |
| 36 | + |
| 37 | +/** |
| 38 | + * Utility class for encoding and decoding bosk state trees as BSON. |
| 39 | + */ |
| 40 | +public class BsonFormatter { |
| 41 | + protected static final UnaryOperator<String> DECODER; |
| 42 | + protected static final UnaryOperator<String> ENCODER; |
| 43 | + protected final CodecRegistry simpleCodecs; |
| 44 | + protected final Function<Type, Codec<?>> preferredBoskCodecs; |
| 45 | + protected final Function<Reference<?>, SerializationPlugin.DeserializationScope> deserializationScopeFunction; |
| 46 | + |
| 47 | + public BsonFormatter(BoskInfo<?> boskInfo, BsonPlugin bsonPlugin) { |
| 48 | + this.simpleCodecs = CodecRegistries.fromProviders( |
| 49 | + bsonPlugin.codecProviderFor(boskInfo), |
| 50 | + new ValueCodecProvider(), |
| 51 | + new DocumentCodecProvider()); |
| 52 | + this.preferredBoskCodecs = type -> bsonPlugin.getCodec(type, rawClass(type), simpleCodecs, boskInfo); |
| 53 | + this.deserializationScopeFunction = bsonPlugin::newDeserializationScope; |
| 54 | + } |
| 55 | + |
| 56 | + public static String dottedFieldNameSegment(String segment) { |
| 57 | + return ENCODER.apply(validSegment(segment)); |
| 58 | + } |
| 59 | + |
| 60 | + public static String undottedFieldNameSegment(String dottedSegment) { |
| 61 | + return DECODER.apply(dottedSegment); |
| 62 | + } |
| 63 | + |
| 64 | + /** |
| 65 | + * @param refLength behave as though <code>ref</code> were truncated to this length without actually having to do it |
| 66 | + */ |
| 67 | + private static <T> void buildDottedFieldNameOf(Reference<T> ref, int startLength, int refLength, ArrayList<String> segments) { |
| 68 | + if (ref.path().length() > startLength) { |
| 69 | + Reference<?> enclosingReference = ref.enclosingReference(Object.class); |
| 70 | + BsonFormatter.buildDottedFieldNameOf(enclosingReference, startLength, refLength, segments); |
| 71 | + if (ref.path().length() <= refLength) { |
| 72 | + if (Listing.class.isAssignableFrom(enclosingReference.targetClass())) { |
| 73 | + segments.add("ids"); |
| 74 | + } else if (SideTable.class.isAssignableFrom(enclosingReference.targetClass())) { |
| 75 | + segments.add("valuesById"); |
| 76 | + } |
| 77 | + segments.add(BsonFormatter.dottedFieldNameSegment(ref.path().lastSegment())); |
| 78 | + } |
| 79 | + } |
| 80 | + } |
| 81 | + |
| 82 | + static { |
| 83 | + DECODER = s->{ |
| 84 | + return URLDecoder.decode(s, StandardCharsets.UTF_8); |
| 85 | + }; |
| 86 | + |
| 87 | + ENCODER = s->{ |
| 88 | + // Selective percent-encoding of characters MongoDB doesn't like. |
| 89 | + // Standard percent-encoding doesn't handle the period character, which |
| 90 | + // we want, so if we're already diverging from the standard, we might |
| 91 | + // as well do something that suits our needs. |
| 92 | + // Good to stay compatible with standard percent-DEcoding, though. |
| 93 | + StringBuilder sb = new StringBuilder(); |
| 94 | + for (int i = 0; i < s.length(); ) { |
| 95 | + int cp = s.codePointAt(i); |
| 96 | + switch (cp) { |
| 97 | + case '%': // For percent-encoding |
| 98 | + case '+': case ' ': // These two are affected by URLDecoder |
| 99 | + case '$': // MongoDB treats these specially |
| 100 | + case '.': // MongoDB separator for dotted field names |
| 101 | + case 0: // Can MongoDB handle nulls? Probably. Do we want to find out? Not really. |
| 102 | + case '|': // (These are reserved for internal use) |
| 103 | + case '!': |
| 104 | + case '~': |
| 105 | + case '[': |
| 106 | + case ']': |
| 107 | + appendPercentEncoded(sb, cp); |
| 108 | + break; |
| 109 | + default: |
| 110 | + sb.appendCodePoint(cp); |
| 111 | + break; |
| 112 | + } |
| 113 | + i += Character.charCount(cp); |
| 114 | + } |
| 115 | + return sb.toString(); |
| 116 | + }; |
| 117 | + } |
| 118 | + |
| 119 | + private static void appendPercentEncoded(StringBuilder sb, int cp) { |
| 120 | + assert 0 <= cp && cp <= 255; |
| 121 | + sb |
| 122 | + .append('%') |
| 123 | + .append(hexCharForDigit(cp / 16)) |
| 124 | + .append(hexCharForDigit(cp % 16)); |
| 125 | + } |
| 126 | + |
| 127 | + /** |
| 128 | + * An uppercase version of {@link Character#forDigit} with a radix of 16. |
| 129 | + */ |
| 130 | + private static char hexCharForDigit(int value) { |
| 131 | + if (value < 10) { |
| 132 | + return (char)('0' + value); |
| 133 | + } else { |
| 134 | + return (char)('A' + value - 10); |
| 135 | + } |
| 136 | + } |
| 137 | + |
| 138 | + /** |
| 139 | + * @param ref the bosk node whose field name is required |
| 140 | + * @param startingRef the bosk node corresponding to the MongoDB document in which {@code ref} is located |
| 141 | + * @return MongoDB field name corresponding to {@code ref} within the document for {@code startingRef}, starting with {@link DocumentFields#state state} |
| 142 | + * @see #referenceTo(String, Reference) |
| 143 | + */ |
| 144 | + public static <T> String dottedFieldNameOf(Reference<T> ref, Reference<?> startingRef) { |
| 145 | + assert startingRef.encloses(ref); |
| 146 | + return dottedFieldNameOf(ref, ref.path().length(), startingRef); |
| 147 | + } |
| 148 | + |
| 149 | + /** |
| 150 | + * @param refLength behave as though <code>ref</code> were truncated to this length, without actually having to do it |
| 151 | + * @return MongoDB field name corresponding to the given Reference |
| 152 | + * @see #referenceTo(String, Reference) |
| 153 | + */ |
| 154 | + static <T> String dottedFieldNameOf(Reference<T> ref, int refLength, Reference<?> startingRef) { |
| 155 | + // TODO: Is this required? It's currently only really called by tests |
| 156 | + ArrayList<String> segments = dottedFieldNameSegments(ref, refLength, startingRef); |
| 157 | + return String.join(".", segments.toArray(new String[0])); |
| 158 | + } |
| 159 | + |
| 160 | + /** |
| 161 | + * @return Reference corresponding to the given field name within a document representing {@code startingReference} |
| 162 | + * @see #dottedFieldNameOf |
| 163 | + */ |
| 164 | + @SuppressWarnings("unchecked") |
| 165 | + public static <T> Reference<T> referenceTo(String dottedName, Reference<?> startingReference) throws InvalidTypeException { |
| 166 | + Reference<?> ref = startingReference; |
| 167 | + Iterator<String> iter = Arrays.asList(dottedName.split(Pattern.quote("."))).iterator(); |
| 168 | + BsonFormatter.skipField(ref, iter, DocumentFields.state.name()); // The entire Bosk state is in this field |
| 169 | + while (iter.hasNext()) { |
| 170 | + if (Listing.class.isAssignableFrom(ref.targetClass())) { |
| 171 | + BsonFormatter.skipField(ref, iter, "ids"); |
| 172 | + } else if (SideTable.class.isAssignableFrom(ref.targetClass())) { |
| 173 | + BsonFormatter.skipField(ref, iter, "valuesById"); |
| 174 | + } |
| 175 | + if (iter.hasNext()) { |
| 176 | + String segment = undottedFieldNameSegment(iter.next()); |
| 177 | + ref = ref.then(Object.class, segment); |
| 178 | + } |
| 179 | + } |
| 180 | + return (Reference<T>) ref; |
| 181 | + } |
| 182 | + |
| 183 | + private static void skipField(Reference<?> ref, Iterator<String> iter, String expectedName) { |
| 184 | + String actualName; |
| 185 | + try { |
| 186 | + actualName = iter.next(); |
| 187 | + } catch (NoSuchElementException e) { |
| 188 | + throw new IllegalStateException("Expected '" + expectedName + "' for " + ref.targetClass().getSimpleName() + "; encountered end of dotted field name"); |
| 189 | + } |
| 190 | + if (!expectedName.equals(actualName)) { |
| 191 | + throw new IllegalStateException("Expected '" + expectedName + "' for " + ref.targetClass().getSimpleName() + "; was: " + actualName); |
| 192 | + } |
| 193 | + } |
| 194 | + |
| 195 | + /** |
| 196 | + * A <em>BSON path</em> is a pipe-delimited sequence of BSON fields. |
| 197 | + * |
| 198 | + * <p> |
| 199 | + * Note that the BSON structure is such that the BSON path's segments may |
| 200 | + * not exactly match those of {@code ref.path()}. |
| 201 | + * |
| 202 | + * <p> |
| 203 | + * This computes the BSON path for {@code ref} within a document representing {@code startingRef}. |
| 204 | + * |
| 205 | + * @param ref reference to the node whose path is desired |
| 206 | + * @param startingRef reference to the node corresponding to the document containing the {@code ref} node |
| 207 | + * @return the BSON path leading to {@code ref} within a document representing {@code startingRef} |
| 208 | + */ |
| 209 | + public static String docBsonPath(Reference<?> ref, Reference<?> startingRef) { |
| 210 | + return "|" + String.join("|", docSegments(ref, startingRef)); |
| 211 | + } |
| 212 | + |
| 213 | + /** |
| 214 | + * @return list of field names suitable for {@link #lookup} to find the document corresponding |
| 215 | + * to <code>docRef</code> inside a document corresponding to <code>rootRef</code> |
| 216 | + */ |
| 217 | + private static List<String> docSegments(Reference<?> docRef, Reference<?> rootRef) { |
| 218 | + ArrayList<String> allSegments = dottedFieldNameSegments(docRef, docRef.path().length(), rootRef); |
| 219 | + return allSegments |
| 220 | + .subList(1, allSegments.size()); // Skip the "state" field |
| 221 | + } |
| 222 | + |
| 223 | + protected Codec<?> codecFor(Type type) { |
| 224 | + // BsonPlugin gives better codecs than CodecRegistry, because BsonPlugin is aware of generics, |
| 225 | + // so we always try that first. The CodecSupplier protocol uses "null" to indicate that another |
| 226 | + // CodecSupplier should be used, so we follow that protocol and fall back on the CodecRegistry. |
| 227 | + // TODO: Should this logic be in BsonPlugin? It has nothing to do with MongoDriver really. |
| 228 | + Codec<?> result = preferredBoskCodecs.apply(type); |
| 229 | + if (result == null) { |
| 230 | + return simpleCodecs.get(rawClass(type)); |
| 231 | + } else { |
| 232 | + return result; |
| 233 | + } |
| 234 | + } |
| 235 | + |
| 236 | + /** |
| 237 | + * Converts a bosk state node to BSON. |
| 238 | + * |
| 239 | + * <p> |
| 240 | + * A common way to call this is, for a given {@link Reference} {@code ref}, is: |
| 241 | + * <pre> |
| 242 | + * ref.value(), ref.targetType() |
| 243 | + * </pre> |
| 244 | + * @param object the bosk state node to convert |
| 245 | + * @param type the type of {@code object} |
| 246 | + * @see #bsonValue2object(BsonValue, Reference) |
| 247 | + */ |
| 248 | + @SuppressWarnings("unchecked") |
| 249 | + public <T> BsonValue object2bsonValue(T object, Type type) { |
| 250 | + rawClass(type).cast(object); |
| 251 | + Codec<T> objectCodec = (Codec<T>) codecFor(type); |
| 252 | + BsonDocument document = new BsonDocument(); |
| 253 | + try (BsonDocumentWriter writer = new BsonDocumentWriter(document)) { |
| 254 | + // To support arbitrary values, not just whole documents, we put the result INSIDE a document. |
| 255 | + writer.writeStartDocument(); |
| 256 | + writer.writeName("value"); |
| 257 | + objectCodec.encode(writer, object, EncoderContext.builder().build()); |
| 258 | + writer.writeEndDocument(); |
| 259 | + } |
| 260 | + return document.get("value"); |
| 261 | + } |
| 262 | + |
| 263 | + /** |
| 264 | + * Converts a BSON value to a bosk state node. |
| 265 | + * @param bson the value to convert |
| 266 | + * @param target the bosk location of the resulting node |
| 267 | + * @see #object2bsonValue(Object, Type) |
| 268 | + */ |
| 269 | + @SuppressWarnings("unchecked") |
| 270 | + public <T> T bsonValue2object(BsonValue bson, Reference<T> target) { |
| 271 | + Codec<T> objectCodec = (Codec<T>) codecFor(target.targetType()); |
| 272 | + BsonDocument document = new BsonDocument(); |
| 273 | + document.append("value", bson); |
| 274 | + try ( |
| 275 | + @SuppressWarnings("unused") SerializationPlugin.DeserializationScope scope = deserializationScopeFunction.apply(target); |
| 276 | + BsonReader reader = document.asBsonReader() |
| 277 | + ) { |
| 278 | + reader.readStartDocument(); |
| 279 | + reader.readName("value"); |
| 280 | + return objectCodec.decode(reader, DecoderContext.builder().build()); |
| 281 | + } |
| 282 | + } |
| 283 | + |
| 284 | + /** |
| 285 | + * The fields of the main MongoDB document. Case-sensitive. |
| 286 | + * |
| 287 | + * <p> |
| 288 | + * No field name should be a prefix of any other. |
| 289 | + */ |
| 290 | + public enum DocumentFields { |
| 291 | + /** |
| 292 | + * The location of this document's state within the conceptual giant document. |
| 293 | + * A pipe-separated list of BSON field names. |
| 294 | + */ |
| 295 | + path, |
| 296 | + |
| 297 | + /** |
| 298 | + * The BSON-encoded portion of the bosk state represented by this document. |
| 299 | + */ |
| 300 | + state, |
| 301 | + |
| 302 | + /** |
| 303 | + * An ever-increasing 64-bit long that is incremented every time the document changes. |
| 304 | + */ |
| 305 | + revision, |
| 306 | + |
| 307 | + /** |
| 308 | + * The contents of {@link BoskDiagnosticContext} corresponding to this |
| 309 | + * document's last update. |
| 310 | + */ |
| 311 | + diagnostics, |
| 312 | + } |
| 313 | + |
| 314 | + static <T> ArrayList<String> dottedFieldNameSegments(Reference<T> ref, int refLength, Reference<?> startingRef) { |
| 315 | + assert startingRef.path().matchesPrefixOf(ref.path()): "'" + ref + "' must be under '" + startingRef + "'"; |
| 316 | + ArrayList<String> segments = new ArrayList<>(); |
| 317 | + segments.add(DocumentFields.state.name()); |
| 318 | + buildDottedFieldNameOf(ref, startingRef.path().length(), refLength, segments); |
| 319 | + return segments; |
| 320 | + } |
| 321 | + |
| 322 | + /** |
| 323 | + * @param elementRefLength behave as though <code>elementRef</code> were truncated to this length, without actually having to do it |
| 324 | + * @return MongoDB field name corresponding to the object that contains the given element |
| 325 | + * @see #referenceTo(String, Reference) |
| 326 | + */ |
| 327 | + static <T> List<String> containerSegments(Reference<T> elementRef, int elementRefLength, Reference<?> startingRef) { |
| 328 | + List<String> elementSegments = dottedFieldNameSegments(elementRef, elementRefLength, startingRef); |
| 329 | + return elementSegments.subList(0, elementSegments.size()-1); // Trim off the element itself |
| 330 | + } |
| 331 | + |
| 332 | +} |
0 commit comments