microsoft · msfluid-bot · Dec 6, 2023 · Dec 6, 2023 · Dec 6, 2023 · Dec 7, 2023
@@ -0,0 +1,9 @@
+---
+"@fluidframework/container-runtime": minor
+---
+
+GC: Tombstoned objects will fail to load by default
+
+Previously, by default Tombstoned objects would merely trigger informational logs, with an option via config
+to also cause errors to be thrown on load. Now failure to load is the default, with an option to disable it if necessary.
+This reflects the purpose of Tombstone stage which is to mimic the user experience of having objects deleted.
@@ -1,9 +1,6 @@
 {
 	"$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",
 	"extends": "../../../common/build/build-common/api-extractor-base.json",
-	"dtsRollup": {
-		"enabled": true
-	},
 	"messages": {
 		"extractorMessageReporting": {
 			// TODO: Add missing documentation and remove this rule override

@@ -1,7 +1,4 @@
 {
 	"$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",
-	"extends": "../../../common/build/build-common/api-extractor-base.json",
-	"dtsRollup": {
-		"enabled": true
-	}
+	"extends": "../../../common/build/build-common/api-extractor-base.json"
 }
@@ -35,7 +35,7 @@
 	 * Configures how the .d.ts rollup file will be generated.
 	 */
 	"dtsRollup": {
-		"enabled": false,
+		"enabled": true,
 		"alphaTrimmedFilePath": "<projectFolder>/dist/<unscopedPackageName>-alpha.d.ts",
 		"betaTrimmedFilePath": "<projectFolder>/dist/<unscopedPackageName>-beta.d.ts",
 		"publicTrimmedFilePath": "<projectFolder>/dist/<unscopedPackageName>-public.d.ts",

@@ -1,9 +1,6 @@
 {
 	"$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",
 	"extends": "../../../common/build/build-common/api-extractor-base.json",
-	"dtsRollup": {
-		"enabled": true
-	},
 	"messages": {
 		"extractorMessageReporting": {
 			// TODO: Add missing documentation and remove this rule override

@@ -828,7 +828,10 @@ export interface IDefaultEditBuilder {
     // (undocumented)
     optionalField(field: FieldUpPath): OptionalFieldEditBuilder;
     // (undocumented)
-    sequenceField(field: FieldUpPath): SequenceFieldEditBuilder;
+    sequenceField(field: FieldUpPath, shapeInfo?: {
+        schema: StoredSchemaCollection;
+        policy: FullSchemaPolicy;
+    }): SequenceFieldEditBuilder;
     // (undocumented)
     valueField(field: FieldUpPath): ValueFieldEditBuilder;
 }

@@ -19,3 +19,4 @@ List of technical/design documents (to be organized into appropriate sections at
 -   [Undo](./main/undo.md)
 -   [V1 Undo Example Flow](./main/v1-undo-example-flow.md)
 -   [V1 Undo](./main/v1-undo.md)
+-   [Compatibility](./main/compatibility.md)
@@ -0,0 +1,149 @@
+# Compatibility
+
+This document provides concrete guidelines and strategies for organizing code that impacts SharedTree's persisted format.
+
+## Prerequisites
+
+[This document](../../../../../packages/dds/SchemaVersioning.md) provides general "best practices" for working with persisted data within the Fluid Framework.
+It's strongly recommended to read through and understand its rationale before continuing with this document,
+as most of the concrete recommendations presented henceforth fall out of those best practices.
+
+## What State is Persisted?
+
+A DDS's persisted format encompasses the format it uses for its summaries as well as its ops (due to [trailing ops](../../../README.md))
+including transitively referenced structured blob data.
+
+Since documents are stored outside of Fluid control (i.e. no type of central data migration is possible),
+DDSes necessarily commit to backwards compatibility of their format for all time.
+
+## Format Management
+
+The persisted format version should be a configuration option an application author can specify using `SharedTreeFactory`:
+this ensures applications can control rollout of configuration changes which require code saturation of some prior version.
+It also empowers the container author (rather than the host--if they differ) to control their data model.
+
+In the SharedTree MVP, there is currently no mechanism for safely changing the persisted format version.
+However, it is feasible to add such a mechanism in the future, and specifying the persisted format explicity in configuration sets us up to easily do so.
+One example of prior art in the space is `@fluid-experimental/tree`'s [format-breaking migration strategy](../../../tree/docs/Breaking-Change-Migration.md),
+though we would likely want to make the mechanism usable across the Fluid Framework.
+
+## Code Organization
+
+Each part of SharedTree which contributes to the persisted format should define:
+
+1. Types defining the _in-memory format_ needed to load or work with its data
+1. A set of versioned _persisted formats_ which encompass all supported formats in the current and past.
+1. EITHER An `IJsonCodec` capable of transcoding between the in-memory format and a particular persisted format, OR an `ICodecFamily` of such `IJsonCodec`s (for different persisted format versions)
+
+Split the above components into files as is reasonable.
+Current organizational standards put:
+
+-   The in-memory format into a file ending in `Types.ts`
+-   The persisted format into a file ending in `Format.ts`
+-   Codecs into a file ending in `Codec.ts` / `Codecs.ts`
+
+Having consistent conventions for these files helps to make changes to persisted formats obvious at review time.
+Schemas for primitive types which are used in persisted formats but don't intrinsically define formats (such as branded strings) can be defined where convenient.
+Codec logic should generally be self-contained: all imports should either be of the form `import type`, or should import from another persisted format file.
+Importing Fluid Framework libraries that have the same guarantees (e.g. `SummaryTreeBuilder`) is also acceptable.
+Codecs should expose the minimal necessary set of types.
+Encoding should take care to only include necessary object properties. **In particular, avoid constructs like object spread**.
+Decoding should validate that the data is not malformed: see [encoding validation](#encoding-validation) for more details.
+
+With the exception of primitives, storage format types should never be exposed in the public API.
+
+> Note: due to API-extractor implementation details, the typebox schemas for primitive types _cannot_ share a name with the primitive type,
+> as it exposes _both_ the value and the type exported under the same name, even if the export is specified via `export type`.
+> For example, the typebox schema for `ChangesetLocalId` is named `ChangesetLocalIdSchema`.
+
+Using this structure, SharedTree will have access to a library of codecs capable of encoding/decoding between
+the in-memory format and some persisted format.
+
+## Encoding Validation
+
+Validating that an encoded format thoroughly matches what the code expects and failing fast otherwise is valuable to reduce the risk of data corruption:
+it's a lot easier to investigate and deploy fixes for documents when incompatible clients haven't also changed the contents of a file.
+
+For this reason, encoded data formats should declare JSON schema for the purpose of runtime validation.
+
+> In the near term, we're using [typebox](https://github.com/sinclairzx81/typebox) to declare these schemas.
+> This choice is a matter of convenience: its API naturally matches the expressiveness of typescript types.
+
+Since format validation does incur runtime and bundle-size cost to obtain additional safety,
+whether or not to perform it should ultimately be left as a policy choice to the user of shared-tree.
+This choice will probably also be made in the shared-tree factory by providing a `JsonValidator`,
+but it doesn't need to be persisted and can be changed at will
+(there's no issue with collaboration between clients that have different policies around how much
+of the persisted data should be validated).
+
+An out-of-the-box implementation of `JsonValidator` based on Typebox's JSON validator is provided,
+but application authors may feel free to implement their own.
+
+## Test Strategy
+
+This section covers types of tests to include when adding new persisted configuration.
+
+There are a couple different dimensions to consider with respect to testing:
+
+1. SharedTree works correctly for all configurations it can be initialized in when collaborating with SharedTrees with similar configuration
+1. SharedTree is compatible with clients using different source code versions of SharedTree (and the documents those clients may create)
+1. Once supported, SharedTree can correctly execute document upgrade processes (changes to persisted configuration such as write format)
+
+### Configuration Unit Tests
+
+Each codec family should contain a suite of unit tests which verify the in-memory representation can be round-tripped through encoding and decoding.
+When adding a new codec version, the test data for this suite should be augmented if existing data doesn't yield 100% code coverage on the new
+codec version.
+
+If the persisted configuration impacts more than just the data encoding step,
+appropriate unit tests should be added for whatever components that configuration impacts.
+As a simple example, a persisted configuration flag which controls whether SharedTree stores attribution information
+should have unit tests which verify processing ops of various sorts yield reasonable attribution on the parts of the tree they affect.
+
+Example: [experimental/dds/tree2/src/test/feature-libraries/editManagerCodecs.spec.ts](../../src/test/feature-libraries/editManagerCodecs.spec.ts)
+
+### Multiple-configuration Functional Tests
+
+Once SharedTree supports multiple persisted formats, we should modify a small set of functional acceptance tests
+(e.g. `sharedTree.spec.ts`) to run for larger sets of configurations.
+Using `generatePairwiseOptions` will help mitigate the combinatorial explosion concern.
+
+These tests in aggregate will verify that SharedTree works when initialized with some particular configuration
+and collaborates with other SharedTree instances initialized with the same configuration.
+They would reasonably detect basic defects in codecs or problems unrelated to backwards compatibility or any upgrade process.
+
+In the same vein, fuzz tests should cover a variety of valid configurations.
+
+### Snapshot Tests
+
+The last dimension of compatibility concerns direct or indirect collaboration between clients using different versions of SharedTree source code.
+This is a vast area that could use more well-established framework testing support, but snapshot testing is a relatively effective category for
+catching regressions.
+
+The idea behind snapshot testing is to verify a document produced using one version of the code is still usable using another version of the code.
+It's typically implemented by writing some code to generate a set of fixed documents "from scratch," then source-controlling the serialized form
+of those documents after summarization.
+Since the serialized form of the documents correspond to documents produced by an older version of the code, this enables writing a test suite that verifies:
+
+1. The current version of the code serializes each document to exactly match how the older version of the code serialized each document.
+1. The current version of the code is capable of loading documents written using older versions of the code.
+
+A few examples (which may not be exhaustive) of snapshot tests are:
+
+-   [Legacy SharedTree](../../../../../experimental/dds/tree/src/test/Summary.tests.ts)
+-   [Sequence / SharedString](../../../sequence/src/test/snapshotVersion.spec.ts)
+-   [e2e Snapshot tests](../../../../test/snapshots/README.md)
+
+The first two examples generate their "from scratch" documents by directly calling DDS APIs on a newly created document.
+The e2e snapshot tests accomplish "from scratch" generation by serializing the op stream alongside the snapshots and replaying it.
+In addition to verifying serialized states line up between old and current version of the code, it can also be helpful to
+verify equivalence at runtime, which typically gives more friendly error messages.
+
+> Aside: this approach is also a bit more flexible: it's possible that different snapshots can load to produce logically equivalent DDSes.
+> Matrix is an example of this: it uses a pair of permutation vectors mapping logical indices (i.e. "row 5, column 3") to in-memory indices for the contents of cells.
+> Thus, permuting both the permutation vectors and the cell data contained within a snapshot would not logically change its data.
+
+Snapshot tests are effective at catching changes which inadvertently modify the document format over time.
+
+Tree2's full-scale snapshot tests can be found at [experimental/dds/tree2/src/test/snapshots/summary.spec.ts](../../src/test/snapshots/summary.spec.ts),
+with smaller-scale snapshot tests (e.g. snapshot testing just the SchemaIndex format) nearby.
@@ -78,6 +78,7 @@
 		"@fluidframework/runtime-definitions": "workspace:~",
 		"@fluidframework/runtime-utils": "workspace:~",
 		"@fluidframework/shared-object-base": "workspace:~",
+		"@fluidframework/telemetry-utils": "workspace:~",
 		"@sinclair/typebox": "^0.29.4",
 		"@ungap/structured-clone": "^1.2.0",
 		"sorted-btree": "^1.8.0",
@@ -95,7 +96,6 @@
 		"@fluidframework/container-loader": "workspace:~",
 		"@fluidframework/eslint-config-fluid": "^3.1.0",
 		"@fluidframework/mocha-test-setup": "workspace:~",
-		"@fluidframework/telemetry-utils": "workspace:~",
 		"@fluidframework/test-runtime-utils": "workspace:~",
 		"@fluidframework/test-utils": "workspace:~",
 		"@microsoft/api-extractor": "^7.38.3",

@@ -158,7 +158,7 @@ export class SchemaFactory<TScope extends string, TName extends number | string
 	 *
 	 * @remarks
 	 * There are good [reasons to avoid using null](https://www.npmjs.com/package/%40rushstack/eslint-plugin#rushstackno-new-null) in JavaScript, however sometimes it is desired.
-	 * This {@link TreeNodeSchema} node provide the option to include nulls in trees when desired.
+	 * This {@link TreeNodeSchema} node provides the option to include nulls in trees when desired.
 	 * Unless directly inter-operating with existing data using null, consider other approaches, like wrapping the value in an optional field, or using a more specifically named empty object node.
 	 */
 	public readonly null = nullSchema;

@@ -26,34 +26,35 @@ export interface ITree extends IChannel {
 	 * Returns a tree that is known to be compatible with the provided schema. The returned tree exposes an API that is schema-aware.
 	 *
 	 * @remarks
-	 * If the tree is uninitialized (has no schema and no content), the tree is initialized with the provided `initialTree` and `schema`.
+	 * If the tree is uninitialized (has no schema and no content), the tree is initialized with the `initialTree` and
+	 * view `schema` in the provided `config`.
 	 *
-	 * The tree (now known to have been initialized) has its stored schema checked against the provided view `schema`.
+	 * The tree (now known to have been initialized) has its stored schema checked against the provided view schema.
 	 *
-	 * If the schema are compatible (including updating the stored schema if permitted via `allowedSchemaModifications`),
-	 * a {@link TreeView} is returned, with a schema aware API based on the provided view schema.
+	 * If the schemas are compatible (including updating the stored schema if permitted via `allowedSchemaModifications`),
+	 * a {@link TreeView} is returned, with a schema-aware API based on the provided view schema.
 	 *
-	 * If the schema are not compatible, and exception is thrown.
+	 * If the schemas are not compatible, an exception is thrown.
 	 *
 	 * @privateRemarks
 	 * TODO: make the mismatch case recoverable.
 	 * - Provide a way to make a generic view schema for any document.
-	 * - Produce/throw the error in a document recoverable way (ex: specific exception type or return value).
+	 * - Produce/throw the error in a document-recoverable way (ex: specific exception type or return value).
 	 * TODO: Document and handle what happens when the stored schema changes after schematize has returned. Is some invalidation contract needed? How does editable tree behave?
 	 * TODO: Clarify lifetimes. Ensure calling schematize multiple times does not leak.
-	 * TODO: Support adapters for handling out of schema data.
+	 * TODO: Support adapters for handling out-of-schema data.
 	 *
 	 * Doing initialization here, regardless of `AllowedUpdateType`, allows a small API that is hard to use incorrectly.
-	 * Other approach tend to have leave easy to make mistakes.
-	 * For example, having a separate initialization function means apps can forget to call it, making an app that can only open existing document,
+	 * Other approaches tend to have easy-to-make mistakes.
+	 * For example, having a separate initialization function means apps can forget to call it, making an app that can only open existing documents,
 	 * or call it unconditionally leaving an app that can only create new documents.
-	 * It also would require the schema to be passed into to separate places and could cause issues if they didn't match.
+	 * It also would require the schema to be passed into separate places and could cause issues if they didn't match.
 	 * Since the initialization function couldn't return a typed tree, the type checking wouldn't help catch that.
 	 * Also, if an app manages to create a document, but the initialization fails to get persisted, an app that only calls the initialization function
 	 * on the create code-path (for example how a schematized factory might do it),
 	 * would leave the document in an unusable state which could not be repaired when it is reopened (by the same or other clients).
 	 * Additionally, once out of schema content adapters are properly supported (with lazy document updates),
-	 * this initialization could become just another out of schema content adapter: at tha point it clearly belong here in schematize.
+	 * this initialization could become just another out of schema content adapter: at that point it clearly belongs here in schematize.
 	 */
 	schematize<TRoot extends ImplicitFieldSchema>(
 		config: TreeConfiguration<TRoot>,
@@ -67,9 +68,9 @@ export interface ITree extends IChannel {
 export class TreeConfiguration<TSchema extends ImplicitFieldSchema = ImplicitFieldSchema> {
 	/**
 	 * @param schema - The schema which the application wants to view the tree with.
-	 * @param initialTree - Default tree content to initialize the tree with iff the tree is uninitialized
+	 * @param initialTree - A function that returns the default tree content to initialize the tree with iff the tree is uninitialized
 	 * (meaning it does not even have any schema set at all).
-	 * If the `initialTree` returns any actual node instances, they should be recreated each time the `initialTree` runs.
+	 * If `initialTree` returns any actual node instances, they should be recreated each time `initialTree` runs.
 	 * This is because if the config is used a second time any nodes that were not recreated could error since nodes cannot be inserted into the tree multiple times.
 	 */
 	public constructor(

@@ -171,6 +171,8 @@ export {
 	RevisionMetadataSource,
 	revisionMetadataSourceFromInfo,
 	RevisionInfo,
+	EncodedRevisionTag,
+	EncodedChangeAtomId,
 } from "./rebase";
 
 export {

@@ -12,6 +12,8 @@ export {
 	GraphCommit,
 	RevisionTag,
 	RevisionTagSchema,
+	EncodedRevisionTag,
+	EncodedChangeAtomId,
 	ChangesetLocalId,
 	ChangeAtomId,
 	ChangeAtomIdMap,

@@ -21,7 +21,8 @@ export const SessionIdSchema = brandedStringType<SessionId>();
  */
 // TODO: These can be compressed by an `IdCompressor` in the future
 export type RevisionTag = StableId;
-export const RevisionTagSchema = brandedStringType<StableId>();
+export type EncodedRevisionTag = Brand<string, "EncodedRevisionTag">;
+export const RevisionTagSchema = brandedStringType<EncodedRevisionTag>();
 
 /**
  * An ID which is unique within a revision of a `ModularChangeset`.
@@ -50,6 +51,11 @@ export interface ChangeAtomId {
 	readonly localId: ChangesetLocalId;
 }
 
+export interface EncodedChangeAtomId {
+	readonly revision?: EncodedRevisionTag;
+	readonly localId: ChangesetLocalId;
+}
+
 /**
  * @alpha
  */