Skip to content

Commit

Permalink
[SNOW-172] Add filter to file_latest and add comments (#128)
Browse files Browse the repository at this point in the history
* add filter to file_latest and add comments

* Update V2.38.0__reintroduce_file_latest_dynamic_table.sql

Add NOT IS_PREVIEW filter

* Update V2.38.0__reintroduce_file_latest_dynamic_table.sql

move not is_preview filter
  • Loading branch information
danlu1 authored and philerooski committed Feb 6, 2025
1 parent d5a6482 commit 948e550
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
-- Introduce the dynamic table
USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP
CREATE OR REPLACE DYNAMIC TABLE FILE_LATEST
TARGET_LAG = '1 day'
WAREHOUSE = compute_xsmall
AS
WITH dedup_filesnapshots AS (
SELECT
*
FROM {{database_name}}.SYNAPSE_RAW.FILESNAPSHOTS --noqa: TMP
WHERE
SNAPSHOT_DATE >= CURRENT_TIMESTAMP - INTERVAL '30 days' AND NOT IS_PREVIEW
QUALIFY
ROW_NUMBER() OVER (
PARTITION BY ID
ORDER BY CHANGE_TIMESTAMP DESC, SNAPSHOT_TIMESTAMP DESC
) = 1
)
SELECT
CHANGE_TYPE,
CHANGE_TIMESTAMP,
CHANGE_USER_ID,
SNAPSHOT_TIMESTAMP,
ID,
CREATED_BY,
CREATED_ON,
MODIFIED_ON,
CONCRETE_TYPE,
CONTENT_MD5,
CONTENT_TYPE,
FILE_NAME,
STORAGE_LOCATION_ID,
CONTENT_SIZE,
BUCKET,
KEY,
PREVIEW_ID,
IS_PREVIEW,
STATUS,
SNAPSHOT_DATE
FROM
dedup_filesnapshots
WHERE
CHANGE_TYPE != 'DELETE';
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
-- Add table and column comments to userprofile_latest dynamic table
USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP
-- Table comments
COMMENT ON DYNAMIC TABLE FILE_LATEST IS 'This dynamic table contains the latest snapshot of files during the past 30 days. Snapshots are taken when files are created or modified. Note: Snapshots are also taken periodically and independently of the changes. The snapshot_timestamp records when the snapshot was taken.';
-- Column comments
COMMENT ON COLUMN FILE_LATEST.CHANGE_TYPE IS 'The type of change that occurred on the file handle, e.g., CREATE, UPDATE, DELETE.';
COMMENT ON COLUMN FILE_LATEST.CHANGE_TIMESTAMP IS 'The time when the change (created/updated/deleted) on the file is pushed to the queue for snapshotting.';
COMMENT ON COLUMN FILE_LATEST.CHANGE_USER_ID IS 'The unique identifier of the user who made the change to the file.';
COMMENT ON COLUMN FILE_LATEST.SNAPSHOT_TIMESTAMP IS 'The time when the snapshot was taken (It is usually after the change happened).';
COMMENT ON COLUMN FILE_LATEST.ID IS 'The unique identifier of the file handle.';
COMMENT ON COLUMN FILE_LATEST.CREATED_BY IS 'The unique identifier of the user who created the file handle.';
COMMENT ON COLUMN FILE_LATEST.CREATED_ON IS 'The creation timestamp of the file handle.';
COMMENT ON COLUMN FILE_LATEST.MODIFIED_ON IS 'The most recent change time of the file handle.';
COMMENT ON COLUMN FILE_LATEST.CONCRETE_TYPE IS 'The type of the file handle. Allowed file handles are: S3FileHandle, ProxyFileHandle, ExternalFileHandle, ExternalObjectStoreFileHandle, GoogleCloudFileHandle.';
COMMENT ON COLUMN FILE_LATEST.CONTENT_MD5 IS 'The md5 hash (using MD5 algorithm) of the file referenced by the file handle.';
COMMENT ON COLUMN FILE_LATEST.CONTENT_TYPE IS 'Metadata about the content of the file, e.g., application/json, application/zip, application/octet-stream.';
COMMENT ON COLUMN FILE_LATEST.FILE_NAME IS 'The name of the file referenced by the file handle.';
COMMENT ON COLUMN FILE_LATEST.STORAGE_LOCATION_ID IS 'The identifier of the environment, where the physical files are stored.';
COMMENT ON COLUMN FILE_LATEST.CONTENT_SIZE IS 'The size of the file referenced by the file handle.';
COMMENT ON COLUMN FILE_LATEST.BUCKET IS 'The bucket where the file is physically stored. Applicable for s3 and GCP, otherwise empty.';
COMMENT ON COLUMN FILE_LATEST.KEY IS 'The key name uniquely identifies the object (file) in the bucket.';
COMMENT ON COLUMN FILE_LATEST.PREVIEW_ID IS 'The identifier of the file handle that contains a preview of the file referenced by this file handle.';
COMMENT ON COLUMN FILE_LATEST.IS_PREVIEW IS 'If true, the file referenced by this file handle is a preview of another file.';
COMMENT ON COLUMN FILE_LATEST.STATUS IS 'The availability status of the file referenced by the file handle. AVAILABLE: accessible via Synapse; UNLINKED: not referenced by Synapse and therefore available for garbage collection; ARCHIVED: the file has been garbage collected.';
COMMENT ON COLUMN FILE_LATEST.SNAPSHOT_DATE IS 'The data is partitioned for fast and cost effective queries. The snapshot_timestamp field is converted into a date and stored in the snapshot_date field for partitioning. The date should be used as a condition (WHERE CLAUSE) in the queries.';

0 comments on commit 948e550

Please sign in to comment.