diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index da492db..e86959e 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -50,7 +50,13 @@ jobs: npx playwright install fi - - name: Run tests on CI + - name: Run CLI smoke tests + run: npm run test:cli + + - name: Validate Malloy models + run: npm run test:models + + - name: Run E2E tests run: npm test - uses: actions/upload-artifact@v6 @@ -72,11 +78,13 @@ jobs: node-version: lts/* - name: Install dependencies run: npm ci - - name: Build website - run: npm run build - # For gh pages deployment, base path is repository name + - name: Build CLI + run: npm run build:cli + + - name: Build all example sites with landing page + run: npm run build:all env: - BASE_PUBLIC_PATH: /${{ github.event.repository.name }}/ + BASE_PATH: /${{ github.event.repository.name }}/ - name: Upload Build Artifact # only run on pushes to master or workflow_dispatch @@ -123,9 +131,10 @@ jobs: - name: Install Playwright Browsers run: npx playwright install --with-deps - name: Run tests on deployed site + # Tests expect the sample-data example which contains the invoices model run: npm test env: - URL: ${{ needs.deploy.outputs.page_url }} + URL: ${{ needs.deploy.outputs.page_url }}sample-data/ - uses: actions/upload-artifact@v6 if: ${{ !cancelled() }} with: diff --git a/.gitignore b/.gitignore index 8208276..1b9eb04 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ dist-ssr /playwright-report/ /blob-report/ /playwright/.cache/ +dist-cli/ diff --git a/models/E-commerce Orders.malloynb b/examples/ecommerce/E-commerce Orders.malloynb similarity index 100% rename from models/E-commerce Orders.malloynb rename to examples/ecommerce/E-commerce Orders.malloynb diff --git a/models/data/contracts.csv b/examples/ecommerce/data/contracts.csv similarity index 100% rename from models/data/contracts.csv rename to examples/ecommerce/data/contracts.csv diff --git a/models/data/orders.csv b/examples/ecommerce/data/orders.csv similarity index 100% rename from models/data/orders.csv rename to examples/ecommerce/data/orders.csv diff --git a/models/ecommerce_orders.malloy b/examples/ecommerce/ecommerce_orders.malloy similarity index 100% rename from models/ecommerce_orders.malloy rename to examples/ecommerce/ecommerce_orders.malloy diff --git a/examples/huggingface/GitHub Events.malloynb b/examples/huggingface/GitHub Events.malloynb new file mode 100644 index 0000000..a7d437f --- /dev/null +++ b/examples/huggingface/GitHub Events.malloynb @@ -0,0 +1,41 @@ +>>>markdown +# GitHub Events Analysis + +Explore GitHub activity data from the Hugging Face datasets. This model analyzes push events, pull requests, issues, and other GitHub activities. + +**Data Source:** [alvarobartt/github-events](https://huggingface.co/datasets/alvarobartt/github-events) on Hugging Face + +>>>malloy +import "github_events.malloy" + +>>>markdown +## Overview Dashboard + +Let's start with an overview of all GitHub events in the dataset: + +>>>malloy +run: github_events -> overview + +>>>markdown +## Event Type Distribution + +Breaking down events by their type: + +>>>malloy +run: github_events -> by_event_type + +>>>markdown +## Top Repositories + +The most active repositories in the dataset: + +>>>malloy +run: github_events -> by_repo + +>>>markdown +## Top Contributors + +Who are the most active contributors? + +>>>malloy +run: github_events -> top_contributors diff --git a/examples/huggingface/IMDB Movies.malloynb b/examples/huggingface/IMDB Movies.malloynb new file mode 100644 index 0000000..cf553b0 --- /dev/null +++ b/examples/huggingface/IMDB Movies.malloynb @@ -0,0 +1,49 @@ +>>>markdown +# IMDB Movies Analysis + +Explore movie ratings, genres, and trends from the IMDB dataset. This data is sourced from Hugging Face and includes movie titles, ratings, vote counts, and more. + +**Data Source:** [Pablinho/imdb-data](https://huggingface.co/datasets/Pablinho/imdb-data) on Hugging Face + +>>>malloy +import "imdb_movies.malloy" + +>>>markdown +## Overview Dashboard + +A comprehensive look at the movie dataset: + +>>>malloy +run: movies -> overview + +>>>markdown +## Top Rated Movies + +The highest-rated movies with significant vote counts (>10,000 votes): + +>>>malloy +run: movies -> top_rated + +>>>markdown +## Most Popular Movies + +Movies sorted by number of votes: + +>>>malloy +run: movies -> most_popular + +>>>markdown +## Genre Analysis + +Deep dive into movie genres with rating trends over time: + +>>>malloy +run: movies -> genre_analysis + +>>>markdown +## Movies by Decade + +How has movie production and quality changed over the decades? + +>>>malloy +run: movies -> by_decade diff --git a/examples/huggingface/NYC Taxi Trips.malloynb b/examples/huggingface/NYC Taxi Trips.malloynb new file mode 100644 index 0000000..016c313 --- /dev/null +++ b/examples/huggingface/NYC Taxi Trips.malloynb @@ -0,0 +1,57 @@ +>>>markdown +# NYC Taxi Trips Analysis + +Analyze New York City yellow taxi trip data. This dataset contains trip records including pickup/dropoff times, distances, fares, tips, and payment methods. + +**Data Source:** [codelion/nyctaxi](https://huggingface.co/datasets/codelion/nyctaxi) on Hugging Face + +>>>malloy +import "nyc_taxi.malloy" + +>>>markdown +## Overview Dashboard + +A comprehensive look at taxi trip patterns: + +>>>malloy +run: taxi_trips -> overview + +>>>markdown +## Hourly Patterns + +When do New Yorkers take taxi rides? Let's look at trips by hour of day: + +>>>malloy +run: taxi_trips -> by_hour + +>>>markdown +## Daily Patterns + +Trip distribution across days of the week: + +>>>malloy +run: taxi_trips -> by_day + +>>>markdown +## Payment Analysis + +How do people pay for their rides, and how does tipping vary by payment method? + +>>>malloy +run: taxi_trips -> by_payment + +>>>markdown +## Fare Analysis by Hour + +Detailed breakdown of fares throughout the day: + +>>>malloy +run: taxi_trips -> fare_analysis + +>>>markdown +## Long Distance Trips + +The longest trips in the dataset: + +>>>malloy +run: taxi_trips -> long_trips diff --git a/examples/huggingface/github_events.malloy b/examples/huggingface/github_events.malloy new file mode 100644 index 0000000..a4bfd7b --- /dev/null +++ b/examples/huggingface/github_events.malloy @@ -0,0 +1,104 @@ +-- GitHub Events Analysis Model +-- Analyzes GitHub activity data from Hugging Face datasets +-- Uses: hf://datasets/alvarobartt/github-events/data/*.parquet + +source: github_events is duckdb.table('hf://datasets/alvarobartt/github-events/data/train-00000-of-00001.parquet') extend { + -- Core measures + measure: event_count is count() + measure: unique_repos is count(distinct repo.name) + measure: unique_actors is count(distinct actor.login) + + -- Event type measures + measure: push_events is count() { where: `type` = 'PushEvent' } + measure: pr_events is count() { where: `type` = 'PullRequestEvent' } + measure: issue_events is count() { where: `type` = 'IssuesEvent' } + measure: watch_events is count() { where: `type` = 'WatchEvent' } + measure: fork_events is count() { where: `type` = 'ForkEvent' } + measure: create_events is count() { where: `type` = 'CreateEvent' } + + -- Dimensions + dimension: event_type is `type` + dimension: repo_name is repo.name + dimension: actor_login is actor.login + dimension: event_date is created_at::date + + -- Views + view: by_event_type is { + group_by: event_type + aggregate: + event_count + unique_repos + unique_actors + order_by: event_count desc + } + + view: by_repo is { + group_by: repo_name + aggregate: + event_count + unique_actors + order_by: event_count desc + limit: 20 + } + + view: by_actor is { + group_by: actor_login + aggregate: + event_count + unique_repos + order_by: event_count desc + limit: 20 + } + + view: activity_timeline is { + group_by: event_date + aggregate: + event_count + unique_repos + unique_actors + order_by: event_date + } + + # dashboard + view: overview is { + aggregate: + event_count + unique_repos + unique_actors + push_events + pr_events + issue_events + watch_events + fork_events + # bar_chart + nest: by_event_type + # bar_chart + nest: top_repos is by_repo + # line_chart + nest: activity_timeline + } + + -- Top contributors view + view: top_contributors is { + group_by: actor_login + aggregate: + event_count + push_events + pr_events + issue_events + # bar_chart + nest: activity_by_type is { + group_by: event_type + aggregate: event_count + } + order_by: event_count desc + limit: 15 + } +} + +-- Named queries +query: github_overview is github_events -> overview +query: event_breakdown is github_events -> by_event_type +query: top_repos is github_events -> by_repo +query: contributors is github_events -> top_contributors +query: timeline is github_events -> activity_timeline diff --git a/examples/huggingface/imdb_movies.malloy b/examples/huggingface/imdb_movies.malloy new file mode 100644 index 0000000..ef61af9 --- /dev/null +++ b/examples/huggingface/imdb_movies.malloy @@ -0,0 +1,140 @@ +-- IMDB Movies Analysis Model +-- Analyzes movie data from the IMDB dataset on Hugging Face +-- Uses: hf://datasets/Pablinho/imdb-data/data/*.parquet + +source: movies is duckdb.table('hf://datasets/Pablinho/imdb-data/data/train-00000-of-00001.parquet') extend { + -- Core measures + measure: movie_count is count() + measure: avg_rating is avg(score) + measure: total_votes is sum(votes) + measure: avg_votes is avg(votes) + measure: avg_runtime is avg(runtime) + + -- Rating distribution + measure: highly_rated is count() { where: score >= 8.0 } + measure: low_rated is count() { where: score < 5.0 } + + -- Dimensions + dimension: decade is floor(year / 10) * 10 + dimension: rating_tier is pick + 'Excellent (8+)' when score >= 8.0 + 'Good (7-8)' when score >= 7.0 + 'Average (5-7)' when score >= 5.0 + else 'Poor (<5)' + + -- Basic views + view: all_movies is { + select: + title + year + score + votes + runtime + genre + order_by: score desc + limit: 100 + } + + view: by_year is { + group_by: year + aggregate: + movie_count + avg_rating + total_votes + order_by: year + } + + view: by_decade is { + group_by: decade + aggregate: + movie_count + avg_rating + total_votes + highly_rated + order_by: decade + } + + view: by_genre is { + group_by: genre + aggregate: + movie_count + avg_rating + avg_votes + order_by: movie_count desc + } + + view: by_rating_tier is { + group_by: rating_tier + aggregate: + movie_count + avg_votes + order_by: movie_count desc + } + + # dashboard + view: overview is { + aggregate: + movie_count + avg_rating + total_votes + highly_rated + low_rated + # bar_chart + nest: by_decade + # bar_chart + nest: by_genre + # bar_chart + nest: by_rating_tier + } + + -- Top rated movies view + view: top_rated is { + select: + title + year + score + votes + genre + runtime + where: votes > 10000 + order_by: score desc + limit: 50 + } + + -- Most voted movies + view: most_popular is { + select: + title + year + score + votes + genre + order_by: votes desc + limit: 50 + } + + -- Genre analysis with nested details + view: genre_analysis is { + group_by: genre + aggregate: + movie_count + avg_rating + total_votes + highly_rated + # line_chart + nest: rating_over_time is { + group_by: decade + aggregate: avg_rating + order_by: decade + } + order_by: movie_count desc + } +} + +-- Named queries +query: movies_overview is movies -> overview +query: top_movies is movies -> top_rated +query: popular_movies is movies -> most_popular +query: movies_by_year is movies -> by_year +query: movies_by_genre is movies -> genre_analysis +query: all_movies_list is movies -> all_movies diff --git a/examples/huggingface/nyc_taxi.malloy b/examples/huggingface/nyc_taxi.malloy new file mode 100644 index 0000000..e3b3299 --- /dev/null +++ b/examples/huggingface/nyc_taxi.malloy @@ -0,0 +1,164 @@ +-- NYC Taxi Trips Analysis Model +-- Analyzes New York City taxi trip data +-- Uses: hf://datasets/codelion/nyctaxi/yellow_tripdata_2023-01.parquet + +source: taxi_trips is duckdb.table('hf://datasets/codelion/nyctaxi/yellow_tripdata_2023-01.parquet') extend { + -- Core measures + measure: trip_count is count() + measure: total_fare is sum(fare_amount) + measure: total_tips is sum(tip_amount) + measure: total_distance is sum(trip_distance) + measure: avg_fare is avg(fare_amount) + measure: avg_tip is avg(tip_amount) + measure: avg_distance is avg(trip_distance) + measure: avg_passengers is avg(passenger_count) + + -- Trip duration (in minutes) + dimension: trip_duration_mins is + (tpep_dropoff_datetime - tpep_pickup_datetime)::bigint / 60000000 + + measure: avg_duration_mins is avg(trip_duration_mins) + + -- Tip percentage + dimension: tip_percentage is + case when fare_amount > 0 then (tip_amount / fare_amount) * 100 else 0 end + measure: avg_tip_percentage is avg(tip_percentage) + + -- Time dimensions + dimension: pickup_hour is hour(tpep_pickup_datetime) + dimension: pickup_day is dayofweek(tpep_pickup_datetime) + dimension: pickup_date is tpep_pickup_datetime::date + + -- Day name + dimension: day_name is pick + 'Sunday' when pickup_day = 0 + 'Monday' when pickup_day = 1 + 'Tuesday' when pickup_day = 2 + 'Wednesday' when pickup_day = 3 + 'Thursday' when pickup_day = 4 + 'Friday' when pickup_day = 5 + 'Saturday' when pickup_day = 6 + else 'Unknown' + + -- Payment type dimension + dimension: payment_type_name is pick + 'Credit Card' when payment_type = 1 + 'Cash' when payment_type = 2 + 'No Charge' when payment_type = 3 + 'Dispute' when payment_type = 4 + 'Unknown' when payment_type = 5 + 'Voided' when payment_type = 6 + else 'Other' + + -- Views + view: by_hour is { + group_by: pickup_hour + aggregate: + trip_count + avg_fare + avg_distance + avg_tip_percentage + order_by: pickup_hour + } + + view: by_day is { + group_by: day_name + aggregate: + trip_count + total_fare + avg_fare + avg_tip + order_by: trip_count desc + } + + view: by_payment is { + group_by: payment_type_name + aggregate: + trip_count + total_fare + avg_tip + avg_tip_percentage + order_by: trip_count desc + } + + view: by_passenger_count is { + group_by: passenger_count + aggregate: + trip_count + avg_fare + avg_distance + order_by: passenger_count + } + + view: daily_trends is { + group_by: pickup_date + aggregate: + trip_count + total_fare + avg_fare + avg_distance + order_by: pickup_date + } + + # dashboard + view: overview is { + aggregate: + trip_count + total_fare + total_tips + total_distance + avg_fare + avg_tip + avg_distance + avg_duration_mins + avg_tip_percentage + # bar_chart + nest: by_hour + # bar_chart + nest: by_day + # bar_chart + nest: by_payment + # line_chart + nest: daily_trends + } + + -- Fare analysis + view: fare_analysis is { + group_by: pickup_hour + aggregate: + trip_count + avg_fare + avg_tip + avg_distance + # bar_chart + nest: by_payment_method is { + group_by: payment_type_name + aggregate: + trip_count + avg_fare + } + order_by: pickup_hour + } + + -- Long trips analysis + view: long_trips is { + select: + tpep_pickup_datetime + tpep_dropoff_datetime + trip_distance + fare_amount + tip_amount + passenger_count + where: trip_distance > 20 + order_by: trip_distance desc + limit: 100 + } +} + +-- Named queries +query: taxi_overview is taxi_trips -> overview +query: hourly_analysis is taxi_trips -> by_hour +query: payment_analysis is taxi_trips -> by_payment +query: fare_breakdown is taxi_trips -> fare_analysis +query: long_distance_trips is taxi_trips -> long_trips +query: daily_summary is taxi_trips -> daily_trends diff --git a/models/Invoices.malloynb b/examples/sample-data/Invoices.malloynb similarity index 100% rename from models/Invoices.malloynb rename to examples/sample-data/Invoices.malloynb diff --git a/models/Kids Screen Time.malloynb b/examples/sample-data/Kids Screen Time.malloynb similarity index 100% rename from models/Kids Screen Time.malloynb rename to examples/sample-data/Kids Screen Time.malloynb diff --git a/models/Sales Orders.malloynb b/examples/sample-data/Sales Orders.malloynb similarity index 100% rename from models/Sales Orders.malloynb rename to examples/sample-data/Sales Orders.malloynb diff --git a/models/Sample Data.malloynb b/examples/sample-data/Sample Data.malloynb similarity index 100% rename from models/Sample Data.malloynb rename to examples/sample-data/Sample Data.malloynb diff --git a/models/SuperStore.malloynb b/examples/sample-data/SuperStore.malloynb similarity index 100% rename from models/SuperStore.malloynb rename to examples/sample-data/SuperStore.malloynb diff --git a/models/Users and Products.malloynb b/examples/sample-data/Users and Products.malloynb similarity index 100% rename from models/Users and Products.malloynb rename to examples/sample-data/Users and Products.malloynb diff --git a/models/business_overview.malloy b/examples/sample-data/business_overview.malloy similarity index 100% rename from models/business_overview.malloy rename to examples/sample-data/business_overview.malloy diff --git a/models/data/Indian_Kids_Screen_Time.csv b/examples/sample-data/data/Indian_Kids_Screen_Time.csv similarity index 100% rename from models/data/Indian_Kids_Screen_Time.csv rename to examples/sample-data/data/Indian_Kids_Screen_Time.csv diff --git a/models/data/invoices.parquet b/examples/sample-data/data/invoices.parquet similarity index 100% rename from models/data/invoices.parquet rename to examples/sample-data/data/invoices.parquet diff --git a/models/data/products.jsonl b/examples/sample-data/data/products.jsonl similarity index 100% rename from models/data/products.jsonl rename to examples/sample-data/data/products.jsonl diff --git a/models/data/sales_orders.xlsx b/examples/sample-data/data/sales_orders.xlsx similarity index 100% rename from models/data/sales_orders.xlsx rename to examples/sample-data/data/sales_orders.xlsx diff --git a/models/data/users.json b/examples/sample-data/data/users.json similarity index 100% rename from models/data/users.json rename to examples/sample-data/data/users.json diff --git a/models/invoices.malloy b/examples/sample-data/invoices.malloy similarity index 100% rename from models/invoices.malloy rename to examples/sample-data/invoices.malloy diff --git a/models/kids_screen_time.malloy b/examples/sample-data/kids_screen_time.malloy similarity index 100% rename from models/kids_screen_time.malloy rename to examples/sample-data/kids_screen_time.malloy diff --git a/models/sales_orders.malloy b/examples/sample-data/sales_orders.malloy similarity index 100% rename from models/sales_orders.malloy rename to examples/sample-data/sales_orders.malloy diff --git a/models/sample_data.malloy b/examples/sample-data/sample_data.malloy similarity index 100% rename from models/sample_data.malloy rename to examples/sample-data/sample_data.malloy diff --git a/models/superstore.malloy b/examples/sample-data/superstore.malloy similarity index 100% rename from models/superstore.malloy rename to examples/sample-data/superstore.malloy diff --git a/models/users_products.malloy b/examples/sample-data/users_products.malloy similarity index 100% rename from models/users_products.malloy rename to examples/sample-data/users_products.malloy diff --git a/landing/index.html b/landing/index.html new file mode 100644 index 0000000..c37b3fc --- /dev/null +++ b/landing/index.html @@ -0,0 +1,233 @@ + + +
+ + + + + + +Static site generator for Malloy data models and notebooks
+Orders, products, and customer analysis with multi-table joins and visualizations.
+ Local CSV + + +GitHub Events, IMDB Movies, and NYC Taxi data from remote Hugging Face datasets.
+ Remote Data + + +Various local data formats including CSV, Parquet, JSON, and Excel files.
+ Mixed Formats + +# Build a site from your Malloy models
+npx @aszenz/data-explorer build ./models -o ./dist
+# Preview the built site
+npx @aszenz/data-explorer preview ./dist
+# Customize title and description
+npx @aszenz/data-explorer build ./models \
+ --title "My Analytics" \
+ --description "Explore data" \
+ --output ./dist
+ ${ex.description}
+ ${ex.tag} + ` + ).join(""); + + return ` + + + + + + + + +Static site generator for Malloy data models and notebooks
+# Build a site from your Malloy models
+npx @aszenz/data-explorer build ./models -o ./dist
+# Preview the built site
+npx @aszenz/data-explorer preview ./dist
+# Customize title and description
+npx @aszenz/data-explorer build ./models \\
+ --title "My Analytics" \\
+ --description "Explore data" \\
+ --output ./dist
+ - Explore and analyze your{" "} + {siteConfig.description}{" "} - Malloy models and notebooks + Powered by Malloy