posit-dev · simonpcouch · Apr 29, 2025 · Apr 29, 2025 · Apr 29, 2025 · Apr 29, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,7 +22,9 @@ BugReports: https://github.com/posit-dev/btw/issues
 Imports:
     cli,
     clipr,
+    DBI,
     dplyr,
+    duckdb,
     ellmer (>= 0.1.1.9000),
     fs,
     jsonlite,

diff --git a/R/tool-query.R b/R/tool-query.R
@@ -0,0 +1,47 @@
+#' Perform a SQL query on the data, and return the results as JSON.
+#' 
+#' @param query A DuckDB SQL query; must be a SELECT statement.
+#' @param data_frame The name of the data frame.
+#' @return The results of the query as a JSON string.
+btw_tool_env_query_data_frame <- function(query, data_frame) {
+  d <- get(data_frame)
+  conn <- btw_connection()
+
+  if (!DBI::dbExistsTable(conn, data_frame)) {
+    duckdb::duckdb_register(conn, data_frame, d, experimental = FALSE)
+  }
+
+  res <- DBI::dbGetQuery(conn, query)
+
+  btw_tool_env_describe_data_frame(res, format = "json", dims = c(Inf, Inf))
+}
+
+.btw_add_to_tools(
+  name = "btw_tool_env_query_data_frame",
+  group = "env",
+  tool = function() {
+    ellmer::tool(
+      btw_tool_env_query_data_frame,
+      .name = "btw_tool_env_query_data_frame",
+      .description = 
+        "Run a DuckDB SQL query against a data frame.
+         Use this tool instead of btw_tool_env_describe_data_frame to run more
+         targeted queries, e.g. calculating statistics on specific columns.",
+      query = ellmer::type_string("A DuckDB SQL query, as a string."),
+      data_frame = ellmer::type_string("The name of the data frame, as a string.")
+    )
+  }
+)
+
+btw_connect <- function() {
+  # TODO: also check if the connection is active
+  if (is.null(.globals$conn)) {
+    .globals$conn <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:")
+  }
+}
+
+btw_connection <- function() {
+  btw_connect()
+
+  .globals$conn
+}
diff --git a/R/utils.R b/R/utils.R
@@ -1,3 +1,5 @@
+.globals <- new_environment()
+
 pandoc_convert <- function(path, ..., from = "html", to = "markdown") {
   tmp_file <- withr::local_tempfile()
 

diff --git a/man/btw_register_tools.Rd b/man/btw_register_tools.Rd
diff --git a/man/btw_tool_env_query_data_frame.Rd b/man/btw_tool_env_query_data_frame.Rd
diff --git a/tests/testthat/_snaps/tool-query.md b/tests/testthat/_snaps/tool-query.md
@@ -0,0 +1,18 @@
+# btw_tool_env_query_data_frame() works
+
+    Code
+      btw_tool_env_query_data_frame("SELECT mpg FROM mtcars LIMIT 5;", "mtcars")
+    Output
+      [1] "```json"                                                                                       
+      [2] "[\n  {\"mpg\":21},\n  {\"mpg\":21},\n  {\"mpg\":22.8},\n  {\"mpg\":21.4},\n  {\"mpg\":18.7}\n]"
+      [3] "```"                                                                                           
+
+---
+
+    Code
+      btw_tool_env_query_data_frame("SELECT mpg FROM mtcars LIMIT 5;", "mtcars")
+    Output
+      [1] "```json"                                                                                       
+      [2] "[\n  {\"mpg\":21},\n  {\"mpg\":21},\n  {\"mpg\":22.8},\n  {\"mpg\":21.4},\n  {\"mpg\":18.7}\n]"
+      [3] "```"                                                                                           
+
diff --git a/tests/testthat/test-tool-query.R b/tests/testthat/test-tool-query.R
@@ -0,0 +1,17 @@
+test_that("btw_tool_env_query_data_frame() works", {
+  # can run a simple query
+  expect_snapshot(
+    btw_tool_env_query_data_frame(
+      "SELECT mpg FROM mtcars LIMIT 5;",
+      "mtcars"
+    )
+  )
+
+  # can run a query against the same table twice
+  expect_snapshot(
+    btw_tool_env_query_data_frame(
+      "SELECT mpg FROM mtcars LIMIT 5;",
+      "mtcars"
+    )
+  )
+})
-Original file line number
+Diff line change
@@ Expand Up / @@ -22,7 +22,9 @@ BugReports: https://github.com/posit-dev/btw/issues @@
     Imports:
         cli,
         clipr,
+        DBI,
         dplyr,
+        duckdb,
         ellmer (>= 0.1.1.9000),
         fs,
         jsonlite,
@@ Expand Down @@