From 24bfd66adfce959326a23198ed447cb24ed85634 Mon Sep 17 00:00:00 2001 From: puchacz Date: Fri, 24 Jan 2025 20:52:14 +0100 Subject: [PATCH] add documentation article --- .../operating-system-as-a-data-source.md | 29 ++++++--- .../turning-csv-into-json.md | 60 +++++++++++++++++++ 2 files changed, 80 insertions(+), 9 deletions(-) create mode 100644 docs/practical-examples-and-applications/turning-csv-into-json.md diff --git a/docs/practical-examples-and-applications/operating-system-as-a-data-source.md b/docs/practical-examples-and-applications/operating-system-as-a-data-source.md index 36c915c..5b86fb1 100644 --- a/docs/practical-examples-and-applications/operating-system-as-a-data-source.md +++ b/docs/practical-examples-and-applications/operating-system-as-a-data-source.md @@ -18,15 +18,15 @@ SELECT ProcessName, Directory, FileName -FROM #os.processes() where ProcessName like '%Musoq%' +FROM #os.processes() WHERE ProcessName LIKE '%Musoq%' ``` ## Finding `.cfg` and `.tmp` Files in Downloads -This query retrieves the file size (`Length`) and the full path (`FullName`) of all files located in the `Downloads` directory of the user `{USER}` that have either a `.cfg` or `.tmp` extension. It searches through all the subdirectories (`true` parameter indicates recursive search) within the specified path for files matching the criteria. +This query retrieves the file size (`Length`) and the full path (`FullPath`) of all files located in the `Downloads` directory of the user `{USER}` that have either a `.cfg` or `.tmp` extension. It searches through all the subdirectories (`true` parameter indicates recursive search) within the specified path for files matching the criteria. ```sql -SELECT Length, FullName FROM #os.files('C:\Users\{USER}\Downloads', true) WHERE FullName LIKE '%.cfg' OR FullName LIKE '%.tmp' +SELECT Length, FullPath FROM #os.files('C:\Users\{USER}\Downloads', true) WHERE FullPath LIKE '%.cfg' OR FullPath LIKE '%.tmp' ``` ## Listing Non-empty Files @@ -55,10 +55,10 @@ SELECT Name FROM #os.files('C:\Users\{USER}\Downloads', true) skip 5 take 5 ## Finding CSV Files Containing 'Frames' Word in File Name -This query searches for `.csv` files that contain the word 'Frames' within their full path (`FullName`) in the `Downloads` directory of the user `{USER}`. It leverages the `rlike` operator for regex pattern matching to filter files. The `true` parameter ensures that the search is conducted recursively through all subdirectories within the specified path, targeting only those `.csv` files whose names include 'Frames'. +This query searches for `.csv` files that contain the word 'Frames' within their full path (`FullPath`) in the `Downloads` directory of the user `{USER}`. It leverages the `rlike` operator for regex pattern matching to filter files. The `true` parameter ensures that the search is conducted recursively through all subdirectories within the specified path, targeting only those `.csv` files whose names include 'Frames'. ```sql -SELECT Name FROM #os.files('C:\Users\{USER}\Downloads', true) WHERE FullName rlike '.*Frames.*.csv' +SELECT Name FROM #os.files('C:\Users\{USER}\Downloads', true) WHERE FullPath rlike '.*Frames.*.csv' ``` ## Filtering `.tmp` and `.cfg` Files by Size @@ -67,10 +67,21 @@ This query selects the names (`Name`) of files within the `Downloads` directory ## Combining JPG Files from Two Folders -This query aggregates the full paths (`FullName`) of `.jpg` files from two specific locations: `Folder1` and `Folder2` within the user `{USER}`'s directory. It uses the `UNION ALL` operation to combine the results from both folders into a single list, including duplicates if they exist. The `true` parameter for each `#os.files` function call ensures that the search includes all subdirectories within both specified paths, targeting `.jpg` files exclusively. +This query aggregates the full paths (`FullPath`) of `.jpg` files from two specific locations: `Folder1` and `Folder2` within the user `{USER}`'s directory. It uses the `UNION ALL` operation to combine the results from both folders into a single list, including duplicates if they exist. The `true` parameter for each `#os.files` function call ensures that the search includes all subdirectories within both specified paths, targeting `.jpg` files exclusively. ```sql -SELECT FullName FROM #os.files('C:\Users\{USER}\Folder1', true) WHERE Name LIKE '%.jpg' -UNION ALL (FullName) -SELECT FullName FROM #os.files('C:\Users\{USER}\Folder2', true) WHERE Name LIKE '%.jpg' +SELECT FullPath FROM #os.files('C:\Users\{USER}\Folder1', true) WHERE Name LIKE '%.jpg' +UNION ALL (FullPath) +SELECT FullPath FROM #os.files('C:\Users\{USER}\Folder2', true) WHERE Name LIKE '%.jpg' +``` + +or you can use cross apply operator: + +```sql +SELECT + f.DirectoryName, + f.FileName +FROM #os.directories('C:\Users\{USER}', false) d +CROSS APPLY #os.files(d.FullName, true) f +WHERE d.Name IN ('Folder1', 'Folder2') ``` \ No newline at end of file diff --git a/docs/practical-examples-and-applications/turning-csv-into-json.md b/docs/practical-examples-and-applications/turning-csv-into-json.md new file mode 100644 index 0000000..b372f7c --- /dev/null +++ b/docs/practical-examples-and-applications/turning-csv-into-json.md @@ -0,0 +1,60 @@ +--- +title: Turning CSV into JSON +layout: default +parent: Practical Examples and Applications +nav_order: 8 +--- + +# Converting CSV to JSON with Musoq - Quick Guide + +This guide shows you how to convert CSV files to JSON using Musoq, with special attention to creating structured JSON objects. + +## Basic Table View + +To view your CSV data in table format, use this command: + +```powershell +./Musoq.exe run query "select * from #separatedvalues.comma('cities.csv', true, 0)" +``` + +You'll see your data in a clear table format: + +```cli +┌────────┬──────────┬────────────────┬──────────┬─────────────┬─────────┬───────────┬───────────────────┐ +│ cityId │ cityName │ cityPopulation │ cityArea │ postOffices │ schools │ isCapitol │ isVoivodeshipCity │ +├────────┼──────────┼────────────────┼──────────┼─────────────┼─────────┼───────────┼───────────────────┤ +│ 1 │ Warsaw │ 1793579 │ 517.24 │ 218 │ 456 │ true │ true │ +│ 2 │ Krakow │ 779115 │ 326.85 │ 156 │ 324 │ false │ true │ +│ 3 │ Lodz │ 679941 │ 293.25 │ 98 │ 278 │ false │ true │ +│ 4 │ Zakopane │ 27000 │ 84.23 │ 12 │ 15 │ false │ false │ +│ 5 │ Gdansk │ 470907 │ 262.58 │ 87 │ 198 │ false │ true │ +└────────┴──────────┴────────────────┴──────────┴─────────────┴─────────┴───────────┴───────────────────┘ +``` + +## Simple JSON Output + +To convert the same data to flat JSON, add the --format json flag: + +```powershell +./Musoq.exe run query "select * from #separatedvalues.comma('cities.csv', true, 0)" --format json +``` + +This produces: + +```json +[{"cityId":"1","cityName":"Warsaw","cityPopulation":"1793579","cityArea":"517.24","postOffices":"218","schools":"456","isCapitol":"true","isVoivodeshipCity":"true"},{"cityId":"2","cityName":"Krakow","cityPopulation":"779115","cityArea":"326.85","postOffices":"156","schools":"324","isCapitol":"false","isVoivodeshipCity":"true"},{"cityId":"3","cityName":"Lodz","cityPopulation":"679941","cityArea":"293.25","postOffices":"98","schools":"278","isCapitol":"false","isVoivodeshipCity":"true"},{"cityId":"4","cityName":"Zakopane","cityPopulation":"27000","cityArea":"84.23","postOffices":"12","schools":"15","isCapitol":"false","isVoivodeshipCity":"false"},{"cityId":"5","cityName":"Gdansk","cityPopulation":"470907","cityArea":"262.58","postOffices":"87","schools":"198","isCapitol":"false","isVoivodeshipCity":"true"}] +``` + +## Nested JSON Output + +The interpreted_json format allows you to treat column headers as a hierarchy of a JSON object and thus, interpret it to create complex objects. Here's how to use it: + +```powershell +./Musoq.exe run query "select cityId as [city.id], cityName as [city.name], cityPopulation as [city.features.population], cityArea as [city.features.area], postOffices as [city.features.postOffices], schools as [city.features.schools], isCapitol as [city.features.isCapitol], isVoivodeshipCity as [city.features.isVoivodeship] from #separatedvalues.comma('cities.csv', true, 0)" --format interpreted_json +``` + +This creates a structured JSON output: + +```json +[{"city":{"id":1,"name":"Warsaw","features":{"population":1793579,"area":517.24,"postOffices":218,"schools":456,"isCapitol":true,"isVoivodeship":true}}},{"city":{"id":2,"name":"Krakow","features":{"population":779115,"area":326.85,"postOffices":156,"schools":324,"isCapitol":false,"isVoivodeship":true}}},{"city":{"id":3,"name":"Lodz","features":{"population":679941,"area":293.25,"postOffices":98,"schools":278,"isCapitol":false,"isVoivodeship":true}}},{"city":{"id":4,"name":"Zakopane","features":{"population":27000,"area":84.23,"postOffices":12,"schools":15,"isCapitol":false,"isVoivodeship":false}}},{"city":{"id":5,"name":"Gdansk","features":{"population":470907,"area":262.58,"postOffices":87,"schools":198,"isCapitol":false,"isVoivodeship":true}}}] +``` \ No newline at end of file