From 53cee25e33430d8687add2f9e5b7b5ec0e1c1deb Mon Sep 17 00:00:00 2001 From: "copilot-developer-agent-robch[bot]" <175728472+Copilot@users.noreply.github.com> Date: Fri, 21 Feb 2025 04:38:53 +0000 Subject: [PATCH 1/2] Initial plan for issue From d9908fe57ea6e25546acc4767910af2d4aecb38d Mon Sep 17 00:00:00 2001 From: "copilot-developer-agent-robch[bot]" <175728472+Copilot@users.noreply.github.com> Date: Fri, 21 Feb 2025 04:44:46 +0000 Subject: [PATCH 2/2] Implement web screenshot functionality --- src/Commands/CommandLineOptions.cs | 7 +++++++ src/Commands/WebCommand.cs | 2 ++ src/Helpers/PlaywrightHelpers.cs | 13 ++++++++++++- src/Program.cs | 22 +++++++++++++++------- src/assets/help/web get examples.txt | 6 +++++- src/assets/help/web get options.txt | 3 +++ 6 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/Commands/CommandLineOptions.cs b/src/Commands/CommandLineOptions.cs index 87536bd..447219d 100644 --- a/src/Commands/CommandLineOptions.cs +++ b/src/Commands/CommandLineOptions.cs @@ -555,6 +555,13 @@ private static bool TryParseWebCommandOptions(WebCommand command, string[] args, command.SavePageOutput = savePageOutput; i += max1Arg.Count(); } + else if (arg == "--screenshot") + { + var max1Arg = GetInputOptionArgs(i + 1, args, max: 1); + var screenshot = max1Arg.FirstOrDefault() ?? DefaultSavePageOutputTemplate.Replace(".md", ".png"); + command.Screenshot = screenshot; + i += max1Arg.Count(); + } else { parsed = false; diff --git a/src/Commands/WebCommand.cs b/src/Commands/WebCommand.cs index c135131..54867df 100644 --- a/src/Commands/WebCommand.cs +++ b/src/Commands/WebCommand.cs @@ -37,4 +37,6 @@ public WebCommand() public List> PageInstructionsList; public string SavePageOutput { get; set; } + + public string Screenshot { get; set; } } diff --git a/src/Helpers/PlaywrightHelpers.cs b/src/Helpers/PlaywrightHelpers.cs index 35a3870..8a00466 100644 --- a/src/Helpers/PlaywrightHelpers.cs +++ b/src/Helpers/PlaywrightHelpers.cs @@ -48,7 +48,7 @@ public static async Task> GetWebSearchResultUrlsAsync(string search return urls; } - public static async Task<(string, string)> GetPageAndTitle(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive) + public static async Task<(string, string)> GetPageAndTitle(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive, string screenshot = null) { // Initialize Playwright using var playwright = await Playwright.CreateAsync(); @@ -63,6 +63,17 @@ public static async Task> GetWebSearchResultUrlsAsync(string search var content = await FetchPageContent(page, url, stripHtml, saveToFolder); var title = await page.TitleAsync(); + // Take screenshot if requested + if (!string.IsNullOrEmpty(screenshot)) + { + var screenshotDir = Path.GetDirectoryName(screenshot); + if (!string.IsNullOrEmpty(screenshotDir)) + { + Directory.CreateDirectory(screenshotDir); + } + await page.ScreenshotAsync(new PageScreenshotOptions { Path = screenshot, FullPage = true }); + } + // Return the content and title return (content, title); } diff --git a/src/Program.cs b/src/Program.cs index 8e6b9f2..d898e3d 100644 --- a/src/Program.cs +++ b/src/Program.cs @@ -261,6 +261,7 @@ private static List> HandleWebGetCommand(CommandLineOptions command var useBuiltInFunctions = command.UseBuiltInFunctions; var saveChatHistory = command.SaveChatHistory; var savePageOutput = command.SavePageOutput; + var screenshot = command.Screenshot; var badUrls = command.Urls.Where(l => !l.StartsWith("http")).ToList(); if (badUrls.Any()) @@ -274,7 +275,7 @@ private static List> HandleWebGetCommand(CommandLineOptions command var tasks = new List>(); foreach (var url in urls) { - var getCheckSaveTask = GetCheckSaveWebPageContentAsync(url, stripHtml, saveToFolder, browserType, interactive, pageInstructionsList, useBuiltInFunctions, saveChatHistory, savePageOutput); + var getCheckSaveTask = GetCheckSaveWebPageContentAsync(url, stripHtml, saveToFolder, browserType, interactive, pageInstructionsList, useBuiltInFunctions, saveChatHistory, savePageOutput, screenshot); var taskToAdd = delayOutputToApplyInstructions ? getCheckSaveTask : getCheckSaveTask.ContinueWith(t => @@ -597,12 +598,12 @@ private static string GetContentFilteredAndFormatted(string content, List return string.Join("\n", output); } - private static async Task GetCheckSaveWebPageContentAsync(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive, List> pageInstructionsList, bool useBuiltInFunctions, string saveChatHistory, string savePageOutput) + private static async Task GetCheckSaveWebPageContentAsync(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive, List> pageInstructionsList, bool useBuiltInFunctions, string saveChatHistory, string savePageOutput, string screenshot) { try { ConsoleHelpers.PrintStatus($"Processing: {url} ..."); - var finalContent = await GetFinalWebPageContentAsync(url, stripHtml, saveToFolder, browserType, interactive, pageInstructionsList, useBuiltInFunctions, saveChatHistory); + var finalContent = await GetFinalWebPageContentAsync(url, stripHtml, saveToFolder, browserType, interactive, pageInstructionsList, useBuiltInFunctions, saveChatHistory, screenshot); if (!string.IsNullOrEmpty(savePageOutput)) { @@ -620,9 +621,9 @@ private static async Task GetCheckSaveWebPageContentAsync(string url, bo } } - private static async Task GetFinalWebPageContentAsync(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive, List> pageInstructionsList, bool useBuiltInFunctions, string saveChatHistory) + private static async Task GetFinalWebPageContentAsync(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive, List> pageInstructionsList, bool useBuiltInFunctions, string saveChatHistory, string screenshot) { - var formatted = await GetFormattedWebPageContentAsync(url, stripHtml, saveToFolder, browserType, interactive); + var formatted = await GetFormattedWebPageContentAsync(url, stripHtml, saveToFolder, browserType, interactive, screenshot); var instructionsForThisPage = pageInstructionsList .Where(x => WebPageMatchesInstructionsCriteria(url, x.Item2)) @@ -643,15 +644,22 @@ private static bool WebPageMatchesInstructionsCriteria(string url, string webPag url == webPageCriteria; } - private static async Task GetFormattedWebPageContentAsync(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive) + private static async Task GetFormattedWebPageContentAsync(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive, string screenshot = null) { try { - var (content, title) = await PlaywrightHelpers.GetPageAndTitle(url, stripHtml, saveToFolder, browserType, interactive); + var (content, title) = await PlaywrightHelpers.GetPageAndTitle(url, stripHtml, saveToFolder, browserType, interactive, screenshot); var sb = new StringBuilder(); sb.AppendLine($"## {title}\n"); sb.AppendLine($"url: {url}\n"); + + if (!string.IsNullOrEmpty(screenshot)) + { + var fullPath = Path.GetFullPath(screenshot); + sb.AppendLine($"screenshot: {screenshot}\n"); + } + sb.AppendLine("```"); sb.AppendLine(content); sb.AppendLine("```\n"); diff --git a/src/assets/help/web get examples.txt b/src/assets/help/web get examples.txt index 50bd23d..0fd8ca7 100644 --- a/src/assets/help/web get examples.txt +++ b/src/assets/help/web get examples.txt @@ -19,7 +19,11 @@ EXAMPLES mdx web get https://learnxinyminutes.com/yaml/ --page-instructions @step1-instructions.txt @step2-instructions.txt - EXAMPLE 4: Apply AI to the final output + EXAMPLE 4: Capture page content and screenshot + + mdx web get https://example.com --save-page-output "{filePath}/{fileBase}-content.md" --screenshot "{filePath}/{fileBase}-screenshot.png" + + EXAMPLE 5: Apply AI to the final output mdx web get https://example.com https://mbers.us/bio --instructions "style example.com as the other site" diff --git a/src/assets/help/web get options.txt b/src/assets/help/web get options.txt index 92d0a7d..180c763 100644 --- a/src/assets/help/web get options.txt +++ b/src/assets/help/web get options.txt @@ -32,6 +32,9 @@ OPTIONS --save-page-output [FILE] Save each web page output to the specified template file (e.g. {filePath}/{fileBase}-output.md) + --screenshot [FILE] Save a screenshot of each web page to the specified template file + (e.g. {filePath}/{fileBase}-output.png) + --save-output [FILE] Save command output to the specified template file --save-alias ALIAS Save current options as an alias (usable via --{ALIAS})