Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/Commands/CommandLineOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,13 @@ private static bool TryParseWebCommandOptions(WebCommand command, string[] args,
command.SavePageOutput = savePageOutput;
i += max1Arg.Count();
}
else if (arg == "--screenshot")
{
var max1Arg = GetInputOptionArgs(i + 1, args, max: 1);
var screenshot = max1Arg.FirstOrDefault() ?? DefaultSavePageOutputTemplate.Replace(".md", ".png");
command.Screenshot = screenshot;
i += max1Arg.Count();
}
else
{
parsed = false;
Expand Down
2 changes: 2 additions & 0 deletions src/Commands/WebCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,6 @@ public WebCommand()
public List<Tuple<string, string>> PageInstructionsList;

public string SavePageOutput { get; set; }

public string Screenshot { get; set; }
}
13 changes: 12 additions & 1 deletion src/Helpers/PlaywrightHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public static async Task<List<string>> GetWebSearchResultUrlsAsync(string search
return urls;
}

public static async Task<(string, string)> GetPageAndTitle(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive)
public static async Task<(string, string)> GetPageAndTitle(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive, string screenshot = null)
{
// Initialize Playwright
using var playwright = await Playwright.CreateAsync();
Expand All @@ -63,6 +63,17 @@ public static async Task<List<string>> GetWebSearchResultUrlsAsync(string search
var content = await FetchPageContent(page, url, stripHtml, saveToFolder);
var title = await page.TitleAsync();

// Take screenshot if requested
if (!string.IsNullOrEmpty(screenshot))
{
var screenshotDir = Path.GetDirectoryName(screenshot);
if (!string.IsNullOrEmpty(screenshotDir))
{
Directory.CreateDirectory(screenshotDir);
}
await page.ScreenshotAsync(new PageScreenshotOptions { Path = screenshot, FullPage = true });
}

// Return the content and title
return (content, title);
}
Expand Down
22 changes: 15 additions & 7 deletions src/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ private static List<Task<string>> HandleWebGetCommand(CommandLineOptions command
var useBuiltInFunctions = command.UseBuiltInFunctions;
var saveChatHistory = command.SaveChatHistory;
var savePageOutput = command.SavePageOutput;
var screenshot = command.Screenshot;

var badUrls = command.Urls.Where(l => !l.StartsWith("http")).ToList();
if (badUrls.Any())
Expand All @@ -274,7 +275,7 @@ private static List<Task<string>> HandleWebGetCommand(CommandLineOptions command
var tasks = new List<Task<string>>();
foreach (var url in urls)
{
var getCheckSaveTask = GetCheckSaveWebPageContentAsync(url, stripHtml, saveToFolder, browserType, interactive, pageInstructionsList, useBuiltInFunctions, saveChatHistory, savePageOutput);
var getCheckSaveTask = GetCheckSaveWebPageContentAsync(url, stripHtml, saveToFolder, browserType, interactive, pageInstructionsList, useBuiltInFunctions, saveChatHistory, savePageOutput, screenshot);
var taskToAdd = delayOutputToApplyInstructions
? getCheckSaveTask
: getCheckSaveTask.ContinueWith(t =>
Expand Down Expand Up @@ -597,12 +598,12 @@ private static string GetContentFilteredAndFormatted(string content, List<Regex>
return string.Join("\n", output);
}

private static async Task<string> GetCheckSaveWebPageContentAsync(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive, List<Tuple<string, string>> pageInstructionsList, bool useBuiltInFunctions, string saveChatHistory, string savePageOutput)
private static async Task<string> GetCheckSaveWebPageContentAsync(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive, List<Tuple<string, string>> pageInstructionsList, bool useBuiltInFunctions, string saveChatHistory, string savePageOutput, string screenshot)
{
try
{
ConsoleHelpers.PrintStatus($"Processing: {url} ...");
var finalContent = await GetFinalWebPageContentAsync(url, stripHtml, saveToFolder, browserType, interactive, pageInstructionsList, useBuiltInFunctions, saveChatHistory);
var finalContent = await GetFinalWebPageContentAsync(url, stripHtml, saveToFolder, browserType, interactive, pageInstructionsList, useBuiltInFunctions, saveChatHistory, screenshot);

if (!string.IsNullOrEmpty(savePageOutput))
{
Expand All @@ -620,9 +621,9 @@ private static async Task<string> GetCheckSaveWebPageContentAsync(string url, bo
}
}

private static async Task<string> GetFinalWebPageContentAsync(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive, List<Tuple<string, string>> pageInstructionsList, bool useBuiltInFunctions, string saveChatHistory)
private static async Task<string> GetFinalWebPageContentAsync(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive, List<Tuple<string, string>> pageInstructionsList, bool useBuiltInFunctions, string saveChatHistory, string screenshot)
{
var formatted = await GetFormattedWebPageContentAsync(url, stripHtml, saveToFolder, browserType, interactive);
var formatted = await GetFormattedWebPageContentAsync(url, stripHtml, saveToFolder, browserType, interactive, screenshot);

var instructionsForThisPage = pageInstructionsList
.Where(x => WebPageMatchesInstructionsCriteria(url, x.Item2))
Expand All @@ -643,15 +644,22 @@ private static bool WebPageMatchesInstructionsCriteria(string url, string webPag
url == webPageCriteria;
}

private static async Task<string> GetFormattedWebPageContentAsync(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive)
private static async Task<string> GetFormattedWebPageContentAsync(string url, bool stripHtml, string saveToFolder, BrowserType browserType, bool interactive, string screenshot = null)
{
try
{
var (content, title) = await PlaywrightHelpers.GetPageAndTitle(url, stripHtml, saveToFolder, browserType, interactive);
var (content, title) = await PlaywrightHelpers.GetPageAndTitle(url, stripHtml, saveToFolder, browserType, interactive, screenshot);

var sb = new StringBuilder();
sb.AppendLine($"## {title}\n");
sb.AppendLine($"url: {url}\n");

if (!string.IsNullOrEmpty(screenshot))
{
var fullPath = Path.GetFullPath(screenshot);
sb.AppendLine($"screenshot: {screenshot}\n");
}

sb.AppendLine("```");
sb.AppendLine(content);
sb.AppendLine("```\n");
Expand Down
6 changes: 5 additions & 1 deletion src/assets/help/web get examples.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ EXAMPLES

mdx web get https://learnxinyminutes.com/yaml/ --page-instructions @step1-instructions.txt @step2-instructions.txt

EXAMPLE 4: Apply AI to the final output
EXAMPLE 4: Capture page content and screenshot

mdx web get https://example.com --save-page-output "{filePath}/{fileBase}-content.md" --screenshot "{filePath}/{fileBase}-screenshot.png"

EXAMPLE 5: Apply AI to the final output

mdx web get https://example.com https://mbers.us/bio --instructions "style example.com as the other site"

Expand Down
3 changes: 3 additions & 0 deletions src/assets/help/web get options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ OPTIONS
--save-page-output [FILE] Save each web page output to the specified template file
(e.g. {filePath}/{fileBase}-output.md)

--screenshot [FILE] Save a screenshot of each web page to the specified template file
(e.g. {filePath}/{fileBase}-output.png)

--save-output [FILE] Save command output to the specified template file
--save-alias ALIAS Save current options as an alias (usable via --{ALIAS})

Expand Down