diff --git a/CHANGELOG.md b/CHANGELOG.md index 7869159..55648bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.0.8] - 2025-11-14 +### Changed +- Disabled thinking for Google Models + ## [0.0.8] - 2025-11-12 ### Changed - Navbar spacing rules diff --git a/core/agents/__init__.py b/core/agents/__init__.py index 2cd151f..995e009 100644 --- a/core/agents/__init__.py +++ b/core/agents/__init__.py @@ -12,14 +12,13 @@ from core.agents.generate_blog_post_content_agent import ( create_generate_blog_post_content_agent, ) +from core.agents.insert_internal_links_agent import create_insert_internal_links_agent from core.agents.populate_competitor_details_agent import ( create_populate_competitor_details_agent, ) from core.agents.summarize_page_agent import create_summarize_page_agent from core.agents.title_suggestions_agent import create_title_suggestions_agent -from core.agents.validate_blog_post_ending_agent import ( - create_validate_blog_post_ending_agent, -) +from core.agents.validate_blog_post_agent import create_validate_blog_post_agent __all__ = [ "create_analyze_competitor_agent", @@ -30,8 +29,9 @@ "create_extract_links_agent", "create_find_competitors_agent", "create_generate_blog_post_content_agent", + "create_insert_internal_links_agent", "create_populate_competitor_details_agent", "create_summarize_page_agent", "create_title_suggestions_agent", - "create_validate_blog_post_ending_agent", + "create_validate_blog_post_agent", ] diff --git a/core/agents/analyze_competitor_agent.py b/core/agents/analyze_competitor_agent.py index be90f3c..7ef23af 100644 --- a/core/agents/analyze_competitor_agent.py +++ b/core/agents/analyze_competitor_agent.py @@ -1,8 +1,8 @@ from django.utils import timezone from pydantic_ai import Agent, RunContext +from core.agents.models import get_default_ai_model from core.agents.schemas import CompetitorAnalysis, CompetitorAnalysisContext -from core.choices import get_default_ai_model def create_analyze_competitor_agent(model=None): @@ -23,7 +23,7 @@ def create_analyze_competitor_agent(model=None): """ You are an expert marketer. Based on the competitor details and homepage content provided, - extract and infer the requested information. Make reasonable inferences based + extract and infer the requested information. Make reasonable inferences based on available content, context, and industry knowledge. """ ), diff --git a/core/agents/analyze_project_agent.py b/core/agents/analyze_project_agent.py index ceec583..b3bf1c1 100644 --- a/core/agents/analyze_project_agent.py +++ b/core/agents/analyze_project_agent.py @@ -1,8 +1,8 @@ from pydantic_ai import Agent +from core.agents.models import get_default_ai_model from core.agents.schemas import ProjectDetails, WebPageContent from core.agents.system_prompts import add_webpage_content -from core.choices import get_default_ai_model def create_analyze_project_agent(model=None): @@ -25,6 +25,7 @@ def create_analyze_project_agent(model=None): "on available content, context, and industry knowledge." ), retries=2, + model_settings={"temperature": 0.8, "thinking_budget": 0}, ) agent.system_prompt(add_webpage_content) diff --git a/core/agents/competitor_vs_blog_post_agent.py b/core/agents/competitor_vs_blog_post_agent.py index ec02053..c264389 100644 --- a/core/agents/competitor_vs_blog_post_agent.py +++ b/core/agents/competitor_vs_blog_post_agent.py @@ -3,9 +3,9 @@ from pydantic_ai.models.openai import OpenAIChatModel from pydantic_ai.providers.openai import OpenAIProvider +from core.agents.models import AIModel from core.agents.schemas import CompetitorVsPostContext from core.agents.system_prompts import add_project_pages, markdown_lists -from core.choices import AIModel def create_competitor_vs_blog_post_agent(model=None): @@ -64,7 +64,7 @@ def create_competitor_vs_blog_post_agent(model=None): @agent.system_prompt def output_format() -> str: return """ - IMPORTANT: Return only the text. Don't surround the text with ```markdown or ```. + Return only the text. Don't surround the text with ```markdown or ```. """ @agent.system_prompt diff --git a/core/agents/content_editor_agent.py b/core/agents/content_editor_agent.py index c31bb4c..e6ea247 100644 --- a/core/agents/content_editor_agent.py +++ b/core/agents/content_editor_agent.py @@ -1,5 +1,6 @@ from pydantic_ai import Agent +from core.agents.models import get_default_ai_model from core.agents.schemas import BlogPostGenerationContext from core.agents.system_prompts import ( add_language_specification, @@ -8,7 +9,6 @@ add_target_keywords, add_title_details, ) -from core.choices import get_default_ai_model def create_content_editor_agent(model=None): @@ -31,13 +31,13 @@ def create_content_editor_agent(model=None): Your task is to edit the blog post content based on the requested changes. """, retries=2, - model_settings={"temperature": 0.3}, + model_settings={"temperature": 0.3, "thinking_budget": 0}, ) @agent.system_prompt def only_return_the_edited_content() -> str: return """ - IMPORTANT: Only return the edited content, no other text. + Only return the edited content, no other text. """ agent.system_prompt(add_project_details) diff --git a/core/agents/extract_competitors_data_agent.py b/core/agents/extract_competitors_data_agent.py index 19709b9..b42cc44 100644 --- a/core/agents/extract_competitors_data_agent.py +++ b/core/agents/extract_competitors_data_agent.py @@ -1,7 +1,7 @@ from pydantic_ai import Agent, RunContext +from core.agents.models import get_default_ai_model from core.agents.schemas import CompetitorDetails -from core.choices import get_default_ai_model def create_extract_competitors_data_agent(model=None): @@ -22,6 +22,7 @@ def create_extract_competitors_data_agent(model=None): Extract all the data from the text provided. """, retries=2, + model_settings={"temperature": 0.2, "thinking_budget": 0}, ) @agent.system_prompt diff --git a/core/agents/extract_links_agent.py b/core/agents/extract_links_agent.py index 217b949..22673b9 100644 --- a/core/agents/extract_links_agent.py +++ b/core/agents/extract_links_agent.py @@ -1,6 +1,6 @@ from pydantic_ai import Agent, RunContext -from core.choices import get_default_ai_model +from core.agents.models import get_default_ai_model def create_extract_links_agent(model=None): @@ -24,6 +24,7 @@ def create_extract_links_agent(model=None): If the text contains no valid URLs, return an empty list. """, retries=2, + model_settings={"temperature": 0.2, "thinking_budget": 0}, ) @agent.system_prompt diff --git a/core/agents/find_competitors_agent.py b/core/agents/find_competitors_agent.py index 8146f8f..2bf8ce2 100644 --- a/core/agents/find_competitors_agent.py +++ b/core/agents/find_competitors_agent.py @@ -3,8 +3,8 @@ from pydantic_ai.models.openai import OpenAIChatModel from pydantic_ai.providers.openai import OpenAIProvider +from core.agents.models import AIModel from core.agents.schemas import ProjectDetails -from core.choices import AIModel def create_find_competitors_agent(is_on_free_plan: bool): @@ -70,22 +70,14 @@ def number_of_competitors(ctx: RunContext[ProjectDetails]) -> str: @agent.system_prompt def language_specification(ctx: RunContext[ProjectDetails]) -> str: project = ctx.deps - return f""" - IMPORTANT: Be mindful that competitors are likely to speak in - {project.language} language. - """ + return f"Be mindful that competitors are likely to speak in {project.language} language." # noqa: E501 @agent.system_prompt def location_specification(ctx: RunContext[ProjectDetails]) -> str: project = ctx.deps if project.location != "Global": - return f""" - IMPORTANT: Only return competitors whose target audience is in - {project.location}. - """ + return f"Only return competitors whose target audience is in {project.location}." # noqa: E501 else: - return """ - IMPORTANT: Return competitors from all over the world. - """ + return "Return competitors from all over the world." return agent diff --git a/core/agents/generate_blog_post_content_agent.py b/core/agents/generate_blog_post_content_agent.py index 0119cef..0268ba2 100644 --- a/core/agents/generate_blog_post_content_agent.py +++ b/core/agents/generate_blog_post_content_agent.py @@ -1,19 +1,20 @@ from pydantic_ai import Agent +from core.agents.models import get_default_ai_model from core.agents.schemas import BlogPostGenerationContext, GeneratedBlogPostSchema from core.agents.system_prompts import ( add_language_specification, add_project_details, - add_project_pages, add_target_keywords, add_title_details, add_todays_date, + add_validation_feedback, filler_content, markdown_lists, post_structure, valid_markdown_format, ) -from core.choices import ContentType, get_default_ai_model +from core.choices import ContentType from core.prompts import GENERATE_CONTENT_SYSTEM_PROMPTS @@ -23,6 +24,9 @@ def create_generate_blog_post_content_agent( """ Create an agent to generate blog post content. + Note: This agent generates content WITHOUT internal links. Links will be inserted + in a separate step using the insert_internal_links_agent. + Args: content_type: The type of content to generate (SHARING, ACTIONABLE, THOUGHT_LEADERSHIP). model: Optional AI model to use. Defaults to the default AI model. @@ -36,15 +40,15 @@ def create_generate_blog_post_content_agent( deps_type=BlogPostGenerationContext, system_prompt=GENERATE_CONTENT_SYSTEM_PROMPTS[content_type], retries=2, - model_settings={"max_tokens": 65500, "temperature": 0.8}, + model_settings={"max_tokens": 65500, "temperature": 0.3, "thinking_budget": 0}, ) agent.system_prompt(add_project_details) - agent.system_prompt(add_project_pages) agent.system_prompt(add_title_details) agent.system_prompt(add_todays_date) agent.system_prompt(add_language_specification) agent.system_prompt(add_target_keywords) + agent.system_prompt(add_validation_feedback) agent.system_prompt(valid_markdown_format) agent.system_prompt(markdown_lists) agent.system_prompt(post_structure) diff --git a/core/agents/insert_internal_links_agent.py b/core/agents/insert_internal_links_agent.py new file mode 100644 index 0000000..da9a46a --- /dev/null +++ b/core/agents/insert_internal_links_agent.py @@ -0,0 +1,85 @@ +from pydantic_ai import Agent, RunContext + +from core.agents.models import get_default_ai_model +from core.agents.schemas import InsertedLinksOutput, InsertInternalLinksContext + + +def create_insert_internal_links_agent(model=None): + """ + Create an agent to insert internal links into blog post content. + + The agent takes existing blog post content and a list of internal pages, + then intelligently inserts links where they are contextually relevant. + + Args: + model: Optional AI model to use. Defaults to the default AI model. + + Returns: + Configured Agent instance + """ + system_prompt = """ +You are an expert content editor specializing in internal linking strategies for SEO optimization. + +Your task is to take blog post content and intelligently insert internal links to relevant pages +where they naturally fit and add value to the reader. + +## Guidelines for Link Insertion: + +1. **Natural Integration**: Insert links only where they enhance the reader's understanding or provide valuable additional context. Links should feel organic and not forced. + +2. **Contextual Relevance**: Match the page's topic with the surrounding content. Only link when there's a clear topical connection. + +3. **Anchor Text**: Use descriptive, natural anchor text that clearly indicates what the reader will find when clicking the link. Avoid generic phrases like "click here" or "this page". + +4. **Strategic Placement**: + - Prefer linking in the main body content rather than introductions or conclusions + - Space out links appropriately - avoid clustering multiple links in a single paragraph + - Link to each page at most once or twice in the entire article + +5. **Must-Use Pages**: These are high-priority pages that should be linked if there's any reasonable contextual fit. Be creative in finding natural places to mention and link to these pages. + +6. **Optional Pages**: Only link to these if they are highly relevant to the specific section of content where they would be inserted. + +7. **Markdown Format**: Use proper Markdown link syntax: `[anchor text](URL)` + +8. **Preserve Content**: Do not modify the existing content except to add links. Maintain all formatting, structure, and tone. + +9. **Quality Over Quantity**: It's better to have fewer, highly relevant links than many loosely related ones. + +## Output Requirements: +Return the complete blog post content with internal links inserted. The content should be in Markdown format with proper link syntax. +""" # noqa: E501 + + agent = Agent( + model or get_default_ai_model(), + output_type=InsertedLinksOutput, + deps_type=InsertInternalLinksContext, + system_prompt=system_prompt, + retries=2, + model_settings={"max_tokens": 65500, "temperature": 0.3, "thinking_budget": 0}, + ) + + @agent.system_prompt + def add_must_use_and_optional_pages(ctx: RunContext[InsertInternalLinksContext]) -> str: + return f""" + Must-use pages: + {ctx.deps.must_use_pages} + -------------------------------- + Optional pages: + {ctx.deps.optional_pages} + """ + + @agent.system_prompt + def add_content(ctx: RunContext[InsertInternalLinksContext]) -> str: + return f""" + Content: + -------------------------------- + {ctx.deps.content} + -------------------------------- + """ + + @agent.system_prompt + def output() -> str: + return "Return only the post that I gave you, but with links." + + return agent diff --git a/core/agents/models.py b/core/agents/models.py new file mode 100644 index 0000000..d5ddd7a --- /dev/null +++ b/core/agents/models.py @@ -0,0 +1,14 @@ +from django.db import models + + +class AIModel(models.TextChoices): + GEMINI_FLASH = "google-gla:gemini-2.5-flash", "Gemini 2.5 Flash" + PERPLEXITY_SONAR = "sonar", "Perplexity Sonar" + + +DEFAULT_AI_MODEL = AIModel.GEMINI_FLASH + + +def get_default_ai_model() -> str: + """Returns the default AI model to use across the application.""" + return DEFAULT_AI_MODEL diff --git a/core/agents/populate_competitor_details_agent.py b/core/agents/populate_competitor_details_agent.py index eed5759..0ed1839 100644 --- a/core/agents/populate_competitor_details_agent.py +++ b/core/agents/populate_competitor_details_agent.py @@ -1,7 +1,7 @@ from pydantic_ai import Agent, RunContext +from core.agents.models import get_default_ai_model from core.agents.schemas import CompetitorDetails, WebPageContent -from core.choices import get_default_ai_model def create_populate_competitor_details_agent(model=None): @@ -27,6 +27,7 @@ def create_populate_competitor_details_agent(model=None): """ ), retries=2, + model_settings={"temperature": 0.5, "thinking_budget": 0}, ) @agent.system_prompt diff --git a/core/agents/schemas.py b/core/agents/schemas.py index b4937fb..cdd71f3 100644 --- a/core/agents/schemas.py +++ b/core/agents/schemas.py @@ -185,8 +185,11 @@ class BlogPostGenerationContext(BaseModel): project_details: ProjectDetails title_suggestion: TitleSuggestion project_keywords: list[str] = [] - project_pages: list[ProjectPageContext] = [] content_type: str = Field(description="Type of content to generate (SEO or SHARING)") + previous_validation_issues: list[str] = Field( + default_factory=list, + description="Previous validation issues to avoid in the new content generation", + ) class GeneratedBlogPostSchema(BaseModel): @@ -275,3 +278,25 @@ class CompetitorVsPostContext(BaseModel): project_pages: list[ProjectPageContext] = Field( default_factory=list, description="List of project pages available for linking" ) + + +class InsertInternalLinksContext(BaseModel): + """Context for inserting internal links into blog post content.""" + + content: str = Field(description="The blog post content in Markdown format") + must_use_pages: list[ProjectPageContext] = Field( + default_factory=list, + description="Pages that must be linked in the content where contextually relevant", + ) + optional_pages: list[ProjectPageContext] = Field( + default_factory=list, + description="Pages that can be linked if they are contextually relevant to the content", + ) + + +class InsertedLinksOutput(BaseModel): + """Output schema for the insert internal links agent.""" + + content: str = Field( + description="The blog post content with internal links inserted in Markdown format" + ) diff --git a/core/agents/summarize_page_agent.py b/core/agents/summarize_page_agent.py index 200bcbc..e77f8a0 100644 --- a/core/agents/summarize_page_agent.py +++ b/core/agents/summarize_page_agent.py @@ -1,8 +1,8 @@ from pydantic_ai import Agent +from core.agents.models import get_default_ai_model from core.agents.schemas import ProjectPageDetails, WebPageContent from core.agents.system_prompts import add_webpage_content -from core.choices import get_default_ai_model def create_summarize_page_agent(model=None): @@ -25,7 +25,7 @@ def create_summarize_page_agent(model=None): "information of the page. Focus on what the page is about and its main value proposition." # noqa: E501 ), retries=2, - model_settings={"temperature": 0.5}, + model_settings={"temperature": 0.5, "thinking_budget": 0}, ) agent.system_prompt(add_webpage_content) diff --git a/core/agents/system_prompts.py b/core/agents/system_prompts.py index 540d301..e247779 100644 --- a/core/agents/system_prompts.py +++ b/core/agents/system_prompts.py @@ -9,39 +9,30 @@ def add_todays_date() -> str: def valid_markdown_format() -> str: - return """ - IMPORTANT: Generate the content in valid markdown format. - Make sure the content is formatted correctly with: - - headings - - paragraphs - - lists - - links - """ + return "Generate the content in valid markdown format." def post_structure() -> str: return """ - - Don't start with a title, header or a subheader (#, ##, ###). Instead start with a plain text as intro. + - Start with a plain text as intro. Don't start with a title, header or a subheader (#, ##, ###). - Use '##' (h2 headers) for sections of the post where necessary. - - Don't use 3rd levle subheaders (###) or deeper. That should not be necessary for the post. + - Don't use 3rd level subheaders (###) or deeper. That should not be necessary for the post. """ # noqa: E501 def markdown_lists() -> str: - return """ - - Add an empty line before the first item in the list. - - Use lists for bullet points where necessary. - - Use numbered lists for steps or instructions. - - Use nested lists for sub-points. - """ # noqa: E501 + return "Add an empty line before the first item in the markdown list." def filler_content() -> str: return """ - Do not add content that needs to be filled in later. + - No link placeholders + - No image placeholders - No placeholders either. This means no: - Image Suggestion: [Image] - Link Suggestion: [Link] + - or anything like that. ... """ @@ -116,7 +107,7 @@ def add_project_pages(ctx: RunContext) -> str: def add_title_details(ctx: RunContext[BlogPostGenerationContext]) -> str: title = ctx.deps.title_suggestion return f""" - This is the title suggestion gnerate by AI using project information: + This is the title suggestion generated by AI using project information: - Title: {title.title} - Description: {title.description} - Category: {title.category} @@ -131,7 +122,7 @@ def add_title_details(ctx: RunContext[BlogPostGenerationContext]) -> str: def add_language_specification(ctx: RunContext[BlogPostGenerationContext]) -> str: return f""" - IMPORTANT: Generate the content in {ctx.deps.project_details.language} language. + Generate the content in {ctx.deps.project_details.language} language. Make sure the content is grammatically correct and culturally appropriate for {ctx.deps.project_details.language}-speaking audiences. """ @@ -160,3 +151,23 @@ def add_webpage_content(ctx: RunContext[WebPageContent]) -> str: f"Description: {ctx.deps.description}" f"Content: {ctx.deps.markdown_content}" ) + + +def add_validation_feedback(ctx: RunContext[BlogPostGenerationContext]) -> str: + """ + Add previous validation issues as feedback to help avoid making the same mistakes. + + This helps the agent learn from previous validation failures and improve content quality. + """ + if hasattr(ctx.deps, "previous_validation_issues") and ctx.deps.previous_validation_issues: + issues_text = "\n - ".join(ctx.deps.previous_validation_issues) + return f""" + IMPORTANT - Previous Validation Feedback: + The previous version of this content failed validation with the following issues. + Please avoid making these same mistakes: + + - {issues_text} + + Make sure to address all of these issues in the content you generate. + """ + return "" diff --git a/core/agents/title_suggestions_agent.py b/core/agents/title_suggestions_agent.py index 417e92e..4dadfba 100644 --- a/core/agents/title_suggestions_agent.py +++ b/core/agents/title_suggestions_agent.py @@ -1,8 +1,9 @@ from pydantic_ai import Agent, RunContext +from core.agents.models import get_default_ai_model from core.agents.schemas import TitleSuggestionContext, TitleSuggestions from core.agents.system_prompts import add_todays_date -from core.choices import ContentType, get_default_ai_model +from core.choices import ContentType from core.prompts import TITLE_SUGGESTION_SYSTEM_PROMPTS @@ -14,7 +15,7 @@ def create_title_suggestions_agent(content_type=ContentType.SHARING, model=None) deps_type=TitleSuggestionContext, system_prompt=TITLE_SUGGESTION_SYSTEM_PROMPTS[content_type], retries=2, - model_settings={"temperature": 0.9}, + model_settings={"temperature": 0.9, "thinking_budget": 0}, ) agent.system_prompt(add_todays_date) @@ -37,13 +38,13 @@ def add_project_details(ctx: RunContext[TitleSuggestionContext]) -> str: @agent.system_prompt def add_number_of_titles_to_generate(ctx: RunContext[TitleSuggestionContext]) -> str: - return f"""IMPORTANT: Generate only {ctx.deps.num_titles} titles.""" + return f"""Generate only {ctx.deps.num_titles} titles.""" @agent.system_prompt def add_language_specification(ctx: RunContext[TitleSuggestionContext]) -> str: project = ctx.deps.project_details return f""" - IMPORTANT: Generate all titles in {project.language} language. + Generate all titles in {project.language} language. Make sure the titles are grammatically correct and culturally appropriate for {project.language}-speaking audiences. """ diff --git a/core/agents/validate_blog_post_agent.py b/core/agents/validate_blog_post_agent.py new file mode 100644 index 0000000..8a2ec62 --- /dev/null +++ b/core/agents/validate_blog_post_agent.py @@ -0,0 +1,41 @@ +from pydantic import BaseModel +from pydantic_ai import Agent + +from core.agents.models import get_default_ai_model + + +class BlogPostValidationResult(BaseModel): + is_valid: bool + issues: list[str] = [] + + +def create_validate_blog_post_agent(model=None): + agent = Agent( + model or get_default_ai_model(), + output_type=BlogPostValidationResult, + system_prompt=""" +You are an expert content quality validator for blog posts. + +Analyze the provided blog post content and determine if it meets publication quality standards. + +Be thorough but fair. Minor imperfections are okay if the content is publishable. + """, # noqa: E501 + retries=1, + model_settings={"temperature": 0.1, "thinking_budget": 0}, + ) + + @agent.system_prompt + def validations_to_check() -> str: + return """ + Validations to check: + 1. **Completeness**: + Does the post have a proper ending? Is it cut off mid-sentence or mid-thought? + 2. **Length**: + Is there substantial content (at least 2500-3000 characters)? + 3. **Placeholders**: + Are there any placeholder text like [INSERT X], [TODO], [EXAMPLE], {placeholder}, etc.? + 4. **Structure**: + Does it start with regular text (not a header like # or ##)? + """ + + return agent diff --git a/core/agents/validate_blog_post_ending_agent.py b/core/agents/validate_blog_post_ending_agent.py deleted file mode 100644 index c053e27..0000000 --- a/core/agents/validate_blog_post_ending_agent.py +++ /dev/null @@ -1,41 +0,0 @@ -from pydantic_ai import Agent - -from core.choices import get_default_ai_model - - -def create_validate_blog_post_ending_agent(model=None): - """ - Create an agent to validate if a blog post has a complete, proper ending. - - Args: - model: Optional AI model to use. Defaults to the default AI model. - - Returns: - Configured Agent instance that returns a boolean - """ - agent = Agent( - model or get_default_ai_model(), - output_type=bool, - system_prompt=""" - You are an expert content editor analyzing blog post endings. Your task is to determine - whether the provided text represents a complete, proper conclusion to a blog post. - - A valid blog post ending should: - - Complete the final thought or sentence - - Provide closure to the topic being discussed - - Feel like a natural conclusion (not abruptly cut off) - - May include calls-to-action, summaries, or closing remarks - - An invalid ending would be: - - Cut off mid-sentence - - Ending abruptly without proper conclusion - - Incomplete thoughts or paragraphs - - Missing expected closing elements for the content type - - Analyze the text carefully and provide your assessment. Return True if the ending is valid, False if not. - """, # noqa: E501 - retries=2, - model_settings={"temperature": 0.1}, - ) - - return agent diff --git a/core/choices.py b/core/choices.py index 883d4ee..9b370e5 100644 --- a/core/choices.py +++ b/core/choices.py @@ -123,16 +123,3 @@ class EmailType(models.TextChoices): EMAIL_CONFIRMATION = "EMAIL_CONFIRMATION", "Email Confirmation" WELCOME = "WELCOME", "Welcome" FEEDBACK_NOTIFICATION = "FEEDBACK_NOTIFICATION", "Feedback Notification" - - -class AIModel(models.TextChoices): - GEMINI_FLASH = "google-gla:gemini-2.5-flash", "Gemini 2.5 Flash" - PERPLEXITY_SONAR = "sonar", "Perplexity Sonar" - - -DEFAULT_AI_MODEL = AIModel.GEMINI_FLASH - - -def get_default_ai_model() -> str: - """Returns the default AI model to use across the application.""" - return DEFAULT_AI_MODEL diff --git a/core/migrations/0043_remove_generatedblogpost_content_too_short_and_more.py b/core/migrations/0043_remove_generatedblogpost_content_too_short_and_more.py new file mode 100644 index 0000000..7d17d05 --- /dev/null +++ b/core/migrations/0043_remove_generatedblogpost_content_too_short_and_more.py @@ -0,0 +1,44 @@ +# Generated by Django 5.2.7 on 2025-11-10 12:57 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0042_autosubmissionsetting_deleted_at_blogpost_deleted_at_and_more'), + ] + + operations = [ + migrations.RemoveField( + model_name='generatedblogpost', + name='content_too_short', + ), + migrations.RemoveField( + model_name='generatedblogpost', + name='has_valid_ending', + ), + migrations.RemoveField( + model_name='generatedblogpost', + name='placeholders', + ), + migrations.RemoveField( + model_name='generatedblogpost', + name='starts_with_header', + ), + migrations.AddField( + model_name='generatedblogpost', + name='is_content_valid', + field=models.BooleanField(default=False), + ), + migrations.AddField( + model_name='generatedblogpost', + name='validation_attempts', + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name='generatedblogpost', + name='validation_issues', + field=models.JSONField(blank=True, default=list), + ), + ] diff --git a/core/models.py b/core/models.py index 3daea9e..4b1809e 100644 --- a/core/models.py +++ b/core/models.py @@ -1,4 +1,3 @@ -import re from decimal import Decimal, InvalidOperation from urllib.request import urlopen @@ -23,16 +22,17 @@ create_extract_links_agent, create_find_competitors_agent, create_generate_blog_post_content_agent, + create_insert_internal_links_agent, create_populate_competitor_details_agent, create_summarize_page_agent, create_title_suggestions_agent, - create_validate_blog_post_ending_agent, ) from core.agents.schemas import ( BlogPostGenerationContext, CompetitorAnalysisContext, CompetitorDetails, GeneratedBlogPostSchema, + InsertInternalLinksContext, ProjectDetails, ProjectPageContext, TitleSuggestion, @@ -54,7 +54,6 @@ ProjectStyle, ProjectType, ) -from core.constants import PLACEHOLDER_BRACKET_PATTERNS, PLACEHOLDER_PATTERNS from core.utils import ( generate_random_key, get_jina_embedding, @@ -747,58 +746,100 @@ def title_suggestion_schema(self): ) def generate_content(self, content_type=ContentType.SHARING, model=None): - agent = create_generate_blog_post_content_agent(content_type, model) + """Generate blog post content with automatic retry on validation failure.""" + MAX_ATTEMPTS = 2 + previous_validation_issues = [] - # Get all analyzed project pages (from AI and sitemap sources) - project_pages = [ - ProjectPageContext( - url=page.url, - title=page.title, - description=page.description, - summary=page.summary, - always_use=page.always_use, + for attempt in range(1, MAX_ATTEMPTS + 1): + logger.info( + "[Generate Content] Attempt %s/%s", + attempt, + MAX_ATTEMPTS, + project_id=self.project.id, + title=self.title, ) - for page in self.project.project_pages.filter(date_analyzed__isnull=False) - ] - - project_keywords = [ - pk.keyword.keyword_text - for pk in self.project.project_keywords.filter(use=True).select_related("keyword") - ] - deps = BlogPostGenerationContext( - project_details=self.project.project_details, - title_suggestion=self.title_suggestion_schema, - project_pages=project_pages, - content_type=content_type, - project_keywords=project_keywords, - ) + agent = create_generate_blog_post_content_agent(content_type, model) - result = run_agent_synchronously( - agent, - "Generate an article based on the project details and title suggestions.", - deps=deps, - function_name="generate_content", - model_name="BlogPostTitleSuggestion", - ) + project_keywords = [ + pk.keyword.keyword_text + for pk in self.project.project_keywords.filter(use=True).select_related("keyword") + ] - blog_post = GeneratedBlogPost.objects.create_and_validate( - project=self.project, - title=self, - description=result.output.description, - slug=result.output.slug, - tags=result.output.tags, - content=result.output.content, - ) + deps = BlogPostGenerationContext( + project_details=self.project.project_details, + title_suggestion=self.title_suggestion_schema, + content_type=content_type, + project_keywords=project_keywords, + previous_validation_issues=previous_validation_issues, + ) - if self.project.enable_automatic_og_image_generation: - async_task( - "core.tasks.generate_og_image_for_blog_post", - blog_post.id, - group="Generate OG Image", + result = run_agent_synchronously( + agent, + "Generate an article based on the project details and title suggestions.", + deps=deps, + function_name="generate_content", + model_name="BlogPostTitleSuggestion", ) - return blog_post + # Create blog post (or recreate on retry) + if attempt == 1: + blog_post = GeneratedBlogPost.objects.create_and_validate( + project=self.project, + title=self, + description=result.output.description, + slug=result.output.slug, + tags=result.output.tags, + content=result.output.content, + ) + else: + # On retry, update existing blog post + blog_post.description = result.output.description + blog_post.slug = result.output.slug + blog_post.tags = result.output.tags + blog_post.content = result.output.content + blog_post.save(update_fields=["description", "slug", "tags", "content"]) + blog_post.run_validation() + + # Check if valid + if blog_post.is_content_valid: + logger.info( + "[Generate Content] Validation passed, inserting links", + blog_post_id=blog_post.id, + attempt=attempt, + ) + blog_post.insert_internal_links(model=model) + + if self.project.enable_automatic_og_image_generation: + async_task( + "core.tasks.generate_og_image_for_blog_post", + blog_post.id, + group="Generate OG Image", + ) + + return blog_post + else: + logger.warning( + "[Generate Content] Validation failed", + blog_post_id=blog_post.id, + attempt=attempt, + issues=blog_post.validation_issues, + ) + + if attempt < MAX_ATTEMPTS: + logger.info( + "[Generate Content] Retrying generation with validation feedback", + blog_post_id=blog_post.id, + validation_issues=blog_post.validation_issues, + ) + previous_validation_issues = blog_post.validation_issues or [] + continue + else: + logger.error( + "[Generate Content] Max attempts reached, returning invalid post", + blog_post_id=blog_post.id, + ) + return blog_post class AutoSubmissionSetting(BaseModel): @@ -865,10 +906,9 @@ class GeneratedBlogPost(BaseModel): date_posted = models.DateTimeField(null=True, blank=True) # Validation Issues - Innocent until proven guilty - content_too_short = models.BooleanField(default=False) - has_valid_ending = models.BooleanField(default=True) - placeholders = models.BooleanField(default=False) - starts_with_header = models.BooleanField(default=False) + is_content_valid = models.BooleanField(default=False) + validation_issues = models.JSONField(default=list, blank=True) + validation_attempts = models.IntegerField(default=0) objects = GeneratedBlogPostManager() @@ -881,12 +921,7 @@ def post_title(self): @property def blog_post_content_is_valid(self): - return ( - self.content_too_short is False - and self.has_valid_ending is True - and self.placeholders is False - and self.starts_with_header is False - ) + return self.is_content_valid @property def generated_blog_post_schema(self): @@ -897,154 +932,69 @@ def generated_blog_post_schema(self): content=self.content, ) - @property - def has_placeholders(self) -> bool: - content = self.content or "" - content_lower = content.lower() - - for pattern in PLACEHOLDER_PATTERNS: - if pattern in content_lower: - logger.warning( - "[Blog Post Has Placeholders] Placeholder found", - pattern=pattern, - blog_post_id=self.id, - ) - return True - - for pattern in PLACEHOLDER_BRACKET_PATTERNS: - matches = re.findall(pattern, content_lower) - if matches: - logger.warning( - "[Blog Post Has Placeholders] Bracket Placeholder found", - pattern=pattern, - blog_post_id=self.id, - ) - return True - - logger.info( - "[Blog Post Has Placeholders] No placeholders found", - blog_post_id=self.id, - ) - - return False - - @property - def content_starts_with_header(self) -> bool: - content = self.content or "" - content = content.strip() - - if not content: - return False - - header_or_asterisk_pattern = r"^(#{1,6}\s+|\*)" - starts_with_header_or_asterisk = bool(re.match(header_or_asterisk_pattern, content)) - - if starts_with_header_or_asterisk: - logger.warning( - "[Blog Post Starts With Header] Content starts with header or asterisk", - blog_post_id=self.id, - ) - else: - logger.info( - "[Blog Post Starts With Header] Content starts with regular text", - blog_post_id=self.id, - ) - - return starts_with_header_or_asterisk + def run_validation(self): + """Run AI-powered validation and update fields.""" + from core.agents.validate_blog_post_agent import create_validate_blog_post_agent + from core.utils import run_agent_synchronously - def blog_post_has_valid_ending(self) -> bool: - """ - Validate if a blog post has a complete, proper ending using AI analysis. + base_logger_info = { + "blog_post_id": self.id, + "project_id": self.project_id, + "project_name": self.project.name, + "profile_id": self.project.profile.id, + "profile_email": self.project.profile.user.email, + } - Args: - blog_post: GeneratedBlogPost instance to validate + logger.info("[Validation] Running AI validation", **base_logger_info) - Returns: - True if the ending is valid, False otherwise - """ - content = self.content or "" - content = content.strip() + if not self.content: + self.is_content_valid = False + self.validation_issues = ["No content provided"] + self.save(update_fields=["is_content_valid", "validation_issues"]) + logger.warning("[Validation] No content to validate", **base_logger_info) + return - agent = create_validate_blog_post_ending_agent() + agent = create_validate_blog_post_agent() try: result = run_agent_synchronously( agent, - f"Please analyze this blog post and determine if it has a complete ending:\n\n{content}", # noqa: E501 - function_name="blog_post_has_valid_ending", + f"Please validate this blog post content:\n\n{self.content}", + function_name="run_validation", ) - ending_is_valid = result.output + self.is_content_valid = result.output.is_valid + self.validation_issues = result.output.issues or [] + self.validation_attempts += 1 + + self.save( + update_fields=["is_content_valid", "validation_issues", "validation_attempts"] + ) - if ending_is_valid: + if self.is_content_valid: logger.info( - "[Blog Post Has Valid Ending] Valid ending", - result=ending_is_valid, - blog_post_id=self.id, + "[Validation] Content validated successfully", + **base_logger_info, + attempts=self.validation_attempts, ) else: logger.warning( - "[Blog Post Has Valid Ending] Invalid ending", - result=ending_is_valid, - blog_post_id=self.id, + "[Validation] Content validation failed", + **base_logger_info, + issues=self.validation_issues, + attempts=self.validation_attempts, ) - return ending_is_valid - except Exception as error: logger.error( - "[Blog Post Has Valid Ending] AI analysis failed", + "[Validation] Validation failed with error", error=str(error), exc_info=True, - content_length=len(content), + **base_logger_info, ) - return False - - def run_validation(self): - """Run validation and update fields in a single query.""" - from core.utils import blog_post_has_valid_ending - - base_logger_info = { - "blog_post_id": self.id, - "project_id": self.project_id, - "project_name": self.project.name, - "profile_id": self.project.profile.id, - "profile_email": self.project.profile.user.email, - } - - logger.info("[Validation] Running validation", **base_logger_info) - - if not self.content: - self.content_too_short = True - self.has_valid_ending = False - self.placeholders = False - self.starts_with_header = False - - else: - content = self.content.strip() - self.content_too_short = len(content) < 3000 - self.has_valid_ending = blog_post_has_valid_ending(self) - self.placeholders = self.has_placeholders - self.starts_with_header = self.content_starts_with_header - - self.save( - update_fields=[ - "content_too_short", - "has_valid_ending", - "placeholders", - "starts_with_header", - ] - ) - - logger.info( - "[Validation] Blog post validation complete", - **base_logger_info, - blog_post_title=self.title.title, - content_too_short=self.content_too_short, - has_valid_ending=self.has_valid_ending, - placeholders=self.placeholders, - starts_with_header=self.starts_with_header, - ) + self.is_content_valid = False + self.validation_issues = [f"Validation error: {str(error)}"] + self.save(update_fields=["is_content_valid", "validation_issues"]) def _build_fix_context(self): """Build full context for content editor agent to ensure accurate regeneration.""" @@ -1073,32 +1023,6 @@ def _build_fix_context(self): project_keywords=project_keywords, ) - def fix_header_start(self): - self.refresh_from_db() - self.title.refresh_from_db() - - context = self._build_fix_context() - - agent = create_content_editor_agent() - - result = run_agent_synchronously( - agent, - """ - This blog post starts with a header (like # or ##) instead of regular text. - - Please remove it such that the content starts with regular text, usually an introduction. - """, # noqa: E501 - deps=context, - function_name="fix_header_start", - model_name="GeneratedBlogPost", - ) - - self.content = result.output - self.save(update_fields=["content"]) - self.run_validation() - - return True - def submit_blog_post_to_endpoint(self): from core.utils import replace_placeholders @@ -1150,23 +1074,33 @@ def submit_blog_post_to_endpoint(self): ) return False - def fix_content_length(self): + def fix_generated_blog_post(self): + """Attempt to fix validation issues using AI editor.""" self.refresh_from_db() self.title.refresh_from_db() - context = self._build_fix_context() + if self.is_content_valid: + logger.info( + "[Fix Generated Blog Post] Content already valid", + blog_post_id=self.id, + ) + return + context = self._build_fix_context() agent = create_content_editor_agent() + issues_text = "\n".join(f"- {issue}" for issue in self.validation_issues) + result = run_agent_synchronously( agent, - """ - This blog post is too short. - I think something went wrong during generation. - Please regenerate. - """, + f""" +The blog post has the following validation issues: +{issues_text} + +Please fix these issues and return the corrected content. + """, deps=context, - function_name="fix_content_length", + function_name="fix_generated_blog_post", model_name="GeneratedBlogPost", ) @@ -1174,65 +1108,124 @@ def fix_content_length(self): self.save(update_fields=["content"]) self.run_validation() - def fix_valid_ending(self): - self.refresh_from_db() - self.title.refresh_from_db() + # Insert links if now valid + if self.is_content_valid: + self.insert_internal_links() + logger.info( + "[Fix Generated Blog Post] Content fixed and links inserted", + blog_post_id=self.id, + ) - context = self._build_fix_context() + def insert_internal_links(self, model=None): + """ + Insert internal links into the blog post content. - agent = create_content_editor_agent() + This method uses an AI agent to intelligently insert links to project pages + within the blog post content where they are contextually relevant. - result = run_agent_synchronously( - agent, - """ - This blog post does not end on an ending that makes sense. - Most likely generation failed at some point and returned half completed content. - Please regenerate the blog post. - """, - deps=context, - function_name="fix_valid_ending", - model_name="GeneratedBlogPost", + Pages marked with always_use=True will be prioritized for linking. + Other pages will be included based on semantic similarity to the content. + + Args: + model: Optional AI model to use. Defaults to the default AI model. + + Returns: + bool: True if successful, False otherwise + """ + from core.utils import get_jina_embedding + + logger.info( + "[Insert Internal Links] Starting internal link insertion", + blog_post_id=self.id, + project_id=self.project_id, ) - self.content = result.output - self.save(update_fields=["content"]) - self.run_validation() + # Get all pages marked as must-use + must_use_pages = self.project.project_pages.filter( + date_analyzed__isnull=False, always_use=True + ) - def fix_placeholders(self): - self.refresh_from_db() - self.title.refresh_from_db() + # Get all other analyzed pages with embeddings + optional_pages_queryset = self.project.project_pages.filter( + date_analyzed__isnull=False, always_use=False, embedding__isnull=False + ) - context = self._build_fix_context() + # Generate embedding for the blog post content to find similar pages + content_embedding = get_jina_embedding(self.content) - agent = create_content_editor_agent() + if content_embedding and optional_pages_queryset.exists(): + # Find semantically similar pages using vector similarity + # Order by cosine distance (lower is more similar) and limit to top 5 + from pgvector.django import CosineDistance - result = run_agent_synchronously( - agent, - """ - The content contains placeholders. - Please regenerate the blog post without placeholders. - """, - deps=context, - function_name="fix_placeholders", - model_name="GeneratedBlogPost", + similar_pages = optional_pages_queryset.order_by( + CosineDistance("embedding", content_embedding) + )[:5] + else: + similar_pages = [] + + # Build context for the agent + must_use_page_contexts = [ + ProjectPageContext( + url=page.url, + title=page.title, + description=page.description, + summary=page.summary, + always_use=page.always_use, + ) + for page in must_use_pages + ] + + optional_page_contexts = [ + ProjectPageContext( + url=page.url, + title=page.title, + description=page.description, + summary=page.summary, + always_use=page.always_use, + ) + for page in similar_pages + ] + + deps = InsertInternalLinksContext( + content=self.content, + must_use_pages=must_use_page_contexts, + optional_pages=optional_page_contexts, ) - self.content = result.output - self.save(update_fields=["content"]) - self.run_validation() + agent = create_insert_internal_links_agent(model) - def fix_generated_blog_post(self): - if self.content_too_short is True: - self.fix_content_length() + try: + result = run_agent_synchronously( + agent, + "Please insert internal links into the blog post content where contextually relevant.", # noqa: E501 + deps=deps, + function_name="insert_internal_links", + model_name="GeneratedBlogPost", + ) - if self.has_valid_ending is False: - self.fix_valid_ending() + self.content = result.output.content + self.save(update_fields=["content"]) - if self.placeholders is True: - self.fix_placeholders() + logger.info( + "[Insert Internal Links] Successfully inserted internal links", + blog_post_id=self.id, + project_id=self.project_id, + must_use_pages_count=len(must_use_page_contexts), + optional_pages_count=len(optional_page_contexts), + ) - if self.starts_with_header is True: - self.fix_header_start() + return True + + except Exception as error: + logger.error( + "[Insert Internal Links] Failed to insert internal links", + error=str(error), + exc_info=True, + blog_post_id=self.id, + project_id=self.project_id, + ) + return False def generate_og_image(self) -> tuple[bool, str]: """ diff --git a/frontend/templates/components/blog_post_validation_warning.html b/frontend/templates/components/blog_post_validation_warning.html index b3c5a4c..8e054a8 100644 --- a/frontend/templates/components/blog_post_validation_warning.html +++ b/frontend/templates/components/blog_post_validation_warning.html @@ -14,15 +14,9 @@
This blog post has some quality issues that should be addressed before posting: