diff --git a/.env.example b/.env.example index ca16b4c..5851b0f 100644 --- a/.env.example +++ b/.env.example @@ -8,6 +8,13 @@ GOOGLE_SHEETS_ID=14pX6dV6-5KLHYPuU1YsZSzu5SLHVbUxU78YQvIIRV_c # For local development: Put your service account JSON in google-credentials.json # For GitHub Actions: Set as GOOGLE_SHEETS_CREDENTIALS secret +# Notion Configuration +NOTION_TOKEN=secret_xxx +NOTION_TRENDS_DB_ID=xxx +NOTION_CONTENT_PIPELINE_DB_ID=xxx +NOTION_PAIN_POINTS_DB_ID=xxx +NOTION_CASE_STUDIES_DB_ID=xxx + # Optional: Logging level LOG_LEVEL=info diff --git a/.github/workflows/intelligence-hub.yml b/.github/workflows/intelligence-hub.yml deleted file mode 100644 index 25eeb86..0000000 --- a/.github/workflows/intelligence-hub.yml +++ /dev/null @@ -1,124 +0,0 @@ -name: Intelligence Hub - Weekly Scraping - -on: - schedule: - - cron: '0 7 * * 1' # Monday 08:00 CET (07:00 UTC) - - workflow_dispatch: - inputs: - scraper: - description: 'Which scraper to run' - required: true - default: 'all' - type: choice - options: - - all - - market-trends - - icp-monitor - - concurrent-tracker - - test-only - -permissions: - contents: read - -jobs: - scrape-intelligence: - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: '20' - cache: 'npm' - - - name: Install dependencies - run: npm install - - - name: Run Test (if test-only) - if: github.event.inputs.scraper == 'test-only' - run: node test-deployment.js - - - name: Run Market Trends Scraper - if: github.event.inputs.scraper == 'all' || github.event.inputs.scraper == 'market-trends' || github.event_name == 'schedule' - run: node scrapers/market-trends-scraper.js - env: - BRAVE_API_KEY: ${{ secrets.BRAVE_API_KEY }} - - - name: Run ICP Monitor - if: github.event.inputs.scraper == 'all' || github.event.inputs.scraper == 'icp-monitor' || github.event_name == 'schedule' - run: node scrapers/icp-monitor.js - env: - BRAVE_API_KEY: ${{ secrets.BRAVE_API_KEY }} - - - name: Run Concurrent Tracker - if: github.event.inputs.scraper == 'all' || github.event.inputs.scraper == 'concurrent-tracker' || github.event_name == 'schedule' - run: node scrapers/concurrent-tracker.js - env: - BRAVE_API_KEY: ${{ secrets.BRAVE_API_KEY }} - - - name: Upload CSV Artifacts - if: github.event.inputs.scraper != 'test-only' - uses: actions/upload-artifact@v4 - with: - name: intelligence-hub-${{ github.run_number }} - path: | - scraper-output/*.csv - scraper-output/*_summary_*.txt - retention-days: 30 - - - name: Create Summary - if: always() - run: | - echo "## Intelligence Hub Run Summary" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "**Date:** $(date)" >> $GITHUB_STEP_SUMMARY - echo "**Run Number:** ${{ github.run_number }}" >> $GITHUB_STEP_SUMMARY - echo "**Scraper:** ${{ github.event.inputs.scraper || 'all (scheduled)' }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Output Files" >> $GITHUB_STEP_SUMMARY - ls -lh scraper-output/*.csv 2>/dev/null || echo "No CSV files generated" - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Next Steps" >> $GITHUB_STEP_SUMMARY - echo "1. Download CSV artifacts" >> $GITHUB_STEP_SUMMARY - echo "2. Import to Google Sheets" >> $GITHUB_STEP_SUMMARY - echo "3. Review insights in Dashboard" >> $GITHUB_STEP_SUMMARY - - - name: Send Slack notification on success - if: success() - run: | - curl -X POST ${{ secrets.SLACK_WEBHOOK_URL }} \ - -H 'Content-Type: application/json' \ - -d '{ - "text": "βœ… Intelligence Hub Completed Successfully", - "blocks": [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": "*Intelligence Hub* 🟒\nβœ… Market intelligence updated\nπŸ“Š Data synced to Notion databases\n\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Run Details>" - } - } - ] - }' - - - name: Send Slack notification on failure - if: failure() - run: | - curl -X POST ${{ secrets.SLACK_WEBHOOK_URL }} \ - -H 'Content-Type: application/json' \ - -d '{ - "text": "❌ Intelligence Hub workflow failed", - "blocks": [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": "*Intelligence Hub Failed* πŸ”΄\nWorkflow: ${{ github.workflow }}\nRun: ${{ github.run_number }}\n\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|Check Logs>" - } - } - ] - }' diff --git a/.github/workflows/weekly-concurrent-activity.yml b/.github/workflows/weekly-concurrent-activity.yml index 0c462a3..4906a72 100644 --- a/.github/workflows/weekly-concurrent-activity.yml +++ b/.github/workflows/weekly-concurrent-activity.yml @@ -17,15 +17,15 @@ jobs: node-version: '20' cache: 'npm' - run: npm install - - name: Decode credentials - run: echo "${{ secrets.GOOGLE_CREDENTIALS }}" | base64 -d > credentials.json + - name: Run concurrent-tracker run: node concurrent-tracker.js - - name: Push to Sheets + + - name: Push to Google Sheets + if: ${{ secrets.GOOGLE_CREDENTIALS != '' }} env: + GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_CREDENTIALS }} SHEET_ID: ${{ secrets.SHEET_ID }} SHEET_TAB: "Concurrent Activity" run: node push-to-sheets.js - - name: Cleanup credentials - if: always() - run: rm -f credentials.json + continue-on-error: true diff --git a/.github/workflows/weekly-icp-activity.yml b/.github/workflows/weekly-icp-activity.yml index 10e45b4..4ba2c50 100644 --- a/.github/workflows/weekly-icp-activity.yml +++ b/.github/workflows/weekly-icp-activity.yml @@ -17,15 +17,15 @@ jobs: node-version: '20' cache: 'npm' - run: npm install - - name: Decode credentials - run: echo "${{ secrets.GOOGLE_CREDENTIALS }}" | base64 -d > credentials.json + - name: Run icp-monitor run: node icp-monitor.js - - name: Push to Sheets + + - name: Push to Google Sheets + if: ${{ secrets.GOOGLE_CREDENTIALS != '' }} env: + GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_CREDENTIALS }} SHEET_ID: ${{ secrets.SHEET_ID }} SHEET_TAB: "ICP Monitor" run: node push-to-sheets.js - - name: Cleanup credentials - if: always() - run: rm -f credentials.json + continue-on-error: true diff --git a/.github/workflows/weekly-market-trends.yml b/.github/workflows/weekly-market-trends.yml index e5b548d..733b123 100644 --- a/.github/workflows/weekly-market-trends.yml +++ b/.github/workflows/weekly-market-trends.yml @@ -17,15 +17,21 @@ jobs: node-version: '20' cache: 'npm' - run: npm install - - name: Decode credentials - run: echo "${{ secrets.GOOGLE_CREDENTIALS }}" | base64 -d > credentials.json + - name: Run market-trends-scraper run: node market-trends-scraper.js - - name: Push to Sheets + + - name: Push to Google Sheets + if: ${{ secrets.GOOGLE_CREDENTIALS != '' }} env: + GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_CREDENTIALS }} SHEET_ID: ${{ secrets.SHEET_ID }} SHEET_TAB: "Market Trends" run: node push-to-sheets.js - - name: Cleanup credentials - if: always() - run: rm -f credentials.json + continue-on-error: true + + - name: Push to Notion + env: + NOTION_TOKEN: ${{ secrets.NOTION_TOKEN }} + NOTION_TRENDS_DB_ID: ${{ secrets.NOTION_TRENDS_DB_ID }} + run: node push-to-notion.js diff --git a/.gitignore b/.gitignore index 9ece4ad..8fbc1c4 100644 --- a/.gitignore +++ b/.gitignore @@ -36,7 +36,6 @@ create-automation-db.js create-notion-db.js inspect-database.js linkedin-activity-tracker.js -push-to-notion.js # Legacy directories intelligence-hub/ diff --git a/intelligence-hub/.github/workflows/intelligence-hub.yml b/intelligence-hub/.github/workflows/intelligence-hub.yml deleted file mode 100644 index ed301d9..0000000 --- a/intelligence-hub/.github/workflows/intelligence-hub.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: Intelligence Hub - Weekly Scraping - -on: - schedule: - - cron: '0 9 * * 1' # Every Monday 9 AM CET - workflow_dispatch: # Manual trigger - -jobs: - gather_market_data: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - - name: Setup Node.js - uses: actions/setup-node@v3 - with: - node-version: '18' - - - name: Install dependencies - run: | - npm install @notionhq/client - npm install dotenv - - - name: Run scrapers - env: - NOTION_TOKEN: ${{ secrets.NOTION_TOKEN }} - NOTION_COMPANIES_DB_ID: ${{ secrets.NOTION_COMPANIES_DB_ID }} - NOTION_VACANCIES_DB_ID: ${{ secrets.NOTION_VACANCIES_DB_ID }} - NOTION_TRENDS_DB_ID: ${{ secrets.NOTION_TRENDS_DB_ID }} - NOTION_METADATA_DB_ID: ${{ secrets.NOTION_METADATA_DB_ID }} - NODE_ENV: production - run: | - echo "πŸš€ Starting Intelligence Hub scrapers..." - - # Market Trends Scraper - echo "πŸ“Š Running: Market Trends Scraper" - node scrapers/market-trends-scraper.js || true - - # ICP Monitor - echo "🎯 Running: ICP Monitor" - node scrapers/icp-monitor.js || true - - # Concurrent Tracker - echo "βš”οΈ Running: Concurrent Tracker" - node scrapers/concurrent-tracker.js || true - - echo "βœ… All scrapers completed" - - - name: Commit & push data - run: | - git config user.name "Recruitin Bot" - git config user.email "bot@recruitin.nl" - git add data/intelligence/ 2>/dev/null || true - git commit -m "πŸ“Š Intelligence Hub update - $(date +'%Y-%m-%d %H:%M')" 2>/dev/null || true - git push || true - - - name: Notify completion - if: always() - run: | - echo "βœ… Intelligence Hub pipeline complete" - echo "πŸ“Š Data synced to Notion" - echo "πŸ”” Check Notion databases for updates" diff --git a/intelligence-hub/.github/workflows/linkedin-newsletter.yml b/intelligence-hub/.github/workflows/linkedin-newsletter.yml deleted file mode 100644 index 2f857bb..0000000 --- a/intelligence-hub/.github/workflows/linkedin-newsletter.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: LinkedIn Newsletter Automation - -on: - schedule: - - cron: '0 6 * * 1' # Monday 6 AM UTC (7 AM CET) - workflow_dispatch: - -jobs: - generate-linkedin-content: - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - - name: Run LinkedIn Newsletter Automation - env: - NOTION_TOKEN: ${{ secrets.NOTION_TOKEN }} - TRENDS_DB_ID: ${{ secrets.TRENDS_DB_ID }} - CONTENT_PIPELINE_DB_ID: ${{ secrets.CONTENT_PIPELINE_DB_ID }} - PAIN_POINTS_DB_ID: ${{ secrets.PAIN_POINTS_DB_ID }} - CASE_STUDIES_DB_ID: ${{ secrets.CASE_STUDIES_DB_ID }} - CLAUDE_API_KEY: ${{ secrets.CLAUDE_API_KEY }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - run: | - python linkedin-newsletter-automation.py - - - name: Send Slack notification on failure - if: failure() - uses: slackapi/slack-github-action@v1 - with: - webhook-url: ${{ secrets.SLACK_WEBHOOK }} - payload: | - { - "text": "❌ LinkedIn Newsletter Automation failed", - "blocks": [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": "*LinkedIn Newsletter Automation*\n❌ Workflow failed\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Details>" - } - } - ] - } diff --git a/intelligence-hub/linkedin-newsletter-automation.py b/intelligence-hub/linkedin-newsletter-automation.py deleted file mode 100644 index f74c179..0000000 --- a/intelligence-hub/linkedin-newsletter-automation.py +++ /dev/null @@ -1,538 +0,0 @@ -#!/usr/bin/env python3 -""" -LinkedIn Newsletter Automation Script -Notion Trends β†’ Claude generates content β†’ Notion drafts -Manual publishing (copy-paste to LinkedIn/Email) - -Usage: - python linkedin-newsletter-automation.py - -Requires: - - NOTION_TOKEN (in .env) - - CLAUDE_API_KEY (in .env) - - Notion databases: Trends, CP Pain Points, Case Studies, Content Pipeline -""" - -import os -import json -from datetime import datetime -from typing import Optional -import anthropic -from notion_client import Client -from dotenv import load_dotenv - -# Load environment variables -load_dotenv() - -# Initialize clients -NOTION_TOKEN = os.getenv("NOTION_TOKEN") -CLAUDE_API_KEY = os.getenv("CLAUDE_API_KEY") -TRENDS_DB_ID = os.getenv("TRENDS_DB_ID") -CONTENT_PIPELINE_DB_ID = os.getenv("CONTENT_PIPELINE_DB_ID") -PAIN_POINTS_DB_ID = os.getenv("PAIN_POINTS_DB_ID") -CASE_STUDIES_DB_ID = os.getenv("CASE_STUDIES_DB_ID") - -notion = Client(auth=NOTION_TOKEN) -os.environ["ANTHROPIC_API_KEY"] = CLAUDE_API_KEY -claude_client = anthropic.Anthropic() - -class LinkedInNewsletterAutomation: - """Main automation class""" - - def __init__(self): - self.trend_data = {} - self.pain_points = [] - self.case_study = {} - self.content = { - "post": None, - "artikel": None, - "company_post": None, - "email": None - } - - def get_latest_trend(self) -> dict: - """Get latest trend page from Notion""" - print("πŸ“Š Fetching latest trend...") - - response = notion.databases.query( - database_id=TRENDS_DB_ID, - sorts=[{"timestamp": "last_edited_time", "direction": "descending"}], - page_size=1 - ) - - if not response["results"]: - print("❌ No trends found") - return None - - page = response["results"][0] - trend_id = page["id"] - - # Extract properties - properties = page["properties"] - - trend_data = { - "id": trend_id, - "period": self._get_property(properties, "Period"), - "companies": self._get_property(properties, "Active Companies"), - "analysis": self._get_property(properties, "Analysis"), - "salary": self._get_property(properties, "Avg Salary"), - "growth": self._get_property(properties, "Growth Rate"), - "sector": self._get_property(properties, "Sector"), - } - - print(f"βœ… Trend found: {trend_data['period']} - {trend_data['sector']}") - return trend_data - - def get_cp_pain_points(self, limit: int = 3) -> list: - """Get current CP pain points""" - print("πŸ’‘ Fetching CP pain points...") - - response = notion.databases.query( - database_id=PAIN_POINTS_DB_ID, - sorts=[{"timestamp": "last_edited_time", "direction": "descending"}], - page_size=limit - ) - - pain_points = [] - for page in response["results"]: - properties = page["properties"] - pain = { - "name": self._get_property(properties, "Pain Point"), - "description": self._get_property(properties, "Description"), - "sector": self._get_property(properties, "Sector"), - } - pain_points.append(pain) - - print(f"βœ… Found {len(pain_points)} pain points") - return pain_points - - def get_latest_case_study(self) -> dict: - """Get latest usable case study""" - print("🎯 Fetching case study...") - - response = notion.databases.query( - database_id=CASE_STUDIES_DB_ID, - filter={ - "property": "Usable in content?", - "select": {"equals": "Yes"} - }, - sorts=[{"timestamp": "last_edited_time", "direction": "descending"}], - page_size=1 - ) - - if not response["results"]: - print("⚠️ No usable case studies found") - return { - "company": "Anonymous Client", - "challenge": "Hiring challenge", - "result": "Successfully hired" - } - - page = response["results"][0] - properties = page["properties"] - - case = { - "company": self._get_property(properties, "Client Company"), - "sector": self._get_property(properties, "Sector"), - "challenge": self._get_property(properties, "Challenge"), - "solution": self._get_property(properties, "Solution"), - "result": self._get_property(properties, "Result/Metrics"), - } - - print(f"βœ… Case study: {case['company']}") - return case - - def _get_property(self, properties: dict, name: str) -> str: - """Extract property value from Notion page""" - if name not in properties: - return "" - - prop = properties[name] - prop_type = prop["type"] - - if prop_type == "title": - return prop["title"][0]["plain_text"] if prop["title"] else "" - elif prop_type == "rich_text": - return prop["rich_text"][0]["plain_text"] if prop["rich_text"] else "" - elif prop_type == "number": - return str(prop["number"]) if prop["number"] else "0" - elif prop_type == "select": - return prop["select"]["name"] if prop["select"] else "" - elif prop_type == "multi_select": - return ", ".join([item["name"] for item in prop["multi_select"]]) - else: - return "" - - def generate_linkedin_post(self) -> str: - """Generate LinkedIn personal post""" - print("\nπŸš€ Generating LinkedIn Post...") - - pain_summary = "\n".join([f"- {p['name']}: {p['description']}" for p in self.pain_points[:2]]) - - prompt = f"""You are a recruitment expert positioning yourself as a thought leader to mid-market companies (50-800 FTE). - -TRENDS DATA: -- Period: {self.trend_data['period']} -- Active Companies: {self.trend_data['companies']} -- Analysis: {self.trend_data['analysis']} -- Avg Salary: {self.trend_data['salary']} -- Growth Rate: {self.trend_data['growth']}% -- Sector: {self.trend_data['sector']} - -CP PAIN POINTS: -{pain_summary} - -CURRENT CASE STUDY: -Company: {self.case_study['company']} -Challenge: {self.case_study['challenge']} -Result: {self.case_study['result']} - -REQUIREMENTS: -- Length: 120-200 words -- Tone: 70% thought leadership, 30% customer intimacy -- Structure: - 1. Hook: Data + emoji - 2. Market insight (20-30 words) - 3. Customer pain: 'Here's why YOUR team is struggling' (30-40 words) - 4. What works: Reference case study proof (30-40 words) - 5. Action: 'What to do now' (20-30 words) - 6. CTA: Link to article/case/consultation - -TONE RULES: -- Lead with data (TL) -- Acknowledge pain (CI) -- Reference success (proof) -- Direct, confident, helpful -- NO jargon, NO corporate speak -- Short sentences -- Emoji at start - -OUTPUT: Post content only (no markdown, no headers, just the post text)""" - - response = claude_client.messages.create( - model="claude-opus-4-1", - max_tokens=600, - messages=[{"role": "user", "content": prompt}] - ) - - content = response.content[0].text - print("βœ… LinkedIn Post generated") - return content - - def generate_linkedin_artikel(self) -> str: - """Generate LinkedIn personal artikel""" - print("\nπŸ“š Generating LinkedIn Artikel...") - - pain_summary = "\n".join([f"- {p['name']}: {p['description']}" for p in self.pain_points]) - - prompt = f"""You are a recruitment thought leader writing for mid-market company decision-makers. - -TRENDS: -- Period: {self.trend_data['period']} -- Sector: {self.trend_data['sector']} -- Analysis: {self.trend_data['analysis']} -- Growth: {self.trend_data['growth']}% - -PAIN POINTS: -{pain_summary} - -CASE STUDY: -{self.case_study['company']}: {self.case_study['challenge']} β†’ {self.case_study['result']} - -ARTICLE STRUCTURE: -1. Title: Catchy, keyword-rich, frames problem -2. Intro (150w): Hook + problem statement -3. Section 1: 'The Market Reality' (200w) - └─ Trend data + what it means -4. Section 2: 'Your Team's Real Problem' (250w) - └─ Pain points + why it matters -5. Section 3: 'What Works' (300w) - └─ Solutions + case study proof -6. Section 4: 'Next Steps' (150w) - └─ Action plan -7. CTA: Consultation offer - -TONE: -- 70% thought leadership (what's happening, why, what works) -- 30% customer intimacy (your pain, your solution) -- Confident, direct, insider perspective -- Data-backed -- Contrarian where possible - -VOICE: -- Like talking to CEO/HR leader -- No fluff -- Every paragraph has value -- Short paragraphs -- Stories > generic advice - -OUTPUT: Full article with headers (use ## for headers, not ###)""" - - response = claude_client.messages.create( - model="claude-opus-4-1", - max_tokens=2500, - messages=[{"role": "user", "content": prompt}] - ) - - content = response.content[0].text - print("βœ… LinkedIn Artikel generated") - return content - - def generate_company_post(self) -> str: - """Generate Recruitin company page post""" - print("\n🏒 Generating Company Post...") - - prompt = f"""You are writing for Recruitin B.V. company LinkedIn page. - -TRENDS: -- Period: {self.trend_data['period']} -- Sector: {self.trend_data['sector']} -- Companies: {self.trend_data['companies']} -- Salary: {self.trend_data['salary']} -- Growth: {self.trend_data['growth']}% - -COMPANY POST REQUIREMENTS: -- Length: 80-150 words -- Audience: Job seekers + Companies + Recruiters -- Tone: Authority + helpfulness + brand voice -- Structure: - 1. Trend headline - 2. Data (3 key numbers) - 3. 'What this means for candidates' - 4. 'What this means for companies' - 5. Recruitin positioning (what we do) - 6. CTA (Open roles / Consult) - -VOICE: -- Professional but friendly -- Authoritative (we know market) -- Value-first -- Brand relevant - -OUTPUT: Company post only (no markdown)""" - - response = claude_client.messages.create( - model="claude-opus-4-1", - max_tokens=500, - messages=[{"role": "user", "content": prompt}] - ) - - content = response.content[0].text - print("βœ… Company Post generated") - return content - - def generate_email_newsletter(self) -> dict: - """Generate email newsletter""" - print("\nπŸ“§ Generating Email Newsletter...") - - pain_summary = "\n".join([f"- {p['name']}: {p['description']}" for p in self.pain_points[:3]]) - - prompt = f"""You are a recruitment email marketer writing weekly to prospects and clients. - -TRENDS: -- Period: {self.trend_data['period']} -- Sector: {self.trend_data['sector']} -- Companies: {self.trend_data['companies']} -- Salary: {self.trend_data['salary']} -- Growth: {self.trend_data['growth']}% - -PAIN POINTS: -{pain_summary} - -CASE STUDY: -{self.case_study['company']}: {self.case_study['challenge']} β†’ {self.case_study['result']} - -EMAIL STRUCTURE: -1. Subject line: 40-50 chars, emoji + hook (SEPARATE FROM BODY) -2. Greeting: "Hi [First name]," -3. Hook (50w): Why this email matters -4. The Numbers (75w): 3 key trends + interpretation -5. What This Means (150w): Pain points for 2 audiences (hiring + candidates) -6. Here's What Works (100w): Solutions + case reference -7. Resource Link (50w): Suggest a resource (article/benchmark) -8. CTA: One clear action -9. Sign-off: "[Your name]\nRecruitin B.V." - -TONE: -- Conversational (like friend) -- Insider wisdom -- No jargon -- Scannable -- Story + data -- Helpful, not salesy - -OUTPUT FORMAT: -SUBJECT: [subject line here] -BODY: -[email body here]""" - - response = claude_client.messages.create( - model="claude-opus-4-1", - max_tokens=1500, - messages=[{"role": "user", "content": prompt}] - ) - - content = response.content[0].text - - # Parse subject and body - parts = content.split("BODY:", 1) - subject = parts[0].replace("SUBJECT:", "").strip() - body = parts[1].strip() if len(parts) > 1 else content - - print("βœ… Email Newsletter generated") - return {"subject": subject, "body": body} - - def save_to_notion(self, post: str, artikel: str, company_post: str, email: dict) -> None: - """Save all content to Notion Content Pipeline""" - print("\nπŸ’Ύ Saving to Notion...") - - trend_relation = [{"id": self.trend_data["id"]}] - - # Save LinkedIn Post - self._create_content_item( - title=f"Post: {self.trend_data['period']}", - type="Post", - body=post, - platform="Personal", - trend_id=self.trend_data["id"] - ) - print("βœ… LinkedIn Post saved") - - # Save Artikel - self._create_content_item( - title=f"Artikel: {self.trend_data['period']}", - type="Artikel", - body=artikel, - platform="Personal", - trend_id=self.trend_data["id"] - ) - print("βœ… Artikel saved") - - # Save Company Post - self._create_content_item( - title=f"Company: {self.trend_data['period']}", - type="Company Post", - body=company_post, - platform="Company", - trend_id=self.trend_data["id"] - ) - print("βœ… Company Post saved") - - # Save Email - email_body = f"Subject: {email['subject']}\n\n{email['body']}" - self._create_content_item( - title=f"Email: {self.trend_data['period']}", - type="Email", - body=email_body, - platform="Email", - trend_id=self.trend_data["id"] - ) - print("βœ… Email saved") - - def _create_content_item(self, title: str, type: str, body: str, platform: str, trend_id: str) -> None: - """Create content item in Notion""" - notion.pages.create( - parent={"database_id": CONTENT_PIPELINE_DB_ID}, - properties={ - "Title": { - "title": [{"text": {"content": title}}] - }, - "Type": { - "select": {"name": type} - }, - "Body": { - "rich_text": [{"text": {"content": body}}] - }, - "Status": { - "select": {"name": "Ready"} - }, - "Platform": { - "select": {"name": platform} - }, - "Week": { - "relation": [{"id": trend_id}] - }, - "Tone Check": { - "checkbox": True - } - } - ) - - def print_preview(self) -> None: - """Print content preview""" - print("\n" + "="*80) - print("πŸ“‹ CONTENT PREVIEW") - print("="*80) - - print("\nπŸ“± LINKEDIN POST:") - print("-" * 80) - print(self.content["post"][:300] + "...\n") - - print("πŸ“„ LINKEDIN ARTIKEL:") - print("-" * 80) - print(self.content["artikel"][:300] + "...\n") - - print("🏒 COMPANY POST:") - print("-" * 80) - print(self.content["company_post"][:200] + "...\n") - - print("πŸ“§ EMAIL:") - print("-" * 80) - print(f"Subject: {self.content['email']['subject']}") - print(self.content["email"]["body"][:200] + "...\n") - - def run(self) -> None: - """Run full automation""" - print("πŸš€ LinkedIn Newsletter Automation Starting...\n") - - # Fetch data - self.trend_data = self.get_latest_trend() - if not self.trend_data: - print("❌ No trends to process. Exiting.") - return - - self.pain_points = self.get_cp_pain_points() - self.case_study = self.get_latest_case_study() - - # Generate content - self.content["post"] = self.generate_linkedin_post() - self.content["artikel"] = self.generate_linkedin_artikel() - self.content["company_post"] = self.generate_company_post() - self.content["email"] = self.generate_email_newsletter() - - # Save to Notion - self.save_to_notion( - self.content["post"], - self.content["artikel"], - self.content["company_post"], - self.content["email"] - ) - - # Preview - self.print_preview() - - print("\n" + "="*80) - print("βœ… AUTOMATION COMPLETE") - print("="*80) - print("\nπŸ“ Next steps:") - print("1. Open Notion β†’ LinkedIn Content Pipeline") - print("2. Review 4 content items (Status: Ready)") - print("3. Copy content to LinkedIn / Email tool") - print("4. Post and track engagement") - print("\nπŸ’° Cost: ~€1 (Claude API calls)") - print("⏱️ Time saved: 2-3 hours of writing") - - -def main(): - """Main entry point""" - try: - automation = LinkedInNewsletterAutomation() - automation.run() - except Exception as e: - print(f"\n❌ Error: {str(e)}") - import traceback - traceback.print_exc() - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 91e4d99..0a2af32 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,11 @@ "version": "1.0.0", "license": "UNLICENSED", "dependencies": { + "@notionhq/client": "^2.2.15", + "csv-parse": "^5.5.6", + "csv-writer": "^1.6.0", + "google-auth-library": "^9.14.0", + "googleapis": "^144.0.0", "puppeteer": "^21.11.0" }, "engines": { @@ -38,6 +43,19 @@ "node": ">=6.9.0" } }, + "node_modules/@notionhq/client": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@notionhq/client/-/client-2.3.0.tgz", + "integrity": "sha512-l7WqTCpQqC+HibkB9chghONQTYcxNQT0/rOJemBfmuKQRTu2vuV8B3yA395iKaUdDo7HI+0KvQaz9687Xskzkw==", + "license": "MIT", + "dependencies": { + "@types/node-fetch": "^2.5.10", + "node-fetch": "^2.6.1" + }, + "engines": { + "node": ">=12" + } + }, "node_modules/@puppeteer/browsers": { "version": "1.9.1", "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-1.9.1.tgz", @@ -70,11 +88,20 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-25.0.8.tgz", "integrity": "sha512-powIePYMmC3ibL0UJ2i2s0WIbq6cg6UyVFQxSCpaPxxzAaziRfimGivjdF943sSGV6RADVbk0Nvlm5P/FB44Zg==", "license": "MIT", - "optional": true, "dependencies": { "undici-types": "~7.16.0" } }, + "node_modules/@types/node-fetch": { + "version": "2.6.13", + "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz", + "integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==", + "license": "MIT", + "dependencies": { + "@types/node": "*", + "form-data": "^4.0.4" + } + }, "node_modules/@types/yauzl": { "version": "2.10.3", "resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz", @@ -136,6 +163,12 @@ "node": ">=4" } }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, "node_modules/b4a": { "version": "1.7.3", "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.7.3.tgz", @@ -193,6 +226,15 @@ "node": ">=10.0.0" } }, + "node_modules/bignumber.js": { + "version": "9.3.1", + "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.3.1.tgz", + "integrity": "sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==", + "license": "MIT", + "engines": { + "node": "*" + } + }, "node_modules/buffer": { "version": "5.7.1", "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", @@ -226,6 +268,41 @@ "node": "*" } }, + "node_modules/buffer-equal-constant-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==", + "license": "BSD-3-Clause" + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/call-bound": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", + "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "get-intrinsic": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/callsites": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", @@ -280,6 +357,18 @@ "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", "license": "MIT" }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/cosmiconfig": { "version": "9.0.0", "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.0.tgz", @@ -315,6 +404,18 @@ "node-fetch": "^2.6.12" } }, + "node_modules/csv-parse": { + "version": "5.6.0", + "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.6.0.tgz", + "integrity": "sha512-l3nz3euub2QMg5ouu5U09Ew9Wf6/wQ8I++ch1loQ0ljmzhmfZYrH9fflS22i/PQEvsPvxCwxgz5q7UB8K1JO4Q==", + "license": "MIT" + }, + "node_modules/csv-writer": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/csv-writer/-/csv-writer-1.6.0.tgz", + "integrity": "sha512-NOx7YDFWEsM/fTRAJjRpPp8t+MKRVvniAg9wQlUKx20MFrPs73WLJhFf5iteqrxNYnsy924K3Iroh3yNHeYd2g==", + "license": "MIT" + }, "node_modules/data-uri-to-buffer": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz", @@ -355,12 +456,44 @@ "node": ">= 14" } }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/devtools-protocol": { "version": "0.0.1232444", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1232444.tgz", "integrity": "sha512-pM27vqEfxSxRkTMnF+XCmxSEb6duO5R+t8A9DEEJgy4Wz2RVanje2mmj99B6A3zv2r/qGfYlOvYznUhuokizmg==", "license": "BSD-3-Clause" }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + } + }, "node_modules/emoji-regex": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", @@ -394,6 +527,51 @@ "is-arrayish": "^0.2.1" } }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, "node_modules/escalade": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", @@ -464,6 +642,12 @@ "bare-events": "^2.7.0" } }, + "node_modules/extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", + "license": "MIT" + }, "node_modules/extract-zip": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz", @@ -499,6 +683,61 @@ "pend": "~1.2.0" } }, + "node_modules/form-data": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", + "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/gaxios": { + "version": "6.7.1", + "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.7.1.tgz", + "integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==", + "license": "Apache-2.0", + "dependencies": { + "extend": "^3.0.2", + "https-proxy-agent": "^7.0.1", + "is-stream": "^2.0.0", + "node-fetch": "^2.6.9", + "uuid": "^9.0.1" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/gcp-metadata": { + "version": "6.1.1", + "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.1.tgz", + "integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==", + "license": "Apache-2.0", + "dependencies": { + "gaxios": "^6.1.1", + "google-logging-utils": "^0.0.2", + "json-bigint": "^1.0.0" + }, + "engines": { + "node": ">=14" + } + }, "node_modules/get-caller-file": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", @@ -508,6 +747,43 @@ "node": "6.* || 8.* || >= 10.*" } }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, "node_modules/get-stream": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz", @@ -537,6 +813,126 @@ "node": ">= 14" } }, + "node_modules/google-auth-library": { + "version": "9.15.1", + "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.1.tgz", + "integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==", + "license": "Apache-2.0", + "dependencies": { + "base64-js": "^1.3.0", + "ecdsa-sig-formatter": "^1.0.11", + "gaxios": "^6.1.1", + "gcp-metadata": "^6.1.0", + "gtoken": "^7.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/google-logging-utils": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-0.0.2.tgz", + "integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==", + "license": "Apache-2.0", + "engines": { + "node": ">=14" + } + }, + "node_modules/googleapis": { + "version": "144.0.0", + "resolved": "https://registry.npmjs.org/googleapis/-/googleapis-144.0.0.tgz", + "integrity": "sha512-ELcWOXtJxjPX4vsKMh+7V+jZvgPwYMlEhQFiu2sa9Qmt5veX8nwXPksOWGGN6Zk4xCiLygUyaz7xGtcMO+Onxw==", + "license": "Apache-2.0", + "dependencies": { + "google-auth-library": "^9.0.0", + "googleapis-common": "^7.0.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/googleapis-common": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/googleapis-common/-/googleapis-common-7.2.0.tgz", + "integrity": "sha512-/fhDZEJZvOV3X5jmD+fKxMqma5q2Q9nZNSF3kn1F18tpxmA86BcTxAGBQdM0N89Z3bEaIs+HVznSmFJEAmMTjA==", + "license": "Apache-2.0", + "dependencies": { + "extend": "^3.0.2", + "gaxios": "^6.0.3", + "google-auth-library": "^9.7.0", + "qs": "^6.7.0", + "url-template": "^2.0.8", + "uuid": "^9.0.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/gtoken": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz", + "integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==", + "license": "MIT", + "dependencies": { + "gaxios": "^6.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, "node_modules/http-proxy-agent": { "version": "7.0.2", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", @@ -623,6 +1019,18 @@ "node": ">=8" } }, + "node_modules/is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/js-tokens": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", @@ -641,12 +1049,42 @@ "js-yaml": "bin/js-yaml.js" } }, + "node_modules/json-bigint": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-bigint/-/json-bigint-1.0.0.tgz", + "integrity": "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==", + "license": "MIT", + "dependencies": { + "bignumber.js": "^9.0.0" + } + }, "node_modules/json-parse-even-better-errors": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", "license": "MIT" }, + "node_modules/jwa": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz", + "integrity": "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==", + "license": "MIT", + "dependencies": { + "buffer-equal-constant-time": "^1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/jws": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.1.tgz", + "integrity": "sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==", + "license": "MIT", + "dependencies": { + "jwa": "^2.0.1", + "safe-buffer": "^5.0.1" + } + }, "node_modules/lines-and-columns": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", @@ -662,6 +1100,36 @@ "node": ">=12" } }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/mitt": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz", @@ -709,6 +1177,18 @@ } } }, + "node_modules/object-inspect": { + "version": "1.13.4", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", + "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -872,6 +1352,21 @@ "node": ">=16.13.2" } }, + "node_modules/qs": { + "version": "6.14.1", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.1.tgz", + "integrity": "sha512-4EK3+xJl8Ts67nLYNwqw/dsFVnCf+qR7RgXSK9jEEm9unao3njwMDdmsdvoKBKHzxd7tCYz5e5M+SnMjdtXGQQ==", + "license": "BSD-3-Clause", + "dependencies": { + "side-channel": "^1.1.0" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", @@ -890,6 +1385,98 @@ "node": ">=4" } }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/side-channel": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", + "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3", + "side-channel-list": "^1.0.0", + "side-channel-map": "^1.0.1", + "side-channel-weakmap": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-list": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", + "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-map": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", + "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-weakmap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", + "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3", + "side-channel-map": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/smart-buffer": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz", @@ -1038,8 +1625,13 @@ "version": "7.16.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", - "license": "MIT", - "optional": true + "license": "MIT" + }, + "node_modules/url-template": { + "version": "2.0.8", + "resolved": "https://registry.npmjs.org/url-template/-/url-template-2.0.8.tgz", + "integrity": "sha512-XdVKMF4SJ0nP/O7XIPB0JwAEuT9lDIYnNsK8yGVe43y0AWoKeJNdv3ZNWh7ksJ6KqQFjOO6ox/VEitLnaVNufw==", + "license": "BSD" }, "node_modules/urlpattern-polyfill": { "version": "10.0.0", @@ -1047,6 +1639,19 @@ "integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg==", "license": "MIT" }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", diff --git a/package.json b/package.json index c97b1db..925f7c8 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,11 @@ "license": "UNLICENSED", "private": true, "dependencies": { + "@notionhq/client": "^2.2.15", + "csv-parse": "^5.5.6", + "csv-writer": "^1.6.0", + "googleapis": "^144.0.0", + "google-auth-library": "^9.14.0", "puppeteer": "^21.11.0" }, "engines": { diff --git a/push-to-notion.js b/push-to-notion.js new file mode 100644 index 0000000..57555d7 --- /dev/null +++ b/push-to-notion.js @@ -0,0 +1,174 @@ +const fs = require('fs'); +const path = require('path'); +const csv = require('csv-parse/sync'); +const { Client } = require('@notionhq/client'); +const { + withTimeout, + withRetry, + logInfo, + logError +} = require('./utils/tracker-utils'); + +// Notion rich_text field has a 2000 character limit +const MAX_TEXT_LENGTH = 1998; + +/** + * Truncate text to Notion's limit + */ +function truncateText(text, maxLength = MAX_TEXT_LENGTH) { + if (!text || text.length <= maxLength) return text; + return text.substring(0, maxLength); +} + +/** + * Push market trends data to Notion database + */ +async function pushToNotion() { + const startTime = Date.now(); + + try { + // Validate required environment variables + if (!process.env.NOTION_TOKEN) { + throw new Error('NOTION_TOKEN environment variable not set'); + } + if (!process.env.NOTION_TRENDS_DB_ID) { + throw new Error('NOTION_TRENDS_DB_ID environment variable not set'); + } + + // Initialize Notion client + const notion = new Client({ auth: process.env.NOTION_TOKEN }); + const databaseId = process.env.NOTION_TRENDS_DB_ID; + + // Find the latest CSV file in artifacts directory + const artifactsDir = path.join(__dirname, 'artifacts'); + const files = fs.readdirSync(artifactsDir) + .filter(f => f.startsWith('market-trends') && f.endsWith('.csv')) + .sort() + .reverse(); + + if (files.length === 0) { + throw new Error('No market-trends CSV files found in artifacts directory'); + } + + const csvFile = path.join(artifactsDir, files[0]); + const content = fs.readFileSync(csvFile, 'utf8'); + const records = csv.parse(content, { columns: true }); + + logInfo('Starting Notion push', { + recordCount: records.length, + databaseId, + csvFile: files[0] + }); + + // Push each record to Notion + let successCount = 0; + let errorCount = 0; + + for (const record of records) { + try { + await withRetry(async () => { + await withTimeout( + notion.pages.create({ + parent: { database_id: databaseId }, + properties: { + // Title property (required) - use Trend as title + 'Name': { + title: [ + { + text: { + content: truncateText(record['Trend'] || 'Unnamed Trend', 200) + } + } + ] + }, + // Impact level + 'Impact': { + select: { + name: record['Impact'] || 'Medium' + } + }, + // Description as rich text + 'Description': { + rich_text: [ + { + text: { + content: truncateText(record['Description'] || '') + } + } + ] + }, + // Affected Sectors + 'Sector': { + rich_text: [ + { + text: { + content: truncateText(record['Affected Sectors'] || '') + } + } + ] + }, + // Market Signal + 'Analysis': { + rich_text: [ + { + text: { + content: truncateText(record['Market Signal'] || '') + } + } + ] + }, + // Date + 'Period': { + rich_text: [ + { + text: { + content: record['Date'] || new Date().toLocaleDateString('nl-NL') + } + } + ] + } + } + }), + 30000 // 30 second timeout + ); + }, 3); // Max 3 retry attempts + + successCount++; + logInfo('Record pushed to Notion', { trend: record['Trend'] }); + + // Small delay to avoid rate limiting + await new Promise(resolve => setTimeout(resolve, 300)); + + } catch (recordError) { + errorCount++; + logError('Failed to push record to Notion', { + trend: record['Trend'], + error: recordError.message + }); + } + } + + const executionTime = Date.now() - startTime; + + if (errorCount > 0 && successCount === 0) { + throw new Error(`All ${errorCount} records failed to push`); + } + + logInfo('Notion push completed', { + successCount, + errorCount, + totalRecords: records.length, + executionTimeMs: executionTime + }); + + } catch (error) { + const executionTime = Date.now() - startTime; + logError('Failed to push to Notion', { + error: error.message, + executionTimeMs: executionTime + }); + process.exit(1); + } +} + +pushToNotion(); diff --git a/push-to-sheets.js b/push-to-sheets.js index 79b120b..0c862b9 100644 --- a/push-to-sheets.js +++ b/push-to-sheets.js @@ -22,10 +22,22 @@ async function pushToSheet() { throw new Error('SHEET_ID environment variable not set'); } - // Load credentials from environment variable + // Load credentials from environment variable (supports both base64 and raw JSON) let credentials; + let credentialsStr = process.env.GOOGLE_CREDENTIALS; + + // Try to decode from base64 first + try { + const decoded = Buffer.from(credentialsStr, 'base64').toString('utf8'); + if (decoded.includes('client_email')) { + credentialsStr = decoded; + } + } catch (e) { + // Not base64, use as-is + } + try { - credentials = JSON.parse(process.env.GOOGLE_CREDENTIALS); + credentials = JSON.parse(credentialsStr); } catch (parseError) { throw new Error(`Invalid JSON in GOOGLE_CREDENTIALS: ${parseError.message}`); } diff --git a/scrapers/concurrent-tracker.js b/scrapers/concurrent-tracker.js deleted file mode 100644 index 4a83323..0000000 --- a/scrapers/concurrent-tracker.js +++ /dev/null @@ -1,540 +0,0 @@ -/** - * CONCURRENT TRACKER - Competitor Blog & PR Activity Monitor - * - * Monitort blog posts, persberichten en LinkedIn activity van concurrenten - * Output: concurrent_activity_[DATE].csv + concurrent_summary_[DATE].txt - * - * RUN: node concurrent-tracker.js - */ - -const puppeteer = require('puppeteer'); -const fs = require('fs').promises; -const path = require('path'); - -// ============================================================================ -// CONFIGURATIE -// ============================================================================ - -const CONFIG = { - concurrenten: [ - { - naam: 'Yacht', - website: 'https://www.yacht.nl', - blogUrl: 'https://www.yacht.nl/kennis', - linkedinUrl: 'https://www.linkedin.com/company/yacht-nl', - sector: 'Recruitment', - focusAreas: ['techniek', 'IT', 'engineering'] - }, - { - naam: 'Brunel', - website: 'https://www.brunel.nl', - blogUrl: 'https://www.brunel.nl/nl-nl/insights', - linkedinUrl: 'https://www.linkedin.com/company/brunel', - sector: 'Recruitment', - focusAreas: ['engineering', 'oil & gas', 'renewable energy'] - }, - { - naam: 'Olympia', - website: 'https://www.olympia.nl', - blogUrl: 'https://www.olympia.nl/zakelijk/nieuws', - linkedinUrl: 'https://www.linkedin.com/company/olympia-uitzendbureau', - sector: 'Recruitment', - focusAreas: ['techniek', 'productie', 'logistiek'] - }, - { - naam: 'Tempo-Team', - website: 'https://www.tempo-team.nl', - blogUrl: 'https://www.tempo-team.nl/werkgevers/kennisbank', - linkedinUrl: 'https://www.linkedin.com/company/tempo-team', - sector: 'Recruitment', - focusAreas: ['techniek', 'bouw', 'industrie'] - }, - { - naam: 'Randstad', - website: 'https://www.randstad.nl', - blogUrl: 'https://www.randstad.nl/werkgevers/kennisbank', - linkedinUrl: 'https://www.linkedin.com/company/randstad-nederland', - sector: 'Recruitment', - focusAreas: ['engineering', 'techniek', 'industrie'] - }, - { - naam: 'Unique', - website: 'https://www.unique.nl', - blogUrl: 'https://www.unique.nl/over-unique/nieuws', - linkedinUrl: 'https://www.linkedin.com/company/unique-nederland', - sector: 'Recruitment', - focusAreas: ['techniek', 'productie'] - }, - { - naam: 'Manpower', - website: 'https://www.manpower.nl', - blogUrl: 'https://www.manpower.nl/nl/kennisbank', - linkedinUrl: 'https://www.linkedin.com/company/manpower-nederland', - sector: 'Recruitment', - focusAreas: ['techniek', 'engineering', 'productie'] - }, - { - naam: 'Cottus', - website: 'https://www.cottus.nl', - blogUrl: 'https://www.cottus.nl/kennis', - linkedinUrl: 'https://www.linkedin.com/company/cottus', - sector: 'Recruitment', - focusAreas: ['bouw', 'infra', 'techniek'] - } - ], - - // Content keywords om te detecteren - contentKeywords: { - techniek: ['automation', 'field service', 'maintenance', 'technisch', 'engineer', 'PLC', 'SCADA'], - trending: ['AI', 'digitalisering', 'sustainability', 'renewable energy', 'net zero', 'energietransitie'], - recruitment: ['krapte', 'arbeidstekort', 'talent', 'personeelstekort', 'werving', 'selectie'], - regional: ['gelderland', 'overijssel', 'noord-brabant', 'arnhem', 'nijmegen', 'zwolle', 'eindhoven'] - }, - - delays: { - betweenCompetitors: 3000, // 3 seconden tussen concurrenten - betweenRequests: 2000 // 2 seconden tussen individuele requests - }, - - outputDir: './scraper-output' -}; - -// ============================================================================ -// HELPER FUNCTIES -// ============================================================================ - -function getToday() { - return new Date().toISOString().split('T')[0]; -} - -function delay(ms) { - return new Promise(resolve => setTimeout(resolve, ms)); -} - -function calculateRelevanceScore(text, concurrent) { - let score = 0; - const lowerText = text.toLowerCase(); - - // Check focus areas (max 30 punten) - concurrent.focusAreas.forEach(area => { - if (lowerText.includes(area.toLowerCase())) { - score += 10; - } - }); - - // Check content keywords (max 40 punten) - Object.entries(CONFIG.contentKeywords).forEach(([category, keywords]) => { - keywords.forEach(keyword => { - if (lowerText.includes(keyword.toLowerCase())) { - score += 2; - } - }); - }); - - // Cap at 100 - return Math.min(score, 100); -} - -function determineActivityLevel(recentPosts, avgRelevance) { - if (recentPosts >= 5 && avgRelevance >= 50) return 'ZEER ACTIEF'; - if (recentPosts >= 3 && avgRelevance >= 30) return 'ACTIEF'; - if (recentPosts >= 1 || avgRelevance >= 20) return 'MATIG ACTIEF'; - return 'INACTIEF'; -} - -function determineThreatLevel(activityLevel, avgRelevance) { - if (activityLevel === 'ZEER ACTIEF' && avgRelevance >= 60) return 'HOOG'; - if (activityLevel === 'ACTIEF' && avgRelevance >= 40) return 'GEMIDDELD'; - if (activityLevel === 'MATIG ACTIEF') return 'LAAG'; - return 'MINIMAAL'; -} - -// ============================================================================ -// SCRAPING FUNCTIES -// ============================================================================ - -async function checkBlogActivity(page, concurrent) { - console.log(` πŸ“° Checking blog: ${concurrent.blogUrl}`); - - try { - await page.goto(concurrent.blogUrl, { - waitUntil: 'networkidle2', - timeout: 15000 - }); - await delay(CONFIG.delays.betweenRequests); - - // Probeer blog posts te vinden (verschillende selectors) - const content = await page.evaluate(() => { - const body = document.body.innerText; - const links = Array.from(document.querySelectorAll('a[href*="/blog"], a[href*="/nieuws"], a[href*="/artikel"], article, .post, .news-item')); - - return { - bodyText: body.substring(0, 5000), // Eerste 5000 chars - linkCount: links.length, - titles: links.slice(0, 10).map(el => el.innerText || el.getAttribute('title') || '').filter(t => t.length > 10) - }; - }); - - return { - accessible: true, - postCount: content.linkCount, - recentTitles: content.titles, - relevanceScore: calculateRelevanceScore(content.bodyText + ' ' + content.titles.join(' '), concurrent) - }; - - } catch (error) { - console.log(` ⚠️ Blog niet toegankelijk: ${error.message}`); - return { - accessible: false, - postCount: 0, - recentTitles: [], - relevanceScore: 0 - }; - } -} - -async function checkLinkedInActivity(page, concurrent) { - console.log(` πŸ’Ό Checking LinkedIn via Google: ${concurrent.naam}`); - - try { - // Zoek LinkedIn posts via Google (LinkedIn direct scrapen is moeilijk) - const searchQuery = `site:linkedin.com/posts "${concurrent.naam}"`; - await page.goto(`https://www.google.com/search?q=${encodeURIComponent(searchQuery)}`, { - waitUntil: 'networkidle2', - timeout: 15000 - }); - await delay(CONFIG.delays.betweenRequests); - - const results = await page.evaluate(() => { - const items = Array.from(document.querySelectorAll('div.g, div[data-sokoban-container]')); - return items.slice(0, 10).map(item => { - const title = item.querySelector('h3')?.innerText || ''; - const snippet = item.querySelector('.VwiC3b, .yXK7lf')?.innerText || ''; - return { title, snippet }; - }).filter(r => r.title || r.snippet); - }); - - const combinedText = results.map(r => r.title + ' ' + r.snippet).join(' '); - - return { - recentPosts: results.length, - relevanceScore: calculateRelevanceScore(combinedText, concurrent) - }; - - } catch (error) { - console.log(` ⚠️ LinkedIn check failed: ${error.message}`); - return { - recentPosts: 0, - relevanceScore: 0 - }; - } -} - -async function checkGoogleNews(page, concurrent) { - console.log(` πŸ“‘ Checking Google News: ${concurrent.naam}`); - - try { - const searchQuery = `"${concurrent.naam}" techniek OR engineering OR recruitment`; - await page.goto(`https://www.google.com/search?q=${encodeURIComponent(searchQuery)}&tbm=nws`, { - waitUntil: 'networkidle2', - timeout: 15000 - }); - await delay(CONFIG.delays.betweenRequests); - - const newsItems = await page.evaluate(() => { - const items = Array.from(document.querySelectorAll('div.SoaBEf, div[data-sokoban-container]')); - return items.slice(0, 5).map(item => { - const title = item.querySelector('div[role="heading"]')?.innerText || ''; - const snippet = item.querySelector('div.GI74Re')?.innerText || ''; - const date = item.querySelector('span.WG9SHc span')?.innerText || ''; - return { title, snippet, date }; - }).filter(r => r.title); - }); - - const combinedText = newsItems.map(n => n.title + ' ' + n.snippet).join(' '); - - return { - newsCount: newsItems.length, - recentNews: newsItems, - relevanceScore: calculateRelevanceScore(combinedText, concurrent) - }; - - } catch (error) { - console.log(` ⚠️ Google News check failed: ${error.message}`); - return { - newsCount: 0, - recentNews: [], - relevanceScore: 0 - }; - } -} - -// ============================================================================ -// MAIN SCRAPER -// ============================================================================ - -async function scrapeConcurrentActivity() { - console.log('🎯 CONCURRENT TRACKER - Starting...\n'); - - const browser = await puppeteer.launch({ - headless: 'new', - args: ['--no-sandbox', '--disable-setuid-sandbox'] - }); - - const page = await browser.newPage(); - await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); - - const results = []; - const today = getToday(); - - for (const concurrent of CONFIG.concurrenten) { - console.log(`\nπŸ“Š Processing: ${concurrent.naam}`); - - try { - // Check alle kanalen - const blogData = await checkBlogActivity(page, concurrent); - await delay(CONFIG.delays.betweenRequests); - - const linkedinData = await checkLinkedInActivity(page, concurrent); - await delay(CONFIG.delays.betweenRequests); - - const newsData = await checkGoogleNews(page, concurrent); - - // Bereken overall metrics - const totalContent = blogData.postCount + linkedinData.recentPosts + newsData.newsCount; - const avgRelevance = Math.round( - (blogData.relevanceScore + linkedinData.relevanceScore + newsData.relevanceScore) / 3 - ); - - const activityLevel = determineActivityLevel(totalContent, avgRelevance); - const threatLevel = determineThreatLevel(activityLevel, avgRelevance); - - // Sla result op - results.push({ - dataDate: today, - concurrent: concurrent.naam, - sector: concurrent.sector, - blogPosts: blogData.postCount, - blogRelevance: blogData.relevanceScore, - linkedinPosts: linkedinData.recentPosts, - linkedinRelevance: linkedinData.relevanceScore, - newsItems: newsData.newsCount, - newsRelevance: newsData.relevanceScore, - totalContent: totalContent, - avgRelevance: avgRelevance, - activityLevel: activityLevel, - threatLevel: threatLevel, - topContent: [ - ...blogData.recentTitles.slice(0, 2), - ...newsData.recentNews.slice(0, 2).map(n => n.title) - ].join(' | ') || 'Geen recente content', - lastUpdated: new Date().toISOString() - }); - - console.log(` βœ… ${concurrent.naam}: ${activityLevel} (${totalContent} items, ${avgRelevance}% relevant)`); - - await delay(CONFIG.delays.betweenCompetitors); - - } catch (error) { - console.error(` ❌ Error processing ${concurrent.naam}:`, error.message); - - // Voeg foutmelding toe als result - results.push({ - dataDate: today, - concurrent: concurrent.naam, - sector: concurrent.sector, - blogPosts: 0, - blogRelevance: 0, - linkedinPosts: 0, - linkedinRelevance: 0, - newsItems: 0, - newsRelevance: 0, - totalContent: 0, - avgRelevance: 0, - activityLevel: 'ERROR', - threatLevel: 'ONBEKEND', - topContent: `Error: ${error.message}`, - lastUpdated: new Date().toISOString() - }); - } - } - - await browser.close(); - - return results; -} - -// ============================================================================ -// CSV EXPORT -// ============================================================================ - -async function exportToCSV(results) { - const csvPath = path.join(CONFIG.outputDir, `concurrent_activity_${getToday()}.csv`); - - // CSV Headers - const headers = [ - 'Data Date', - 'Concurrent', - 'Sector', - 'Blog Posts', - 'Blog Relevance %', - 'LinkedIn Posts', - 'LinkedIn Relevance %', - 'News Items', - 'News Relevance %', - 'Total Content', - 'Avg Relevance %', - 'Activity Level', - 'Threat Level', - 'Top Content', - 'Last Updated' - ]; - - // CSV Rows - const rows = results.map(r => [ - r.dataDate, - r.concurrent, - r.sector, - r.blogPosts, - r.blogRelevance, - r.linkedinPosts, - r.linkedinRelevance, - r.newsItems, - r.newsRelevance, - r.totalContent, - r.avgRelevance, - r.activityLevel, - r.threatLevel, - `"${r.topContent.replace(/"/g, '""')}"`, // Escape quotes - r.lastUpdated - ]); - - const csv = [headers.join(','), ...rows.map(row => row.join(','))].join('\n'); - - await fs.writeFile(csvPath, csv, 'utf-8'); - console.log(`\nβœ… CSV exported: ${csvPath}`); - - return csvPath; -} - -// ============================================================================ -// SUMMARY REPORT -// ============================================================================ - -async function generateSummary(results) { - const summaryPath = path.join(CONFIG.outputDir, `concurrent_summary_${getToday()}.txt`); - - // Groepeer per threat level - const byThreat = { - HOOG: results.filter(r => r.threatLevel === 'HOOG'), - GEMIDDELD: results.filter(r => r.threatLevel === 'GEMIDDELD'), - LAAG: results.filter(r => r.threatLevel === 'LAAG'), - MINIMAAL: results.filter(r => r.threatLevel === 'MINIMAAL'), - ONBEKEND: results.filter(r => r.threatLevel === 'ONBEKEND') - }; - - // Top 3 meest actieve concurrenten - const topActive = [...results] - .filter(r => r.activityLevel !== 'ERROR') - .sort((a, b) => b.totalContent - a.totalContent) - .slice(0, 3); - - // Top 3 meest relevante content - const topRelevant = [...results] - .filter(r => r.activityLevel !== 'ERROR') - .sort((a, b) => b.avgRelevance - a.avgRelevance) - .slice(0, 3); - - const summary = ` -CONCURRENT ACTIVITY MONITOR - ${getToday()} -================================================================================ - -πŸ“Š OVERVIEW ------------ -Total Concurrenten: ${results.length} -Total Content Items: ${results.reduce((sum, r) => sum + r.totalContent, 0)} -Gemiddelde Relevantie: ${Math.round(results.reduce((sum, r) => sum + r.avgRelevance, 0) / results.length)}% - -🚨 THREAT LEVELS ----------------- -HOOG: ${byThreat.HOOG.length} concurrenten -GEMIDDELD: ${byThreat.GEMIDDELD.length} concurrenten -LAAG: ${byThreat.LAAG.length} concurrenten -MINIMAAL: ${byThreat.MINIMAAL.length} concurrenten -ONBEKEND: ${byThreat.ONBEKEND.length} concurrenten - -πŸ”₯ TOP 3 MEEST ACTIEF ---------------------- -${topActive.map((c, i) => `${i + 1}. ${c.concurrent}: ${c.totalContent} items (${c.activityLevel})`).join('\n')} - -⭐ TOP 3 MEEST RELEVANT ------------------------ -${topRelevant.map((c, i) => `${i + 1}. ${c.concurrent}: ${c.avgRelevance}% relevantie`).join('\n')} - -🎯 HOGE DREIGING DETAILS ------------------------- -${byThreat.HOOG.length > 0 ? byThreat.HOOG.map(c => ` -${c.concurrent}: - - Blog: ${c.blogPosts} posts (${c.blogRelevance}% relevant) - - LinkedIn: ${c.linkedinPosts} posts (${c.linkedinRelevance}% relevant) - - News: ${c.newsItems} items (${c.newsRelevance}% relevant) - - Top Content: ${c.topContent.substring(0, 200)}... -`).join('\n') : 'Geen concurrenten met hoge dreiging.'} - -πŸ’‘ ACTIES ---------- -${byThreat.HOOG.length > 0 ? ` -⚠️ IMMEDIATE: - - Review content strategie van: ${byThreat.HOOG.map(c => c.concurrent).join(', ')} - - Overweeg counter-content op zelfde topics -` : ''} -${byThreat.GEMIDDELD.length > 0 ? ` -πŸ“Œ DEZE WEEK: - - Monitor activiteit van: ${byThreat.GEMIDDELD.map(c => c.concurrent).join(', ')} - - Identificeer content gaps -` : ''} -${byThreat.LAAG.length + byThreat.MINIMAAL.length > 0 ? ` -πŸ“Š MONITOR: - - Blijf ${byThreat.LAAG.length + byThreat.MINIMAAL.length} minder actieve concurrenten volgen -` : ''} - -πŸ“… Next Run: Over 7 dagen -================================================================================ -`; - - await fs.writeFile(summaryPath, summary, 'utf-8'); - console.log(`βœ… Summary generated: ${summaryPath}`); - - return summaryPath; -} - -// ============================================================================ -// MAIN EXECUTION -// ============================================================================ - -async function main() { - try { - // Ensure output directory exists - await fs.mkdir(CONFIG.outputDir, { recursive: true }); - - // Run scraper - const results = await scrapeConcurrentActivity(); - - // Export data - await exportToCSV(results); - await generateSummary(results); - - console.log('\nπŸŽ‰ CONCURRENT TRACKER - Completed!\n'); - - } catch (error) { - console.error('\n❌ Fatal error:', error); - process.exit(1); - } -} - -// Run if called directly -if (require.main === module) { - main(); -} - -module.exports = { scrapeConcurrentActivity, exportToCSV, generateSummary }; diff --git a/scrapers/icp-monitor.js b/scrapers/icp-monitor.js deleted file mode 100644 index e294e04..0000000 --- a/scrapers/icp-monitor.js +++ /dev/null @@ -1,520 +0,0 @@ -/** - * ICP ACTIVITY MONITOR - * - * Monitoreert target bedrijven (ICP) voor hiring signals en nieuws - * Focus: Mid-market (50-800 FTE) technisch bedrijven Gelderland/Overijssel/Brabant - * Output: CSV format voor Google Sheets Intelligence Hub - * - * USAGE: - * node icp-monitor.js - * - * OUTPUT: - * icp_activity_[YYYY-MM-DD].csv - */ - -const puppeteer = require('puppeteer'); -const fs = require('fs').promises; -const path = require('path'); - -// ============================================================================ -// CONFIGURATION -// ============================================================================ - -const CONFIG = { - // ICP target companies (Wouter's mid-market focus) - targetCompanies: [ - // Automation & Manufacturing - { name: 'Stork', domain: 'stork.com', sector: 'Oil & Gas / Maintenance' }, - { name: 'Siemens Nederland', domain: 'siemens.nl', sector: 'Automation' }, - { name: 'Bosch Rexroth', domain: 'boschrexroth.nl', sector: 'Automation' }, - { name: 'Mitsubishi Electric', domain: 'mitsubishielectric.nl', sector: 'Automation' }, - { name: 'Omron', domain: 'industrial.omron.nl', sector: 'Automation' }, - - // Manufacturing & Engineering - { name: 'VDL Groep', domain: 'vdlgroep.com', sector: 'Manufacturing' }, - { name: 'Demka', domain: 'demka.nl', sector: 'Manufacturing' }, - { name: 'Stiho', domain: 'stiho.nl', sector: 'Building Services' }, - { name: 'Imtech', domain: 'imtech.eu', sector: 'Technical Services' }, - { name: 'Strukton', domain: 'strukton.com', sector: 'Infrastructure' }, - - // Construction & Infrastructure - { name: 'BAM', domain: 'bam.com', sector: 'Construction' }, - { name: 'VolkerWessels', domain: 'volkerwessels.com', sector: 'Construction' }, - { name: 'Heijmans', domain: 'heijmans.nl', sector: 'Construction' }, - { name: 'Dura Vermeer', domain: 'duravermeer.nl', sector: 'Construction' }, - - // Renewable Energy - { name: 'Alfen', domain: 'alfen.com', sector: 'Energy Storage' }, - { name: 'Solarfields', domain: 'solarfields.nl', sector: 'Solar Energy' }, - { name: 'Vattenfall', domain: 'vattenfall.nl', sector: 'Renewable Energy' }, - - // Add more as needed - ], - - // Hiring signal keywords - hiringSignals: [ - 'vacature', - 'vacatures', - 'werken bij', - 'join our team', - 'we are hiring', - 'open sollicitatie', - 'career', - 'carriΓ¨re' - ], - - // News signal keywords - newsSignals: [ - 'order', - 'contract', - 'project', - 'uitbreiding', - 'expansion', - 'nieuwe vestiging', - 'investering', - 'groei', - 'growth', - 'opdracht', - 'samenwerking' - ], - - // Output settings - outputDir: './scraper-output', - csvFilename: `icp_activity_${new Date().toISOString().split('T')[0]}.csv`, - - // Browser settings - headless: true, - timeout: 30000 -}; - -// ============================================================================ -// UTILITY FUNCTIONS -// ============================================================================ - -/** - * Exponential backoff with jitter for rate limiting - * @param {number} attempt - Current attempt number (0-indexed) - * @param {number} baseDelay - Base delay in milliseconds (default: 1000) - * @param {number} maxDelay - Maximum delay in milliseconds (default: 10000) - * @param {number} multiplier - Backoff multiplier (default: 2) - * @returns {Promise} - */ -async function exponentialBackoff(attempt = 0, baseDelay = 1000, maxDelay = 10000, multiplier = 2) { - const exponentialDelay = Math.min(baseDelay * Math.pow(multiplier, attempt), maxDelay); - const jitter = Math.random() * 0.3 * exponentialDelay; // 0-30% jitter - const totalDelay = exponentialDelay + jitter; - - console.log(`[RATE LIMIT] Waiting ${Math.round(totalDelay)}ms (attempt ${attempt + 1})...`); - await new Promise(resolve => setTimeout(resolve, totalDelay)); -} - -/** - * Retry wrapper with exponential backoff - * @param {Function} fn - Async function to retry - * @param {number} maxRetries - Maximum number of retries (default: 3) - * @param {string} context - Context string for logging - * @returns {Promise} - */ -async function retryWithBackoff(fn, maxRetries = 3, context = 'Operation') { - let lastError; - - for (let attempt = 0; attempt <= maxRetries; attempt++) { - try { - if (attempt > 0) { - console.log(`[RETRY] ${context} - Attempt ${attempt + 1}/${maxRetries + 1}`); - await exponentialBackoff(attempt - 1); - } - - return await fn(); - } catch (error) { - lastError = error; - console.log(`[ERROR] ${context} failed:`, error.message); - - if (attempt === maxRetries) { - console.log(`[FAILED] ${context} - Max retries reached`); - throw lastError; - } - } - } - - throw lastError; -} - -// ============================================================================ -// SCRAPER FUNCTIONS -// ============================================================================ - -/** - * Initialize browser - */ -async function initBrowser() { - return await puppeteer.launch({ - headless: CONFIG.headless, - args: ['--no-sandbox', '--disable-setuid-sandbox'] - }); -} - -/** - * Check company career page for hiring signals - */ -async function checkCareerPage(page, company) { - const careerUrls = [ - `https://${company.domain}/careers`, - `https://${company.domain}/carriere`, - `https://${company.domain}/vacatures`, - `https://${company.domain}/jobs`, - `https://www.${company.domain}/careers`, - `https://www.${company.domain}/carriere` - ]; - - for (const url of careerUrls) { - try { - const response = await page.goto(url, { - waitUntil: 'networkidle2', - timeout: CONFIG.timeout - }); - - if (response && response.status() === 200) { - // Found career page - const vacancyCount = await page.evaluate((signals) => { - const bodyText = document.body.textContent.toLowerCase(); - - // Try to find vacancy count - const countMatch = bodyText.match(/(\d+)\s*(vacature|openings|positions)/i); - if (countMatch) return parseInt(countMatch[1]); - - // Count vacancy-related links - const links = Array.from(document.querySelectorAll('a')); - const vacancyLinks = links.filter(link => { - const text = link.textContent.toLowerCase(); - return signals.some(signal => text.includes(signal)); - }); - - return vacancyLinks.length > 0 ? vacancyLinks.length : null; - }, CONFIG.hiringSignals); - - return { - careerPageFound: true, - careerPageUrl: url, - vacancyCount: vacancyCount || 'Unknown', - lastChecked: new Date().toISOString() - }; - } - } catch (error) { - // Continue to next URL - continue; - } - } - - return { - careerPageFound: false, - careerPageUrl: 'N/A', - vacancyCount: 0, - lastChecked: new Date().toISOString() - }; -} - -/** - * Search Google News for company mentions - */ -async function searchCompanyNews(page, company) { - const query = `${company.name} ${CONFIG.newsSignals.slice(0, 3).join(' OR ')}`; - const url = `https://www.google.com/search?q=${encodeURIComponent(query)}&tbm=nws&num=10`; - - try { - await page.goto(url, { waitUntil: 'networkidle2', timeout: CONFIG.timeout }); - - const newsItems = await page.evaluate(() => { - const results = []; - const newsCards = document.querySelectorAll('div[data-hveid]'); - - newsCards.forEach((card, index) => { - if (index >= 5) return; // Top 5 only - - const titleElement = card.querySelector('div[role="heading"]'); - const linkElement = card.querySelector('a'); - const dateElement = card.querySelector('span:not([role])'); - - if (titleElement && linkElement) { - results.push({ - title: titleElement.textContent.trim(), - url: linkElement.href, - date: dateElement ? dateElement.textContent.trim() : 'Unknown' - }); - } - }); - - return results; - }); - - return { - newsFound: newsItems.length > 0, - newsCount: newsItems.length, - latestNews: newsItems[0] || null, - allNews: newsItems - }; - - } catch (error) { - console.error(`Error searching news for ${company.name}:`, error.message); - return { - newsFound: false, - newsCount: 0, - latestNews: null, - allNews: [] - }; - } -} - -/** - * Check LinkedIn company page for updates - */ -async function checkLinkedInActivity(page, company) { - // LinkedIn requires login, so we'll use a simplified approach - // Check if company has LinkedIn page via Google search - - const query = `${company.name} LinkedIn site:linkedin.com/company`; - const url = `https://www.google.com/search?q=${encodeURIComponent(query)}`; - - try { - await page.goto(url, { waitUntil: 'networkidle2', timeout: CONFIG.timeout }); - - const linkedInUrl = await page.evaluate(() => { - const links = Array.from(document.querySelectorAll('a')); - const linkedInLink = links.find(link => - link.href.includes('linkedin.com/company/') - ); - return linkedInLink ? linkedInLink.href : null; - }); - - return { - hasLinkedIn: !!linkedInUrl, - linkedInUrl: linkedInUrl || 'N/A' - }; - - } catch (error) { - return { - hasLinkedIn: false, - linkedInUrl: 'N/A' - }; - } -} - -/** - * Calculate activity score - */ -function calculateActivityScore(careerData, newsData, linkedInData) { - let score = 0; - - // Career page signals (0-40 points) - if (careerData.careerPageFound) { - score += 20; - if (typeof careerData.vacancyCount === 'number' && careerData.vacancyCount > 0) { - score += Math.min(20, careerData.vacancyCount * 2); - } - } - - // News signals (0-40 points) - if (newsData.newsFound) { - score += Math.min(40, newsData.newsCount * 8); - } - - // LinkedIn presence (0-20 points) - if (linkedInData.hasLinkedIn) { - score += 20; - } - - return Math.min(100, score); -} - -// ============================================================================ -// MAIN MONITORING LOGIC -// ============================================================================ - -async function monitorAllCompanies() { - console.log('πŸš€ Starting ICP Activity Monitor...\n'); - console.log(`Target Companies: ${CONFIG.targetCompanies.length}\n`); - - const browser = await initBrowser(); - const page = await browser.newPage(); - - await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); - - const results = []; - - for (let i = 0; i < CONFIG.targetCompanies.length; i++) { - const company = CONFIG.targetCompanies[i]; - console.log(`[${i + 1}/${CONFIG.targetCompanies.length}] Monitoring: ${company.name}...`); - - // Check career page - console.log(` β†’ Checking career page...`); - const careerData = await checkCareerPage(page, company); - - // Exponential backoff to avoid rate limiting - await exponentialBackoff(0, 1500, 5000); - - // Search news - console.log(` β†’ Searching news...`); - const newsData = await searchCompanyNews(page, company); - - // Exponential backoff to avoid rate limiting - await exponentialBackoff(0, 1500, 5000); - - // Check LinkedIn - console.log(` β†’ Checking LinkedIn...`); - const linkedInData = await checkLinkedInActivity(page, company); - - // Exponential backoff to avoid rate limiting - await exponentialBackoff(0, 1500, 5000); - - // Calculate activity score - const activityScore = calculateActivityScore(careerData, newsData, linkedInData); - - // Determine status - let status = 'COLD'; - if (activityScore >= 70) status = 'HOT'; - else if (activityScore >= 40) status = 'WARM'; - - console.log(` βœ“ Score: ${activityScore}/100 | Status: ${status}\n`); - - results.push({ - company: company.name, - domain: company.domain, - sector: company.sector, - activityScore, - status, - careerPageFound: careerData.careerPageFound, - careerPageUrl: careerData.careerPageUrl, - vacancyCount: careerData.vacancyCount, - newsCount: newsData.newsCount, - latestNews: newsData.latestNews ? newsData.latestNews.title : 'N/A', - latestNewsUrl: newsData.latestNews ? newsData.latestNews.url : 'N/A', - linkedInUrl: linkedInData.linkedInUrl, - lastChecked: new Date().toISOString() - }); - } - - await browser.close(); - - console.log('\nβœ… Monitoring completed!\n'); - - return results; -} - -// ============================================================================ -// CSV EXPORT -// ============================================================================ - -/** - * Convert results to CSV - */ -function convertToCSV(results) { - let csv = 'Data Date,Company,Sector,Activity Score,Status,Hiring Signal,News Signal,Action Priority,Last Updated\n'; - - results.forEach(r => { - const date = new Date().toISOString().split('T')[0]; - const hiringSignal = r.careerPageFound ? `${r.vacancyCount} vacatures` : 'No career page'; - const newsSignal = r.newsCount > 0 ? `${r.newsCount} news items` : 'No recent news'; - const actionPriority = r.status === 'HOT' ? 'IMMEDIATE' : r.status === 'WARM' ? 'THIS WEEK' : 'MONITOR'; - - csv += `${date},"${r.company}","${r.sector}",${r.activityScore},"${r.status}","${hiringSignal}","${newsSignal}","${actionPriority}","${r.lastChecked}"\n`; - }); - - return csv; -} - -/** - * Save results and generate report - */ -async function saveResults(results) { - await fs.mkdir(CONFIG.outputDir, { recursive: true }); - - // Save CSV - const csv = convertToCSV(results); - const csvPath = path.join(CONFIG.outputDir, CONFIG.csvFilename); - await fs.writeFile(csvPath, csv, 'utf8'); - console.log(`πŸ“Š ICP activity saved: ${csvPath}`); - - // Generate detailed report - const report = generateReport(results); - const reportPath = path.join(CONFIG.outputDir, `icp_report_${new Date().toISOString().split('T')[0]}.txt`); - await fs.writeFile(reportPath, report, 'utf8'); - console.log(`πŸ“‹ Detailed report saved: ${reportPath}`); -} - -/** - * Generate detailed report - */ -function generateReport(results) { - const hotCompanies = results.filter(r => r.status === 'HOT'); - const warmCompanies = results.filter(r => r.status === 'WARM'); - const coldCompanies = results.filter(r => r.status === 'COLD'); - - const avgScore = results.reduce((sum, r) => sum + r.activityScore, 0) / results.length; - const companiesWithVacancies = results.filter(r => r.careerPageFound && r.vacancyCount > 0).length; - - return ` -============================================================================= -ICP ACTIVITY MONITOR REPORT -============================================================================= -Date: ${new Date().toISOString()} -Companies Monitored: ${results.length} - -ACTIVITY BREAKDOWN: -- HOT (70-100): ${hotCompanies.length} companies β†’ IMMEDIATE OUTREACH -- WARM (40-69): ${warmCompanies.length} companies β†’ OUTREACH THIS WEEK -- COLD (0-39): ${coldCompanies.length} companies β†’ KEEP MONITORING - -AVERAGE ACTIVITY SCORE: ${Math.round(avgScore)}/100 -COMPANIES WITH VACANCIES: ${companiesWithVacancies} - -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -HOT COMPANIES (Immediate Action Required): -${hotCompanies.length > 0 ? hotCompanies.map((c, i) => ` -${i + 1}. ${c.company} (${c.activityScore}/100) - Sector: ${c.sector} - Vacancies: ${c.vacancyCount} - Career Page: ${c.careerPageUrl} - Latest News: ${c.latestNews} - LinkedIn: ${c.linkedInUrl} -`).join('\n') : ' None'} - -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -WARM COMPANIES (Schedule Outreach): -${warmCompanies.length > 0 ? warmCompanies.map((c, i) => ` -${i + 1}. ${c.company} (${c.activityScore}/100) - Sector: ${c.sector} - Hiring Signal: ${c.careerPageFound ? 'Yes' : 'No'} - News Activity: ${c.newsCount} items -`).join('\n') : ' None'} - -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -NEXT STEPS: -1. Import CSV to Google Sheets Intelligence Hub -2. Prioritize outreach to HOT companies -3. Schedule warm outreach for WARM companies -4. Continue monitoring COLD companies - -============================================================================= - `.trim(); -} - -// ============================================================================ -// EXECUTION -// ============================================================================ - -(async () => { - try { - const results = await monitorAllCompanies(); - await saveResults(results); - - console.log('\nβœ… ALL DONE! Files ready for Google Sheets import.\n'); - process.exit(0); - - } catch (error) { - console.error('❌ Monitor failed:', error); - process.exit(1); - } -})(); diff --git a/scrapers/market-trends-scraper.js b/scrapers/market-trends-scraper.js deleted file mode 100644 index 8ac8dae..0000000 --- a/scrapers/market-trends-scraper.js +++ /dev/null @@ -1,394 +0,0 @@ -/** - * MARKET TRENDS SCRAPER - * - * Scrapes Indeed.nl + Monsterboard.nl voor vacature trends - * Target: Technisch recruitment (Automation, Oil & Gas, Manufacturing) - * Output: CSV format voor Google Sheets Intelligence Hub - * - * USAGE: - * node market-trends-scraper.js - * - * OUTPUT: - * market_trends_[YYYY-MM-DD].csv - */ - -const puppeteer = require('puppeteer'); -const fs = require('fs').promises; -const path = require('path'); - -// ============================================================================ -// CONFIGURATION -// ============================================================================ - -const CONFIG = { - // Target keywords (technical recruitment focus) - keywords: [ - 'automation engineer', - 'field service engineer', - 'maintenance engineer', - 'process engineer', - 'electrical engineer', - 'mechanical engineer', - 'project engineer', - 'commissioning engineer', - 'PLC programmeur', - 'technisch commercieel' - ], - - // Target locations (Wouter's regions) - locations: [ - 'Gelderland', - 'Overijssel', - 'Noord-Brabant' - ], - - // Concurrent keywords for ghosting detection - concurrentKeywords: [ - 'Yacht', - 'Brunel', - 'Olympia', - 'Tempo-Team' - ], - - // Output settings - outputDir: './scraper-output', - csvFilename: `market_trends_${new Date().toISOString().split('T')[0]}.csv`, - - // Browser settings - headless: true, - timeout: 30000 -}; - -// ============================================================================ -// SCRAPER FUNCTIONS -// ============================================================================ - -/** - * Initialize browser - */ -async function initBrowser() { - return await puppeteer.launch({ - headless: CONFIG.headless, - args: ['--no-sandbox', '--disable-setuid-sandbox'] - }); -} - -/** - * Scrape Indeed.nl voor vacature counts - */ -async function scrapeIndeed(page, keyword, location) { - const url = `https://nl.indeed.com/jobs?q=${encodeURIComponent(keyword)}&l=${encodeURIComponent(location)}`; - - try { - await page.goto(url, { waitUntil: 'networkidle2', timeout: CONFIG.timeout }); - - // Extract job count - const jobCount = await page.evaluate(() => { - // Indeed shows count in multiple possible locations - const countElement = document.querySelector('[class*="jobsearch-JobCountAndSortPane-jobCount"]') || - document.querySelector('div[id="searchCountPages"]'); - - if (!countElement) return 0; - - const text = countElement.textContent; - const match = text.match(/[\d.,]+/); - return match ? parseInt(match[0].replace(/[.,]/g, '')) : 0; - }); - - // Extract sample salary data - const salaryData = await page.evaluate(() => { - const salaryElements = document.querySelectorAll('[class*="salary-snippet"]'); - const salaries = []; - - salaryElements.forEach(el => { - const text = el.textContent; - const match = text.match(/€\s*[\d.,]+/g); - if (match) { - salaries.push(text.trim()); - } - }); - - return salaries.slice(0, 3); // Top 3 samples - }); - - return { - source: 'Indeed', - keyword, - location, - jobCount, - salaryData: salaryData.join(' | '), - url - }; - - } catch (error) { - console.error(`Error scraping Indeed for ${keyword} in ${location}:`, error.message); - return { - source: 'Indeed', - keyword, - location, - jobCount: 0, - salaryData: 'ERROR', - url - }; - } -} - -/** - * Scrape Monsterboard.nl voor vacature counts - */ -async function scrapeMonsterboard(page, keyword, location) { - const url = `https://www.monsterboard.nl/vacatures/zoeken/?q=${encodeURIComponent(keyword)}&where=${encodeURIComponent(location)}`; - - try { - await page.goto(url, { waitUntil: 'networkidle2', timeout: CONFIG.timeout }); - - // Extract job count - const jobCount = await page.evaluate(() => { - const countElement = document.querySelector('[data-test-id="svx-job-count"]') || - document.querySelector('.job-count') || - document.querySelector('h2[class*="results"]'); - - if (!countElement) return 0; - - const text = countElement.textContent; - const match = text.match(/[\d.,]+/); - return match ? parseInt(match[0].replace(/[.,]/g, '')) : 0; - }); - - return { - source: 'Monsterboard', - keyword, - location, - jobCount, - salaryData: 'N/A', - url - }; - - } catch (error) { - console.error(`Error scraping Monsterboard for ${keyword} in ${location}:`, error.message); - return { - source: 'Monsterboard', - keyword, - location, - jobCount: 0, - salaryData: 'ERROR', - url - }; - } -} - -/** - * Check concurrent ghosting signals (competitor activity) - */ -async function checkGhostingSignals(page, keyword, location) { - const url = `https://nl.indeed.com/jobs?q=${encodeURIComponent(keyword + ' ' + CONFIG.concurrentKeywords.join(' OR '))}&l=${encodeURIComponent(location)}`; - - try { - await page.goto(url, { waitUntil: 'networkidle2', timeout: CONFIG.timeout }); - - const concurrentCount = await page.evaluate(() => { - const countElement = document.querySelector('[class*="jobsearch-JobCountAndSortPane-jobCount"]'); - if (!countElement) return 0; - - const text = countElement.textContent; - const match = text.match(/[\d.,]+/); - return match ? parseInt(match[0].replace(/[.,]/g, '')) : 0; - }); - - return { - keyword, - location, - concurrentCount, - ghostingRisk: concurrentCount > 20 ? 'HIGH' : concurrentCount > 10 ? 'MEDIUM' : 'LOW' - }; - - } catch (error) { - console.error(`Error checking ghosting signals for ${keyword}:`, error.message); - return { - keyword, - location, - concurrentCount: 0, - ghostingRisk: 'UNKNOWN' - }; - } -} - -// ============================================================================ -// MAIN SCRAPING LOGIC -// ============================================================================ - -async function scrapeAllData() { - console.log('πŸš€ Starting Market Trends Scraper...\n'); - console.log(`Target Keywords: ${CONFIG.keywords.length}`); - console.log(`Target Locations: ${CONFIG.locations.length}`); - console.log(`Total Combinations: ${CONFIG.keywords.length * CONFIG.locations.length}\n`); - - const browser = await initBrowser(); - const page = await browser.newPage(); - - // Set user agent to avoid detection - await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); - - const results = []; - const ghostingData = []; - - let processed = 0; - const total = CONFIG.keywords.length * CONFIG.locations.length * 2; // 2 sources - - // Scrape each keyword-location combination - for (const keyword of CONFIG.keywords) { - for (const location of CONFIG.locations) { - - // Indeed - console.log(`[${++processed}/${total}] Scraping Indeed: ${keyword} in ${location}...`); - const indeedResult = await scrapeIndeed(page, keyword, location); - results.push(indeedResult); - - // Small delay to avoid rate limiting - await new Promise(resolve => setTimeout(resolve, 2000)); - - // Monsterboard - console.log(`[${++processed}/${total}] Scraping Monsterboard: ${keyword} in ${location}...`); - const monsterResult = await scrapeMonsterboard(page, keyword, location); - results.push(monsterResult); - - // Small delay - await new Promise(resolve => setTimeout(resolve, 2000)); - - // Check ghosting signals (once per keyword-location) - console.log(`[INFO] Checking ghosting signals: ${keyword} in ${location}...`); - const ghosting = await checkGhostingSignals(page, keyword, location); - ghostingData.push(ghosting); - - await new Promise(resolve => setTimeout(resolve, 2000)); - } - } - - await browser.close(); - - console.log('\nβœ… Scraping completed!\n'); - - return { results, ghostingData }; -} - -// ============================================================================ -// CSV EXPORT -// ============================================================================ - -/** - * Convert results to CSV format for Google Sheets - */ -function convertToCSV(results, ghostingData) { - const timestamp = new Date().toISOString(); - - // Market Trends CSV - let csv = 'Data Date,Source,Keyword,Location,Count,Trend %,Salary Range,URL,Last Updated\n'; - - results.forEach(r => { - const date = new Date().toISOString().split('T')[0]; - csv += `${date},"${r.source}","${r.keyword}","${r.location}",${r.jobCount},,"${r.salaryData}","${r.url}","${timestamp}"\n`; - }); - - // Ghosting Patterns CSV - let ghostingCSV = 'Data Date,Keyword,Location,Concurrent Count,Ghosting Risk,Detection Method,Last Updated\n'; - - ghostingData.forEach(g => { - const date = new Date().toISOString().split('T')[0]; - ghostingCSV += `${date},"${g.keyword}","${g.location}",${g.concurrentCount},"${g.ghostingRisk}","Concurrent Analysis","${timestamp}"\n`; - }); - - return { csv, ghostingCSV }; -} - -/** - * Save CSV files - */ -async function saveResults(results, ghostingData) { - // Create output directory - await fs.mkdir(CONFIG.outputDir, { recursive: true }); - - const { csv, ghostingCSV } = convertToCSV(results, ghostingData); - - // Save market trends - const trendsPath = path.join(CONFIG.outputDir, CONFIG.csvFilename); - await fs.writeFile(trendsPath, csv, 'utf8'); - console.log(`πŸ“Š Market trends saved: ${trendsPath}`); - - // Save ghosting patterns - const ghostingPath = path.join(CONFIG.outputDir, `ghosting_patterns_${new Date().toISOString().split('T')[0]}.csv`); - await fs.writeFile(ghostingPath, ghostingCSV, 'utf8'); - console.log(`πŸ‘» Ghosting patterns saved: ${ghostingPath}`); - - // Generate summary - const summary = generateSummary(results, ghostingData); - const summaryPath = path.join(CONFIG.outputDir, `scrape_summary_${new Date().toISOString().split('T')[0]}.txt`); - await fs.writeFile(summaryPath, summary, 'utf8'); - console.log(`πŸ“‹ Summary saved: ${summaryPath}`); -} - -/** - * Generate summary report - */ -function generateSummary(results, ghostingData) { - const totalJobs = results.reduce((sum, r) => sum + r.jobCount, 0); - const avgJobsPerKeyword = totalJobs / CONFIG.keywords.length; - - const highGhosting = ghostingData.filter(g => g.ghostingRisk === 'HIGH').length; - const mediumGhosting = ghostingData.filter(g => g.ghostingRisk === 'MEDIUM').length; - - const topKeywords = CONFIG.keywords - .map(keyword => { - const keywordResults = results.filter(r => r.keyword === keyword); - const totalCount = keywordResults.reduce((sum, r) => sum + r.jobCount, 0); - return { keyword, count: totalCount }; - }) - .sort((a, b) => b.count - a.count) - .slice(0, 5); - - return ` -============================================================================= -MARKET TRENDS SCRAPE SUMMARY -============================================================================= -Date: ${new Date().toISOString()} -Keywords Scraped: ${CONFIG.keywords.length} -Locations: ${CONFIG.locations.join(', ')} - -VACANCY TRENDS: -- Total Jobs Found: ${totalJobs} -- Average per Keyword: ${Math.round(avgJobsPerKeyword)} -- Sources: Indeed.nl, Monsterboard.nl - -TOP 5 KEYWORDS: -${topKeywords.map((k, i) => `${i + 1}. ${k.keyword}: ${k.count} jobs`).join('\n')} - -GHOSTING RISK: -- HIGH Risk: ${highGhosting} keyword-location combinations -- MEDIUM Risk: ${mediumGhosting} keyword-location combinations -- LOW Risk: ${ghostingData.length - highGhosting - mediumGhosting} combinations - -NEXT STEPS: -1. Import CSVs to Google Sheets Intelligence Hub -2. Review high ghosting risk keywords -3. Adjust targeting based on trends - -============================================================================= - `.trim(); -} - -// ============================================================================ -// EXECUTION -// ============================================================================ - -(async () => { - try { - const { results, ghostingData } = await scrapeAllData(); - await saveResults(results, ghostingData); - - console.log('\nβœ… ALL DONE! Files ready for Google Sheets import.\n'); - process.exit(0); - - } catch (error) { - console.error('❌ Scraper failed:', error); - process.exit(1); - } -})(); diff --git a/scripts/cleanup-content-intelligence-system.sh b/scripts/cleanup-content-intelligence-system.sh new file mode 100755 index 0000000..8b788e1 --- /dev/null +++ b/scripts/cleanup-content-intelligence-system.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Cleanup script voor recruitin-content-intelligence-system repo +# Verwijdert duplicate scrapers die nu in intelligence-hub zitten + +set -e + +REPO_URL="https://github.com/WouterArtsRecruitin/recruitin-content-intelligence-system.git" +BRANCH_NAME="cleanup/remove-duplicate-scrapers" + +echo "=== Cleanup recruitin-content-intelligence-system ===" +echo "" + +# Clone repo +TEMP_DIR=$(mktemp -d) +echo "Cloning repo to $TEMP_DIR..." +git clone "$REPO_URL" "$TEMP_DIR/repo" +cd "$TEMP_DIR/repo" + +# Create branch +echo "Creating branch: $BRANCH_NAME" +git checkout -b "$BRANCH_NAME" + +# Remove duplicate files +echo "Removing duplicate scrapers..." +git rm -f market-trends-scraper.js icp-monitor.js concurrent-tracker.js + +# Commit +echo "Committing changes..." +git commit -m "chore: remove duplicate scrapers (moved to intelligence-hub) + +Remove scripts that are now maintained in the intelligence-hub repository: +- market-trends-scraper.js +- icp-monitor.js +- concurrent-tracker.js + +These scrapers are now part of the consolidated intelligence-hub repo +with weekly scheduled workflows. + +This repo now focuses on: +- Pipedrive CRM integration (exports, dashboards) +- Content sentiment analysis +- News report generation +- Notion deals sync" + +# Push +echo "Pushing to origin..." +git push -u origin "$BRANCH_NAME" + +echo "" +echo "=== Done! ===" +echo "Create PR at: https://github.com/WouterArtsRecruitin/recruitin-content-intelligence-system/pull/new/$BRANCH_NAME" + +# Cleanup +cd / +rm -rf "$TEMP_DIR"