diff --git a/scripts/build_tools/system.ts b/scripts/build_tools/system.ts index 4edf662e..eea2f5eb 100644 --- a/scripts/build_tools/system.ts +++ b/scripts/build_tools/system.ts @@ -4,6 +4,7 @@ export const author = "@@official.shinkai"; export const systemTools = [ "local:::__official_shinkai:::shinkai_llm_prompt_processor", + "local:::__official_shinkai:::shinkai_llm_map_reduce_processor", "local:::__official_shinkai:::shinkai_sqlite_query_executor", "local:::__official_shinkai:::shinkai_process_embeddings", "local:::__official_shinkai:::shinkai_tool_config_updater", diff --git a/tools/pdf-whitepaper-analyzer/analysis-guide.txt b/tools/pdf-whitepaper-analyzer/assets/analysis-guide.txt similarity index 100% rename from tools/pdf-whitepaper-analyzer/analysis-guide.txt rename to tools/pdf-whitepaper-analyzer/assets/analysis-guide.txt diff --git a/tools/smart-search/.tool-dump.test.json b/tools/smart-search/.tool-dump.test.json index c692fc30..7856af4a 100644 --- a/tools/smart-search/.tool-dump.test.json +++ b/tools/smart-search/.tool-dump.test.json @@ -1 +1 @@ -{"type":"Deno","content":[{"name":"Smart Search Engine","homepage":null,"author":"@@official.shinkai","version":"1.0.0","js_code":"import { googleSearch, shinkaiLlmPromptProcessor, downloadPages } from './shinkai-local-tools.ts';\n\ntype CONFIG = {\n searchEngineApiKey?: string;\n searchEngine?: SearchEngine;\n maxSources?: number;\n}\ntype INPUTS = {\n question: string;\n};\ntype OUTPUT = {\n response: string;\n sources: SmartSearchSourcePage[];\n statements: SmartSearchStatement[];\n}\ntype PREFFERED_SOURCES = 'WIKIPEDIA'|'WOLFRAMALPHA'|'OTHER';\n\ntype SearchQueryConversion = {\n \"origin_question\": string;\n \"preferred_sources\": PREFFERED_SOURCES[];\n \"search_query\": string\n}\n\ntype SearchResult = {\n title: string;\n description: string;\n url: string;\n}\n\ntype SmartSearchSource = SearchResult | string;\ntype SearchEngine = 'DUCKDUCKGO' | 'GOOGLE' | 'BRAVE';\n\nexport interface SmartSearchSourcePage {\n id: number;\n url: string;\n markdown?: string;\n title: string;\n}\n\nexport interface SmartSearchStatement {\n sourceId: number;\n sourceTitle: string;\n extractedFacts: {\n statement: string;\n relevance: 'DIRECT_ANSWER' | 'HIGHLY_RELEVANT' | 'SOMEWHAT_RELEVANT' | 'TANGENTIAL' | 'NOT_RELEVANT';\n }[];\n}\nexport interface SmartSearchGenerationContext {\n originalQuestion: string;\n statements: SmartSearchStatement[];\n sources: SmartSearchSourcePage[];\n}\n\nconst answerGenerator = (context: SmartSearchGenerationContext): string => `\n# Smart Search Answer Generation Instructions\nYou are a sophisticated scientific communication assistant specialized in transforming extracted research statements into comprehensive, accessible, and precisely cited explanations.Your primary objective is to synthesize complex information from multiple sources into a clear, authoritative answer that maintains absolute fidelity to the source material. Think of yourself as an academic translator - your role is to take fragmented scientific statements and weave them into a coherent narrative that is both intellectually rigorous and engaging, ensuring that every substantive claim is meticulously attributed to its original source. Approach each question as an opportunity to provide a deep, nuanced understanding that goes beyond surface-level explanation, while maintaining strict scholarly integrity.\n## Input JSON Interfaces and Definitions\n\n\\`\\`\\`typescript\n// Source Page Interface\nexport interface SmartSearchSourcePage {\n id: number; // Unique identifier for the source\n url: string; // Full URL of the source\n markdown: string; // Full text content of the source page\n title: string; // Title of the source page\n}\n\n// Statement Interface with Detailed Relevance Levels\nexport interface SmartSearchStatement {\n sourceId: number; // ID of the source this statement comes from\n sourceTitle: string; // Title of the source\n extractedFacts: {\n statement: string; // Exact verbatim text from the source\n relevance: 'DIRECT_ANSWER' \n | 'HIGHLY_RELEVANT' \n | 'SOMEWHAT_RELEVANT' \n | 'TANGENTIAL' \n | 'NOT_RELEVANT'; // Relevance classification\n }[];\n}\n\n// Complete Input JSON Structure\ninterface AnswerGenerationContext {\n originalQuestion: string;\n statements: SmartSearchStatement[];\n sources: SmartSearchSourcePage[];\n}\n\\`\\`\\`\n\n## Relevance Level Interpretation\n- \\`DIRECT_ANSWER\\`: Prioritize these statements first\n- \\`HIGHLY_RELEVANT\\`: Strong secondary focus\n- \\`SOMEWHAT_RELEVANT\\`: Use for additional context\n- \\`TANGENTIAL\\`: Optional supplementary information\n- \\`NOT_RELEVANT\\`: Ignore completely\n\n## Answer Generation Guidelines\n\n### Content Construction Rules:\n1. Use ONLY information from the provided statements\n2. Prioritize statements with 'DIRECT_ANSWER' and 'HIGHLY_RELEVANT' relevance\n3. Create a comprehensive, informative answer\n4. Maintain scientific accuracy and depth\n\n### Citation Methodology:\n- Place citations IMMEDIATELY after relevant statements\n- Use SQUARE BRACKETS with NUMERIC source IDs\n- Format: \\`Statement of fact.[1][2]\\`\n- Cite EVERY substantive statement\n- Match citations exactly to source IDs\n\n### Structural Requirements:\n1. Detailed Main Answer\n - Comprehensive explanation\n - Technical depth\n - Precise scientific language\n - Full source citations\n\n2. Follow-Up Questions Section\n - Generate 3-4 thought-provoking questions\n - Encourage deeper exploration\n - Based on answer content\n - Formatted as a bulleted list\n\n3. Sources Section\n - List all cited sources\n - Include source titles and URLs\n - Order based on first citation appearance\n\n## Output Example Structure:\n\\`\\`\\`\n[Comprehensive, cited answer with source IDs in brackets]\n\nFollow-up Questions:\n- Question about deeper aspect of the topic\n- Question exploring related concepts\n- Question encouraging further research\n\nSources:\n[1] Source Title (URL)\n[2] Another Source Title (URL)\n...\n\\`\\`\\`\n\n## Critical Constraints:\n- NEVER introduce information not in the statements\n- Preserve exact factual content\n- Ensure grammatical and logical coherence\n- Provide a complete, informative answer\n- Maintain academic rigor\n\n## Processing Instructions:\n- Analyze statements systematically\n- Synthesize information coherently\n- Break down complex concepts\n- Provide scientific context\n- Explain underlying mechanisms\n\n\nThis is the input context:\n${JSON.stringify(context)}\n\n`;\n\nconst searchEngineQueryGenerator = (query: string) => {\n return `\n# Search Query and Source Selection Prompt\n\nYou are an expert at transforming natural language questions into precise search queries and selecting the most appropriate information source.\n\n## Source Selection Guidelines:\n- WIKIPEDIA: Best for general knowledge, scientific explanations, historical information\n- WOLFRAMALPHA: Ideal for mathematical, statistical, computational queries, scientific calculations\n- OTHER: General web search for current events, recent developments, practical information\n\n## Output Requirements:\n- Provide a JSON response with three key fields\n- Do NOT use code block backticks\n- Ensure \"preferred_sources\" is an array\n- Make search query concise and targeted\n\n## Examples:\n\n### Example 1\n- User Query: \"What is the speed of light?\"\n- Output:\n{\n\"origin_question\": \"What is the speed of light?\",\n\"preferred_sources\": [\"WOLFRAMALPHA\"],\n\"search_query\": \"speed of light exact value meters per second\"\n}\n\n### Example 2\n- User Query: \"Who was Marie Curie?\"\n- Output:\n{\n\"origin_question\": \"Who was Marie Curie?\",\n\"preferred_sources\": [\"WIKIPEDIA\"],\n\"search_query\": \"Marie Curie biography scientific achievements\"\n}\n\n### Example 3\n- User Query: \"Best restaurants in New York City\"\n- Output:\n{\n\"origin_question\": \"Best restaurants in New York City\",\n\"preferred_sources\": [\"OTHER\"],\n\"search_query\": \"top rated restaurants NYC 2024 dining\"\n}\n\n### Example 4\n- User Query: \"How do solar panels work?\"\n- Output:\n{\n\"origin_question\": \"How do solar panels work?\",\n\"preferred_sources\": [\"WIKIPEDIA\", \"OTHER\"],\n\"search_query\": \"solar panel photovoltaic technology mechanism\"\n}\n\n## Instructions:\n- Carefully analyze the user's query\n- Select the MOST APPROPRIATE source(s)\n- Create a targeted search query\n- Return ONLY the JSON without additional text\n\nUser Query: ${query}\n`\n\n}\n\nconst statementExtract = (originalQuestion: string, source: SmartSearchSourcePage): string => `\n\n# Fact Extraction Instructions\n\n## Input JSON Structure\n\\`\\`\\`json\n{\n \"originalQuestion\": \"string - The user's original question\",\n \"source\": {\n \"id\": \"number - Unique identifier for the source\",\n \"url\": \"string - URL of the source page\",\n \"title\": \"string - Title of the source page\",\n \"markdown\": \"string - Full text content of the source page\"\n }\n}\n\\`\\`\\`\n\n## Output JSON Structure\n\\`\\`\\`json\n{\n \"sourceId\": \"number - ID of the source\",\n \"sourceTitle\": \"string - Title of the source\",\n \"extractedFacts\": [\n {\n \"statement\": \"string - Verbatim text from the source\",\n \"relevance\": \"string - One of ['DIRECT_ANSWER', 'HIGHLY_RELEVANT', 'SOMEWHAT_RELEVANT', 'TANGENTIAL', 'NOT_RELEVANT']\"\n }\n ]\n}\n\\`\\`\\`\n\n## Relevance Classification Guide:\n- \\`DIRECT_ANSWER\\`: \n - Completely and precisely addresses the original question\n - Contains the core information needed to fully respond\n - Minimal to no additional context required\n\n- \\`HIGHLY_RELEVANT\\`: \n - Provides substantial information directly related to the question\n - Offers critical context or partial solution\n - Significantly contributes to understanding\n\n- \\`SOMEWHAT_RELEVANT\\`: \n - Provides partial or indirect information\n - Offers peripheral insights\n - Requires additional context to be fully meaningful\n\n- \\`TANGENTIAL\\`: \n - Loosely connected to the topic\n - Provides background or related information\n - Not directly addressing the core question\n\n- \\`NOT_RELEVANT\\`: \n - No meaningful connection to the original question\n - Completely unrelated information\n\n\n ## Extraction Guidelines:\n 1. Read the entire source document carefully\n 2. Extract EXACT quotes that:\n - Are actually helpful answering the provided question\n - Are stated verbatim from the source or are rephrased in such a way that doesn't distort the meaning in the original source\n - Represent complete thoughts or meaningful segments\n 3. Classify each extracted fact with its relevance level\n 4. Preserve original context and nuance\n\n## Critical Rules:\n- try NOT to paraphrase or modify the original text\n- Avoid any text in the \"statement\" field that is not helpful answering the provided question like javascript, URLs, HTML, and other non-textual content\n- Extract statements as they appear in the source and ONLY if they are helpful answering the provided question\n- Include full sentences or meaningful text segments\n- Preserve original formatting and punctuation\n- Sort extracted facts by relevance (DIRECT_ANSWER first)\n- Output JSON without \\`\\`\\`json\\`\\`\\` tags, or without any escape characters or any text that is not JSON or my system will crash.\n\n## Processing Instructions:\n- Analyze the entire document systematically\n- Be comprehensive in fact extraction\n- Err on the side of inclusion when in doubt\n- Focus on factual, informative statements\n\n==BEGIN INPUT==\nOriginal Question: ${originalQuestion}\n\nSource:\n${JSON.stringify(source)}\n==END INPUT==\n\n`\nconst debug = []\nfunction tryToExtractJSON(text: string): string {\n const regex = /```(?:json)?\\n([\\s\\S]+?)\\n```/;\n const match = text.match(regex);\n if (match) return match[1];\n else return text;\n}\n\nconst ProcessQuestionError = (step: string, error: Error): string =>\n `Failed to process question at ${step}: ${error.message}`;\n\nasync function conversionToSearchQuery(question: string): Promise {\n const prompt = searchEngineQueryGenerator(question);\n const optimizedQueryResult = await shinkaiLlmPromptProcessor({ format: 'text' , prompt });\n try {\n const result = JSON.parse(optimizedQueryResult.message.trim()) as SearchQueryConversion;\n return result;\n } catch (error) {\n throw new Error(ProcessQuestionError('question processing in optimizequery', new Error(String(error))));\n }\n}\n\n\nasync function extractSourcesFromSearchEngine(\n searchQuery: string,\n engine: SearchEngine,\n apiKey?: string,\n): Promise {\n switch (engine) {\n\t\tcase 'GOOGLE' : {\n\t\t\tconst results = await googleSearch({ query: searchQuery });\n\t\t\treturn results.results;\n\t\t}\n case 'DUCKDUCKGO':\n throw new Error('DuckDuckGo is not supported yet');\n case 'BRAVE': \n throw new Error('Brave is not supported yet');\n default:\n throw new Error('Invalid or unsupperted search engine');\n }\n}\n\nexport async function run(\n config: CONFIG,\n inputs: INPUTS\n): Promise {\n const { question } = inputs;\n if (!question) {\n throw new Error('Question is required in inputs');\n }\n\n try {\n // Step 1: Generate optimized search query\n const searchQuery = await conversionToSearchQuery(question);\n // Step 2: Perform search with optimized query\n const sources: SmartSearchSource[] = []\n for (const preferred_source of searchQuery.preferred_sources) {\n switch (preferred_source) {\n case 'WIKIPEDIA':{\n const searchEngineQuery = searchQuery.search_query+' site:wikipedia.org';\n const searchEngine = config.searchEngine || 'GOOGLE';\n const sourcesSearchResults: SearchResult[] = await extractSourcesFromSearchEngine(searchEngineQuery, searchEngine, config.searchEngineApiKey);\n try {\n const maxSources = config.maxSources ?? 3;\n sources.push(...(sourcesSearchResults.slice(0, Number(maxSources)) as SearchResult[]));\n } catch (error) {\n console.error('Failed to process search results', error);\n throw new Error('Failed to process search results');\n }\n break;\n }\n case 'WOLFRAMALPHA':\n throw new Error('WOLFRAMALPHA is not supported yet');\n case 'OTHER':\n break;\n default:\n throw new Error('Invalid source');\n }\n }\n const smartSearchSouces: SmartSearchSourcePage[] = []\n let id = 1;\n for (const source of sources) {\n if (typeof source === 'string') throw new Error('Invalid source');\n const searchResult = await downloadPages({ url: source.url });\n smartSearchSouces.push({\n id: id++, url: source.url, title: source.title,\n markdown: searchResult.markdown ?? '',\n });\n }\n const statements: SmartSearchStatement[] = []\n // Step 3: Extract statements from sources\n for (const smartSearchSource of smartSearchSouces) {\n const statementString = await shinkaiLlmPromptProcessor({ format: 'text', prompt: statementExtract(question, smartSearchSource) });\n const cleanStatementString = tryToExtractJSON(statementString.message)\n try { \n const statement = JSON.parse(cleanStatementString) as SmartSearchStatement;\n statements.push(statement);\n } catch (error) {\n console.error('Failed to process statement', smartSearchSource.url, error);\n console.error(cleanStatementString)\n }\n }\n // clean markdown from sources for lighter input\n smartSearchSouces.forEach(source => delete source.markdown);\n const generationContext: SmartSearchGenerationContext = {\n originalQuestion: question,\n statements,\n sources: smartSearchSouces,\n }\n // Step 4: Generate answer\n const answerPrompt = answerGenerator(generationContext);\n\t\tconst response = await shinkaiLlmPromptProcessor({ format: 'text', prompt: answerPrompt });\n return {\n statements,\n sources: smartSearchSouces,\n response: response.message,\n };\n } catch (error) {\n throw new Error(ProcessQuestionError('question processing in answer generation', new Error(String(error))));\n }\n}\n","tools":["local:::__official_shinkai:::google_search:::1.0.0","local:::__official_shinkai:::shinkai_llm_prompt_processor:::1.0.0","local:::__official_shinkai:::download_pages:::1.0.0"],"config":[{"BasicConfig":{"key_name":"searchEngine","description":"The search engine to use","required":false,"type":null,"key_value":null}},{"BasicConfig":{"key_name":"searchEngineApiKey","description":"The API key for the search engine","required":false,"type":null,"key_value":null}},{"BasicConfig":{"key_name":"maxSources","description":"The maximum number of sources to return","required":false,"type":null,"key_value":null}}],"description":"This function takes a question as input and returns a comprehensive answer, along with the sources and statements used to generate the answer.","keywords":["search","answer generation","fact extraction","wikipedia","google"],"input_args":{"type":"object","properties":{"question":{"type":"string","description":"The question to answer"}},"required":["question"]},"output_arg":{"json":""},"activated":false,"embedding":[0.44519925,0.011538006,-0.110713586,-0.22075558,-0.14790598,-0.29269773,-0.4754676,0.44221854,-0.121888064,0.39252597,-0.3006528,1.0143435,0.36214498,-0.31270468,0.6776067,0.25015622,0.35587713,-0.23176439,-1.9852347,0.016676497,0.5186351,0.13615943,0.34565818,0.11482762,-0.014134172,0.29590514,0.31907755,-0.3896206,-1.2329955,-1.6192199,0.85829604,0.62480825,-0.6851429,-0.4065035,-0.57009935,-0.44622755,-0.018254763,-0.29652905,0.06502104,0.08794068,0.123337545,-0.2962008,-0.7889936,-0.2309857,-0.30533978,0.00014958903,-0.100521065,-0.17255962,0.65703535,0.59996736,-0.34312758,-0.3323959,-0.2751081,0.054695282,-0.6813821,-0.16973025,-0.34893867,-0.60133564,-0.1950637,-0.30053732,0.61119884,0.13490543,-4.0774403,-0.1405774,0.52335876,0.2547453,0.10967965,-0.37900138,0.33353627,-0.0860852,0.06344949,0.27401373,0.35607475,0.3472401,0.20127875,-0.2749816,-0.1676489,-0.3010119,0.40944278,-0.60488445,-0.33455145,0.036982603,0.1607093,0.18134978,-0.50376195,0.4321913,0.21538696,-0.3986729,0.1924279,-0.07740533,-0.5905008,-0.32293463,-0.33489698,0.08675105,-0.38169703,0.20435761,0.1692553,0.48983234,0.4500325,3.2424073,0.30719528,-0.3322998,0.36166048,-1.097402,0.9221763,-0.3271866,0.15179256,-0.7435132,0.7849906,-0.076523826,0.4214928,-0.20277414,-0.008337051,0.84142566,-0.14813311,0.10049243,-0.48139656,0.091330424,0.08260442,0.73960954,-0.13112147,0.17086662,-0.43677095,-0.3310505,-0.25675213,0.6580711,-0.3097783,0.3686354,0.40853876,0.52439237,0.45975456,-0.5577316,-1.1559865,-0.028603502,0.3412104,0.350788,0.30436534,-0.84187645,0.6342305,-0.7242338,0.30563703,-1.3071945,0.56033385,-0.11213897,0.7491704,-0.037651196,-0.66392916,-0.22443774,-0.40022716,-0.4695392,0.12346482,0.42506516,-0.008828731,-0.036567945,0.7910692,0.4870108,-0.10458979,-0.0010068789,-0.19713187,0.18817124,-0.34421226,0.43161416,0.35337302,0.45155972,0.31935665,-0.38856277,0.14921239,0.46791223,-0.19556057,-0.1643018,0.4380207,-0.46539715,-0.2832293,0.81312156,-0.052545656,-0.29040393,0.1400195,-0.12654832,0.41849804,-0.882607,-0.13826783,0.77245986,-0.5711882,-0.27059942,-0.19140518,0.4529812,0.0826526,-0.367387,0.64419246,1.1117058,-0.47169876,1.4447793,-0.2390542,-0.19558607,0.1427455,-0.21482131,0.104871795,0.2707702,0.38783553,0.1292122,-1.0809127,-0.0846922,-0.039507207,-0.3097255,-0.29537687,-0.65163094,0.19342569,-0.32778674,-0.08130689,-0.64830554,0.12468888,-0.5193145,1.1083255,-0.010225475,0.6004076,-0.14024194,0.058595452,0.4302386,-0.47035223,0.8164973,0.21753985,-0.013964482,-0.6944066,-0.62527126,-0.82455045,-0.112839624,0.14301777,0.12782156,-0.2723977,0.008159652,0.5853714,0.9211197,0.4646956,1.390659,0.76697886,0.523064,-0.12873787,0.25171667,0.17022641,-0.30911857,0.31879586,-0.1103535,-0.33922687,-0.32489824,0.3671196,-0.54260576,-0.010865424,-0.16758339,0.06505124,1.4560661,0.7549216,0.18457569,-0.04554111,0.9045233,-0.14435571,0.16820474,-1.6039296,0.078886904,-0.30494863,0.45922333,-0.011344843,-0.3130207,0.39990506,0.3539346,-0.6010364,-0.8332309,-0.37220326,-0.80922806,-0.17530465,-0.06533443,-0.109381296,0.35899848,-0.47536635,-0.042883083,0.5028103,0.15244354,0.3528542,0.5739549,-0.58191675,-0.13802925,0.4039406,0.15418546,0.10369478,0.24870338,-0.14724898,-0.1899396,-0.4523422,-0.067849,-0.45093012,0.7759565,-0.47772875,-0.5009712,-0.63364625,0.32183337,1.6104475,0.31228143,-0.2737522,0.8513892,-0.10785564,-0.083213486,0.3272493,0.34409443,-0.102422476,0.14750528,-0.21719596,-0.7646839,0.7050644,-0.43036377,-0.34817296,-0.07673599,-0.8938274,-0.259611,0.085467875,-0.35622948,0.17879769,-0.4371638,0.84722894,0.42178243,0.47265047,-2.0367239,-0.28862223,0.2279939,0.023028731,0.03350927,-0.84622985,0.6191283,-0.36584458,0.52736944,-0.8522264,1.7012173,0.623458,-0.34564644,-0.29220566,0.22257191,1.008989,-0.5586227,-0.100436315,-0.20206258,-0.14138165,0.29932487,0.3574607,1.777014,0.44976053,0.1803996,0.17181638,0.11031827,-0.18544856,-1.3960699,0.18656585,0.43533033,-0.32739973,0.56046206,-0.08367232,-0.45680672,0.45554873,1.0323718,0.2787497,0.19714256,-0.0057662483,2.0173283,0.14193904,-0.33267602,-0.4824432,-0.44496542,-0.44327742,0.33923206,0.24754228,-0.22856423,0.23647808,-0.4234256,-0.11778459,-0.19414693,0.28126818,-0.0637252,0.28323674,0.4969948,-0.15884419,0.684458,0.3888534,-0.17290172,0.74648315,0.03662668,-1.0852582,-0.24843164],"result":{"type":"object","properties":{"response":{"description":"The generated answer","type":"string"},"sources":{"description":"The sources used to generate the answer","items":{"properties":{"id":{"type":"number"},"title":{"type":"string"},"url":{"type":"string"}},"type":"object"},"type":"array"},"statements":{"description":"The statements extracted from the sources","items":{"properties":{"extractedFacts":{"items":{"properties":{"relevance":{"type":"string"},"statement":{"type":"string"}},"type":"object"},"type":"array"},"sourceId":{"type":"number"},"sourceTitle":{"type":"string"}},"type":"object"},"type":"array"}},"required":["response","sources","statements"]},"sql_tables":[],"sql_queries":[],"file_inbox":null,"oauth":null,"assets":null,"runner":"any","operating_system":["linux","macos","windows"],"tool_set":""},false]} +{"type":"Deno","content":[{"name":"Smart Search Engine","homepage":null,"author":"@@official.shinkai","version":"1.0.0","js_code":"import {\n googleSearch,\n duckduckgoSearch,\n shinkaiLlmPromptProcessor,\n shinkaiLlmMapReduceProcessor,\n downloadPages,\n} from './shinkai-local-tools.ts';\n\ntype CONFIG = {\n searchEngineApiKey?: string;\n searchEngine?: SearchEngine;\n maxSources?: number;\n}\ntype INPUTS = {\n question: string;\n};\ntype OUTPUT = {\n response: string;\n sources: SmartSearchSourcePage[];\n statements: SmartSearchStatement[];\n}\ntype PREFFERED_SOURCES = 'WIKIPEDIA'|'WEB_SEARCH';\n\ntype SearchQueryConversion = {\n \"origin_question\": string;\n \"preferred_sources\": PREFFERED_SOURCES[];\n \"search_query\": string\n}\n\ntype SearchResult = {\n title: string;\n description: string;\n url: string;\n}\n\ntype SmartSearchSource = SearchResult | string;\nexport type SearchEngine = 'DUCKDUCKGO' | 'GOOGLE' | 'BRAVE';\n\nexport interface SmartSearchSourcePage {\n id: number;\n url: string;\n markdown?: string;\n title: string;\n}\n\nexport interface SmartSearchStatement {\n sourceId: number;\n sourceTitle: string;\n extractedFacts: {\n statement: string;\n relevance: 'DIRECT_ANSWER' | 'HIGHLY_RELEVANT' | 'SOMEWHAT_RELEVANT' | 'TANGENTIAL' | 'NOT_RELEVANT';\n }[];\n}\nexport interface SmartSearchGenerationContext {\n originalQuestion: string;\n statements: SmartSearchStatement[];\n sources: SmartSearchSourcePage[];\n}\n\nconst answerGenerator = (context: SmartSearchGenerationContext): string => `\n# Smart Search Answer Generation Instructions\nYou are a sophisticated scientific communication assistant specialized in transforming extracted research statements into comprehensive, accessible, and precisely cited explanations.Your primary objective is to synthesize complex information from multiple sources into a clear, authoritative answer that maintains absolute fidelity to the source material. Think of yourself as an academic translator - your role is to take fragmented scientific statements and weave them into a coherent narrative that is both intellectually rigorous and engaging, ensuring that every substantive claim is meticulously attributed to its original source. Approach each question as an opportunity to provide a deep, nuanced understanding that goes beyond surface-level explanation, while maintaining strict scholarly integrity.\n## Input JSON Interfaces and Definitions\n\n\\`\\`\\`typescript\n// Source Page Interface\nexport interface SmartSearchSourcePage {\n id: number; // Unique identifier for the source\n url: string; // Full URL of the source\n markdown: string; // Full text content of the source page\n title: string; // Title of the source page\n}\n\n// Statement Interface with Detailed Relevance Levels\nexport interface SmartSearchStatement {\n sourceId: number; // ID of the source this statement comes from\n sourceTitle: string; // Title of the source\n extractedFacts: {\n statement: string; // Exact verbatim text from the source\n relevance: 'DIRECT_ANSWER' \n | 'HIGHLY_RELEVANT' \n | 'SOMEWHAT_RELEVANT' \n | 'TANGENTIAL' \n | 'NOT_RELEVANT'; // Relevance classification\n }[];\n}\n\n// Complete Input JSON Structure\ninterface AnswerGenerationContext {\n originalQuestion: string;\n statements: SmartSearchStatement[];\n sources: SmartSearchSourcePage[];\n}\n\\`\\`\\`\n\n## Relevance Level Interpretation\n- \\`DIRECT_ANSWER\\`: Prioritize these statements first\n- \\`HIGHLY_RELEVANT\\`: Strong secondary focus\n- \\`SOMEWHAT_RELEVANT\\`: Use for additional context\n- \\`TANGENTIAL\\`: Optional supplementary information\n- \\`NOT_RELEVANT\\`: Ignore completely\n\n## Answer Generation Guidelines\n\n### Content Construction Rules:\n1. Use ONLY information from the provided statements\n2. Prioritize statements with 'DIRECT_ANSWER' and 'HIGHLY_RELEVANT' relevance\n3. Create a comprehensive, informative answer\n4. Maintain scientific accuracy and depth\n\n### Citation Methodology:\n- Place citations IMMEDIATELY after relevant statements\n- Use SQUARE BRACKETS with NUMERIC source IDs\n- Format: \\`Statement of fact.[1][2]\\`\n- Cite EVERY substantive statement\n- Match citations exactly to source IDs\n\n### Structural Requirements:\n1. Detailed Main Answer\n - Comprehensive explanation\n - Technical depth\n - Precise scientific language\n - Full source citations\n\n2. Follow-Up Questions Section\n - Generate 3-4 thought-provoking questions\n - Encourage deeper exploration\n - Based on answer content\n - Formatted as a bulleted list\n\n3. Sources Section\n - List all cited sources\n - Include source titles and URLs\n - Order based on first citation appearance\n\n## Output Example Structure:\n\\`\\`\\`\n[Comprehensive, cited answer with source IDs in brackets]\n\nFollow-up Questions:\n- Question about deeper aspect of the topic\n- Question exploring related concepts\n- Question encouraging further research\n\nSources:\n[1] Source Title (URL)\n[2] Another Source Title (URL)\n...\n\\`\\`\\`\n\n## Critical Constraints:\n- NEVER introduce information not in the statements\n- Preserve exact factual content\n- Ensure grammatical and logical coherence\n- Provide a complete, informative answer\n- Maintain academic rigor\n\n## Processing Instructions:\n- Analyze statements systematically\n- Synthesize information coherently\n- Break down complex concepts\n- Provide scientific context\n- Explain underlying mechanisms\n\n\nThis is the input context:\n${JSON.stringify(context)}\n\n`;\n\nconst searchEngineQueryGenerator = (query: string) => {\n return `\n# Search Query and Source Selection Prompt\n\nYou are an expert at transforming natural language questions into precise search queries and selecting the most appropriate information source.\n\n## Source Selection Guidelines:\n- WEB_SEARCH: General web search for current events, recent developments, practical information\n- WIKIPEDIA: Best for general knowledge, scientific explanations, historical information\n\n## Output Requirements:\n- Provide a JSON response with three key fields\n- Do NOT use code block backticks\n- Ensure \"preferred_sources\" is an array\n- Make search query concise and targeted\n\n## Examples:\n\n### Example 1\n- User Query: \"Who was Marie Curie?\"\n- Output:\n{\n\"origin_question\": \"Who was Marie Curie?\",\n\"preferred_sources\": [\"WIKIPEDIA\"],\n\"search_query\": \"Marie Curie biography scientific achievements\"\n}\n\n### Example 2\n- User Query: \"Best restaurants in New York City\"\n- Output:\n{\n\"origin_question\": \"Best restaurants in New York City\",\n\"preferred_sources\": [\"WEB_SEARCH\"],\n\"search_query\": \"top rated restaurants NYC 2024 dining\"\n}\n\n### Example 3\n- User Query: \"How do solar panels work?\"\n- Output:\n{\n\"origin_question\": \"How do solar panels work?\",\n\"preferred_sources\": [\"WIKIPEDIA\", \"WEB_SEARCH\"],\n\"search_query\": \"solar panel photovoltaic technology mechanism\"\n}\n\n## Instructions:\n- Carefully analyze the user's query\n- Select the MOST APPROPRIATE source(s)\n- Create a targeted search query\n- Return ONLY the JSON without additional text\n- Regarding things like new technologies like blockchain or artifical intelligence or recent scientific discoveries you should always use WEB_SEARCH\n- Regarding things like historical events or consolidated scientific knowledge you should always use WIKIPEDIA\n\nUser Query: ${query}\n`\n\n}\n\nconst statementExtract = (originalQuestion: string, source: SmartSearchSourcePage): string => `\nYou're an expert at extracting facts from a source page. It has been commended to you to extract facts from the source page that are helpful to answer the original question.\nOriginal Question: ${originalQuestion}\nYou will be given a source with the following fields:\n- id: number - Unique identifier for the source\n- url: string - URL of the source page\n- title: string - Title of the source page\n- markdown: string - Full text content of the source page\n\n${JSON.stringify(source)}\n\n# Fact Extraction Instructions\n\nYou will be given the contents of the provided source page. Your job is to extract the facts that are helpful to answer the original question.\nPlease format the facts that will be extracted in an array of objects with the following JSON structure.\n## Output JSON Structure\n\\`\\`\\`json\n{\n \"sourceId\": \"number - ID of the source\",\n \"sourceTitle\": \"string - Title of the source\",\n \"extractedFacts\": [\n {\n \"statement\": \"string - Verbatim text from the source\",\n \"relevance\": \"string - One of ['DIRECT_ANSWER', 'HIGHLY_RELEVANT', 'SOMEWHAT_RELEVANT', 'TANGENTIAL', 'NOT_RELEVANT']\"\n }\n ]\n}\n\\`\\`\\`\n\n## Relevance Classification Guide:\n- \\`DIRECT_ANSWER\\`: \n - Completely and precisely addresses the original question\n - Contains the core information needed to fully respond\n - Minimal to no additional context required\n\n- \\`HIGHLY_RELEVANT\\`: \n - Provides substantial information directly related to the question\n - Offers critical context or partial solution\n - Significantly contributes to understanding\n\n- \\`SOMEWHAT_RELEVANT\\`: \n - Provides partial or indirect information\n - Offers peripheral insights\n - Requires additional context to be fully meaningful\n\n- \\`TANGENTIAL\\`: \n - Loosely connected to the topic\n - Provides background or related information\n - Not directly addressing the core question\n\n- \\`NOT_RELEVANT\\`: \n - No meaningful connection to the original question\n - Completely unrelated information\n\n\n## Extraction Guidelines:\n1. Read the entire source document carefully\n2. Extract EXACT quotes that:\n - Are actually helpful answering the provided question\n - Are stated verbatim from the source or are rephrased in such a way that doesn't distort the meaning in the original source\n - Represent complete thoughts or meaningful segments\n3. Classify each extracted fact with its relevance level\n4. Preserve original context and nuance\n\n## Critical Rules:\n- try NOT to paraphrase or modify the original text. If you can't find a direct quote or you think the found quote is too long, you can paraphrase it.\n- Avoid any text in the \"statement\" field that is not helpful answering the provided question like javascript, URLs, HTML, and other non-textual content\n- Extract statements as they appear in the source and ONLY if they are helpful answering the provided question\n- Include full sentences or meaningful text segments\n- Preserve original formatting and punctuation\n- Sort extracted facts by relevance (DIRECT_ANSWER first)\n- Output JSON without \\`\\`\\`json\\`\\`\\` tags, or without any escape characters or any text that is not JSON or my system will crash.\n\n## Processing Instructions:\n- Analyze the entire document systematically\n- Be comprehensive in fact extraction\n- Err on the side of inclusion when in doubt\n- Focus on factual, informative statements\n`\nconst debug = []\nconst randomTimeout = () => {\n const random = (1000 + Math.random() * 2000)|0;\n console.log(`Waiting for ${random}ms`)\n return new Promise(resolve => setTimeout(resolve, random));\n}\n\nfunction tryToExtractJSON(text: string): string {\n const regex = /```(?:json)?\\n([\\s\\S]+?)\\n```/;\n const match = text.match(regex);\n if (match) return match[1];\n else return text;\n}\n\nconst ProcessQuestionError = (step: string, error: Error): string =>\n `Failed to process question at ${step}: ${error.message}`;\n\nasync function conversionToSearchQuery(question: string): Promise {\n const prompt = searchEngineQueryGenerator(question);\n const optimizedQueryResult = await shinkaiLlmPromptProcessor({ format: 'text' , prompt });\n try {\n const result = JSON.parse(optimizedQueryResult.message.trim()) as SearchQueryConversion;\n return result;\n } catch (error) {\n console.error(error)\n if (typeof error === 'object') {\n console.log(JSON.stringify(error, null, 2))\n }\n throw new Error(ProcessQuestionError('question processing in optimizequery', new Error(String(error))));\n }\n}\n\n\nasync function extractSourcesFromSearchEngine(\n searchQuery: string,\n engine: SearchEngine,\n apiKey?: string,\n): Promise {\n switch (engine) {\n\t\tcase 'GOOGLE' : {\n\t\t\tconst results = await googleSearch({ query: searchQuery });\n\t\t\treturn results.results;\n\t\t}\n case 'DUCKDUCKGO': {\n const results = await duckduckgoSearch({ message: searchQuery });\n if (results.message) return JSON.parse(results.message);\n return [];\n }\n case 'BRAVE': \n throw new Error('Brave is not supported yet');\n default:\n throw new Error('Invalid or unsupperted search engine');\n }\n}\n\nexport async function run(\n config: CONFIG,\n inputs: INPUTS\n): Promise {\n const { question } = inputs;\n if (!question) {\n throw new Error('Question is required in inputs');\n }\n\n try {\n // Step 1: Generate optimized search query\n const searchQuery = await conversionToSearchQuery(question);\n // Step 2: Perform search with optimized query\n const sources: SmartSearchSource[] = []\n for (const preferred_source of searchQuery.preferred_sources) {\n switch (preferred_source) {\n case 'WIKIPEDIA':{\n const searchEngineQuery = searchQuery.search_query+' site:wikipedia.org';\n const searchEngine = config.searchEngine || 'GOOGLE';\n const sourcesSearchResults: SearchResult[] = await extractSourcesFromSearchEngine(searchEngineQuery, searchEngine, config.searchEngineApiKey);\n try {\n sources.push(...(sourcesSearchResults as SearchResult[]));\n } catch (error) {\n console.error('Failed to process search results', error);\n throw new Error('Failed to process search results');\n }\n break;\n }\n case 'WEB_SEARCH': {\n const searchEngineQuery = searchQuery.search_query.trim();\n const searchEngine = config.searchEngine || 'GOOGLE';\n const sourcesSearchResults: SearchResult[] = await extractSourcesFromSearchEngine(searchEngineQuery, searchEngine, config.searchEngineApiKey);\n sources.push(...(sourcesSearchResults as SearchResult[]));\n break;\n }\n default:\n throw new Error('Invalid source');\n }\n }\n const smartSearchSouces: SmartSearchSourcePage[] = []\n let id = 1;\n while (smartSearchSouces.length < Number(config.maxSources ?? 3)) {\n const source = sources.shift();\n if (!source) break;\n if (typeof source === 'string') throw new Error('Invalid source');\n console.log('+++++++++');\n console.log(`${id} Downloading source ${source.url}`)\n console.log('+++++++++');\n try {\n const searchResult = await downloadPages({ url: source.url });\n smartSearchSouces.push({\n id: id, url: source.url, title: source.title,\n markdown: searchResult.markdown ?? '',\n });\n } catch (error) {\n console.error('Failed to process source', source.url, error);\n }\n id++;\n await randomTimeout();\n }\n console.log('Finished downloading sources');\n const statements: SmartSearchStatement[] = []\n // Step 3: Extract statements from sources\n for (const smartSearchSource of smartSearchSouces) {\n // TODO use map reduce to extract statements\n const source = smartSearchSource.markdown;\n const sourceData = {\n title: smartSearchSource.title,\n url: smartSearchSource.url,\n id: smartSearchSource.id,\n }\n const statementString = await shinkaiLlmMapReduceProcessor({ prompt: statementExtract(question, sourceData), data: source as string });\n const cleanStatementString = tryToExtractJSON(statementString.response)\n try { \n const statement = JSON.parse(cleanStatementString) as SmartSearchStatement;\n statements.push(statement);\n } catch (error) {\n console.error('Failed to process statement', smartSearchSource.url, error);\n console.error(cleanStatementString)\n }\n }\n // clean markdown from sources for lighter input\n smartSearchSouces.forEach(source => delete source.markdown);\n const generationContext: SmartSearchGenerationContext = {\n originalQuestion: question,\n statements,\n sources: smartSearchSouces,\n }\n // Step 4: Generate answer\n const answerPrompt = answerGenerator(generationContext);\n\t\tconst response = await shinkaiLlmPromptProcessor({ format: 'text', prompt: answerPrompt });\n return {\n statements,\n sources: smartSearchSouces,\n response: response.message,\n };\n } catch (error) {\n throw new Error(ProcessQuestionError('question processing in answer generation', new Error(String(error))));\n }\n}\n","tools":["local:::__official_shinkai:::google_search:::1.0.0","local:::__official_shinkai:::duckduckgo_search:::1.0.0","local:::__official_shinkai:::shinkai_llm_prompt_processor:::1.0.0","local:::__official_shinkai:::shinkai_llm_map_reduce_processor:::1.0.0","local:::__official_shinkai:::download_pages:::1.0.0"],"config":[{"BasicConfig":{"key_name":"searchEngine","description":"The search engine to use","required":false,"type":null,"key_value":null}},{"BasicConfig":{"key_name":"searchEngineApiKey","description":"The API key for the search engine","required":false,"type":null,"key_value":null}},{"BasicConfig":{"key_name":"maxSources","description":"The maximum number of sources to return","required":false,"type":null,"key_value":null}}],"description":"This function takes a question as input and returns a comprehensive answer, along with the sources and statements used to generate the answer.","keywords":["search","answer generation","fact extraction","wikipedia","google"],"input_args":{"type":"object","properties":{"question":{"type":"string","description":"The question to answer"}},"required":["question"]},"output_arg":{"json":""},"activated":false,"embedding":[0.44519925,0.011538006,-0.110713586,-0.22075558,-0.14790598,-0.29269773,-0.4754676,0.44221854,-0.121888064,0.39252597,-0.3006528,1.0143435,0.36214498,-0.31270468,0.6776067,0.25015622,0.35587713,-0.23176439,-1.9852347,0.016676497,0.5186351,0.13615943,0.34565818,0.11482762,-0.014134172,0.29590514,0.31907755,-0.3896206,-1.2329955,-1.6192199,0.85829604,0.62480825,-0.6851429,-0.4065035,-0.57009935,-0.44622755,-0.018254763,-0.29652905,0.06502104,0.08794068,0.123337545,-0.2962008,-0.7889936,-0.2309857,-0.30533978,0.00014958903,-0.100521065,-0.17255962,0.65703535,0.59996736,-0.34312758,-0.3323959,-0.2751081,0.054695282,-0.6813821,-0.16973025,-0.34893867,-0.60133564,-0.1950637,-0.30053732,0.61119884,0.13490543,-4.0774403,-0.1405774,0.52335876,0.2547453,0.10967965,-0.37900138,0.33353627,-0.0860852,0.06344949,0.27401373,0.35607475,0.3472401,0.20127875,-0.2749816,-0.1676489,-0.3010119,0.40944278,-0.60488445,-0.33455145,0.036982603,0.1607093,0.18134978,-0.50376195,0.4321913,0.21538696,-0.3986729,0.1924279,-0.07740533,-0.5905008,-0.32293463,-0.33489698,0.08675105,-0.38169703,0.20435761,0.1692553,0.48983234,0.4500325,3.2424073,0.30719528,-0.3322998,0.36166048,-1.097402,0.9221763,-0.3271866,0.15179256,-0.7435132,0.7849906,-0.076523826,0.4214928,-0.20277414,-0.008337051,0.84142566,-0.14813311,0.10049243,-0.48139656,0.091330424,0.08260442,0.73960954,-0.13112147,0.17086662,-0.43677095,-0.3310505,-0.25675213,0.6580711,-0.3097783,0.3686354,0.40853876,0.52439237,0.45975456,-0.5577316,-1.1559865,-0.028603502,0.3412104,0.350788,0.30436534,-0.84187645,0.6342305,-0.7242338,0.30563703,-1.3071945,0.56033385,-0.11213897,0.7491704,-0.037651196,-0.66392916,-0.22443774,-0.40022716,-0.4695392,0.12346482,0.42506516,-0.008828731,-0.036567945,0.7910692,0.4870108,-0.10458979,-0.0010068789,-0.19713187,0.18817124,-0.34421226,0.43161416,0.35337302,0.45155972,0.31935665,-0.38856277,0.14921239,0.46791223,-0.19556057,-0.1643018,0.4380207,-0.46539715,-0.2832293,0.81312156,-0.052545656,-0.29040393,0.1400195,-0.12654832,0.41849804,-0.882607,-0.13826783,0.77245986,-0.5711882,-0.27059942,-0.19140518,0.4529812,0.0826526,-0.367387,0.64419246,1.1117058,-0.47169876,1.4447793,-0.2390542,-0.19558607,0.1427455,-0.21482131,0.104871795,0.2707702,0.38783553,0.1292122,-1.0809127,-0.0846922,-0.039507207,-0.3097255,-0.29537687,-0.65163094,0.19342569,-0.32778674,-0.08130689,-0.64830554,0.12468888,-0.5193145,1.1083255,-0.010225475,0.6004076,-0.14024194,0.058595452,0.4302386,-0.47035223,0.8164973,0.21753985,-0.013964482,-0.6944066,-0.62527126,-0.82455045,-0.112839624,0.14301777,0.12782156,-0.2723977,0.008159652,0.5853714,0.9211197,0.4646956,1.390659,0.76697886,0.523064,-0.12873787,0.25171667,0.17022641,-0.30911857,0.31879586,-0.1103535,-0.33922687,-0.32489824,0.3671196,-0.54260576,-0.010865424,-0.16758339,0.06505124,1.4560661,0.7549216,0.18457569,-0.04554111,0.9045233,-0.14435571,0.16820474,-1.6039296,0.078886904,-0.30494863,0.45922333,-0.011344843,-0.3130207,0.39990506,0.3539346,-0.6010364,-0.8332309,-0.37220326,-0.80922806,-0.17530465,-0.06533443,-0.109381296,0.35899848,-0.47536635,-0.042883083,0.5028103,0.15244354,0.3528542,0.5739549,-0.58191675,-0.13802925,0.4039406,0.15418546,0.10369478,0.24870338,-0.14724898,-0.1899396,-0.4523422,-0.067849,-0.45093012,0.7759565,-0.47772875,-0.5009712,-0.63364625,0.32183337,1.6104475,0.31228143,-0.2737522,0.8513892,-0.10785564,-0.083213486,0.3272493,0.34409443,-0.102422476,0.14750528,-0.21719596,-0.7646839,0.7050644,-0.43036377,-0.34817296,-0.07673599,-0.8938274,-0.259611,0.085467875,-0.35622948,0.17879769,-0.4371638,0.84722894,0.42178243,0.47265047,-2.0367239,-0.28862223,0.2279939,0.023028731,0.03350927,-0.84622985,0.6191283,-0.36584458,0.52736944,-0.8522264,1.7012173,0.623458,-0.34564644,-0.29220566,0.22257191,1.008989,-0.5586227,-0.100436315,-0.20206258,-0.14138165,0.29932487,0.3574607,1.777014,0.44976053,0.1803996,0.17181638,0.11031827,-0.18544856,-1.3960699,0.18656585,0.43533033,-0.32739973,0.56046206,-0.08367232,-0.45680672,0.45554873,1.0323718,0.2787497,0.19714256,-0.0057662483,2.0173283,0.14193904,-0.33267602,-0.4824432,-0.44496542,-0.44327742,0.33923206,0.24754228,-0.22856423,0.23647808,-0.4234256,-0.11778459,-0.19414693,0.28126818,-0.0637252,0.28323674,0.4969948,-0.15884419,0.684458,0.3888534,-0.17290172,0.74648315,0.03662668,-1.0852582,-0.24843164],"result":{"type":"object","properties":{"response":{"description":"The generated answer","type":"string"},"sources":{"description":"The sources used to generate the answer","items":{"properties":{"id":{"type":"number"},"title":{"type":"string"},"url":{"type":"string"}},"type":"object"},"type":"array"},"statements":{"description":"The statements extracted from the sources","items":{"properties":{"extractedFacts":{"items":{"properties":{"relevance":{"type":"string"},"statement":{"type":"string"}},"type":"object"},"type":"array"},"sourceId":{"type":"number"},"sourceTitle":{"type":"string"}},"type":"object"},"type":"array"}},"required":["response","sources","statements"]},"sql_tables":[],"sql_queries":[],"file_inbox":null,"oauth":null,"assets":null,"runner":"any","operating_system":["linux","macos","windows"],"tool_set":""},false]} \ No newline at end of file diff --git a/tools/smart-search/metadata.json b/tools/smart-search/metadata.json index be007735..70867f75 100644 --- a/tools/smart-search/metadata.json +++ b/tools/smart-search/metadata.json @@ -99,7 +99,9 @@ "sqlQueries": [], "tools": [ "local:::__official_shinkai:::google_search:::1.0.0", + "local:::__official_shinkai:::duckduckgo_search:::1.0.0", "local:::__official_shinkai:::shinkai_llm_prompt_processor:::1.0.0", + "local:::__official_shinkai:::shinkai_llm_map_reduce_processor:::1.0.0", "local:::__official_shinkai:::download_pages:::1.0.0" ] } diff --git a/tools/smart-search/tool.test.ts b/tools/smart-search/tool.test.ts new file mode 100644 index 00000000..68d4a7b4 --- /dev/null +++ b/tools/smart-search/tool.test.ts @@ -0,0 +1,26 @@ +// main.ts +import "https://deno.land/std@0.190.0/dotenv/load.ts"; // Use the latest version if possible + +// Now you can access your variables: +console.log("API_KEY:", Deno.env.get("API_KEY")); +console.log("PORT:", Deno.env.get("PORT")); +console.log("X_SHINKAI_LLM_PROVIDER:", Deno.env.get("X_SHINKAI_LLM_PROVIDER")); +console.log("SHINKAI_NODE_LOCATION:", Deno.env.get("SHINKAI_NODE_LOCATION")); +console.log("X_SHINKAI_APP_ID:", Deno.env.get("X_SHINKAI_APP_ID")); + +import { run, SearchEngine } from "./tool.ts"; + + +const config = { + searchEngineApiKey: "API_KEY", + searchEngine: "DUCKDUCKGO" as SearchEngine, + maxSources: 3, +} + +const input = { + question: "What are the applications of DAGs in blockchain or cryptocurrencies?", +} + +const result = await run(config, input); + +console.log(result); \ No newline at end of file diff --git a/tools/smart-search/tool.ts b/tools/smart-search/tool.ts index e0301ad2..d0e02dd1 100644 --- a/tools/smart-search/tool.ts +++ b/tools/smart-search/tool.ts @@ -1,4 +1,10 @@ -import { googleSearch, shinkaiLlmPromptProcessor, downloadPages } from './shinkai-local-tools.ts'; +import { + googleSearch, + duckduckgoSearch, + shinkaiLlmPromptProcessor, + shinkaiLlmMapReduceProcessor, + downloadPages, +} from './shinkai-local-tools.ts'; type CONFIG = { searchEngineApiKey?: string; @@ -13,7 +19,7 @@ type OUTPUT = { sources: SmartSearchSourcePage[]; statements: SmartSearchStatement[]; } -type PREFFERED_SOURCES = 'WIKIPEDIA'|'WOLFRAMALPHA'|'OTHER'; +type PREFFERED_SOURCES = 'WIKIPEDIA'|'WEB_SEARCH'; type SearchQueryConversion = { "origin_question": string; @@ -28,7 +34,7 @@ type SearchResult = { } type SmartSearchSource = SearchResult | string; -type SearchEngine = 'DUCKDUCKGO' | 'GOOGLE' | 'BRAVE'; +export type SearchEngine = 'DUCKDUCKGO' | 'GOOGLE' | 'BRAVE'; export interface SmartSearchSourcePage { id: number; @@ -169,9 +175,8 @@ const searchEngineQueryGenerator = (query: string) => { You are an expert at transforming natural language questions into precise search queries and selecting the most appropriate information source. ## Source Selection Guidelines: +- WEB_SEARCH: General web search for current events, recent developments, practical information - WIKIPEDIA: Best for general knowledge, scientific explanations, historical information -- WOLFRAMALPHA: Ideal for mathematical, statistical, computational queries, scientific calculations -- OTHER: General web search for current events, recent developments, practical information ## Output Requirements: - Provide a JSON response with three key fields @@ -182,15 +187,6 @@ You are an expert at transforming natural language questions into precise search ## Examples: ### Example 1 -- User Query: "What is the speed of light?" -- Output: -{ -"origin_question": "What is the speed of light?", -"preferred_sources": ["WOLFRAMALPHA"], -"search_query": "speed of light exact value meters per second" -} - -### Example 2 - User Query: "Who was Marie Curie?" - Output: { @@ -199,21 +195,21 @@ You are an expert at transforming natural language questions into precise search "search_query": "Marie Curie biography scientific achievements" } -### Example 3 +### Example 2 - User Query: "Best restaurants in New York City" - Output: { "origin_question": "Best restaurants in New York City", -"preferred_sources": ["OTHER"], +"preferred_sources": ["WEB_SEARCH"], "search_query": "top rated restaurants NYC 2024 dining" } -### Example 4 +### Example 3 - User Query: "How do solar panels work?" - Output: { "origin_question": "How do solar panels work?", -"preferred_sources": ["WIKIPEDIA", "OTHER"], +"preferred_sources": ["WIKIPEDIA", "WEB_SEARCH"], "search_query": "solar panel photovoltaic technology mechanism" } @@ -222,6 +218,8 @@ You are an expert at transforming natural language questions into precise search - Select the MOST APPROPRIATE source(s) - Create a targeted search query - Return ONLY the JSON without additional text +- Regarding things like new technologies like blockchain or artifical intelligence or recent scientific discoveries you should always use WEB_SEARCH +- Regarding things like historical events or consolidated scientific knowledge you should always use WIKIPEDIA User Query: ${query} ` @@ -229,22 +227,20 @@ User Query: ${query} } const statementExtract = (originalQuestion: string, source: SmartSearchSourcePage): string => ` +You're an expert at extracting facts from a source page. It has been commended to you to extract facts from the source page that are helpful to answer the original question. +Original Question: ${originalQuestion} +You will be given a source with the following fields: +- id: number - Unique identifier for the source +- url: string - URL of the source page +- title: string - Title of the source page +- markdown: string - Full text content of the source page -# Fact Extraction Instructions +${JSON.stringify(source)} -## Input JSON Structure -\`\`\`json -{ - "originalQuestion": "string - The user's original question", - "source": { - "id": "number - Unique identifier for the source", - "url": "string - URL of the source page", - "title": "string - Title of the source page", - "markdown": "string - Full text content of the source page" - } -} -\`\`\` +# Fact Extraction Instructions +You will be given the contents of the provided source page. Your job is to extract the facts that are helpful to answer the original question. +Please format the facts that will be extracted in an array of objects with the following JSON structure. ## Output JSON Structure \`\`\`json { @@ -285,17 +281,17 @@ const statementExtract = (originalQuestion: string, source: SmartSearchSourcePag - Completely unrelated information - ## Extraction Guidelines: - 1. Read the entire source document carefully - 2. Extract EXACT quotes that: - - Are actually helpful answering the provided question - - Are stated verbatim from the source or are rephrased in such a way that doesn't distort the meaning in the original source - - Represent complete thoughts or meaningful segments - 3. Classify each extracted fact with its relevance level - 4. Preserve original context and nuance +## Extraction Guidelines: +1. Read the entire source document carefully +2. Extract EXACT quotes that: + - Are actually helpful answering the provided question + - Are stated verbatim from the source or are rephrased in such a way that doesn't distort the meaning in the original source + - Represent complete thoughts or meaningful segments +3. Classify each extracted fact with its relevance level +4. Preserve original context and nuance ## Critical Rules: -- try NOT to paraphrase or modify the original text +- try NOT to paraphrase or modify the original text. If you can't find a direct quote or you think the found quote is too long, you can paraphrase it. - Avoid any text in the "statement" field that is not helpful answering the provided question like javascript, URLs, HTML, and other non-textual content - Extract statements as they appear in the source and ONLY if they are helpful answering the provided question - Include full sentences or meaningful text segments @@ -308,16 +304,14 @@ const statementExtract = (originalQuestion: string, source: SmartSearchSourcePag - Be comprehensive in fact extraction - Err on the side of inclusion when in doubt - Focus on factual, informative statements - -==BEGIN INPUT== -Original Question: ${originalQuestion} - -Source: -${JSON.stringify(source)} -==END INPUT== - ` const debug = [] +const randomTimeout = () => { + const random = (1000 + Math.random() * 2000)|0; + console.log(`Waiting for ${random}ms`) + return new Promise(resolve => setTimeout(resolve, random)); +} + function tryToExtractJSON(text: string): string { const regex = /```(?:json)?\n([\s\S]+?)\n```/; const match = text.match(regex); @@ -335,6 +329,10 @@ async function conversionToSearchQuery(question: string): Promise