From 1f8f6d3f51eada433820a980cb31c557e873b962 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Rame=CC=81?= Date: Thu, 22 Feb 2024 11:26:02 +0100 Subject: [PATCH] fix: "import.meta.url" was hardcoded during the build, had to change our logic --- src/features/domain.ts | 5 ++--- src/features/initiative.ts | 20 +++++++++---------- src/features/llm-openai.ts | 13 ++++++------- src/features/repository.ts | 5 ++--- src/features/tool.ts | 5 ++--- src/prisma/seed.script.ts | 5 ++--- src/semgrep/index.spec.ts | 39 +++++++++++++++++++------------------- src/semgrep/index.ts | 9 ++++----- src/utils/database.ts | 5 ++--- 9 files changed, 48 insertions(+), 58 deletions(-) diff --git a/src/features/domain.ts b/src/features/domain.ts index 8806a35..f768165 100644 --- a/src/features/domain.ts +++ b/src/features/domain.ts @@ -9,7 +9,6 @@ import { ParseResultType, parseDomain } from 'parse-domain'; import path from 'path'; import robotsParser from 'robots-parser'; import { PeerCertificate, TLSSocket } from 'tls'; -import { fileURLToPath } from 'url'; import z from 'zod'; import { downloadFile } from '@etabli/src/common'; @@ -25,11 +24,11 @@ import { formatArrayProgress } from '@etabli/src/utils/format'; import { containsHtml } from '@etabli/src/utils/html'; import { sleep } from '@etabli/src/utils/sleep'; -const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const __root_dirname = process.cwd(); export const latestRemoteCsvUrl = 'https://gitlab.adullact.net/dinum/noms-de-domaine-organismes-secteur-public/-/raw/master/domains.csv?ref_type=heads'; -export const localCsvPath = path.resolve(__dirname, '../../data/domains.csv'); +export const localCsvPath = path.resolve(__root_dirname, './data/domains.csv'); export const CsvDomainTypeSchema = z.enum([ '', diff --git a/src/features/initiative.ts b/src/features/initiative.ts index 3b1609d..d8ca9a8 100644 --- a/src/features/initiative.ts +++ b/src/features/initiative.ts @@ -15,7 +15,6 @@ import OpenAI from 'openai'; import path from 'path'; import prettyBytes from 'pretty-bytes'; import { simpleGit } from 'simple-git'; -import { fileURLToPath } from 'url'; import { promisify } from 'util'; import Wappalyzer from 'wappalyzer'; @@ -36,13 +35,12 @@ import { prisma } from '@etabli/src/prisma'; import { analyzeWithSemgrep } from '@etabli/src/semgrep/index'; import { watchGracefulExitInLoop } from '@etabli/src/server/system'; import { getListDiff } from '@etabli/src/utils/comparaison'; -import { capitalizeFirstLetter } from '@etabli/src/utils/format'; -import { formatArrayProgress } from '@etabli/src/utils/format'; +import { capitalizeFirstLetter, formatArrayProgress } from '@etabli/src/utils/format'; import { languagesExtensions } from '@etabli/src/utils/languages'; import { sleep } from '@etabli/src/utils/sleep'; import { WappalyzerResultSchema } from '@etabli/src/wappalyzer'; -const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const __root_dirname = process.cwd(); const fastFolderSizeAsync = promisify(fastFolderSize); const useLocalFileCache = true; // Switch it when testing locally to prevent multiplying network request whereas the remote content has probably no change since then @@ -59,8 +57,8 @@ const filesToKeepGitEndingPatterns: string[] = [ ...languagesExtensions, ]; -const noImgAndSvgFilterPath = path.resolve(__dirname, '../../src/pandoc/no-img-and-svg.lua'); -const extractMetaDescriptionFilterPath = path.resolve(__dirname, '../../src/pandoc/extract-meta-description.lua'); +const noImgAndSvgFilterPath = path.resolve(__root_dirname, './src/pandoc/no-img-and-svg.lua'); +const extractMetaDescriptionFilterPath = path.resolve(__root_dirname, './src/pandoc/extract-meta-description.lua'); const wappalyzer = new Wappalyzer({ debug: false, @@ -173,7 +171,7 @@ export async function inferInitiativesFromDatabase() { // To debug it may be useful to print a global JSON representation if (!!false) { const jsonContent = graphlib.json.write(graph); - const jsonPath = path.resolve(__dirname, '../../data/graph.json'); + const jsonPath = path.resolve(__root_dirname, './data/graph.json'); await fs.writeFile(jsonPath, JSON.stringify(jsonContent, null, 2)); } @@ -456,9 +454,9 @@ export async function feedInitiativesFromDatabase() { await wappalyzer.init(); // Prepare the message template used to ask GPT about the initiative - const initiativeGptTemplateContent = await fs.readFile(path.resolve(__dirname, '../../src/gpt/templates/initiative.md'), 'utf-8'); - const websiteGptTemplateContent = await fs.readFile(path.resolve(__dirname, '../../src/gpt/templates/website.md'), 'utf-8'); - const repositoryGptTemplateContent = await fs.readFile(path.resolve(__dirname, '../../src/gpt/templates/repository.md'), 'utf-8'); + const initiativeGptTemplateContent = await fs.readFile(path.resolve(__root_dirname, './src/gpt/templates/initiative.md'), 'utf-8'); + const websiteGptTemplateContent = await fs.readFile(path.resolve(__root_dirname, './src/gpt/templates/website.md'), 'utf-8'); + const repositoryGptTemplateContent = await fs.readFile(path.resolve(__root_dirname, './src/gpt/templates/repository.md'), 'utf-8'); handlebars.registerPartial('websitePartial', websiteGptTemplateContent); handlebars.registerPartial('repositoryPartial', repositoryGptTemplateContent); @@ -485,7 +483,7 @@ export async function feedInitiativesFromDatabase() { // actuellement JE REGARDAIS pour faire marcher "context". Now c'est good mais il est sur une ligne pas dans un array // je voulais tester quand y'a plusieurs documents à retourner... - const projectDirectory = path.resolve(__dirname, '../../data/initiatives/', initiativeMap.id); + const projectDirectory = path.resolve(__root_dirname, './data/initiatives/', initiativeMap.id); const websitesTemplates: WebsiteTemplateSchemaType[] = []; const repositoriesTemplates: RepositoryTemplateSchemaType[] = []; diff --git a/src/features/llm-openai.ts b/src/features/llm-openai.ts index 1e3900e..8739b8c 100644 --- a/src/features/llm-openai.ts +++ b/src/features/llm-openai.ts @@ -10,7 +10,6 @@ import { AssistantFile } from 'openai/resources/beta/assistants/files'; import { Run } from 'openai/resources/beta/threads/runs/runs'; import path from 'path'; import { encoding_for_model } from 'tiktoken'; -import { fileURLToPath } from 'url'; import { ChunkEventEmitter, LlmManager } from '@etabli/src/features/llm'; import { gptInstances } from '@etabli/src/gpt'; @@ -20,7 +19,7 @@ import { prisma } from '@etabli/src/prisma'; import { watchGracefulExitInLoop } from '@etabli/src/server/system'; import { sleep } from '@etabli/src/utils/sleep'; -const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const __root_dirname = process.cwd(); export class OpenaiWithAssistantApiLlmManager implements LlmManager { public readonly openaiItemPrefix = 'etabli_'; @@ -180,7 +179,7 @@ export class OpenaiWithAssistantApiLlmManager implements LlmManager { const toolsNames: string[] = tools.map((tool) => tool.name); - const toolsGptTemplateContent = await fs.readFile(path.resolve(__dirname, '../../src/gpt/templates/tools-document.md'), 'utf-8'); + const toolsGptTemplateContent = await fs.readFile(path.resolve(__root_dirname, './src/gpt/templates/tools-document.md'), 'utf-8'); const toolsGptTemplate = handlebars.compile(toolsGptTemplateContent); // Since tools should not reach limit of 2M tokens for 1 document, we have no chunk logic @@ -196,7 +195,7 @@ export class OpenaiWithAssistantApiLlmManager implements LlmManager { } // Store the document for debug - const gptToolsDocumentPath = path.resolve(__dirname, '../../data/gpt-document-tools.md'); + const gptToolsDocumentPath = path.resolve(__root_dirname, './data/gpt-document-tools.md'); await fs.mkdir(path.dirname(gptToolsDocumentPath), { recursive: true }); await fs.writeFile(gptToolsDocumentPath, toolsGptContent); @@ -282,12 +281,12 @@ export class OpenaiWithAssistantApiLlmManager implements LlmManager { } const initiativesChunkGptTemplateContent = await fs.readFile( - path.resolve(__dirname, '../../src/gpt/templates/initiatives-chunk-document.md'), + path.resolve(__root_dirname, './src/gpt/templates/initiatives-chunk-document.md'), 'utf-8' ); const initiativesChunkGptTemplate = handlebars.compile(initiativesChunkGptTemplateContent); const initiativeGptTemplateContent = await fs.readFile( - path.resolve(__dirname, '../../src/gpt/templates/initiatives-chunk-document-initiative.md'), + path.resolve(__root_dirname, './src/gpt/templates/initiatives-chunk-document-initiative.md'), 'utf-8' ); const initiativeGptTemplate = handlebars.compile(initiativeGptTemplateContent); @@ -369,7 +368,7 @@ export class OpenaiWithAssistantApiLlmManager implements LlmManager { formattedInitiativesPerChunk[currentChunk - 1].push(formattedInitiativeContent); } - const gptInitiativeChunksDocumentsFolderPath = path.resolve(__dirname, '../../data/gpt-document-initiatives'); + const gptInitiativeChunksDocumentsFolderPath = path.resolve(__root_dirname, './data/gpt-document-initiatives'); // Remove previous debug files if any if (fsSync.existsSync(gptInitiativeChunksDocumentsFolderPath)) { diff --git a/src/features/repository.ts b/src/features/repository.ts index 2d692fc..7fdd521 100644 --- a/src/features/repository.ts +++ b/src/features/repository.ts @@ -3,7 +3,6 @@ import fsSync from 'fs'; import fs from 'fs/promises'; import linkifyit from 'linkify-it'; import path from 'path'; -import { fileURLToPath } from 'url'; import z from 'zod'; import { downloadFile } from '@etabli/src/common'; @@ -15,13 +14,13 @@ import { getListDiff } from '@etabli/src/utils/comparaison'; import { formatArrayProgress } from '@etabli/src/utils/format'; import { emptyStringtoNullPreprocessor } from '@etabli/src/utils/validation'; -const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const __root_dirname = process.cwd(); const linkify = linkifyit(); // We did not used the CSV format even if less heavy to avoid extra parsing for numbers, null, string on multiple lines... (ref: https://code.gouv.fr/data/repositories/csv/all.csv) export const latestRemoteJsonUrl = 'https://code.gouv.fr/data/repositories/json/all.json'; -export const localJsonPath = path.resolve(__dirname, '../../data/repositories.json'); +export const localJsonPath = path.resolve(__root_dirname, './data/repositories.json'); export const JsonRepositoryPlatformSchema = z.enum(['GitHub', 'GitLab']); export type JsonRepositoryPlatformSchemaType = z.infer; diff --git a/src/features/tool.ts b/src/features/tool.ts index 2b58def..cef4970 100644 --- a/src/features/tool.ts +++ b/src/features/tool.ts @@ -3,7 +3,6 @@ import { parse } from 'csv-parse'; import fsSync from 'fs'; import fs from 'fs/promises'; import path from 'path'; -import { fileURLToPath } from 'url'; import z from 'zod'; import { downloadFile } from '@etabli/src/common'; @@ -14,10 +13,10 @@ import { watchGracefulExitInLoop } from '@etabli/src/server/system'; import { getListDiff } from '@etabli/src/utils/comparaison'; import { emptyStringtoNullPreprocessor } from '@etabli/src/utils/validation'; -const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const __root_dirname = process.cwd(); export const latestRemoteCsvUrl = 'https://raw.githubusercontent.com/captn3m0/stackshare-dataset/main/tools.csv'; -export const localCsvPath = path.resolve(__dirname, '../../data/tools.csv'); +export const localCsvPath = path.resolve(__root_dirname, './data/tools.csv'); export const CsvToolCategorySchema = z.enum([ 'languages-and-frameworks', diff --git a/src/prisma/seed.script.ts b/src/prisma/seed.script.ts index 61408f9..bac4fe8 100644 --- a/src/prisma/seed.script.ts +++ b/src/prisma/seed.script.ts @@ -1,14 +1,13 @@ import dotenv from 'dotenv'; import path from 'path'; -import { fileURLToPath } from 'url'; import { prisma } from '@etabli/src/prisma'; import { seedDatabase } from '@etabli/src/prisma/seed'; -const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const __root_dirname = process.cwd(); // This script always targets the local test database -dotenv.config({ path: path.resolve(__dirname, '../../.env.test') }); +dotenv.config({ path: path.resolve(__root_dirname, './.env.test') }); seedDatabase(prisma) .catch((e) => { diff --git a/src/semgrep/index.spec.ts b/src/semgrep/index.spec.ts index 7e0c8c8..e8ea578 100644 --- a/src/semgrep/index.spec.ts +++ b/src/semgrep/index.spec.ts @@ -2,18 +2,17 @@ * @jest-environment node */ import path from 'path'; -import { fileURLToPath } from 'url'; import { analyzeWithSemgrep } from '@etabli/src/semgrep/index'; -const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const __root_dirname = process.cwd(); describe('analyzeWithSemgrep()', () => { it( 'should analyze correctly a node project', async () => { - const codeFolder = path.resolve(__dirname, 'samples/node'); - const resultsPath = path.resolve(__dirname, 'results/code-analysis-node.json'); + const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/node'); + const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-node.json'); const results = await analyzeWithSemgrep(codeFolder, resultsPath); @@ -28,8 +27,8 @@ describe('analyzeWithSemgrep()', () => { it( 'should analyze correctly a php project', async () => { - const codeFolder = path.resolve(__dirname, 'samples/php'); - const resultsPath = path.resolve(__dirname, 'results/code-analysis-php.json'); + const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/php'); + const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-php.json'); const results = await analyzeWithSemgrep(codeFolder, resultsPath); @@ -44,8 +43,8 @@ describe('analyzeWithSemgrep()', () => { it( 'should analyze correctly a ruby project', async () => { - const codeFolder = path.resolve(__dirname, 'samples/ruby'); - const resultsPath = path.resolve(__dirname, 'results/code-analysis-ruby.json'); + const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/ruby'); + const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-ruby.json'); const results = await analyzeWithSemgrep(codeFolder, resultsPath); @@ -60,8 +59,8 @@ describe('analyzeWithSemgrep()', () => { it( 'should analyze correctly a python project', async () => { - const codeFolder = path.resolve(__dirname, 'samples/python'); - const resultsPath = path.resolve(__dirname, 'results/code-analysis-python.json'); + const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/python'); + const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-python.json'); const results = await analyzeWithSemgrep(codeFolder, resultsPath); @@ -90,8 +89,8 @@ describe('analyzeWithSemgrep()', () => { it( 'should analyze correctly a java project', async () => { - const codeFolder = path.resolve(__dirname, 'samples/java'); - const resultsPath = path.resolve(__dirname, 'results/code-analysis-java.json'); + const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/java'); + const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-java.json'); const results = await analyzeWithSemgrep(codeFolder, resultsPath); @@ -113,8 +112,8 @@ describe('analyzeWithSemgrep()', () => { it( 'should analyze correctly a golang project', async () => { - const codeFolder = path.resolve(__dirname, 'samples/golang'); - const resultsPath = path.resolve(__dirname, 'results/code-analysis-golang.json'); + const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/golang'); + const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-golang.json'); const results = await analyzeWithSemgrep(codeFolder, resultsPath); @@ -129,8 +128,8 @@ describe('analyzeWithSemgrep()', () => { it( 'should analyze correctly a rust project', async () => { - const codeFolder = path.resolve(__dirname, 'samples/rust'); - const resultsPath = path.resolve(__dirname, 'results/code-analysis-rust.json'); + const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/rust'); + const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-rust.json'); const results = await analyzeWithSemgrep(codeFolder, resultsPath); @@ -145,8 +144,8 @@ describe('analyzeWithSemgrep()', () => { it( 'should analyze correctly a cpp project', async () => { - const codeFolder = path.resolve(__dirname, 'samples/cpp'); - const resultsPath = path.resolve(__dirname, 'results/code-analysis-cpp.json'); + const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/cpp'); + const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-cpp.json'); const results = await analyzeWithSemgrep(codeFolder, resultsPath); @@ -173,8 +172,8 @@ describe('analyzeWithSemgrep()', () => { it( 'should analyze correctly a scala project', async () => { - const codeFolder = path.resolve(__dirname, 'samples/scala'); - const resultsPath = path.resolve(__dirname, 'results/code-analysis-scala.json'); + const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/scala'); + const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-scala.json'); const results = await analyzeWithSemgrep(codeFolder, resultsPath); diff --git a/src/semgrep/index.ts b/src/semgrep/index.ts index 5519a31..bd391b9 100644 --- a/src/semgrep/index.ts +++ b/src/semgrep/index.ts @@ -1,21 +1,20 @@ import { $ } from 'execa'; import fsSync from 'fs'; import fs from 'fs/promises'; -import { fileURLToPath } from 'node:url'; import path from 'path'; import { SemgrepResultSchema } from '@etabli/src/semgrep'; +const __root_dirname = process.cwd(); + export interface AnalysisResult { functions: string[]; dependencies: string[]; } -const __dirname = path.dirname(fileURLToPath(import.meta.url)); - export async function analyzeWithSemgrep(folderPath: string, outputPath: string): Promise { - const codeAnalysisRulesPath = path.resolve(__dirname, '../../semgrep-rules.yaml'); - const bibliothecaryScriptPath = path.resolve(__dirname, '../../src/bibliothecary/deps-parser.rb'); + const codeAnalysisRulesPath = path.resolve(__root_dirname, './semgrep-rules.yaml'); + const bibliothecaryScriptPath = path.resolve(__root_dirname, './src/bibliothecary/deps-parser.rb'); if (!fsSync.existsSync(codeAnalysisRulesPath)) { throw new Error('semgrep rules must exist'); diff --git a/src/utils/database.ts b/src/utils/database.ts index 5b65d3e..d706429 100644 --- a/src/utils/database.ts +++ b/src/utils/database.ts @@ -1,11 +1,10 @@ -import { fileURLToPath } from 'node:url'; import path from 'path'; import { DockerComposeEnvironment, Wait } from 'testcontainers'; import type { StartedGenericContainer } from 'testcontainers/dist/src/generic-container/started-generic-container'; import { bindContainerLogs, defaultEnvironment, formatContainerNameWithSuffix } from '@etabli/src/utils/testcontainers'; -const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const __root_dirname = process.cwd(); export interface PostgresContainer { container: StartedGenericContainer; @@ -22,7 +21,7 @@ export async function setupPostgres(): Promise { process.env.TESTCONTAINERS_RYUK_DISABLED = 'true'; } - const composeFilePath = path.resolve(__dirname, '../../'); + const composeFilePath = path.resolve(__root_dirname, './'); const composeFile = 'docker-compose.yaml'; const serviceName = 'postgres'; const containerName = formatContainerNameWithSuffix('etabli_postgres_container');