Skip to content

Commit

Permalink
fix: "import.meta.url" was hardcoded during the build, had to change …
Browse files Browse the repository at this point in the history
…our logic
  • Loading branch information
sneko committed Feb 22, 2024
1 parent 3a3770c commit 1f8f6d3
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 58 deletions.
5 changes: 2 additions & 3 deletions src/features/domain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import { ParseResultType, parseDomain } from 'parse-domain';
import path from 'path';
import robotsParser from 'robots-parser';
import { PeerCertificate, TLSSocket } from 'tls';
import { fileURLToPath } from 'url';
import z from 'zod';

import { downloadFile } from '@etabli/src/common';
Expand All @@ -25,11 +24,11 @@ import { formatArrayProgress } from '@etabli/src/utils/format';
import { containsHtml } from '@etabli/src/utils/html';
import { sleep } from '@etabli/src/utils/sleep';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const __root_dirname = process.cwd();

export const latestRemoteCsvUrl =
'https://gitlab.adullact.net/dinum/noms-de-domaine-organismes-secteur-public/-/raw/master/domains.csv?ref_type=heads';
export const localCsvPath = path.resolve(__dirname, '../../data/domains.csv');
export const localCsvPath = path.resolve(__root_dirname, './data/domains.csv');

export const CsvDomainTypeSchema = z.enum([
'',
Expand Down
20 changes: 9 additions & 11 deletions src/features/initiative.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import OpenAI from 'openai';
import path from 'path';
import prettyBytes from 'pretty-bytes';
import { simpleGit } from 'simple-git';
import { fileURLToPath } from 'url';
import { promisify } from 'util';
import Wappalyzer from 'wappalyzer';

Expand All @@ -36,13 +35,12 @@ import { prisma } from '@etabli/src/prisma';
import { analyzeWithSemgrep } from '@etabli/src/semgrep/index';
import { watchGracefulExitInLoop } from '@etabli/src/server/system';
import { getListDiff } from '@etabli/src/utils/comparaison';
import { capitalizeFirstLetter } from '@etabli/src/utils/format';
import { formatArrayProgress } from '@etabli/src/utils/format';
import { capitalizeFirstLetter, formatArrayProgress } from '@etabli/src/utils/format';
import { languagesExtensions } from '@etabli/src/utils/languages';
import { sleep } from '@etabli/src/utils/sleep';
import { WappalyzerResultSchema } from '@etabli/src/wappalyzer';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const __root_dirname = process.cwd();

const fastFolderSizeAsync = promisify(fastFolderSize);
const useLocalFileCache = true; // Switch it when testing locally to prevent multiplying network request whereas the remote content has probably no change since then
Expand All @@ -59,8 +57,8 @@ const filesToKeepGitEndingPatterns: string[] = [
...languagesExtensions,
];

const noImgAndSvgFilterPath = path.resolve(__dirname, '../../src/pandoc/no-img-and-svg.lua');
const extractMetaDescriptionFilterPath = path.resolve(__dirname, '../../src/pandoc/extract-meta-description.lua');
const noImgAndSvgFilterPath = path.resolve(__root_dirname, './src/pandoc/no-img-and-svg.lua');
const extractMetaDescriptionFilterPath = path.resolve(__root_dirname, './src/pandoc/extract-meta-description.lua');

const wappalyzer = new Wappalyzer({
debug: false,
Expand Down Expand Up @@ -173,7 +171,7 @@ export async function inferInitiativesFromDatabase() {
// To debug it may be useful to print a global JSON representation
if (!!false) {
const jsonContent = graphlib.json.write(graph);
const jsonPath = path.resolve(__dirname, '../../data/graph.json');
const jsonPath = path.resolve(__root_dirname, './data/graph.json');

await fs.writeFile(jsonPath, JSON.stringify(jsonContent, null, 2));
}
Expand Down Expand Up @@ -456,9 +454,9 @@ export async function feedInitiativesFromDatabase() {
await wappalyzer.init();

// Prepare the message template used to ask GPT about the initiative
const initiativeGptTemplateContent = await fs.readFile(path.resolve(__dirname, '../../src/gpt/templates/initiative.md'), 'utf-8');
const websiteGptTemplateContent = await fs.readFile(path.resolve(__dirname, '../../src/gpt/templates/website.md'), 'utf-8');
const repositoryGptTemplateContent = await fs.readFile(path.resolve(__dirname, '../../src/gpt/templates/repository.md'), 'utf-8');
const initiativeGptTemplateContent = await fs.readFile(path.resolve(__root_dirname, './src/gpt/templates/initiative.md'), 'utf-8');
const websiteGptTemplateContent = await fs.readFile(path.resolve(__root_dirname, './src/gpt/templates/website.md'), 'utf-8');
const repositoryGptTemplateContent = await fs.readFile(path.resolve(__root_dirname, './src/gpt/templates/repository.md'), 'utf-8');

handlebars.registerPartial('websitePartial', websiteGptTemplateContent);
handlebars.registerPartial('repositoryPartial', repositoryGptTemplateContent);
Expand All @@ -485,7 +483,7 @@ export async function feedInitiativesFromDatabase() {
// actuellement JE REGARDAIS pour faire marcher "context". Now c'est good mais il est sur une ligne pas dans un array
// je voulais tester quand y'a plusieurs documents à retourner...

const projectDirectory = path.resolve(__dirname, '../../data/initiatives/', initiativeMap.id);
const projectDirectory = path.resolve(__root_dirname, './data/initiatives/', initiativeMap.id);

const websitesTemplates: WebsiteTemplateSchemaType[] = [];
const repositoriesTemplates: RepositoryTemplateSchemaType[] = [];
Expand Down
13 changes: 6 additions & 7 deletions src/features/llm-openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import { AssistantFile } from 'openai/resources/beta/assistants/files';
import { Run } from 'openai/resources/beta/threads/runs/runs';
import path from 'path';
import { encoding_for_model } from 'tiktoken';
import { fileURLToPath } from 'url';

import { ChunkEventEmitter, LlmManager } from '@etabli/src/features/llm';
import { gptInstances } from '@etabli/src/gpt';
Expand All @@ -20,7 +19,7 @@ import { prisma } from '@etabli/src/prisma';
import { watchGracefulExitInLoop } from '@etabli/src/server/system';
import { sleep } from '@etabli/src/utils/sleep';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const __root_dirname = process.cwd();

export class OpenaiWithAssistantApiLlmManager implements LlmManager {
public readonly openaiItemPrefix = 'etabli_';
Expand Down Expand Up @@ -180,7 +179,7 @@ export class OpenaiWithAssistantApiLlmManager implements LlmManager {

const toolsNames: string[] = tools.map((tool) => tool.name);

const toolsGptTemplateContent = await fs.readFile(path.resolve(__dirname, '../../src/gpt/templates/tools-document.md'), 'utf-8');
const toolsGptTemplateContent = await fs.readFile(path.resolve(__root_dirname, './src/gpt/templates/tools-document.md'), 'utf-8');
const toolsGptTemplate = handlebars.compile(toolsGptTemplateContent);

// Since tools should not reach limit of 2M tokens for 1 document, we have no chunk logic
Expand All @@ -196,7 +195,7 @@ export class OpenaiWithAssistantApiLlmManager implements LlmManager {
}

// Store the document for debug
const gptToolsDocumentPath = path.resolve(__dirname, '../../data/gpt-document-tools.md');
const gptToolsDocumentPath = path.resolve(__root_dirname, './data/gpt-document-tools.md');
await fs.mkdir(path.dirname(gptToolsDocumentPath), { recursive: true });
await fs.writeFile(gptToolsDocumentPath, toolsGptContent);

Expand Down Expand Up @@ -282,12 +281,12 @@ export class OpenaiWithAssistantApiLlmManager implements LlmManager {
}

const initiativesChunkGptTemplateContent = await fs.readFile(
path.resolve(__dirname, '../../src/gpt/templates/initiatives-chunk-document.md'),
path.resolve(__root_dirname, './src/gpt/templates/initiatives-chunk-document.md'),
'utf-8'
);
const initiativesChunkGptTemplate = handlebars.compile(initiativesChunkGptTemplateContent);
const initiativeGptTemplateContent = await fs.readFile(
path.resolve(__dirname, '../../src/gpt/templates/initiatives-chunk-document-initiative.md'),
path.resolve(__root_dirname, './src/gpt/templates/initiatives-chunk-document-initiative.md'),
'utf-8'
);
const initiativeGptTemplate = handlebars.compile(initiativeGptTemplateContent);
Expand Down Expand Up @@ -369,7 +368,7 @@ export class OpenaiWithAssistantApiLlmManager implements LlmManager {
formattedInitiativesPerChunk[currentChunk - 1].push(formattedInitiativeContent);
}

const gptInitiativeChunksDocumentsFolderPath = path.resolve(__dirname, '../../data/gpt-document-initiatives');
const gptInitiativeChunksDocumentsFolderPath = path.resolve(__root_dirname, './data/gpt-document-initiatives');

// Remove previous debug files if any
if (fsSync.existsSync(gptInitiativeChunksDocumentsFolderPath)) {
Expand Down
5 changes: 2 additions & 3 deletions src/features/repository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import fsSync from 'fs';
import fs from 'fs/promises';
import linkifyit from 'linkify-it';
import path from 'path';
import { fileURLToPath } from 'url';
import z from 'zod';

import { downloadFile } from '@etabli/src/common';
Expand All @@ -15,13 +14,13 @@ import { getListDiff } from '@etabli/src/utils/comparaison';
import { formatArrayProgress } from '@etabli/src/utils/format';
import { emptyStringtoNullPreprocessor } from '@etabli/src/utils/validation';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const __root_dirname = process.cwd();
const linkify = linkifyit();

// We did not used the CSV format even if less heavy to avoid extra parsing for numbers, null, string on multiple lines... (ref: https://code.gouv.fr/data/repositories/csv/all.csv)

export const latestRemoteJsonUrl = 'https://code.gouv.fr/data/repositories/json/all.json';
export const localJsonPath = path.resolve(__dirname, '../../data/repositories.json');
export const localJsonPath = path.resolve(__root_dirname, './data/repositories.json');

export const JsonRepositoryPlatformSchema = z.enum(['GitHub', 'GitLab']);
export type JsonRepositoryPlatformSchemaType = z.infer<typeof JsonRepositoryPlatformSchema>;
Expand Down
5 changes: 2 additions & 3 deletions src/features/tool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import { parse } from 'csv-parse';
import fsSync from 'fs';
import fs from 'fs/promises';
import path from 'path';
import { fileURLToPath } from 'url';
import z from 'zod';

import { downloadFile } from '@etabli/src/common';
Expand All @@ -14,10 +13,10 @@ import { watchGracefulExitInLoop } from '@etabli/src/server/system';
import { getListDiff } from '@etabli/src/utils/comparaison';
import { emptyStringtoNullPreprocessor } from '@etabli/src/utils/validation';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const __root_dirname = process.cwd();

export const latestRemoteCsvUrl = 'https://raw.githubusercontent.com/captn3m0/stackshare-dataset/main/tools.csv';
export const localCsvPath = path.resolve(__dirname, '../../data/tools.csv');
export const localCsvPath = path.resolve(__root_dirname, './data/tools.csv');

export const CsvToolCategorySchema = z.enum([
'languages-and-frameworks',
Expand Down
5 changes: 2 additions & 3 deletions src/prisma/seed.script.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import dotenv from 'dotenv';
import path from 'path';
import { fileURLToPath } from 'url';

import { prisma } from '@etabli/src/prisma';
import { seedDatabase } from '@etabli/src/prisma/seed';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const __root_dirname = process.cwd();

// This script always targets the local test database
dotenv.config({ path: path.resolve(__dirname, '../../.env.test') });
dotenv.config({ path: path.resolve(__root_dirname, './.env.test') });

seedDatabase(prisma)
.catch((e) => {
Expand Down
39 changes: 19 additions & 20 deletions src/semgrep/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,17 @@
* @jest-environment node
*/
import path from 'path';
import { fileURLToPath } from 'url';

import { analyzeWithSemgrep } from '@etabli/src/semgrep/index';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const __root_dirname = process.cwd();

describe('analyzeWithSemgrep()', () => {
it(
'should analyze correctly a node project',
async () => {
const codeFolder = path.resolve(__dirname, 'samples/node');
const resultsPath = path.resolve(__dirname, 'results/code-analysis-node.json');
const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/node');
const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-node.json');

const results = await analyzeWithSemgrep(codeFolder, resultsPath);

Expand All @@ -28,8 +27,8 @@ describe('analyzeWithSemgrep()', () => {
it(
'should analyze correctly a php project',
async () => {
const codeFolder = path.resolve(__dirname, 'samples/php');
const resultsPath = path.resolve(__dirname, 'results/code-analysis-php.json');
const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/php');
const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-php.json');

const results = await analyzeWithSemgrep(codeFolder, resultsPath);

Expand All @@ -44,8 +43,8 @@ describe('analyzeWithSemgrep()', () => {
it(
'should analyze correctly a ruby project',
async () => {
const codeFolder = path.resolve(__dirname, 'samples/ruby');
const resultsPath = path.resolve(__dirname, 'results/code-analysis-ruby.json');
const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/ruby');
const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-ruby.json');

const results = await analyzeWithSemgrep(codeFolder, resultsPath);

Expand All @@ -60,8 +59,8 @@ describe('analyzeWithSemgrep()', () => {
it(
'should analyze correctly a python project',
async () => {
const codeFolder = path.resolve(__dirname, 'samples/python');
const resultsPath = path.resolve(__dirname, 'results/code-analysis-python.json');
const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/python');
const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-python.json');

const results = await analyzeWithSemgrep(codeFolder, resultsPath);

Expand Down Expand Up @@ -90,8 +89,8 @@ describe('analyzeWithSemgrep()', () => {
it(
'should analyze correctly a java project',
async () => {
const codeFolder = path.resolve(__dirname, 'samples/java');
const resultsPath = path.resolve(__dirname, 'results/code-analysis-java.json');
const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/java');
const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-java.json');

const results = await analyzeWithSemgrep(codeFolder, resultsPath);

Expand All @@ -113,8 +112,8 @@ describe('analyzeWithSemgrep()', () => {
it(
'should analyze correctly a golang project',
async () => {
const codeFolder = path.resolve(__dirname, 'samples/golang');
const resultsPath = path.resolve(__dirname, 'results/code-analysis-golang.json');
const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/golang');
const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-golang.json');

const results = await analyzeWithSemgrep(codeFolder, resultsPath);

Expand All @@ -129,8 +128,8 @@ describe('analyzeWithSemgrep()', () => {
it(
'should analyze correctly a rust project',
async () => {
const codeFolder = path.resolve(__dirname, 'samples/rust');
const resultsPath = path.resolve(__dirname, 'results/code-analysis-rust.json');
const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/rust');
const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-rust.json');

const results = await analyzeWithSemgrep(codeFolder, resultsPath);

Expand All @@ -145,8 +144,8 @@ describe('analyzeWithSemgrep()', () => {
it(
'should analyze correctly a cpp project',
async () => {
const codeFolder = path.resolve(__dirname, 'samples/cpp');
const resultsPath = path.resolve(__dirname, 'results/code-analysis-cpp.json');
const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/cpp');
const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-cpp.json');

const results = await analyzeWithSemgrep(codeFolder, resultsPath);

Expand All @@ -173,8 +172,8 @@ describe('analyzeWithSemgrep()', () => {
it(
'should analyze correctly a scala project',
async () => {
const codeFolder = path.resolve(__dirname, 'samples/scala');
const resultsPath = path.resolve(__dirname, 'results/code-analysis-scala.json');
const codeFolder = path.resolve(__root_dirname, './src/semgrep/samples/scala');
const resultsPath = path.resolve(__root_dirname, './src/semgrep/results/code-analysis-scala.json');

const results = await analyzeWithSemgrep(codeFolder, resultsPath);

Expand Down
9 changes: 4 additions & 5 deletions src/semgrep/index.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
import { $ } from 'execa';
import fsSync from 'fs';
import fs from 'fs/promises';
import { fileURLToPath } from 'node:url';
import path from 'path';

import { SemgrepResultSchema } from '@etabli/src/semgrep';

const __root_dirname = process.cwd();

export interface AnalysisResult {
functions: string[];
dependencies: string[];
}

const __dirname = path.dirname(fileURLToPath(import.meta.url));

export async function analyzeWithSemgrep(folderPath: string, outputPath: string): Promise<AnalysisResult> {
const codeAnalysisRulesPath = path.resolve(__dirname, '../../semgrep-rules.yaml');
const bibliothecaryScriptPath = path.resolve(__dirname, '../../src/bibliothecary/deps-parser.rb');
const codeAnalysisRulesPath = path.resolve(__root_dirname, './semgrep-rules.yaml');
const bibliothecaryScriptPath = path.resolve(__root_dirname, './src/bibliothecary/deps-parser.rb');

if (!fsSync.existsSync(codeAnalysisRulesPath)) {
throw new Error('semgrep rules must exist');
Expand Down
5 changes: 2 additions & 3 deletions src/utils/database.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import { fileURLToPath } from 'node:url';
import path from 'path';
import { DockerComposeEnvironment, Wait } from 'testcontainers';
import type { StartedGenericContainer } from 'testcontainers/dist/src/generic-container/started-generic-container';

import { bindContainerLogs, defaultEnvironment, formatContainerNameWithSuffix } from '@etabli/src/utils/testcontainers';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const __root_dirname = process.cwd();

export interface PostgresContainer {
container: StartedGenericContainer;
Expand All @@ -22,7 +21,7 @@ export async function setupPostgres(): Promise<PostgresContainer> {
process.env.TESTCONTAINERS_RYUK_DISABLED = 'true';
}

const composeFilePath = path.resolve(__dirname, '../../');
const composeFilePath = path.resolve(__root_dirname, './');
const composeFile = 'docker-compose.yaml';
const serviceName = 'postgres';
const containerName = formatContainerNameWithSuffix('etabli_postgres_container');
Expand Down

0 comments on commit 1f8f6d3

Please sign in to comment.