Skip to content

Commit

Permalink
fix header row sorting (don't)
Browse files Browse the repository at this point in the history
  • Loading branch information
sterlingwes committed Feb 10, 2024
1 parent fc86ab6 commit d459da7
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 7 deletions.
3 changes: 1 addition & 2 deletions scripts/data/common/killed-in-gaza/data/dict_ar_ar.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@

عيد دردونه, عيددردونه
original,cleaned
آية الله,آيةالله
ابو ,ابو
Expand Down Expand Up @@ -112,6 +110,7 @@ original,cleaned
عوده الله,عودهالله
عوض الله,عوضالله
عيد الزقزوق,عيدالزقزوق
عيد دردونه, عيددردونه
غلام الدين,غلامالدين
غلام الدين,غلامالدين
فاطمة الزهراء,فاطمةالزهراء
Expand Down
23 changes: 18 additions & 5 deletions scripts/utils/sort-csv.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,27 @@
import { ArabicClass } from "arabic-utils";
import fs from "fs";

const headerRow = "original,cleaned";

const sortCsv = (repoFilePath: string) => {
const csv = fs.readFileSync(repoFilePath).toString();

const sortedRows = csv.split("\n").sort((aRaw, bRaw) => {
const a = new ArabicClass(aRaw).normalize();
const b = new ArabicClass(bRaw).normalize();
return a.localeCompare(b);
});
const sortedRows = csv
.split("\n")
.sort((aRaw, bRaw) => {
if (aRaw === headerRow) {
return -1;
}

if (bRaw === headerRow) {
return 1;
}

const a = new ArabicClass(aRaw).normalize();
const b = new ArabicClass(bRaw).normalize();
return a.localeCompare(b);
})
.filter((row) => !!row);

const uniqueArParts = new Set<string>();
const duplicates = new Set<string>();
Expand Down

0 comments on commit d459da7

Please sign in to comment.