Skip to content

Commit

Permalink
Merge pull request #563 from StephenMcConnel/BL-13305-AdditionalWork
Browse files Browse the repository at this point in the history
Use all languages for counting books, not just lang1 (BL-13305) (#563)
  • Loading branch information
andrew-polk authored Jul 26, 2024
2 parents 3d1461f + 43d4b9a commit 7741066
Show file tree
Hide file tree
Showing 7 changed files with 274 additions and 140 deletions.
4 changes: 2 additions & 2 deletions src/components/AggregateGrid/AggregateGridInterfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ export interface IMinimalBookInfo {
objectId: string;
createdAt: string;
tags: string[];
lang1Tag?: string;
show?: { pdf: { langTag: string } }; // there is more, but this is what we're using to get at l1 at the moment
//lang1Tag?: string;
uploader: IBasicUserInfo;
languages: string[]; // language tags from the langPointers object array
}
5 changes: 5 additions & 0 deletions src/components/AggregateGrid/AggregateGridPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,11 @@ export function getLangTagDataForIrregularLangCode(
case "ydd":
newCode = "yi";
break;
// obsolete code for Malay
// zsm / Malay shares the macrolanguage code ms with bjn, btj, bve, bvu, coa, dup, hji, id (!), jak, jax, and many more
case "zsm":
newCode = "ms";
break;
}
const tagData = langDataMap.get(newCode);
if (tagData) {
Expand Down
58 changes: 37 additions & 21 deletions src/components/CountryGrid/CountryGridColumns.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@ import { Filter, Sorting } from "@devexpress/dx-react-grid";
import {
filterNumberWithOperator,
filterSimpleString,
filterStringWithNegation,
} from "../AggregateGrid/AggregateGridPage";
import { IMinimalBookInfo } from "../AggregateGrid/AggregateGridInterfaces";

export interface ICountryGridRowData {
name: string; // country name
Expand Down Expand Up @@ -142,24 +140,6 @@ export function getCountryGridColumnsDefinitions(): IGridColumn[] {
return definitions;
}

export function filterBooksBeforeCreatingCountryGridRows(
book: IMinimalBookInfo,
gridFilters: Filter[]
): boolean {
if (!book.lang1Tag) return false;
const filter = gridFilters.find(
(f) => f.columnName === "blorgLanguageTags"
);
if (filter && filter.value) {
const filterValue = filter.value.trim();
if (!filterValue) return true;
if (!filterStringWithNegation(filterValue, book.lang1Tag)) {
return false;
}
}
return true;
}

export function compareCountryGridRows(
a: ICountryGridRowData,
b: ICountryGridRowData,
Expand Down Expand Up @@ -262,9 +242,45 @@ export function filterCountryGridRow(
return false;
break;
case "blorgLanguageTags":
// handled in the filterBooksBeforeCreatingCountryGridRows function
if (filter.value && filter.value.trim()) {
const filterValue = filter.value.trim();
// This allows matching partial tags, e.g. "en" matches "en" or "en-GB",
// and "e" matches "en" or "es" or "de".
// "-" will match any language with a subtag, e.g. "en-GB" or "en-US".
return (
row.blorgLanguageTags.filter((x) => {
return filterSimpleString(filterValue, x);
}).length > 0
);
}
break;
}
}
return true;
}

export function adjustListDisplaysForFiltering(
columnDefinitions: IGridColumn[],
filters: Filter[]
) {
const blorgLanguageTagsColDef = columnDefinitions.find(
(c) => c.name === "blorgLanguageTags"
);
if (blorgLanguageTagsColDef) {
const filterDef = filters.find(
(f) => f.columnName === "blorgLanguageTags"
);
if (filterDef && filterDef.value) {
const filterValue = filterDef.value.trim();
blorgLanguageTagsColDef.getCellValue = (row: ICountryGridRowData) =>
row.blorgLanguageTags
.filter((x) => {
return filterSimpleString(filterValue, x);
})
.join(", ");
} else {
blorgLanguageTagsColDef.getCellValue = (row: ICountryGridRowData) =>
row.blorgLanguageTags.join(", ");
}
}
}
39 changes: 25 additions & 14 deletions src/components/CountryGrid/CountryGridControlInternal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ import { TableCell, useTheme } from "@material-ui/core";
import {
ICountryGridRowData,
compareCountryGridRows,
filterBooksBeforeCreatingCountryGridRows,
filterCountryGridRow,
getCountryGridColumnsDefinitions,
adjustListDisplaysForFiltering,
} from "./CountryGridColumns";
import { IGridColumn } from "../Grid/GridColumns";
import { useStorageState } from "react-storage-hooks";
Expand Down Expand Up @@ -165,27 +165,27 @@ const CountryGridControlInternal: React.FunctionComponent<ICountryGridControlPro
// unknownRegions.sort()
// );
bookData.forEach((book) => {
if (book.lang1Tag) {
if (
!filterBooksBeforeCreatingCountryGridRows(
book,
gridFilters
)
) {
return;
}
let lang = fullLangDataMap.get(book.lang1Tag);
// Keep track of the regions represented by the
// languages in this book so that we can count
// the books for each region accurately even with
// multiple languages from the same region.
const bookRegions: string[] = [];

book.languages.forEach((langTag) => {
let lang = fullLangDataMap.get(langTag);
if (!lang) {
lang = getLangTagDataForIrregularLangCode(
book.lang1Tag,
langTag,
fullLangDataMap,
countryIdMap
);
}
if (lang && lang.region) {
if (!bookRegions.includes(lang.region)) {
bookRegions.push(lang.region);
}
const rowData = countryMap.get(lang.region);
if (rowData) {
++rowData.bookCount;
if (
!rowData.blorgLanguageTags.includes(
lang.tag
Expand Down Expand Up @@ -222,8 +222,15 @@ const CountryGridControlInternal: React.FunctionComponent<ICountryGridControlPro
// }
}
}
}
});
bookRegions.forEach((region) => {
const rowData = countryMap.get(region);
if (rowData) {
++rowData.bookCount;
}
});
});

const allRows: ICountryGridRowData[] = Array.from(
countryMap.values()
)
Expand Down Expand Up @@ -293,6 +300,10 @@ const CountryGridControlInternal: React.FunctionComponent<ICountryGridControlPro
// Apply filtering and sorting to the rows, then set the page of rows to display.
// Also set the total row count and the export data.
useEffect(() => {
adjustListDisplaysForFiltering(
countryGridColumnDefinitions,
gridFilters
);
const filteredRows = countryDataRows.filter((row) =>
filterCountryGridRow(row, gridFilters)
);
Expand Down
160 changes: 71 additions & 89 deletions src/components/LanguageGrid/LanguageGridControlInternal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,12 @@ const LanguageGridControlInternal: React.FunctionComponent<ILanguageGridControlP
} else {
baseValue.countryName = `[${langData.region}]`;
}
} else {
// console.log(
// `No region for ${
// lang.isoCode
// }: ${JSON.stringify(langData)}`
// );
}
// if we go back to allowing multiple regions per language, this code will be useful
// if (langData.regions) {
Expand All @@ -168,116 +174,92 @@ const LanguageGridControlInternal: React.FunctionComponent<ILanguageGridControlP
}
languageRowMap.set(lang.isoCode, baseValue);
});
// let unknownLangCount = 0;
bookData.forEach((book) => {
if (!book.lang1Tag) {
// ++unknownLangCount;
return;
}
let lang = languageRowMap.get(book.lang1Tag);
if (!lang) {
// we've tried to standardize on "th" for Thai, but there are still some "th-TH" books
if (book.lang1Tag === "th-TH") {
lang = languageRowMap.get("th");
// we've tried to standardize on "zh-CN" for Chinese, but there is at least one "cmn" book
} else if (book.lang1Tag === "cmn") {
lang = languageRowMap.get("zh-CN");
} else if (book.lang1Tag === "xkg") {
// I'm not sure what happened here, but we have several books with a tag of "xkg" that
// display the language as "kcg-x-Gworog" in the bloom library UI. I assume that's correct.
lang = languageRowMap.get("kcg-x-Gworog");
} else if (book.lang1Tag === "fuv-Arab") {
// Three books claim the Arabic script, but are obviously Latin (the default) script.
lang = languageRowMap.get("fuv");
// I have no idea what this user was thinking, but the book is obviously English.
} else if (book.lang1Tag === "en-Dupl") {
lang = languageRowMap.get("en");
} /* else if (book.lang1Tag === "kvt") {
// Two books seem to have been mislabeled as a different language in Myanmar.
lang = map1.get("aeu");
}*/
}
if (lang) {
if (
!filterBooksBeforeCreatingLanguageGridRows(
book,
gridFilters
)
) {
return;
// Use the book's languages (derived from the langPointers array) to update
// the language row data.
book.languages.forEach((langTag) => {
const lang = languageRowMap.get(langTag);
if (!lang) {
return; // shouldn't happen since book.languages should be a subset of languages
}
lang.bookCount++;
if (lang.bookCount === 1)
lang.firstSeen = book.createdAt;
else if (book.createdAt < lang.firstSeen)
lang.firstSeen = book.createdAt;
if (book.uploader?.username) {
if (lang) {
if (
!lang.uploaderEmails.includes(
book.uploader.username
!filterBooksBeforeCreatingLanguageGridRows(
book,
gridFilters
)
) {
lang.uploaderCount++;
lang.uploaderEmails.push(
book.uploader.username
);
return;
}
}
if (book.tags) {
// Level tags are the most reliable way to determine the level of a book.
// If a book has a level tag, it is the level of the book. Otherwise, we look for
// a computedLevel tag, which is calculated automatically by some algorithm.
const levelTag = book.tags.find((x) =>
x.startsWith("level:")
);
if (levelTag) {
switch (levelTag) {
case "level:1":
lang.level1Count++;
break;
case "level:2":
lang.level2Count++;
break;
case "level:3":
lang.level3Count++;
break;
case "level:4":
lang.level4Count++;
break;
lang.bookCount++;
if (lang.bookCount === 1)
lang.firstSeen = book.createdAt;
else if (
// This may not be accurate if the book is uploaded later with an
// additional language, but it's the best we can do.
book.createdAt < lang.firstSeen
) {
lang.firstSeen = book.createdAt;
}
if (book.uploader?.username) {
if (
!lang.uploaderEmails.includes(
book.uploader.username
)
) {
lang.uploaderCount++;
lang.uploaderEmails.push(
book.uploader.username
);
}
} else {
const computedTag = book.tags.find((x) =>
x.startsWith("computedLevel:")
}
if (book.tags) {
// Level tags are the most reliable way to determine the level of a book.
// If a book has a level tag, it is the level of the book. Otherwise, we look for
// a computedLevel tag, which is calculated automatically by some algorithm.
const levelTag = book.tags.find((x) =>
x.startsWith("level:")
);
if (computedTag) {
switch (computedTag) {
case "computedLevel:1":
if (levelTag) {
switch (levelTag) {
case "level:1":
lang.level1Count++;
break;
case "computedLevel:2":
case "level:2":
lang.level2Count++;
break;
case "computedLevel:3":
case "level:3":
lang.level3Count++;
break;
case "computedLevel:4":
case "level:4":
lang.level4Count++;
break;
}
} else {
const computedTag = book.tags.find((x) =>
x.startsWith("computedLevel:")
);
if (computedTag) {
switch (computedTag) {
case "computedLevel:1":
lang.level1Count++;
break;
case "computedLevel:2":
lang.level2Count++;
break;
case "computedLevel:3":
lang.level3Count++;
break;
case "computedLevel:4":
lang.level4Count++;
break;
}
}
}
}
}
} else if (book.lang1Tag) {
// console.warn(
// `LanguageGridControlInternal: Book ${book.objectId} data for unknown language ${book.lang1Tag}`
// );
} else {
// ++unknownLangCount;
}
});
});
// console.warn(
// `LanguageGridControlInternal: ${unknownLangCount} books with undetermined primary language`
// );
const allRows: ILanguageGridRowData[] = Array.from(
languageRowMap.values()
)
Expand Down
Loading

0 comments on commit 7741066

Please sign in to comment.