-
Notifications
You must be signed in to change notification settings - Fork 823
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a script to update the region/geolocation files (#3701)
* Add a script to update the region/geolocation files * Update tooltip now that we only list supported geolocations * Add language sensitive sorting
- Loading branch information
Showing
123 changed files
with
247 additions
and
7,130 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
/** | ||
* This script updates the files in static/geolocations with the available locations on YouTube. | ||
* | ||
* It tries to map every active FreeTube language (static/locales/activelocales.json) | ||
* to it's equivalent on YouTube. | ||
* | ||
* It then uses those language mappings, | ||
* to scrape the location selection menu on the YouTube website, in every mapped language. | ||
* | ||
* All languages it couldn't find on YouTube, that don't have manually added mapping, | ||
* get logged to the console, as well as all unmapped YouTube languages. | ||
*/ | ||
|
||
import { mkdirSync, readFileSync, rmSync, writeFileSync } from 'fs' | ||
import { dirname } from 'path' | ||
import { fileURLToPath } from 'url' | ||
import { Innertube, Misc } from 'youtubei.js' | ||
|
||
const STATIC_DIRECTORY = `${dirname(fileURLToPath(import.meta.url))}/../static` | ||
|
||
const activeLanguagesPath = `${STATIC_DIRECTORY}/locales/activeLocales.json` | ||
/** @type {string[]} */ | ||
const activeLanguages = JSON.parse(readFileSync(activeLanguagesPath, { encoding: 'utf8' })) | ||
|
||
// en-US is en on YouTube | ||
const initialResponse = await scrapeLanguage('en') | ||
|
||
// Scrape language menu in en-US | ||
|
||
/** @type {string[]} */ | ||
const youTubeLanguages = initialResponse.data.actions[0].openPopupAction.popup.multiPageMenuRenderer.sections[1].multiPageMenuSectionRenderer.items[1].compactLinkRenderer.serviceEndpoint.signalServiceEndpoint.actions[0].getMultiPageMenuAction.menu.multiPageMenuRenderer.sections[0].multiPageMenuSectionRenderer.items | ||
.map(({ compactLinkRenderer }) => { | ||
return compactLinkRenderer.serviceEndpoint.signalServiceEndpoint.actions[0].selectLanguageCommand.hl | ||
}) | ||
|
||
// map FreeTube languages to their YouTube equivalents | ||
|
||
const foundLanguageNames = ['en-US'] | ||
const unusedYouTubeLanguageNames = [] | ||
const languagesToScrape = [] | ||
|
||
for (const language of youTubeLanguages) { | ||
if (activeLanguages.includes(language)) { | ||
foundLanguageNames.push(language) | ||
languagesToScrape.push({ | ||
youTube: language, | ||
freeTube: language | ||
}) | ||
} else if (activeLanguages.includes(language.replace('-', '_'))) { | ||
const withUnderScore = language.replace('-', '_') | ||
foundLanguageNames.push(withUnderScore) | ||
languagesToScrape.push({ | ||
youTube: language, | ||
freeTube: withUnderScore | ||
}) | ||
} | ||
// special cases | ||
else if (language === 'de') { | ||
foundLanguageNames.push('de-DE') | ||
languagesToScrape.push({ | ||
youTube: 'de', | ||
freeTube: 'de-DE' | ||
}) | ||
} else if (language === 'fr') { | ||
foundLanguageNames.push('fr-FR') | ||
languagesToScrape.push({ | ||
youTube: 'fr', | ||
freeTube: 'fr-FR' | ||
}) | ||
} else if (language === 'no') { | ||
// according to https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes | ||
// "no" is the macro language for "nb" and "nn" | ||
foundLanguageNames.push('nb_NO', 'nn') | ||
languagesToScrape.push({ | ||
youTube: 'no', | ||
freeTube: 'nb_NO' | ||
}) | ||
languagesToScrape.push({ | ||
youTube: 'no', | ||
freeTube: 'nn' | ||
}) | ||
} else if (language !== 'en') { | ||
unusedYouTubeLanguageNames.push(language) | ||
} | ||
} | ||
|
||
console.log("Active FreeTube languages that aren't available on YouTube:") | ||
console.log(activeLanguages.filter(lang => !foundLanguageNames.includes(lang)).sort()) | ||
|
||
console.log("YouTube languages that don't have an equivalent active FreeTube language:") | ||
console.log(unusedYouTubeLanguageNames.sort()) | ||
|
||
// Scrape the location menu in various languages and write files to the file system | ||
|
||
rmSync(`${STATIC_DIRECTORY}/geolocations`, { recursive: true }) | ||
mkdirSync(`${STATIC_DIRECTORY}/geolocations`) | ||
|
||
processGeolocations('en-US', 'en', initialResponse) | ||
|
||
for (const { youTube, freeTube } of languagesToScrape) { | ||
const response = await scrapeLanguage(youTube) | ||
|
||
processGeolocations(freeTube, youTube, response) | ||
} | ||
|
||
|
||
|
||
async function scrapeLanguage(youTubeLanguageCode) { | ||
const session = await Innertube.create({ | ||
retrieve_player: false, | ||
generate_session_locally: true, | ||
lang: youTubeLanguageCode | ||
}) | ||
|
||
return await session.actions.execute('/account/account_menu') | ||
} | ||
|
||
function processGeolocations(freeTubeLanguage, youTubeLanguage, response) { | ||
const geolocations = response.data.actions[0].openPopupAction.popup.multiPageMenuRenderer.sections[1].multiPageMenuSectionRenderer.items[3].compactLinkRenderer.serviceEndpoint.signalServiceEndpoint.actions[0].getMultiPageMenuAction.menu.multiPageMenuRenderer.sections[0].multiPageMenuSectionRenderer.items | ||
.map(({ compactLinkRenderer }) => { | ||
return { | ||
name: new Misc.Text(compactLinkRenderer.title).toString().trim(), | ||
code: compactLinkRenderer.serviceEndpoint.signalServiceEndpoint.actions[0].selectCountryCommand.gl | ||
} | ||
}) | ||
|
||
const normalisedFreeTubeLanguage = freeTubeLanguage.replace('_', '-') | ||
|
||
// give Intl.Collator 4 locales, in the hopes that it supports one of them | ||
// deduplicate the list so it doesn't have to do duplicate work | ||
const localeSet = new Set() | ||
localeSet.add(normalisedFreeTubeLanguage) | ||
localeSet.add(youTubeLanguage) | ||
localeSet.add(normalisedFreeTubeLanguage.split('-')[0]) | ||
localeSet.add(youTubeLanguage.split('-')[0]) | ||
|
||
const locales = Array.from(localeSet) | ||
|
||
// only sort if node supports sorting the language, otherwise hope that YouTube's sorting was correct | ||
// node 20.3.1 doesn't support sorting `eu` | ||
if (Intl.Collator.supportedLocalesOf(locales).length > 0) { | ||
const collator = new Intl.Collator(locales) | ||
|
||
geolocations.sort((a, b) => collator.compare(a.name, b.name)) | ||
} | ||
|
||
writeFileSync(`${STATIC_DIRECTORY}/geolocations/${freeTubeLanguage}.json`, JSON.stringify(geolocations)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
[{"name":"أذربيجان","code":"AZ"},{"name":"أسبانيا","code":"ES"},{"name":"أستراليا","code":"AU"},{"name":"إستونيا","code":"EE"},{"name":"إسرائيل","code":"IL"},{"name":"الأرجنتين","code":"AR"},{"name":"الأردن","code":"JO"},{"name":"الإكوادور","code":"EC"},{"name":"الإمارات العربية المتحدة","code":"AE"},{"name":"البحرين","code":"BH"},{"name":"البرازيل","code":"BR"},{"name":"البرتغال","code":"PT"},{"name":"البوسنة والهرسك","code":"BA"},{"name":"الجبل الأسود","code":"ME"},{"name":"الجزائر","code":"DZ"},{"name":"الدنمارك","code":"DK"},{"name":"السلفادور","code":"SV"},{"name":"السنغال","code":"SN"},{"name":"السويد","code":"SE"},{"name":"العراق","code":"IQ"},{"name":"الفيليبين","code":"PH"},{"name":"الكويت","code":"KW"},{"name":"ألمانيا","code":"DE"},{"name":"المجر","code":"HU"},{"name":"المغرب","code":"MA"},{"name":"المكسيك","code":"MX"},{"name":"المملكة العربية السعودية","code":"SA"},{"name":"المملكة المتحدة","code":"GB"},{"name":"النرويج","code":"NO"},{"name":"النمسا","code":"AT"},{"name":"الهند","code":"IN"},{"name":"الولايات المتحدة","code":"US"},{"name":"اليابان","code":"JP"},{"name":"اليمن","code":"YE"},{"name":"اليونان","code":"GR"},{"name":"إندونيسيا","code":"ID"},{"name":"أورغواي","code":"UY"},{"name":"أوغندا","code":"UG"},{"name":"أوكرانيا","code":"UA"},{"name":"أيرلندا","code":"IE"},{"name":"أيسلندا","code":"IS"},{"name":"إيطاليا","code":"IT"},{"name":"بابوا غينيا الجديدة","code":"PG"},{"name":"باراغواي","code":"PY"},{"name":"باكستان","code":"PK"},{"name":"بلجيكا","code":"BE"},{"name":"بلغاريا","code":"BG"},{"name":"بنغلاديش","code":"BD"},{"name":"بنما","code":"PA"},{"name":"بورتوريكو","code":"PR"},{"name":"بولندا","code":"PL"},{"name":"بوليفيا","code":"BO"},{"name":"بيرو","code":"PE"},{"name":"بيلاروسيا","code":"BY"},{"name":"تايلاند","code":"TH"},{"name":"تايوان","code":"TW"},{"name":"تركيا","code":"TR"},{"name":"تشيكيا","code":"CZ"},{"name":"تشيلي","code":"CL"},{"name":"تنزانيا","code":"TZ"},{"name":"تونس","code":"TN"},{"name":"جامايكا","code":"JM"},{"name":"جمهورية الدومينكان","code":"DO"},{"name":"جنوب إفريقيا","code":"ZA"},{"name":"جورجيا","code":"GE"},{"name":"روسيا","code":"RU"},{"name":"رومانيا","code":"RO"},{"name":"زيمبابوي","code":"ZW"},{"name":"سريلانكا","code":"LK"},{"name":"سلوفاكيا","code":"SK"},{"name":"سلوفينيا","code":"SI"},{"name":"سنغافورة","code":"SG"},{"name":"سويسرا","code":"CH"},{"name":"شمال مقدونيا","code":"MK"},{"name":"صربيا","code":"RS"},{"name":"عُمان","code":"OM"},{"name":"غانا","code":"GH"},{"name":"غواتيمالا","code":"GT"},{"name":"فرنسا","code":"FR"},{"name":"فنزويلا","code":"VE"},{"name":"فنلندا","code":"FI"},{"name":"فيتنام","code":"VN"},{"name":"قبرص","code":"CY"},{"name":"قطر","code":"QA"},{"name":"كازاخستان","code":"KZ"},{"name":"كرواتيا","code":"HR"},{"name":"كمبوديا","code":"KH"},{"name":"كندا","code":"CA"},{"name":"كوريا الجنوبية","code":"KR"},{"name":"كوستاريكا","code":"CR"},{"name":"كولومبيا","code":"CO"},{"name":"كينيا","code":"KE"},{"name":"لاتفيا","code":"LV"},{"name":"لاوس","code":"LA"},{"name":"لبنان","code":"LB"},{"name":"لوكسمبورغ","code":"LU"},{"name":"ليبيا","code":"LY"},{"name":"ليتوانيا","code":"LT"},{"name":"ليشتنشتاين","code":"LI"},{"name":"مالطة","code":"MT"},{"name":"ماليزيا","code":"MY"},{"name":"مصر","code":"EG"},{"name":"نيبال","code":"NP"},{"name":"نيجيريا","code":"NG"},{"name":"نيكاراغوا","code":"NI"},{"name":"نيوزيلندا","code":"NZ"},{"name":"هندوراس","code":"HN"},{"name":"هولندا","code":"NL"},{"name":"هونغ كونغ","code":"HK"}] |
Oops, something went wrong.