Skip to content

Commit 68f9c91

Browse files
committed
Added experimental youtube url scraping support
1 parent a795251 commit 68f9c91

File tree

6 files changed

+76
-29
lines changed

6 files changed

+76
-29
lines changed

apps/core/event/pool.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,15 @@ const relate = pool<
4343
| ((type: "artist", to: ArtistInfo, page: number) => Aggregated<ArtistInfo>)
4444
>("relate", { transform: aggregate, timeout, rate });
4545

46+
const scrape = pool<(url: string, page: number) => Aggregated<TrackInfo>>(
47+
"scrape",
48+
{ transform: aggregate, timeout },
49+
);
50+
4651
const recognize = pool<
4752
(
4853
stream: () => ReadableStream<Uint8Array>,
49-
page: number
54+
page: number,
5055
) => Aggregated<TrackInfo>
5156
>("recognize", { transform: aggregate, timeout });
5257

@@ -68,6 +73,7 @@ export {
6873
search,
6974
relate,
7075
expand,
76+
scrape,
7177
desource,
7278
recognize,
7379
transcribe,

plugins/telegram/handlers/message.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {
44
message,
55
search,
66
lookup,
7+
scrape,
78
voice,
89
fetch,
910
info,
@@ -29,6 +30,20 @@ message(function* (text) {
2930
const page = pages.get(id);
3031
if (!page) return;
3132

33+
if (text.match(/^https?:\/\//)) {
34+
const reply = this.reply.bind(this);
35+
yield* map(scrape(text, 8), function* (state) {
36+
if (state.completed && state.items.length === 1) {
37+
page.close();
38+
state.close();
39+
yield* reply(state.items);
40+
} else {
41+
yield* page.update(state);
42+
}
43+
});
44+
return;
45+
}
46+
3247
this.signal.addEventListener("abort", page.close, { once: true });
3348
yield* map(search("track", text, 8), function* (state) {
3449
yield* page.update(state);

plugins/telegram/plugin.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ export const {
2323
fetch,
2424
users,
2525
search,
26+
scrape,
2627
relate,
2728
expand,
2829
lookup,
@@ -40,7 +41,7 @@ export const {
4041
token: defaulted(string(), ""),
4142
webhook: defaulted(string(), ""),
4243
}),
43-
{}
44+
{},
4445
),
4546
},
4647
settings: {
@@ -70,11 +71,11 @@ const command = pool<(command: string, replied?: number) => void>("command");
7071
const changed = pool<(entries: number[]) => void>("changed");
7172
const callback =
7273
pool<(request: Infer<typeof query>, message: number, chat: number) => void>(
73-
"callback"
74+
"callback",
7475
);
7576

7677
update.catch((error: any) =>
77-
wrn(error.cause?.message || error.cause || error.message)
78+
wrn(error.cause?.message || error.cause || error.message),
7879
);
7980

8081
export {

plugins/youtube/index.ts

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import {
55
init,
66
lookup,
77
relate,
8+
scrape,
89
search,
910
transcribe,
1011
} from "./plugin";
@@ -15,7 +16,7 @@ import { convert } from "./types";
1516
init(function* () {
1617
const loadMessage = setTimeout(
1718
() => info("Retrieving player script..."),
18-
1000
19+
1000,
1920
);
2021

2122
const youtube = yield* async(Innertube.create());
@@ -46,7 +47,7 @@ desource(function* (track) {
4647
this.youtube.music
4748
.getInfo(id)
4849
.then((x) => x.chooseFormat({ type: "audio", quality: "best" }))
49-
.then((x) => x.decipher(this.youtube.player))
50+
.then((x) => x.decipher(this.youtube.player)),
5051
);
5152
});
5253

@@ -65,12 +66,12 @@ expand(function* (type, what, _) {
6566
: this.youtube.music
6667
.getAlbum(id)
6768
.then((x) => x.url?.match(/list=([^&]+)/)?.[1])
68-
.catch(() => undefined)
69+
.catch(() => undefined),
6970
);
7071
if (!playlist) return;
7172

7273
let result = yield* async(
73-
this.youtube.music.getPlaylist(playlist).catch(() => undefined)
74+
this.youtube.music.getPlaylist(playlist).catch(() => undefined),
7475
);
7576
while (result) {
7677
yield* convert(result.items as any, "track");
@@ -90,8 +91,8 @@ relate(function* (type, to, _) {
9091
x.sections
9192
.find((x: any) => x.header?.title?.text === "Fans might also like")
9293
?.contents.filter((x) => x.type === "MusicTwoRowItem"),
93-
() => undefined
94-
)
94+
() => undefined,
95+
),
9596
);
9697
return yield* convert(artists as any, "artist");
9798
}
@@ -101,11 +102,19 @@ relate(function* (type, to, _) {
101102
.getRelated(id)
102103
.then((x) => x.find((x) => x.type === "MusicCarouselShelf"))
103104
.then((x: any) => x?.contents)
104-
.catch(() => undefined)
105+
.catch(() => undefined),
105106
);
106107
yield* convert(items, type);
107108
});
108109

110+
scrape(function* (url) {
111+
const id = url.match(/youtu(be.com|.be)\/(watch\?v=)?([a-zA-Z0-9_-]+)/)?.[3];
112+
if (!id) return;
113+
114+
const info = yield* async(this.youtube.music.getInfo(id));
115+
yield* convert([info["basic_info"]], "track");
116+
});
117+
109118
transcribe(function* (track) {
110119
const id = yield* identify(track, "track");
111120
if (!id) return;
@@ -114,14 +123,14 @@ transcribe(function* (track) {
114123
this.youtube.music
115124
.getLyrics(id)
116125
.then((x) => x?.description.toString())
117-
.catch(() => undefined)
126+
.catch(() => undefined),
118127
);
119128
if (lyrics) yield lyrics;
120129
});
121130

122131
function* identify(
123132
data: { sources?: string[]; title?: string },
124-
type: "track" | "artist" | "album"
133+
type: "track" | "artist" | "album",
125134
) {
126135
return (
127136
data.sources?.find((x) => x.startsWith("youtube/"))?.slice(8) ||

plugins/youtube/plugin.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ export const {
88
search,
99
relate,
1010
expand,
11+
scrape,
1112
lookup,
1213
desource,
1314
transcribe,

plugins/youtube/types.ts

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
1-
import type {
2-
MusicResponsiveListItem,
3-
MusicTwoRowItem,
4-
} from "youtubei.js/dist/src/parser/nodes";
1+
import type { YTMusic } from "youtubei.js";
52

6-
function toAssets(url?: string) {
3+
function toAssets(url?: string, secondary = url) {
74
if (!url) return { arts: [], thumbnails: [] };
85
return {
96
arts: [url.replace(/w[0-9]+-h[0-9]+/, "w800-h800")],
10-
thumbnails: [url.replace(/w[0-9]+-h[0-9]+/, "w120-h120")],
7+
thumbnails: [secondary!.replace(/w[0-9]+-h[0-9]+/, "w120-h120")],
118
};
129
}
1310

@@ -35,7 +32,7 @@ function toAlbum(data: MusicResponsiveListItem) {
3532
(x: any) =>
3633
x.endpoint?.payload?.browseEndpointContextSupportedConfigs
3734
?.browseEndpointContextMusicConfig?.pageType ===
38-
"MUSIC_PAGE_TYPE_ARTIST" && x.endpoint?.payload?.browseId
35+
"MUSIC_PAGE_TYPE_ARTIST" && x.endpoint?.payload?.browseId,
3936
)
4037
.map((x: any) => ({
4138
title: x.text as string,
@@ -44,37 +41,55 @@ function toAlbum(data: MusicResponsiveListItem) {
4441
};
4542
}
4643

47-
function toTrack(data: MusicResponsiveListItem) {
48-
if (!data.artists?.length) return;
44+
function toTrack(data: MusicResponsiveListItem & TrackInfo) {
45+
if (!data.artists?.length && !data.view_count) return;
4946

5047
return {
5148
title: data.title + (data.subtitle ? ` (${data.subtitle})` : ""),
52-
duration: data.duration?.seconds || 0,
49+
duration: data.duration?.seconds || +data.duration! || 0,
5350
sources: [`youtube/${data.id}`],
5451
album: {
5552
title: data.album?.name || data.title || "Unknown",
5653
sources: data.album?.id ? [`youtube/${data.album?.id}`] : [],
5754
year: 0,
58-
...toAssets(data.thumbnail?.contents[0].url),
55+
...toAssets(
56+
data.thumbnail?.contents?.[0].url || data.thumbnail?.[0].url,
57+
data.thumbnail?.[data.thumbnail.length - 1]?.url,
58+
),
5959
},
6060
artists:
6161
data.artists?.map((x) => ({
6262
title: x.name,
6363
sources: x.channel_id ? [`youtube/${x.channel_id}`] : [],
64-
})) || [],
64+
})) ||
65+
(typeof data.author === "string"
66+
? [
67+
{
68+
title: data.author.toString(),
69+
sources: data.channel_id ? [`youtube/${data.channel_id}`] : [],
70+
},
71+
]
72+
: []),
6573
};
6674
}
6775

6876
function convert<T extends "track" | "artist" | "album">(
69-
data: MusicResponsiveListItem[] | undefined,
70-
type: T
77+
data: (MusicResponsiveListItem | TrackInfo)[] | undefined,
78+
type: T,
7179
) {
7280
const map = { track: toTrack, artist: toArtist, album: toAlbum }[type];
7381
const truthy = <T>(x: T): x is NonNullable<T> => !!x;
7482
const convert = map as (
75-
x: Parameters<typeof map>[0]
83+
x: Parameters<typeof map>[0],
7684
) => ReturnType<typeof map>;
77-
return data?.map((x) => convert(x)).filter(truthy) || [];
85+
return data?.map((x: any) => convert(x)).filter(truthy) || [];
7886
}
7987

88+
type TrackInfo = YTMusic.TrackInfo["basic_info"];
89+
type MusicResponsiveListItem = YTMusic.Album["contents"][number];
90+
type MusicTwoRowItem = Extract<
91+
YTMusic.Album["sections"][number]["contents"][number],
92+
{ id: string | undefined }
93+
>;
94+
8095
export { convert };

0 commit comments

Comments
 (0)