Skip to content

Commit 8690bdf

Browse files
committed
extend ipfs content path to support multiple input formats
1 parent f085602 commit 8690bdf

File tree

1 file changed

+131
-77
lines changed

1 file changed

+131
-77
lines changed

graph/src/ipfs/content_path.rs

Lines changed: 131 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use anyhow::anyhow;
22
use cid::Cid;
3+
use url::Url;
34

45
use crate::ipfs::IpfsError;
56
use crate::ipfs::IpfsResult;
@@ -13,39 +14,79 @@ pub struct ContentPath {
1314

1415
impl ContentPath {
1516
/// Creates a new [ContentPath] from the specified input.
17+
///
18+
/// Supports the following formats:
19+
/// - <CID>[/<path>]
20+
/// - /ipfs/<CID>[/<path>]
21+
/// - ipfs://<CID>[/<path>]
22+
/// - http[s]://.../ipfs/<CID>[/<path>]
23+
/// - http[s]://.../api/v0/cat?arg=<CID>[/<path>]
1624
pub fn new(input: impl AsRef<str>) -> IpfsResult<Self> {
17-
let input = input.as_ref();
25+
let input = input.as_ref().trim();
1826

1927
if input.is_empty() {
2028
return Err(IpfsError::InvalidContentPath {
21-
input: "".to_owned(),
22-
source: anyhow!("path is empty"),
29+
input: "".to_string(),
30+
source: anyhow!("content path is empty"),
2331
});
2432
}
2533

26-
let (cid, path) = input
27-
.strip_prefix("/ipfs/")
28-
.unwrap_or(input)
29-
.split_once('/')
30-
.unwrap_or((input, ""));
34+
if input.starts_with("http://") || input.starts_with("https://") {
35+
return Self::parse_from_url(input);
36+
}
37+
38+
Self::parse_from_cid_and_path(input)
39+
}
40+
41+
fn parse_from_url(input: &str) -> IpfsResult<Self> {
42+
let url = Url::parse(input).map_err(|_err| IpfsError::InvalidContentPath {
43+
input: input.to_string(),
44+
source: anyhow!("input is not a valid URL"),
45+
})?;
46+
47+
if let Some((_, x)) = url.query_pairs().find(|(key, _)| key == "arg") {
48+
return Self::parse_from_cid_and_path(&x);
49+
}
50+
51+
if let Some((_, x)) = url.path().split_once("/ipfs/") {
52+
return Self::parse_from_cid_and_path(x);
53+
}
54+
55+
Self::parse_from_cid_and_path(url.path())
56+
}
57+
58+
fn parse_from_cid_and_path(mut input: &str) -> IpfsResult<Self> {
59+
input = input.trim_matches('/');
60+
61+
for prefix in ["ipfs/", "ipfs://"] {
62+
if let Some(input_without_prefix) = input.strip_prefix(prefix) {
63+
input = input_without_prefix
64+
}
65+
}
66+
67+
let (cid, path) = input.split_once('/').unwrap_or((input, ""));
3168

3269
let cid = cid
3370
.parse::<Cid>()
3471
.map_err(|err| IpfsError::InvalidContentPath {
35-
input: input.to_owned(),
72+
input: input.to_string(),
3673
source: anyhow::Error::from(err).context("invalid CID"),
3774
})?;
3875

3976
if path.contains('?') {
4077
return Err(IpfsError::InvalidContentPath {
41-
input: input.to_owned(),
78+
input: input.to_string(),
4279
source: anyhow!("query parameters not allowed"),
4380
});
4481
}
4582

4683
Ok(Self {
4784
cid,
48-
path: (!path.is_empty()).then_some(path.to_owned()),
85+
path: if path.is_empty() {
86+
None
87+
} else {
88+
Some(path.to_string())
89+
},
4990
})
5091
}
5192

@@ -97,13 +138,20 @@ mod tests {
97138
const CID_V0: &str = "QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn";
98139
const CID_V1: &str = "bafybeiczsscdsbs7ffqz55asqdf3smv6klcw3gofszvwlyarci47bgf354";
99140

141+
fn make_path(cid: &str, path: Option<&str>) -> ContentPath {
142+
ContentPath {
143+
cid: cid.parse().unwrap(),
144+
path: path.map(ToOwned::to_owned),
145+
}
146+
}
147+
100148
#[test]
101149
fn fails_on_empty_input() {
102150
let err = ContentPath::new("").unwrap_err();
103151

104152
assert_eq!(
105153
err.to_string(),
106-
"'' is not a valid IPFS content path: path is empty",
154+
"'' is not a valid IPFS content path: content path is empty",
107155
);
108156
}
109157

@@ -119,75 +167,37 @@ mod tests {
119167
#[test]
120168
fn accepts_a_valid_cid_v0() {
121169
let path = ContentPath::new(CID_V0).unwrap();
122-
123-
assert_eq!(
124-
path,
125-
ContentPath {
126-
cid: CID_V0.parse().unwrap(),
127-
path: None,
128-
}
129-
);
170+
assert_eq!(path, make_path(CID_V0, None));
130171
}
131172

132173
#[test]
133174
fn accepts_a_valid_cid_v1() {
134175
let path = ContentPath::new(CID_V1).unwrap();
135-
136-
assert_eq!(
137-
path,
138-
ContentPath {
139-
cid: CID_V1.parse().unwrap(),
140-
path: None,
141-
}
142-
);
176+
assert_eq!(path, make_path(CID_V1, None));
143177
}
144178

145179
#[test]
146-
fn fails_on_a_leading_slash_followed_by_a_valid_cid() {
147-
let err = ContentPath::new(format!("/{CID_V0}")).unwrap_err();
180+
fn accepts_and_removes_leading_slashes() {
181+
let path = ContentPath::new(format!("/{CID_V0}")).unwrap();
182+
assert_eq!(path, make_path(CID_V0, None));
148183

149-
assert!(err.to_string().starts_with(&format!(
150-
"'/{CID_V0}' is not a valid IPFS content path: invalid CID: "
151-
)));
184+
let path = ContentPath::new(format!("///////{CID_V0}")).unwrap();
185+
assert_eq!(path, make_path(CID_V0, None));
152186
}
153187

154188
#[test]
155-
fn ignores_the_first_slash_after_the_cid() {
189+
fn accepts_and_removes_trailing_slashes() {
156190
let path = ContentPath::new(format!("{CID_V0}/")).unwrap();
191+
assert_eq!(path, make_path(CID_V0, None));
157192

158-
assert_eq!(
159-
path,
160-
ContentPath {
161-
cid: CID_V0.parse().unwrap(),
162-
path: None,
163-
}
164-
);
193+
let path = ContentPath::new(format!("{CID_V0}///////")).unwrap();
194+
assert_eq!(path, make_path(CID_V0, None));
165195
}
166196

167197
#[test]
168198
fn accepts_a_path_after_the_cid() {
169199
let path = ContentPath::new(format!("{CID_V0}/readme.md")).unwrap();
170-
171-
assert_eq!(
172-
path,
173-
ContentPath {
174-
cid: CID_V0.parse().unwrap(),
175-
path: Some("readme.md".to_owned()),
176-
}
177-
);
178-
}
179-
180-
#[test]
181-
fn accepts_multiple_consecutive_slashes_after_the_cid() {
182-
let path = ContentPath::new(format!("{CID_V0}//")).unwrap();
183-
184-
assert_eq!(
185-
path,
186-
ContentPath {
187-
cid: CID_V0.parse().unwrap(),
188-
path: Some("/".to_owned()),
189-
}
190-
);
200+
assert_eq!(path, make_path(CID_V0, Some("readme.md")));
191201
}
192202

193203
#[test]
@@ -214,23 +224,67 @@ mod tests {
214224
#[test]
215225
fn accepts_and_removes_the_ipfs_prefix() {
216226
let path = ContentPath::new(format!("/ipfs/{CID_V0}")).unwrap();
217-
218-
assert_eq!(
219-
path,
220-
ContentPath {
221-
cid: CID_V0.parse().unwrap(),
222-
path: None,
223-
}
224-
);
227+
assert_eq!(path, make_path(CID_V0, None));
225228

226229
let path = ContentPath::new(format!("/ipfs/{CID_V0}/readme.md")).unwrap();
230+
assert_eq!(path, make_path(CID_V0, Some("readme.md")));
231+
}
227232

228-
assert_eq!(
229-
path,
230-
ContentPath {
231-
cid: CID_V0.parse().unwrap(),
232-
path: Some("readme.md".to_owned()),
233-
}
234-
);
233+
#[test]
234+
fn accepts_and_removes_the_ipfs_schema() {
235+
let path = ContentPath::new(format!("ipfs://{CID_V0}")).unwrap();
236+
assert_eq!(path, make_path(CID_V0, None));
237+
238+
let path = ContentPath::new(format!("ipfs://{CID_V0}/readme.md")).unwrap();
239+
assert_eq!(path, make_path(CID_V0, Some("readme.md")));
240+
}
241+
242+
#[test]
243+
fn accepts_and_parses_ipfs_rpc_urls() {
244+
let path = ContentPath::new(format!("http://ipfs.com/api/v0/cat?arg={CID_V0}")).unwrap();
245+
assert_eq!(path, make_path(CID_V0, None));
246+
247+
let path =
248+
ContentPath::new(format!("http://ipfs.com/api/v0/cat?arg={CID_V0}/readme.md")).unwrap();
249+
assert_eq!(path, make_path(CID_V0, Some("readme.md")));
250+
251+
let path = ContentPath::new(format!("https://ipfs.com/api/v0/cat?arg={CID_V0}")).unwrap();
252+
assert_eq!(path, make_path(CID_V0, None));
253+
254+
let path = ContentPath::new(format!(
255+
"https://ipfs.com/api/v0/cat?arg={CID_V0}/readme.md"
256+
))
257+
.unwrap();
258+
assert_eq!(path, make_path(CID_V0, Some("readme.md")));
259+
}
260+
261+
#[test]
262+
fn accepts_and_parses_ipfs_gateway_urls() {
263+
let path = ContentPath::new(format!("http://ipfs.com/ipfs/{CID_V0}")).unwrap();
264+
assert_eq!(path, make_path(CID_V0, None));
265+
266+
let path = ContentPath::new(format!("http://ipfs.com/ipfs/{CID_V0}/readme.md")).unwrap();
267+
assert_eq!(path, make_path(CID_V0, Some("readme.md")));
268+
269+
let path = ContentPath::new(format!("https://ipfs.com/ipfs/{CID_V0}")).unwrap();
270+
assert_eq!(path, make_path(CID_V0, None));
271+
272+
let path = ContentPath::new(format!("https://ipfs.com/ipfs/{CID_V0}/readme.md")).unwrap();
273+
assert_eq!(path, make_path(CID_V0, Some("readme.md")));
274+
}
275+
276+
#[test]
277+
fn accepts_and_parses_paths_from_urls() {
278+
let path = ContentPath::new(format!("http://ipfs.com/{CID_V0}")).unwrap();
279+
assert_eq!(path, make_path(CID_V0, None));
280+
281+
let path = ContentPath::new(format!("http://ipfs.com/{CID_V0}/readme.md")).unwrap();
282+
assert_eq!(path, make_path(CID_V0, Some("readme.md")));
283+
284+
let path = ContentPath::new(format!("https://ipfs.com/{CID_V0}")).unwrap();
285+
assert_eq!(path, make_path(CID_V0, None));
286+
287+
let path = ContentPath::new(format!("https://ipfs.com/{CID_V0}/readme.md")).unwrap();
288+
assert_eq!(path, make_path(CID_V0, Some("readme.md")));
235289
}
236290
}

0 commit comments

Comments
 (0)