Skip to content

Commit ea2fa33

Browse files
Zircozclaude
andcommitted
[Confluence] Fix pagination for get_all_* methods and unify _get_paged across Cloud/Server
Fixes #1598 - Switch 10 get_all_* methods to use _get_paged for full pagination - Unify _get_paged into ConfluenceBase (remove Cloud/Server duplicates) - Handle _links.next as both string and dict formats - Fix relative pagination URLs by prepending base URL correctly - Fix Cloud api_root from wiki/api/v2 to wiki/rest/api (endpoints use v1 paths) - Recognize api.atlassian.com in Cloud detection; support explicit cloud= kwarg - Add routing tests and pagination edge-case tests for both Cloud and Server Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent efcfb44 commit ea2fa33

File tree

9 files changed

+399
-152
lines changed

9 files changed

+399
-152
lines changed

atlassian/confluence/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@ class Confluence(ConfluenceBase):
1818

1919
def __init__(self, url, *args, **kwargs):
2020
# Detect which implementation to use
21-
if ("atlassian.net" in url or "jira.com" in url) and ("/wiki" not in url):
21+
# Priority: explicit cloud= kwarg > URL-based heuristic
22+
is_cloud = kwargs.get("cloud")
23+
if is_cloud is None:
24+
is_cloud = "atlassian.net" in url or "jira.com" in url or "api.atlassian.com" in url
25+
if is_cloud:
2226
impl = ConfluenceCloud(url, *args, **kwargs)
2327
else:
2428
impl = ConfluenceServer(url, *args, **kwargs)

atlassian/confluence/base.py

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -134,27 +134,39 @@ def _get_paged(
134134

135135
yield from response.get("results", [])
136136

137-
if self.cloud:
138-
url = response.get("_links", {}).get("next", {}).get("href")
139-
if url is None:
140-
break
141-
# From now on we have absolute URLs with parameters
142-
absolute = True
143-
# Params are now provided by the url
144-
params = {}
145-
# Trailing should not be added as it is already part of the url
146-
trailing = False
137+
next_link = response.get("_links", {}).get("next")
138+
if next_link is None:
139+
break
140+
if isinstance(next_link, str):
141+
url = next_link
147142
else:
148-
if response.get("_links", {}).get("next") is None:
149-
break
150-
# For server, we need to extract the next page URL from the _links.next.href
151-
next_url = response.get("_links", {}).get("next", {}).get("href")
152-
if next_url is None:
153-
break
154-
url = next_url
155-
absolute = True
156-
params = {}
157-
trailing = False
143+
url = next_link.get("href")
144+
if url is None:
145+
break
146+
147+
if url.startswith("/"):
148+
# Prepend base URL from self.url, stripping the API root suffix to preserve path prefix
149+
# Example: self.url = "https://api.atlassian.com/ex/confluence/abc/wiki/rest/api"
150+
# api_root = "wiki/rest/api"
151+
# base = "https://api.atlassian.com/ex/confluence/abc"
152+
# relative = "/rest/api/content?cursor=1"
153+
# result = "https://api.atlassian.com/ex/confluence/abc/rest/api/content?cursor=1"
154+
api_root_suffix = f"/{self.api_root}"
155+
if self.url.endswith(api_root_suffix):
156+
base = self.url[:-len(api_root_suffix)]
157+
else:
158+
# Fallback: extract scheme+netloc if api_root suffix not found
159+
from urllib.parse import urlparse
160+
parsed = urlparse(self.url)
161+
base = f"{parsed.scheme}://{parsed.netloc}"
162+
url = base + url
163+
164+
# From now on we have absolute URLs with parameters
165+
absolute = True
166+
# Params are now provided by the url
167+
params = {}
168+
# Trailing should not be added as it is already part of the url
169+
trailing = False
158170

159171
return
160172

atlassian/confluence/cloud/__init__.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@ def __init__(self, url="https://api.atlassian.com/", *args, **kwargs):
1313
if "cloud" not in kwargs:
1414
kwargs["cloud"] = True
1515
if "api_version" not in kwargs:
16-
kwargs["api_version"] = "2"
16+
kwargs["api_version"] = "latest"
1717
if "api_root" not in kwargs:
18-
kwargs["api_root"] = "wiki/api/v2"
19-
url = url.strip("/")
18+
kwargs["api_root"] = "wiki/rest/api"
19+
url = url.strip("/") + f"/{kwargs['api_root']}"
2020
super(Cloud, self).__init__(url, *args, **kwargs)
2121

2222
# Content Management
@@ -28,6 +28,14 @@ def get_content_by_type(self, content_type, **kwargs):
2828
"""Get content by type (page, blogpost, etc.)."""
2929
return self.get("content", params={"type": content_type, **kwargs})
3030

31+
def get_all_pages_from_space(self, space_key, **kwargs):
32+
"""Get all pages from space."""
33+
return self._get_paged("content", params={"spaceKey": space_key, "type": "page", **kwargs})
34+
35+
def get_all_blog_posts_from_space(self, space_key, **kwargs):
36+
"""Get all blog posts from space."""
37+
return self._get_paged("content", params={"spaceKey": space_key, "type": "blogpost", **kwargs})
38+
3139
def create_content(self, data, **kwargs):
3240
"""Create new content."""
3341
return self.post("content", data=data, **kwargs)

atlassian/confluence/cloud/base.py

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -24,53 +24,4 @@ def __init__(self, url, *args, **kwargs):
2424
"""
2525
super(ConfluenceCloudBase, self).__init__(url, *args, **kwargs)
2626

27-
def _get_paged(
28-
self,
29-
url,
30-
params=None,
31-
data=None,
32-
flags=None,
33-
trailing=None,
34-
absolute=False,
35-
):
36-
"""
37-
Used to get the paged data for Confluence Cloud
38-
39-
:param url: string: The url to retrieve
40-
:param params: dict (default is None): The parameter's
41-
:param data: dict (default is None): The data
42-
:param flags: string[] (default is None): The flags
43-
:param trailing: bool (default is None): If True, a trailing slash is added to the url
44-
:param absolute: bool (default is False): If True, the url is used absolute and not relative to the root
45-
46-
:return: A generator object for the data elements
47-
"""
48-
if params is None:
49-
params = {}
50-
51-
while True:
52-
response = self.get(
53-
url,
54-
trailing=trailing,
55-
params=params,
56-
data=data,
57-
flags=flags,
58-
absolute=absolute,
59-
)
60-
if "results" not in response:
61-
return
62-
63-
yield from response.get("results", [])
64-
65-
# Confluence Cloud uses _links.next.href for pagination
66-
url = response.get("_links", {}).get("next", {}).get("href")
67-
if url is None:
68-
break
69-
# From now on we have absolute URLs with parameters
70-
absolute = True
71-
# Params are now provided by the url
72-
params = {}
73-
# Trailing should not be added as it is already part of the url
74-
trailing = False
7527

76-
return

atlassian/confluence/server/__init__.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,11 @@ def get_content_by_id(self, content_id, **kwargs):
6262

6363
def get_all_pages_from_space(self, space_key, **kwargs):
6464
"""Get all pages from space."""
65-
return self.get("content", params={"spaceKey": space_key, "type": "page", **kwargs})
65+
return self._get_paged("content", params={"spaceKey": space_key, "type": "page", **kwargs})
6666

6767
def get_all_blog_posts_from_space(self, space_key, **kwargs):
6868
"""Get all blog posts from space."""
69-
return self.get("content", params={"spaceKey": space_key, "type": "blogpost", **kwargs})
69+
return self._get_paged("content", params={"spaceKey": space_key, "type": "blogpost", **kwargs})
7070

7171
def get_page_by_title(self, space_key, title, **kwargs):
7272
"""Get page by title and space key."""
@@ -195,11 +195,11 @@ def remove_content_label(self, content_id, label_name, **kwargs):
195195

196196
def get_all_pages_by_label(self, label, **kwargs):
197197
"""Get all pages by label."""
198-
return self.get("content", params={"label": label, "type": "page", **kwargs})
198+
return self._get_paged("content", params={"label": label, "type": "page", **kwargs})
199199

200200
def get_all_blog_posts_by_label(self, label, **kwargs):
201201
"""Get all blog posts by label."""
202-
return self.get("content", params={"label": label, "type": "blogpost", **kwargs})
202+
return self._get_paged("content", params={"label": label, "type": "blogpost", **kwargs})
203203

204204
# Attachment Management
205205
def get_attachments(self, content_id, **kwargs):
@@ -293,24 +293,24 @@ def get_draft_content(self, content_id, **kwargs):
293293

294294
def get_all_draft_pages_from_space(self, space_key, **kwargs):
295295
"""Get all draft pages from space."""
296-
return self.get("content", params={"spaceKey": space_key, "type": "page", "status": "draft", **kwargs})
296+
return self._get_paged("content", params={"spaceKey": space_key, "type": "page", "status": "draft", **kwargs})
297297

298298
def get_all_draft_blog_posts_from_space(self, space_key, **kwargs):
299299
"""Get all draft blog posts from space."""
300-
return self.get("content", params={"spaceKey": space_key, "type": "blogpost", "status": "draft", **kwargs})
300+
return self._get_paged("content", params={"spaceKey": space_key, "type": "blogpost", "status": "draft", **kwargs})
301301

302302
# Trash Management
303303
def get_trash_content(self, space_key, **kwargs):
304304
"""Get trash content."""
305-
return self.get("content", params={"spaceKey": space_key, "status": "trashed", **kwargs})
305+
return self._get_paged("content", params={"spaceKey": space_key, "status": "trashed", **kwargs})
306306

307307
def get_all_pages_from_space_trash(self, space_key, **kwargs):
308308
"""Get all pages from space trash."""
309-
return self.get("content", params={"spaceKey": space_key, "type": "page", "status": "trashed", **kwargs})
309+
return self._get_paged("content", params={"spaceKey": space_key, "type": "page", "status": "trashed", **kwargs})
310310

311311
def get_all_blog_posts_from_space_trash(self, space_key, **kwargs):
312312
"""Get all blog posts from space trash."""
313-
return self.get("content", params={"spaceKey": space_key, "type": "blogpost", "status": "trashed", **kwargs})
313+
return self._get_paged("content", params={"spaceKey": space_key, "type": "blogpost", "status": "trashed", **kwargs})
314314

315315
# Export
316316
def export_content(self, content_id, **kwargs):

atlassian/confluence/server/base.py

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -24,54 +24,4 @@ def __init__(self, url, *args, **kwargs):
2424
"""
2525
super(ConfluenceServerBase, self).__init__(url, *args, **kwargs)
2626

27-
def _get_paged(
28-
self,
29-
url,
30-
params=None,
31-
data=None,
32-
flags=None,
33-
trailing=False,
34-
absolute=False,
35-
):
36-
"""
37-
Used to get the paged data for Confluence Server
38-
39-
:param url: string: The url to retrieve
40-
:param params: dict (default is None): The parameter's
41-
:param data: dict (default is None): The data
42-
:param flags: string[] (default is None): The flags
43-
:param trailing: bool (default is None): If True, a trailing slash is added to the url
44-
:param absolute: bool (default is False): If True, the url is used absolute and not relative to the root
45-
46-
:return: A generator object for the data elements
47-
"""
48-
if params is None:
49-
params = {}
50-
51-
while True:
52-
response = self.get(
53-
url,
54-
trailing=trailing,
55-
params=params,
56-
data=data,
57-
flags=flags,
58-
absolute=absolute,
59-
)
60-
if "results" not in response:
61-
return
62-
63-
yield from response.get("results", [])
64-
65-
# Confluence Server uses _links.next.href for pagination
66-
if response.get("_links", {}).get("next") is None:
67-
break
68-
# For server, we need to extract the next page URL from the _links.next.href
69-
next_url = response.get("_links", {}).get("next", {}).get("href")
70-
if next_url is None:
71-
break
72-
url = next_url
73-
absolute = True
74-
params = {}
75-
trailing = False
7627

77-
return

0 commit comments

Comments
 (0)