-
Notifications
You must be signed in to change notification settings - Fork 54
/
xml.ns.m
286 lines (240 loc) · 9.45 KB
/
xml.ns.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
%---------------------------------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%---------------------------------------------------------------------------%
% Copyright (C) 2000, 2001, 2004-2006, 2011 The University of Melbourne.
% Copyright (C) 2014, 2018, 2022 The Mercury team.
% This file is distributed under the terms specified in COPYING.LIB.
%---------------------------------------------------------------------------%
%
% File: xml.ns.m
% Main author: conway@cs.mu.oz.au, inch@students.cs.mu.oz.au
%
% This module provides predicates to turn an XML document into a namespace
% aware XML document. A normal XML document containing multiple elements
% and attributes may encounter problems of recognition and collision, i.e.
% same element type or attribute name may have different scope. A namespace
% aware XML document solves this problem by using URI references to identify
% elements and attributes.
%
% Reference:
% <http://www.w3.org/TR-REC-xml-names>
%
%---------------------------------------------------------------------------%
:- module xml.ns.
:- interface.
:- import_module xml.doc.
:- import_module array.
:- import_module list.
:- import_module pair.
%---------------------------------------------------------------------------%
:- type ns_document
---> ns_doc(
% Array index pointing to prestuff.
prestuff :: list(ref(ns_content)),
% Root of the document tree.
root :: ref(ns_content),
% Array index pointing to poststuff.
poststuff :: list(ref(ns_content)),
% Array storing all document content.
content :: array(ns_content)
).
:- type ns_content
---> ns_element(ns_element) % element or attribute
; pi(string, string) % processing instruction
; comment(string) % comment
; data(string). % data
:- type ns_element
---> ns_element(
% Qualified name.
elt_name :: q_name,
% List of attributes.
elt_attrs :: list(ns_attribute),
% List of index pointing to children in the document tree.
elt_content :: list(ref(content)),
% List of (Prefix - URI).
elt_namespaces :: ns_list
).
:- type ns_attribute
---> ns_attribute(
attr_name :: q_name, % qualified name
attr_value :: string % attribute value
).
:- type q_name
---> q_name(
localName :: string, % local name without prefix
ns_uri :: ns_uri % URI reference
).
:- type ns_list == list(pair(string, ns_uri)).
:- type ns_uri == string.
% ns_translate() takes in a normal XML document and returns a namespace
% aware XML document.
%
:- pred ns_translate(xml.doc.document::in, ns_document::out) is det.
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
:- implementation.
:- import_module int.
:- import_module map.
:- import_module require.
:- import_module string.
%---------------------------------------------------------------------------%
:- type namespaces == map(string, ns_uri).
ns_translate(Doc, NsDoc) :-
traverse(Doc, [], NsDocContent),
NsDoc = ns_doc(Doc ^ prestuff, Doc ^ root, Doc ^ poststuff,
array(NsDocContent)).
% traverse takes in a normal XML document and an accumulator, creates an
% empty tree, traverses and translates the document tree, and gives back a
% namespace aware document.
%
:- pred traverse(document::in, list(ns_content)::in, list(ns_content)::out)
is det.
traverse(Doc, !Acc) :-
traverse(Doc ^ content, map.init, "", Doc ^ root, !Acc).
% Carries out the actual traverse and transformation. If the content is an
% element, change it to a namespace aware element and visit its siblings,
% otherwise, convert the type.
%
:- pred traverse(array(content)::in, namespaces::in, string::in,
ref(content)::in, list(ns_content)::in, list(ns_content)::out) is det.
traverse(ContentArray, Namespaces0, Default0, ContentRef, !Acc) :-
array.lookup(ContentArray, ContentRef, Content),
(
Content = element(Elem),
% Examine the attributes to find any default namespaces.
( if default_namespace(Elem ^ elt_attrs, Default1, Attrs0) then
Default = Default1,
Attrs1 = Attrs0
else
Default = Default0,
Attrs1 = Elem ^ elt_attrs
),
% Extract any namespace declaration and insert into tree.
extract_namespace_decls(Attrs1, NSList, _Attrs2),
list.foldl(
( pred((Pref - URI)::in, NSs1::in,
NSs2::out) is det :-
map.set(Pref, URI, NSs1, NSs2)
), NSList, Namespaces0, Namespaces),
% Change element and attributes to namespace aware.
namespaceize_elt_name(Namespaces, Default, Elem ^ elt_name, Name),
list.map(
( pred(Attr0::in, Attr::out) is det :-
Attr0 = attribute(AttrName0, Value),
namespaceize_elt_name(Namespaces, Default,
AttrName0, AttrName),
Attr = ns_attribute(AttrName, Value)
), Elem ^ elt_attrs, Attrs),
% Visit its siblings.
Kids = Elem ^ elt_content,
list.reverse(Kids, Kids0),
NsElem = ns_element(
ns_element(Name, Attrs, Elem ^ elt_content, NSList)),
!:Acc = [NsElem | !.Acc],
xml.ns.foldl(traverse, ContentArray, Namespaces, Default, Kids0, !Acc)
;
Content = comment(_),
!:Acc = [convert_type(Content) | !.Acc]
;
Content = data(_),
!:Acc = [convert_type(Content) | !.Acc]
;
Content = pi(_,_),
!:Acc = [convert_type(Content) | !.Acc]
).
% Searches for any default namespaces.
%
:- pred default_namespace(list(attribute)::in, string::out,
list(attribute)::out) is semidet.
default_namespace([], _, _) :- fail.
default_namespace([Attr | Attrs], Default, NewAttrs) :-
( if
% If a default namespace is found, return the namespace
% and the list of attributes without the default namespace
is_xmlns(Attr ^ attr_name)
then
Default = Attr ^ attr_value,
NewAttrs = Attrs
else
% Otherwise keep searching
Default = Default0,
NewAttrs = NewAttrs0,
default_namespace(Attrs, Default0, NewAttrs0)
).
% Searches the list of attributes and extract any namespace declarations.
%
:- pred extract_namespace_decls(list(attribute)::in, ns_list::out,
list(attribute)::out) is det.
extract_namespace_decls([], [], []).
extract_namespace_decls([Attr | Attrs], NSList, NewAttrs) :-
split_on_colon(Attr ^ attr_name, Prefix, Suffix),
( if
% for case like < book xmlns:isbn="someURI" >
% Prefix = xmlns
% Suffix = isbn
is_xmlns(Prefix)
then
NSList = [(Suffix - Attr ^ attr_value) | NSList0],
NewAttrs = NewAttrs0
else
NSList = NSList0,
NewAttrs = [Attr | NewAttrs0]
),
extract_namespace_decls(Attrs, NSList0, NewAttrs0).
% Change Name to QName by matching Name with the Namespaces list.
% If fails, applies default namespace.
%
:- pred namespaceize_elt_name(namespaces::in, string::in, string::in,
q_name::out) is det.
namespaceize_elt_name(Namespaces, Default, Name, QName) :-
split_on_colon(Name, Prefix, Suffix),
( if
% for case when element name = prefix:suffix
map.search(Namespaces, Prefix, URI)
then
QName = q_name(Suffix, URI)
else if
% for case when attribute name = xmlns:suffix
is_xmlns(Prefix),
map.search(Namespaces, Suffix, URI)
then
QName = q_name(Suffix, URI)
else
% for case when element name has no prefix
QName = q_name(Suffix, Default)
).
% Split a name into prefix and suffix.
%
:- pred split_on_colon(string::in, string::out, string::out) is det.
split_on_colon(Name, Prefix, Suffix) :-
( if string.sub_string_search(Name, ":", Index) then
string.length(Name, Length),
string.right(Name, Length - (Index + 1), Suffix),
string.left(Name, Index, Prefix)
else
Suffix = Name,
Prefix = ""
).
% According to the namespaces specification `Namespaces in XML'
% <http://www.w3.org/TR-REC-xml-names>, a namespace is declared
% as an attribute with `xmlns' as a prefix.
%
:- pred is_xmlns(string::in) is semidet.
is_xmlns("xmlns").
:- func convert_type(content) = ns_content.
convert_type(comment(S)) = comment(S).
convert_type(data(S)) = data(S).
convert_type(pi(S,S0)) = pi(S,S0).
convert_type(element(_)) = _ :-
require.error("Convert element failed.").
% Traverse children in the document tree.
%
:- pred foldl(
pred(array(content), namespaces, string, ref(content), T, T)::
in(pred(in, in, in, in, in, out) is det),
array(content)::in, namespaces::in, string::in, list(ref(content))::in,
T::in, T::out) is det.
foldl(_Pred, _, _, _, [], !Acc).
foldl(Pred, Content, NameSpaces, Default, [Ref | Refs], !Acc) :-
Pred(Content, NameSpaces, Default, Ref, !Acc),
foldl(Pred, Content, NameSpaces, Default, Refs, !Acc).