diff --git a/src/main/java/life/qbic/linksmith/model/WebLink.java b/src/main/java/life/qbic/linksmith/model/WebLink.java index bc8fa80..7671a10 100644 --- a/src/main/java/life/qbic/linksmith/model/WebLink.java +++ b/src/main/java/life/qbic/linksmith/model/WebLink.java @@ -7,30 +7,64 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.function.Predicate; import java.util.stream.Collectors; +import java.util.stream.Stream; import life.qbic.linksmith.core.RfcLinkParameter; /** - * A Java record representing a web link object following the - * RFC 8288 model specification. + * A semantic view of a single Web Linking relation as modeled by the HTTP {@code Link} header field + * in RFC 8288. + *

+ * Scope and intent
+ * This record represents one link consisting of: + *

+ * The record does not attempt to fully enforce all RFC constraints at construction time. + * It is designed as a semantic accessor layer over raw parameters produced by parsers and/or + * validators. Consumers can use this type to conveniently access commonly used RFC parameters such + * as {@code rel}, {@code anchor}, {@code type}, {@code hreflang}, {@code media}, {@code title}, + * and {@code title*}. + *

+ * Parameter model
+ * In RFC 8288, links can carry parameters, serialized as {@code link-param} entries after the + * target URI. The ABNF for a parameter in HTTP serialization is defined as: + *

{@code
+ * link-param = token BWS [ "=" BWS ( token / quoted-string ) ]
+ * }
+ * This record stores parameters as {@link WebLinkParameter} name/value pairs. Higher-level + * components (e.g. validators) may interpret parameter semantics (cardinality, value formats, + * profile rules such as FAIR Signposting) and emit issues; this record focuses on accessors. + *

+ * Known vs extension attributes
+ * RFC 8288 defines a set of well-known parameters (e.g. {@code rel}, {@code anchor}, {@code type}). + * Parameters not listed in {@link RfcLinkParameter} are treated as extension attributes and + * can be accessed via {@link #extensionAttributes()} and {@link #extensionAttribute(String)}. + *

+ * Multiplicity
+ * Some parameters may occur multiple times (e.g. {@code hreflang}), while others have stricter + * rules in the RFC (e.g. {@code rel} is specified as not appearing more than once in a given + * link-value). This model exposes values as found in {@link #params()} and provides deterministic + * accessors (e.g. {@link #type()} returns the first occurrence). + * + * @param target the target of the link (the URI inside {@code <...>} in the HTTP serialization) + * @param params the list of link parameters / target attributes associated with the link */ public record WebLink(URI target, List params) { /** - * Creates an RFC 8288 compliant web - * link object. + * Creates a new {@link WebLink} instance. *

- * Following RFC8288, the ABNF for a link parameter is: - *

- * {@code link-param = token BWS [ "=" BWS ( token / quoted-string ) ]} - *

- * The parameter key must not be withoutValue, so during construction the {@code params} keys are - * checked for an withoutValue key. The values can be withoutValue though. + * This factory method exists to provide a stable, explicit construction API and to enforce basic + * null checks. The semantic correctness of {@code params} (e.g. allowed characters, parameter + * cardinality, value constraints) is typically handled by validators. * - * @param reference a {@link URI} pointing to the actual resource - * @param params a {@link Map} of parameters as keys and a list of their values - * @return the new Weblink - * @throws NullPointerException if any method argument is {@code null} + * @param reference a {@link URI} pointing to the link target + * @param params the raw link parameters associated with the target + * @return a new {@link WebLink} + * @throws NullPointerException if {@code reference} or {@code params} is {@code null} */ public static WebLink create(URI reference, List params) throws NullPointerException { @@ -40,107 +74,276 @@ public static WebLink create(URI reference, List params) } /** - * Web link constructor that can be used if a web link has no parameters. - *

+ * Convenience factory to create a {@link WebLink} without any parameters. * - * @param reference a {@link URI} pointing to the actual resource - * @return the new Weblink - * @throws NullPointerException if any method argument is {@code null} + * @param reference a {@link URI} pointing to the link target + * @return a new {@link WebLink} without parameters + * @throws NullPointerException if {@code reference} is {@code null} */ public static WebLink create(URI reference) throws NullPointerException { return create(reference, List.of()); } - + /** + * Returns the {@code anchor} parameter value of this link, if present. + *

+ * The {@code anchor} parameter expresses the link context (origin) explicitly as defined in + * RFC 8288 ("Link Context"). If multiple {@code anchor} parameters are present, this method + * returns the first one encountered in {@link #params()}. + * + * @return the first {@code anchor} parameter value, or {@link Optional#empty()} if absent + */ public Optional anchor() { - return Optional.empty(); + return findFirstWithFilter(params, WebLink::isAnchorParameter) + .map(WebLinkParameter::value); } + /** + * Returns all {@code hreflang} parameter values of this link. + *

+ * The {@code hreflang} target attribute indicates the language of the target resource as defined + * in RFC 8288 ("The hreflang Target Attribute"). The attribute may occur multiple times; this + * method returns values in encounter order. + * + * @return all {@code hreflang} values, or an empty list if none are present + */ public List hreflang() { - return List.of(); + return params.stream() + .filter(WebLink::isHreflangParameter) + .map(WebLinkParameter::value) + .toList(); } + /** + * Returns the {@code media} parameter value of this link, if present. + *

+ * The {@code media} target attribute describes the intended media/device of the target resource + * as defined in RFC 8288 ("The media Target Attribute"). If multiple {@code media} parameters are + * present, this method returns the first one encountered. + * + * @return the first {@code media} parameter value, or {@link Optional#empty()} if absent + */ public Optional media() { - return Optional.empty(); + return findFirstWithFilter(params, WebLink::isMediaParameter) + .map(WebLinkParameter::value); } /** - * Returns all "rel" parameter values of the link. - *

- * RFC 8288 section 3.3 states, that the relation parameter MUST NOT appear more than once in a - * given link-value, but one "rel" parameter value can contain multiple relation-types when - * separated by one or more space characters (SP = ASCII 0x20): + * Returns all relation types conveyed by the {@code rel} parameter(s). *

- * {@code relation-type *( 1*SP relation-type ) }. - *

- * The method returns space-separated values as individual values of the "rel" parameter. + * In RFC 8288, the relation type of a link is conveyed via the {@code rel} parameter. + * The value of {@code rel} is a whitespace-separated list of relation types: + *

{@code
+   * relation-type *( 1*SP relation-type )
+   * }
+ * This method: + * * - * @return a list of relation parameter values + * @return a list of relation types derived from {@code rel} values, or an empty list if absent */ public List rel() { - return this.params.stream() - .filter(param -> param.name().equals("rel")) + return findAllWithFilter(params, WebLink::isRelParameter) .map(WebLinkParameter::value) - .map(value -> value.split("\\s+")) + .map(WebLink::splitByWhitespace) .flatMap(Arrays::stream) .toList(); } /** - * Returns all "rev" parameter values of the link. - *

- * RFC 8288 section 3.3 does not specify the multiplicity of occurrence. But given the close - * relation to the "rel" parameter and its definition in the same section, web link will treat the - * "rev" parameter equally. + * Returns all reverse relation types conveyed by the {@code rev} parameter(s). *

- * As with the "rel" parameter, multiple regular relation types are allowed when they are - * separated by one or more space characters (SP = ASCII 0x20): - *

- * {@code relation-type *( 1*SP relation-type ) }. - *

- * The method returns space-separated values as individual values of the "rel" parameter. + * The {@code rev} parameter is defined in RFC 8288 in relation to {@code rel} and conveys reverse + * relation types. This method mirrors the {@link #rel()} behavior: + * it splits each {@code rev} parameter value by one or more whitespace characters and returns the + * flattened result. * - * @return a list of relation parameter values + * @return a list of reverse relation types derived from {@code rev} values, or an empty list if absent */ public List rev() { return this.params.stream() .filter(param -> param.name().equals("rev")) .map(WebLinkParameter::value) - .map(value -> value.split("\\s+")) + .map(WebLink::splitByWhitespace) .flatMap(Arrays::stream) .toList(); } - + /** + * Returns the {@code title} parameter value of this link, if present. + *

+ * The {@code title} target attribute provides a human-readable label for the link target as + * defined in RFC 8288 ("The title Target Attribute"). If multiple {@code title} parameters are + * present, this method returns the first one encountered. + * + * @return the first {@code title} value, or {@link Optional#empty()} if absent + */ public Optional title() { - return Optional.empty(); + return findFirstWithFilter(params, WebLink::isTitleParameter) + .map(WebLinkParameter::value); } - public Optional titleMultiple() { - return Optional.empty(); + /** + * Returns the {@code title*} parameter value of this link, if present. + *

+ * The {@code title*} target attribute is the extended form of {@code title} and allows character + * set and language encoding as referenced by RFC 8288 (via RFC 5987). + * If multiple {@code title*} parameters are present, this method returns the first one encountered. + *

+ * Note: this method returns the raw serialized value as found in {@link #params()} without + * decoding. + * + * @return the first {@code title*} value, or {@link Optional#empty()} if absent + */ + public Optional titleEncodings() { + return findFirstWithFilter(params, WebLink::isTitleEncodingsParameter) + .map(WebLinkParameter::value); } /** - * Returns the MIME type of the current link, if one is available. + * Returns the {@code type} parameter value of this link, if present. + *

+ * The {@code type} target attribute indicates the media type (MIME type) of the link target as + * defined in RFC 8288 ("The type Target Attribute"). If multiple {@code type} parameters are + * present, this method returns the first one encountered. * - * @return the MIME type of the link, or empty if none was provided + * @return the first {@code type} value, or {@link Optional#empty()} if absent */ public Optional type() { return this.params.stream() - .filter(WebLink::hasTypeParameter) + .filter(WebLink::isTypeParameter) .findFirst() .map(WebLinkParameter::value); } /** - * Checks, if a web link parameter is with name {@code type}. + * Determines whether a parameter represents {@code anchor}. * - * @param param the web link parameter to validate - * @return true, if the parameter name is {@code type}, else returns false + * @param param the parameter to test + * @return {@code true} if {@code param.name()} equals {@code "anchor"} */ - private static boolean hasTypeParameter(WebLinkParameter param) { + private static boolean isAnchorParameter(WebLinkParameter param) { + return param.name().equals("anchor"); + } + + /** + * Determines whether a parameter represents {@code hreflang}. + * + * @param param the parameter to test + * @return {@code true} if {@code param.name()} equals {@code "hreflang"} + */ + private static boolean isHreflangParameter(WebLinkParameter param) { + return param.name().equals("hreflang"); + } + + /** + * Determines whether a parameter represents {@code media}. + * + * @param param the parameter to test + * @return {@code true} if {@code param.name()} equals {@code "media"} + */ + private static boolean isMediaParameter(WebLinkParameter param) { + return param.name().equals("media"); + } + + /** + * Determines whether a parameter represents {@code rel}. + * + * @param param the parameter to test + * @return {@code true} if {@code param.name()} equals {@code "rel"} + */ + private static boolean isRelParameter(WebLinkParameter param) { + return param.name().equals("rel"); + } + + /** + * Determines whether a parameter represents {@code title}. + * + * @param param the parameter to test + * @return {@code true} if {@code param.name()} equals {@code "title"} + */ + private static boolean isTitleParameter(WebLinkParameter param) { + return param.name().equals("title"); + } + + /** + * Determines whether a parameter represents {@code title*}. + * + * @param param the parameter to test + * @return {@code true} if {@code param.name()} equals {@code "title*"} + */ + private static boolean isTitleEncodingsParameter(WebLinkParameter param) { + return param.name().equals("title*"); + } + + /** + * Determines whether a parameter represents {@code type}. + * + * @param param the parameter to test + * @return {@code true} if {@code param.name()} equals {@code "type"} + */ + private static boolean isTypeParameter(WebLinkParameter param) { return param.name().equals("type"); } + /** + * Splits a serialized parameter value into parts using one or more whitespace characters. + *

+ * This helper is used for parameters whose syntax is defined as a whitespace-separated list + * (notably {@code rel} and {@code rev}). Leading and trailing whitespace is trimmed prior to + * splitting. + * + * @param value the serialized value to split + * @return an array of parts (never {@code null}) + */ + private static String[] splitByWhitespace(String value) { + return value.trim().split("\\s+"); + } + + /** + * Finds the first parameter in the given list that matches the provided predicate. + * + * @param params the parameter list to search + * @param filter predicate selecting the desired parameter(s) + * @return the first matching parameter, or {@link Optional#empty()} if none match + */ + private static Optional findFirstWithFilter( + List params, + Predicate filter) { + return params.stream() + .filter(filter) + .findFirst(); + } + + /** + * Returns a stream over all parameters in the given list that match the provided predicate. + * + * @param params the parameter list to search + * @param filter predicate selecting the desired parameter(s) + * @return a stream of all matching parameters (possibly empty) + */ + private static Stream findAllWithFilter( + List params, + Predicate filter + ) { + return params.stream() + .filter(filter); + } + + /** + * Returns a map of all extension attributes (parameters not defined by RFC 8288) grouped by + * parameter name. + *

+ * The set of RFC-defined parameter names is derived from {@link RfcLinkParameter}. All parameters + * whose {@link WebLinkParameter#name()} is not in that set are considered extension attributes. + *

+ * The returned map groups values by name and preserves the encounter order of values within each + * list. + * + * @return a map of extension attribute names to lists of their values (possibly empty) + */ public Map> extensionAttributes() { Set rfcParameterNames = Arrays.stream(RfcLinkParameter.values()) .map(RfcLinkParameter::rfcValue) @@ -151,6 +354,15 @@ public Map> extensionAttributes() { Collectors.mapping(WebLinkParameter::value, Collectors.toList()))); } + /** + * Returns all values for a specific extension attribute name. + *

+ * This is a convenience method on top of {@link #extensionAttributes()} and returns an empty list + * if the attribute is not present. + * + * @param name the extension attribute name + * @return a list of values associated with {@code name}, or an empty list if absent + */ public List extensionAttribute(String name) { return extensionAttributes().getOrDefault(name, List.of()); } diff --git a/src/test/groovy/life/qbic/linksmith/model/WebLinkSpec.groovy b/src/test/groovy/life/qbic/linksmith/model/WebLinkSpec.groovy index acfb321..8dcb6b3 100644 --- a/src/test/groovy/life/qbic/linksmith/model/WebLinkSpec.groovy +++ b/src/test/groovy/life/qbic/linksmith/model/WebLinkSpec.groovy @@ -1,6 +1,8 @@ package life.qbic.linksmith.model + import spock.lang.Specification +import spock.lang.Unroll class WebLinkSpec extends Specification { @@ -17,12 +19,247 @@ class WebLinkSpec extends Specification { } + // -------------------------------------------------------------------------- + // rel(): view behavior + // - extracts ALL rel parameter occurrences (no RFC multiplicity enforcement here) + // - splits by whitespace (\\s+) as documented in the model + // -------------------------------------------------------------------------- + + def "rel: returns empty list when no rel parameter is present"() { + given: + def link = weblink(uri("https://example.org/res"), List.of(parameter("type", "application/json"))) + + expect: + link.rel() == [] + } + + def "rel: splits a single rel value into multiple relation-types by whitespace"() { + given: + def link = weblink(uri("https://example.org/res"), List.of( + parameter("rel", "self describedby item") + )) + + expect: + link.rel() == ["self", "describedby", "item"] + } + + def "rel: flattens multiple rel parameters (view does not ignore later occurrences)"() { + given: + def link = weblink(uri("https://example.org/res"), List.of( + parameter("rel", "self"), + parameter("rel", "describedby item") + )) + + expect: + link.rel() == ["self", "describedby", "item"] + } + + @Unroll + def "rel: treats any whitespace as separator because implementation uses \\\\s+ (#value)"() { + given: + def link = weblink(uri("https://example.org/res"), List.of(parameter("rel", value))) + + expect: + link.rel() == expected + + where: + value || expected + "self item" || ["self", "item"] + "self\titem" || ["self", "item"] + "self\nitem" || ["self", "item"] + " self item " || ["self", "item"] + } + + // -------------------------------------------------------------------------- + // rev(): view behavior (same splitting strategy as rel) + // -------------------------------------------------------------------------- + + def "rev: returns empty list when no rev parameter is present"() { + given: + def link = weblink(uri("https://example.org/res"), List.of(parameter("rel", "self"))) + + expect: + link.rev() == [] + } + + def "rev: splits a single rev value by whitespace"() { + given: + def link = weblink(uri("https://example.org/res"), List.of(parameter("rev", "a b c"))) + + expect: + link.rev() == ["a", "b", "c"] + } + + def "rev: flattens multiple rev parameters"() { + given: + def link = weblink(uri("https://example.org/res"), List.of( + parameter("rev", "a"), + parameter("rev", "b c") + )) + + expect: + link.rev() == ["a", "b", "c"] + } + + // -------------------------------------------------------------------------- + // type(): view behavior + // - returns first matching 'type' value if present + // - does not validate MIME format here + // -------------------------------------------------------------------------- + + def "type: returns empty when type parameter is absent"() { + given: + def link = weblink(uri("https://example.org/res"), List.of(parameter("rel", "self"))) + + expect: + link.type().isEmpty() + } + + def "type: returns the first type parameter value if present"() { + given: + def link = weblink(uri("https://example.org/res"), List.of( + parameter("type", "application/json"), + parameter("type", "text/html") + )) + + expect: + link.type().get() == "application/json" + } + + def "type: does not validate the media type format (view semantics)"() { + given: + def link = weblink(uri("https://example.org/res"), List.of(parameter("type", "not a mime"))) + + expect: + link.type().get() == "not a mime" + } + + // -------------------------------------------------------------------------- + // extensionAttributes(): view behavior + // - groups parameters that are not in the RFC parameter enum + // - preserves multiplicity and order of values per key (collector keeps encounter order) + // -------------------------------------------------------------------------- + + def "extensionAttributes: returns empty map if only known RFC parameters are present"() { + given: + def link = weblink(uri("https://example.org/target"), List.of( + parameter("rel", "item"), + parameter("type", "application/json"), + parameter("title", "t"), + parameter("anchor", "https://example.org/context") + )) + + expect: + link.extensionAttributes().isEmpty() + } + + def "extensionAttributes: groups unknown parameters by name and retains all values"() { + given: + def link = weblink(uri("https://example.org/target"), List.of( + parameter("profile", "https://example.org/p1"), + parameter("profile", "https://example.org/p2"), + parameter("x-flag", "a"), + parameter("x-flag", "b"), + parameter("rel", "item") + )) + + when: + def ext = link.extensionAttributes() + + then: + ext["profile"] == ["https://example.org/p1", "https://example.org/p2"] + ext["x-flag"] == ["a", "b"] + + and: + link.extensionAttribute("profile") == ["https://example.org/p1", "https://example.org/p2"] + link.extensionAttribute("does-not-exist") == [] + } + + def "extensionAttributes: treats names case-sensitively (no normalization in the view)"() { + given: + def link = weblink(uri("https://example.org/target"), List.of( + parameter("Profile", "X"), + parameter("profile", "Y") + )) + + expect: + link.extensionAttributes().keySet() == ["Profile", "profile"] as Set + } + + // -------------------------------------------------------------------------- + // Methods currently returning empty by implementation: + // anchor(), hreflang(), media(), title(), titleEncodings() + // + // These tests document the intended view semantics without enforcing RFC rules. + // They will fail until implemented; keep them as "pending" by ignoring for now. + // -------------------------------------------------------------------------- + + def "anchor: returns the first anchor parameter value if present (view semantics)"() { + given: + def link = weblink(uri("https://example.org/target"), List.of( + parameter("anchor", "https://example.org/context1"), + parameter("anchor", "https://example.org/context2") + )) + + expect: + link.anchor().get() == "https://example.org/context1" + } + + def "hreflang: returns all hreflang parameter values in encounter order (view semantics)"() { + given: + def link = weblink(uri("https://example.org/target"), List.of( + parameter("hreflang", "en"), + parameter("hreflang", "de"), + parameter("hreflang", "fr") + )) + + expect: + link.hreflang() == ["en", "de", "fr"] + } + + def "media: returns the first media parameter value if present (view semantics)"() { + given: + def link = weblink(uri("https://example.org/target"), List.of( + parameter("media", "screen"), + parameter("media", "print") + )) + + expect: + link.media().get() == "screen" + } + + def "title: returns the first title parameter value if present (view semantics)"() { + given: + def link = weblink(uri("https://example.org/target"), List.of( + parameter("title", "First"), + parameter("title", "Second") + )) + + expect: + link.title().get() == "First" + } + + def "titleMultiple: returns the first title* parameter value if present (view semantics)"() { + given: + def link = weblink(uri("https://example.org/target"), List.of( + parameter("title*", "UTF-8''first"), + parameter("title*", "UTF-8''second") + )) + + expect: + link.titleEncodings().get() == "UTF-8''first" + } + // ------------------------------------------------------------------------ // Helpers // ------------------------------------------------------------------------ private static WebLink weblink(String uri, List params) { - new WebLink(URI.create(uri), List.copyOf(params)) + weblink(URI.create(uri), List.copyOf(params)) + } + + private static WebLink weblink(URI uri, List params) { + new WebLink(uri, List.copyOf(params)) } private static WebLinkParameter rel(String relValue) { @@ -33,4 +270,10 @@ class WebLinkSpec extends Specification { new WebLinkParameter("type", typeValue) } + private static URI uri(String u) { URI.create(u) } + + private static WebLinkParameter parameter(String name, String value) { + new WebLinkParameter(name, value) + } + }