Skip to content

Commit

Permalink
ICU-22908 MF2: Update spec tests and update implementation for recent…
Browse files Browse the repository at this point in the history
… spec changes

Updating the spec tests requires two implementation changes:
* Match on variables rather than expressions --
  see unicode-org/message-format-wg#877
* Require attribute values to be literals (if present) --
  see unicode-org/message-format-wg#894
  • Loading branch information
catamorphism committed Nov 1, 2024
1 parent 7459872 commit ecb67cf
Show file tree
Hide file tree
Showing 29 changed files with 505 additions and 480 deletions.
2 changes: 1 addition & 1 deletion icu4c/source/i18n/messageformat2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ void MessageFormatter::resolveSelectors(MessageContext& context, Environment& en
CHECK_ERROR(status);
U_ASSERT(!dataModel.hasPattern());

const Expression* selectors = dataModel.getSelectorsInternal();
const VariableName* selectors = dataModel.getSelectorsInternal();
// 1. Let res be a new empty list of resolved values that support selection.
// (Implicit, since `res` is an out-parameter)
// 2. For each expression exp of the message's selectors
Expand Down
14 changes: 5 additions & 9 deletions icu4c/source/i18n/messageformat2_checker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,18 +205,14 @@ void Checker::checkVariants(UErrorCode& status) {
}
}

void Checker::requireAnnotated(const TypeEnvironment& t, const Expression& selectorExpr, UErrorCode& status) {
void Checker::requireAnnotated(const TypeEnvironment& t,
const VariableName& selectorVar,
UErrorCode& status) {
CHECK_ERROR(status);

if (selectorExpr.isFunctionCall()) {
if (t.get(selectorVar) == TypeEnvironment::Type::Annotated) {
return; // No error
}
const Operand& rand = selectorExpr.getOperand();
if (rand.isVariable()) {
if (t.get(rand.asVariable()) == TypeEnvironment::Type::Annotated) {
return; // No error
}
}
// If this code is reached, an error was detected
errors.addError(StaticErrorType::MissingSelectorAnnotation, status);
}
Expand All @@ -226,7 +222,7 @@ void Checker::checkSelectors(const TypeEnvironment& t, UErrorCode& status) {

// Check each selector; if it's not annotated, emit a
// "missing selector annotation" error
const Expression* selectors = dataModel.getSelectorsInternal();
const VariableName* selectors = dataModel.getSelectorsInternal();
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
requireAnnotated(t, selectors[i], status);
}
Expand Down
2 changes: 1 addition & 1 deletion icu4c/source/i18n/messageformat2_checker.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ namespace message2 {
Checker(const MFDataModel& m, StaticErrors& e) : dataModel(m), errors(e) {}
private:

void requireAnnotated(const TypeEnvironment&, const Expression&, UErrorCode&);
void requireAnnotated(const TypeEnvironment&, const VariableName&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const Operand&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const Operator&, UErrorCode&);
void addFreeVars(TypeEnvironment& t, const OptionMap&, UErrorCode&);
Expand Down
26 changes: 13 additions & 13 deletions icu4c/source/i18n/messageformat2_data_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -691,9 +691,9 @@ Matcher::Matcher(const Matcher& other) {
numSelectors = other.numSelectors;
numVariants = other.numVariants;
UErrorCode localErrorCode = U_ZERO_ERROR;
selectors.adoptInstead(copyArray<Expression>(other.selectors.getAlias(),
numSelectors,
localErrorCode));
selectors.adoptInstead(copyArray<VariableName>(other.selectors.getAlias(),
numSelectors,
localErrorCode));
variants.adoptInstead(copyArray<Variant>(other.variants.getAlias(),
numVariants,
localErrorCode));
Expand All @@ -702,7 +702,7 @@ Matcher::Matcher(const Matcher& other) {
}
}

Matcher::Matcher(Expression* ss, int32_t ns, Variant* vs, int32_t nv)
Matcher::Matcher(VariableName* ss, int32_t ns, Variant* vs, int32_t nv)
: selectors(ss), numSelectors(ns), variants(vs), numVariants(nv) {}

Matcher::~Matcher() {}
Expand All @@ -724,7 +724,7 @@ const Binding* MFDataModel::getLocalVariablesInternal() const {
return bindings.getAlias();
}

const Expression* MFDataModel::getSelectorsInternal() const {
const VariableName* MFDataModel::getSelectorsInternal() const {
U_ASSERT(!bogus);
U_ASSERT(!hasPattern());
return std::get_if<Matcher>(&body)->selectors.getAlias();
Expand Down Expand Up @@ -786,15 +786,13 @@ MFDataModel::Builder& MFDataModel::Builder::addBinding(Binding&& b, UErrorCode&
return *this;
}

/*
selector must be non-null
*/
MFDataModel::Builder& MFDataModel::Builder::addSelector(Expression&& selector, UErrorCode& status) noexcept {
MFDataModel::Builder& MFDataModel::Builder::addSelector(VariableName&& selector,
UErrorCode& status) {
THIS_ON_ERROR(status);

buildSelectorsMessage(status);
U_ASSERT(selectors != nullptr);
selectors->adoptElement(create<Expression>(std::move(selector), status), status);
selectors->adoptElement(create<VariableName>(std::move(selector), status), status);

return *this;
}
Expand Down Expand Up @@ -830,11 +828,11 @@ MFDataModel::MFDataModel(const MFDataModel& other) : body(Pattern()) {
if (other.hasPattern()) {
body = *std::get_if<Pattern>(&other.body);
} else {
const Expression* otherSelectors = other.getSelectorsInternal();
const VariableName* otherSelectors = other.getSelectorsInternal();
const Variant* otherVariants = other.getVariantsInternal();
int32_t numSelectors = other.numSelectors();
int32_t numVariants = other.numVariants();
Expression* copiedSelectors = copyArray(otherSelectors, numSelectors, localErrorCode);
VariableName* copiedSelectors = copyArray(otherSelectors, numSelectors, localErrorCode);
Variant* copiedVariants = copyArray(otherVariants, numVariants, localErrorCode);
if (U_FAILURE(localErrorCode)) {
bogus = true;
Expand Down Expand Up @@ -863,7 +861,9 @@ MFDataModel::MFDataModel(const MFDataModel::Builder& builder, UErrorCode& errorC
int32_t numVariants = builder.variants->size();
int32_t numSelectors = builder.selectors->size();
LocalArray<Variant> variants(copyVectorToArray<Variant>(*builder.variants, errorCode), errorCode);
LocalArray<Expression> selectors(copyVectorToArray<Expression>(*builder.selectors, errorCode), errorCode);
LocalArray<VariableName> selectors(copyVectorToArray<VariableName>(*builder.selectors,
errorCode),
errorCode);
if (U_FAILURE(errorCode)) {
bogus = true;
return;
Expand Down
95 changes: 49 additions & 46 deletions icu4c/source/i18n/messageformat2_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -510,21 +510,13 @@ VariableName Parser::parseVariableName(UErrorCode& errorCode) {
VariableName result;

U_ASSERT(inBounds());
// If the '$' is missing, we don't want a binding
// for this variable to be created.
bool valid = peek() == DOLLAR;

parseToken(DOLLAR, errorCode);
if (!inBounds()) {
ERROR(errorCode);
return result;
}
UnicodeString varName = parseName(errorCode);
// Set the name to "" if the variable wasn't
// declared correctly
if (!valid) {
varName.remove();
}
return VariableName(varName);
return VariableName(parseName(errorCode));
}

/*
Expand Down Expand Up @@ -861,27 +853,17 @@ void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode)
parseTokenWithWhitespace(EQUALS, errorCode);

UnicodeString rhsStr;
// Parse RHS, which is either a literal or variable
switch (peek()) {
case DOLLAR: {
rand = Operand(parseVariableName(errorCode));
break;
}
default: {
// Must be a literal
rand = Operand(parseLiteral(errorCode));
break;
}
}
U_ASSERT(!rand.isNull());
// Parse RHS, which must be a literal
// attribute = "@" identifier [o "=" o literal]
rand = Operand(parseLiteral(errorCode));
} else {
// attribute -> "@" identifier [[s] "=" [s]]
// Use null operand, which `rand` is already set to
// "Backtrack" by restoring the whitespace (if there was any)
index = savedIndex;
}

attrAdder.addAttribute(lhs, std::move(rand), errorCode);
attrAdder.addAttribute(lhs, std::move(Operand(rand)), errorCode);
}

/*
Expand Down Expand Up @@ -1720,6 +1702,22 @@ Pattern Parser::parseSimpleMessage(UErrorCode& status) {
return result.build(status);
}

void Parser::parseVariant(UErrorCode& status) {
CHECK_ERROR(status);

// At least one key is required
SelectorKeys keyList(parseNonEmptyKeys(status));

// parseNonEmptyKeys() consumes any trailing whitespace,
// so the pattern can be consumed next.

// Restore precondition before calling parsePattern()
// (which must return a non-null value)
CHECK_BOUNDS(status);
Pattern rhs = parseQuotedPattern(status);

dataModel.addVariant(std::move(keyList), std::move(rhs), status);
}

/*
Consume a `selectors` (matching the nonterminal in the grammar),
Expand All @@ -1739,22 +1737,25 @@ void Parser::parseSelectors(UErrorCode& status) {
// Parse selectors
// "Backtracking" is required here. It's not clear if whitespace is
// (`[s]` selector) or (`[s]` variant)
while (isWhitespace(peek()) || peek() == LEFT_CURLY_BRACE) {
parseOptionalWhitespace(status);
while (isWhitespace(peek()) || peek() == DOLLAR) {
int32_t whitespaceStart = index;
parseRequiredWhitespace(status);
// Restore precondition
CHECK_BOUNDS(status);
if (peek() != LEFT_CURLY_BRACE) {
if (peek() != DOLLAR) {
// This is not necessarily an error, but rather,
// means the whitespace we parsed was the optional
// whitespace preceding the first variant, not the
// optional whitespace preceding a subsequent expression.
// required whitespace preceding a subsequent variable.
// In that case, "push back" the whitespace.
normalizedInput.truncate(normalizedInput.length() - 1);
index = whitespaceStart;
break;
}
Expression expression;
expression = parseExpression(status);
VariableName var = parseVariableName(status);
empty = false;

dataModel.addSelector(std::move(expression), status);
dataModel.addSelector(std::move(var), status);
CHECK_ERROR(status);
}

Expand All @@ -1770,27 +1771,29 @@ void Parser::parseSelectors(UErrorCode& status) {
} \

// Parse variants
// matcher = match-statement s variant *(o variant)

// Parse first variant
parseRequiredWhitespace(status);
if (!inBounds()) {
ERROR(status);
return;
}
parseVariant(status);
if (!inBounds()) {
// Not an error; there might be only one variant
return;
}

while (isWhitespace(peek()) || isKeyStart(peek())) {
// Trailing whitespace is allowed
parseOptionalWhitespace(status);
// Restore the precondition.
// Trailing whitespace is allowed.
if (!inBounds()) {
return;
}

// At least one key is required
SelectorKeys keyList(parseNonEmptyKeys(status));

CHECK_ERROR(status);

// parseNonEmptyKeys() consumes any trailing whitespace,
// so the pattern can be consumed next.

// Restore precondition before calling parsePattern()
// (which must return a non-null value)
CHECK_BOUNDS(status);
Pattern rhs = parseQuotedPattern(status);

dataModel.addVariant(std::move(keyList), std::move(rhs), status);
parseVariant(status);

// Restore the precondition, *without* erroring out if we've
// reached the end of input. That's because it's valid for the
Expand Down
3 changes: 2 additions & 1 deletion icu4c/source/i18n/messageformat2_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ namespace message2 {
void parseUnsupportedStatement(UErrorCode&);
void parseLocalDeclaration(UErrorCode&);
void parseInputDeclaration(UErrorCode&);
void parseSelectors(UErrorCode&);
void parseSelectors(UErrorCode&);
void parseVariant(UErrorCode&);

void parseWhitespaceMaybeRequired(bool, UErrorCode&);
void parseRequiredWhitespace(UErrorCode&);
Expand Down
6 changes: 4 additions & 2 deletions icu4c/source/i18n/messageformat2_serializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,18 +244,20 @@ void Serializer::serializeDeclarations() {

void Serializer::serializeSelectors() {
U_ASSERT(!dataModel.hasPattern());
const Expression* selectors = dataModel.getSelectorsInternal();
const VariableName* selectors = dataModel.getSelectorsInternal();

emit(ID_MATCH);
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
// No whitespace needed here -- see `selectors` in the grammar
whitespace();
emit(DOLLAR);
emit(selectors[i]);
}
}

void Serializer::serializeVariants() {
U_ASSERT(!dataModel.hasPattern());
const Variant* variants = dataModel.getVariantsInternal();
whitespace();
for (int32_t i = 0; i < dataModel.numVariants(); i++) {
const Variant& v = variants[i];
emit(v.getKeys());
Expand Down
18 changes: 9 additions & 9 deletions icu4c/source/i18n/unicode/messageformat2_data_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -2211,16 +2211,16 @@ namespace message2 {

friend class MFDataModel;

Matcher(Expression* ss, int32_t ns, Variant* vs, int32_t nv);
Matcher(VariableName* ss, int32_t ns, Variant* vs, int32_t nv);
Matcher() {}

// A Matcher may have numSelectors=0 and numVariants=0
// (this is a data model error, but it's representable).
// So we have to keep a separate flag to track failed copies.
bool bogus = false;

// The expressions that are being matched on.
LocalArray<Expression> selectors;
// The variables that are being matched on.
LocalArray<VariableName> selectors;
// The number of selectors
int32_t numSelectors = 0;
// The list of `when` clauses (case arms).
Expand Down Expand Up @@ -2328,13 +2328,13 @@ namespace message2 {
* @internal ICU 75 technology preview
* @deprecated This API is for technology preview only.
*/
const std::vector<Expression> getSelectors() const {
const std::vector<VariableName> getSelectors() const {
if (std::holds_alternative<Pattern>(body)) {
return {};
}
const Matcher* match = std::get_if<Matcher>(&body);
// match must be non-null, given the previous check
return toStdVector<Expression>(match->selectors.getAlias(), match->numSelectors);
return toStdVector<VariableName>(match->selectors.getAlias(), match->numSelectors);
}
/**
* Accesses the variants. Returns an empty vector if this is a pattern message.
Expand Down Expand Up @@ -2462,17 +2462,17 @@ namespace message2 {
*/
Builder& addBinding(Binding&& b, UErrorCode& status);
/**
* Adds a selector expression. Copies `expression`.
* Adds a selector variable.
* If a pattern was previously set, clears the pattern.
*
* @param selector Expression to add as a selector. Passed by move.
* @param selector Variable to add as a selector. Passed by move.
* @param errorCode Input/output error code
* @return A reference to the builder.
*
* @internal ICU 75 technology preview
* @deprecated This API is for technology preview only.
*/
Builder& addSelector(Expression&& selector, UErrorCode& errorCode) noexcept;
Builder& addSelector(VariableName&& selector, UErrorCode& errorCode);
/**
* Adds a single variant.
* If a pattern was previously set using `setPattern()`, clears the pattern.
Expand Down Expand Up @@ -2564,7 +2564,7 @@ namespace message2 {
int32_t bindingsLen = 0;

const Binding* getLocalVariablesInternal() const;
const Expression* getSelectorsInternal() const;
const VariableName* getSelectorsInternal() const;
const Variant* getVariantsInternal() const;

int32_t numSelectors() const {
Expand Down
Loading

0 comments on commit ecb67cf

Please sign in to comment.