Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update tagged structure merger #92

Merged
merged 1 commit into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified profile-merger/profile-merger-1.0-SNAPSHOT.jar
Binary file not shown.
14 changes: 8 additions & 6 deletions profile-merger/src/main/java/org/verapdf/ProfileMerger.java
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,15 @@ private static void updatePDFUAProfiles(ZipFile zipSource) {
new File(PDFUA_FOLDER).mkdirs();
generateProfile(zipSource, "PDFUA-1.xml", PDFUA_FOLDER, new String[]{"1"}, new String[]{}, Collections.emptyList());
generateProfile(zipSource, "PDFUA-2.xml", PDFUA_FOLDER, new String[]{"2"}, new String[]{}, Collections.emptyList());
List<RuleId> excludedTaggedRules = new ArrayList<>(6);
List<RuleId> excludedTaggedRules = new ArrayList<>(8);
excludedTaggedRules.add(Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", 1));
excludedTaggedRules.add(Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", 1656));
excludedTaggedRules.add(Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", 1657));
excludedTaggedRules.add(Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", 1658));
excludedTaggedRules.add(Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", 1659));
excludedTaggedRules.add(Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", 1660));
excludedTaggedRules.add(Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", 2));
excludedTaggedRules.add(Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", 3));
excludedTaggedRules.add(Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", 4));
excludedTaggedRules.add(Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", 5));
excludedTaggedRules.add(Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", 6));
excludedTaggedRules.add(Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", 7));
excludedTaggedRules.add(Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", 8));
generateProfile(zipSource, "PDFUA-2-ISO32005.xml", PDFUA_FOLDER, new String[]{"2"}, new String[]{"ISO-32005-Tagged.xml"}, excludedTaggedRules);
generateProfile(zipSource, "WCAG-2-2.xml", PDFUA_FOLDER, new String[]{"WCAG/2.2"}, new String[]{}, Collections.emptyList());
List<RuleId> excludedWCAGRules = new ArrayList<>(7);
Expand Down
10 changes: 10 additions & 0 deletions tagged-structure-merger/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,16 @@
</dependencies>

<build>

<resources>
<resource>
<directory>src/main/resources</directory>
<includes>
<include>rules.csv</include>
</includes>
</resource>
</resources>

<plugins>

<plugin>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,14 @@ public static void main(final String[] args) throws IOException, JAXBException {
System.exit(0);
}

if (cliArgParser.getCsvPath() != null) {
CliProcessor processor = CliProcessor.createProcessorFromArgs(cliArgParser);
String outputPath = cliArgParser.getOutputPath();
if (outputPath != null) {
try (OutputStream out = new FileOutputStream(new File(outputPath))) {
processor.process(cliArgParser.getCsvPath(), out);
}
} else {
processor.process(cliArgParser.getCsvPath(), System.out);
CliProcessor processor = CliProcessor.createProcessorFromArgs(cliArgParser);
String outputPath = cliArgParser.getOutputPath();
if (outputPath != null) {
try (OutputStream out = new FileOutputStream(new File(outputPath))) {
processor.process(cliArgParser.getCsvPath(), out);
}
} else {
processor.process(cliArgParser.getCsvPath(), System.out);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public class CliArgParser {
@Parameter(names = { PROFILE_CREATOR }, description = "The creator of generated profile")
private String creator = "veraPDF Consortium";

@Parameter(description = "INPUT_CSV", required = true)
@Parameter(description = "INPUT_CSV", required = false)
private List<String> csvPath = null;
/**
* @return true if help requested
Expand Down Expand Up @@ -75,7 +75,7 @@ public String getOutputPath() {
* @return the list of file paths
*/
public String getCsvPath() {
return this.csvPath.get(0);
return this.csvPath != null ? this.csvPath.get(0) : null;
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,26 @@ static CliProcessor createProcessorFromArgs(final CliArgParser args) {

void process(String csvIn, OutputStream out) throws FileNotFoundException, JAXBException {
List<ParsedRelationStructure> relations = parseRelations(csvIn);
SortedSet<Rule> rules = new TreeSet<>(new Profiles.RuleComparator());
SortedSet<Variable> variables = new TreeSet<>(Comparator.comparing(Variable::getName));

rules.addAll(ruleCreator.generateRules(relations));


SortedSet<Rule> rules = ruleCreator.generateRules(relations);
ProfileDetails det = Profiles.profileDetailsFromValues(name, description, creator, new Date());
ValidationProfile mergedProfile = Profiles.profileFromSortedValues(ruleCreator.getFlavour(), det, "", rules, variables);
Profiles.profileToXml(mergedProfile, out, true, false);
}

private static Scanner getScanner(String csvIn) throws FileNotFoundException {
if (csvIn != null) {
return new Scanner(new File(csvIn));
}
return new Scanner(CliProcessor.class.getClassLoader().getResourceAsStream("rules.csv"));
}

private List<ParsedRelationStructure> parseRelations(String csvIn) throws FileNotFoundException {
List<ParsedRelationStructure> res = new ArrayList<>();
try (Scanner sc = new Scanner(new File(csvIn))) {
try (Scanner sc = getScanner(csvIn)) {
// read and check headers
String[] headers = sc.nextLine().split(",");
String[] headers = sc.nextLine().split(";");
if (!"".equals(headers[0])) {
throw new IllegalArgumentException("Invalid headers format");
}
Expand All @@ -55,7 +60,7 @@ private List<ParsedRelationStructure> parseRelations(String csvIn) throws FileNo
int childIndex = 0;
while (sc.hasNextLine()) {
++childIndex;
String[] children = sc.nextLine().split(",");
String[] children = sc.nextLine().split(";");
// is this line correct, probably add first element check on containing in standard types set
if (children.length != headers.length || "".equals(children[0])) {
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,37 +58,42 @@ public PDFAFlavour getFlavour() {
return this.flavour;
}

public List<Rule> generateRules(List<ParsedRelationStructure> relations) {
List<Rule> res = new ArrayList<>(relations.size());
public SortedSet<Rule> generateRules(List<ParsedRelationStructure> relations) {
SortedSet<Rule> res = new TreeSet<>(new Profiles.RuleComparator());
int testNumber = 0;

// standard structure type requirement
List<Reference> annex_l_reference = Collections.singletonList(Profiles.referenceFromValues(
this.pdfVersion.getIso(), "Annex_L"));

res.add(getRuleAboutNotRemappedNonStandardType(annex_l_reference, ++testNumber));

res.add(getRuleAboutCircularMapping(annex_l_reference, ++testNumber));
res.add(getRuleAboutRemappedStandardType(annex_l_reference, ++testNumber));
res.add(getRuleAboutStructTreeRoot(annex_l_reference, ++testNumber));
res.add(getRuleAboutStructElementParent(annex_l_reference, ++testNumber));
res.add(getRuleAboutMathMLParent(annex_l_reference, ++testNumber));
res.add(getRuleAboutRuby(annex_l_reference, ++testNumber));
res.add(getRuleAboutWarichu(annex_l_reference, ++testNumber));
for (ParsedRelationStructure relation : relations) {
if (shallProcess(relation)) {
RuleData data = getRuleData(relation);
if (data == null) {
System.err.println("Missing rule for " + relation.getDescriptionString());
continue;
}
RuleId id = Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", ++testNumber);
RuleId id = Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005,
getClause(relation), 1);
ErrorDetails error = Profiles.errorFromValues(data.errorMessage, Collections.emptyList());
res.add(Profiles.ruleFromValues(id, data.object, null, StructureTag.getTags(relation), data.description,
data.test, error, annex_l_reference));
}
}
res.add(getRuleAboutCircularMapping(annex_l_reference, ++testNumber));
res.add(getRuleAboutRemappedStandardType(annex_l_reference, ++testNumber));
res.add(getRuleAboutStructTreeRoot(annex_l_reference, ++testNumber));
res.add(getRuleAboutStructElementParent(annex_l_reference, ++testNumber));
res.add(getRuleAboutMathMLParent(annex_l_reference, ++testNumber));
return res;
}

private static String getClause(ParsedRelationStructure relation) {
return relation.getParent() + "-" + (CONTENT_ITEM.equals(relation.getChild()) ? "content" : relation.getChild());
}

private Rule getRuleAboutNotRemappedNonStandardType(List<Reference> annex_l_reference, int testNumber) {
return Profiles.ruleFromValues(
Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", testNumber),
Expand Down Expand Up @@ -153,6 +158,32 @@ private Rule getRuleAboutMathMLParent(List<Reference> annex_l_reference, int tes
annex_l_reference);
}

private Rule getRuleAboutRuby(List<Reference> annex_l_reference, int testNumber) {
return Profiles.ruleFromValues(
Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", testNumber),
"SERuby",
null,
StructureTag.STRUCTURE_TAG.getTag(),
"A Ruby structure element shall contain a single RB structure element and a single RT structure element or a Ruby structure element shall consist of a four-element subsequence: RB, RP, RT, RP",
"kidsStandardTypes == 'RB&amp;RT' || kidsStandardTypes == 'RB&amp;RP&amp;RT&amp;RP'",
Profiles.errorFromValues("The Ruby structure element has invalid sequence of children: %1",
Collections.singletonList(ErrorArgumentImpl.fromValues("kidsStandardTypes.replaceAll('&amp;', ',')", null, null))),
annex_l_reference);
}

private Rule getRuleAboutWarichu(List<Reference> annex_l_reference, int testNumber) {
return Profiles.ruleFromValues(
Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", testNumber),
"SEWarichu",
null,
StructureTag.STRUCTURE_TAG.getTag(),
"Content typeset as warichu shall be tagged in a three-element sequence consisting of the structure elements WP, WT and WP, grouped inside a Warichu structure element",
"kidsStandardTypes == 'WP&amp;WT&amp;WP'",
Profiles.errorFromValues("The Warichu structure element has invalid sequence of children: %1",
Collections.singletonList(ErrorArgumentImpl.fromValues("kidsStandardTypes.replaceAll('&amp;', ',')", null, null))),
annex_l_reference);
}

private Rule getRuleAboutStructTreeRoot(List<Reference> annex_l_reference, int testNumber) {
return Profiles.ruleFromValues(
Profiles.ruleIdFromValues(PDFAFlavour.Specification.ISO_32005, "6.2", testNumber),
Expand Down
Loading