diff --git a/CHANGELOG.md b/CHANGELOG.md index 6132178e..dc586ac9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ **Version 4.2** - Fixed a bug in the dictionary validation that would not flag as invalid a dictionary with a missing URI attribute. + - Added support in NaaccrXmlUtils for new lineToPatient and patientToLine methods. **Version 4.1** diff --git a/README.md b/README.md index 397c7373..6a5f09c0 100644 --- a/README.md +++ b/README.md @@ -74,14 +74,18 @@ A few higher-level utility methods have been defined in the [NaaccrXmlUtils](htt *Translation methods* * void ***flatToXml*** (File flatFile, File xmlFile, ...) * void ***xmlToFlat*** (File xmlFile, File flatFile, ...) +* Patient ***lineToPatient*** (String line, NaaccrContext context) +* String ***patientToLine*** (Patient patient, NaaccrContext context) There are other utility methods, but those are the main ones. -All those methods accept the following optional parameters (optional in the sense that null can be passed to the method): +All the file-related methods accept the following optional parameters (optional in the sense that null can be passed to the method): * [NaaccrXmlOptions](https://github.com/imsweb/naaccr-xml/blob/master/src/main/java/com/imsweb/naaccrxml/NaaccrXmlOptions.java) - options for customizing the read/write and errors reporting operations * [NaaccrDictionary](https://github.com/imsweb/naaccr-xml/blob/master/src/main/java/com/imsweb/naaccrxml/entity/dictionary/NaaccrDictionary.java) - one or several user-defined dictionary (if none is provided, the default user-defined dictionary will be used) * [NaaccrObserver](https://github.com/imsweb/naaccr-xml/blob/master/src/main/java/com/imsweb/naaccrxml/NaaccrObserver.java) - an observer allowing to report progress as the files are being processed. +The methods translating a single line or single patient takes a context as parameter; it is very important to initialize that context outside a loop if the methods are called in a loop. + ### Using the Graphical User Interface (Standalone) The library contains an standalone GUI that wraps some of the utility methods and provides a more user-friendly environment for processing files. diff --git a/src/main/java/com/imsweb/naaccrxml/NaaccrXmlUtils.java b/src/main/java/com/imsweb/naaccrxml/NaaccrXmlUtils.java index b369de0b..537a8b66 100644 --- a/src/main/java/com/imsweb/naaccrxml/NaaccrXmlUtils.java +++ b/src/main/java/com/imsweb/naaccrxml/NaaccrXmlUtils.java @@ -253,7 +253,23 @@ public static void writeFlatFile(NaaccrData data, File flatFile, NaaccrOptions o /** * Translates a single line representing a flat file line into a patient object. *

- * TODO explain the context + * Unlike the methods dealing with files, this method takes a context as a parameter. The reason for that difference is that this method uses a stream to convert + * the line, and so the stream needs to be re-created every time the method is invoked on a given line. This is very inefficient and would be too slow if this + * method was used in a loop (which is the common use-case). Having a shared context that is created once outside the loop avoids that inefficiency. + *

+ * It is very important to not re-create the context when this method is called in a loop: + *

+ * This is NOT correct: + * + * for (String line : lines) + * NaaccrXmlUtils.lineToPatient(line, new NaaccrContext(NaaccrFormat.NAACCR_FORMAT_16_ABSTRACT)); + * + * This is correct: + * + * NaaccrContext context = new NaaccrContext(NaaccrFormat.NAACCR_FORMAT_16_ABSTRACT); + * for (String line : lines) + * NaaccrXmlUtils.lineToPatient(line, context); + * * @param line the line to translate, required * @param context the context to use for the translation, required * @return the corresponding patient, never null @@ -265,7 +281,18 @@ public static Patient lineToPatient(String line, NaaccrContext context) throws N if (context == null) throw new NaaccrIOException("Context is required"); - // TODO inject format if not provided on the line + NaaccrFormat format = NaaccrFormat.getInstance(context.getFormat()); + if (line.length() != format.getLineLength()) + throw new NaaccrIOException("Expected line length to be " + format.getLineLength() + " but was " + line.length()); + + boolean updateType = !format.getRecordType().equals(line.substring(0, 1).trim()); + boolean updateVersion = !format.getNaaccrVersion().equals(line.substring(16, 19).trim()); + if (updateType || updateVersion) { + StringBuilder buf = new StringBuilder(line); + buf.replace(0, 1, format.getRecordType()); + buf.replace(16, 19, format.getNaaccrVersion()); + line = buf.toString(); + } try (PatientFlatReader reader = new PatientFlatReader(new StringReader(line), context.getOptions(), context.getUserDictionaries(), context.getStreamConfiguration())) { return reader.readPatient(); @@ -275,7 +302,23 @@ public static Patient lineToPatient(String line, NaaccrContext context) throws N /** * Translates a single patient into a line representing a flat file line. *

- * TODO explain the context + * Unlike the methods dealing with files, this method takes a context as a parameter. The reason for that difference is that this method uses a stream to convert + * the patient, and so the stream needs to be re-created every time the method is invoked on a given patient. This is very inefficient and would be too slow if this + * method was used in a loop (which is the common use-case). Having a shared context that is created once outside the loop avoids that inefficiency. + *

+ * It is very important to not re-create the context when this method is called in a loop: + *

+ * This is NOT correct: + * + * for (Patient patient : patients) + * NaaccrXmlUtils.patientToLine(patient, new NaaccrContext(NaaccrFormat.NAACCR_FORMAT_16_ABSTRACT)); + * + * This is correct: + * + * NaaccrContext context = new NaaccrContext(NaaccrFormat.NAACCR_FORMAT_16_ABSTRACT); + * for (Patient patient : patients) + * NaaccrXmlUtils.patientToLine(patient, context); + * * @param patient the patient to translate, required * @param context the context to use for the translation, required * @return the corresponding line, never null diff --git a/src/test/java/com/imsweb/naaccrxml/NaaccrXmlUtilsTest.java b/src/test/java/com/imsweb/naaccrxml/NaaccrXmlUtilsTest.java index 050193df..d2f26179 100644 --- a/src/test/java/com/imsweb/naaccrxml/NaaccrXmlUtilsTest.java +++ b/src/test/java/com/imsweb/naaccrxml/NaaccrXmlUtilsTest.java @@ -158,6 +158,56 @@ public void testWriteXmlFile() throws IOException { } } + @Test + public void testLineToPatient() throws IOException { + StringBuilder line = TestingUtils.createEmptyRecord("160", "A", "00000001"); + + NaaccrContext context = new NaaccrContext(NaaccrFormat.NAACCR_FORMAT_16_ABSTRACT); + Patient patient = NaaccrXmlUtils.lineToPatient(line.toString(), context); + Assert.assertEquals("00000001", patient.getItemValue("patientIdNumber")); + + List dictionaries = Collections.singletonList(TestingUtils.createUserDictionary()); + line.replace(2339, 2341, "00"); + context = new NaaccrContext(NaaccrFormat.NAACCR_FORMAT_16_ABSTRACT, dictionaries); + patient = NaaccrXmlUtils.lineToPatient(line.toString(), context); + Assert.assertEquals("00", patient.getTumors().get(0).getItemValue("myVariable")); + + NaaccrOptions options = NaaccrOptions.getDefault(); + options.setItemsToExclude(Collections.singletonList("patientIdNumber")); + context = new NaaccrContext(NaaccrFormat.NAACCR_FORMAT_16_ABSTRACT, dictionaries, options); + patient = NaaccrXmlUtils.lineToPatient(line.toString(), context); + Assert.assertNull(patient.getItemValue("patientIdNumber")); + + line.replace(0, 20, " "); + context = new NaaccrContext(NaaccrFormat.NAACCR_FORMAT_16_ABSTRACT); + patient = NaaccrXmlUtils.lineToPatient(line.toString(), context); + Assert.assertEquals("00000001", patient.getItemValue("patientIdNumber")); + } + + @Test + public void testPatientToLine() throws IOException { + Patient patient = new Patient(); + patient.addItem(new Item("patientIdNumber", "00000001")); + + NaaccrContext context = new NaaccrContext(NaaccrFormat.NAACCR_FORMAT_16_ABSTRACT); + String line = NaaccrXmlUtils.patientToLine(patient, context); + Assert.assertEquals("00000001", line.substring(41, 49)); + + List dictionaries = Collections.singletonList(TestingUtils.createUserDictionary()); + Tumor tumor = new Tumor(); + patient.addTumor(tumor); + tumor.addItem(new Item("myVariable", "00")); + context = new NaaccrContext(NaaccrFormat.NAACCR_FORMAT_16_ABSTRACT, dictionaries); + line = NaaccrXmlUtils.patientToLine(patient, context); + Assert.assertEquals("00", line.substring(2339, 2341)); + + NaaccrOptions options = NaaccrOptions.getDefault(); + options.setItemsToExclude(Collections.singletonList("patientIdNumber")); + context = new NaaccrContext(NaaccrFormat.NAACCR_FORMAT_16_ABSTRACT, dictionaries, options); + line = NaaccrXmlUtils.patientToLine(patient, context); + Assert.assertEquals(" ", line.substring(41, 49)); + } + @Test public void testGetFormatFromFlatFile() throws IOException {