Skip to content

Commit

Permalink
sepinf-inc#2286: link ufed message to correspondent chat part
Browse files Browse the repository at this point in the history
  • Loading branch information
aberenguel committed Aug 15, 2024
1 parent 04b3cfc commit dcd7e49
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 10 deletions.
6 changes: 1 addition & 5 deletions iped-app/resources/config/conf/ParserConfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -291,11 +291,6 @@
<param name="extractEntries" type="bool">false</param>
</params>
</parser>
<parser class="iped.parsers.ufed.UFEDChatParser">
<params>
<param name="extractMessages" type="bool">true</param>
</params>
</parser>
<parser class="iped.parsers.telegram.TelegramParser">
<params>
<param name="extractMessages" type="bool">true</param>
Expand Down Expand Up @@ -328,6 +323,7 @@
<parser class="iped.parsers.ufed.UFEDChatParser">
<!--<mime-exclude>application/x-ufed-chat-whatsapp</mime-exclude>-->
<params>
<param name="extractMessages" type="bool">true</param>
<param name="minChatSplitSize" type="int">6000000</param>
</params>
</parser>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,11 +241,11 @@ public void parse(InputStream inputStream, ContentHandler handler, Metadata meta
ByteArrayInputStream chatStream = new ByteArrayInputStream(bytes);
extractor.parseEmbedded(chatStream, handler, chatMetadata, false);
bytes = nextBytes;
}
}

if (extractMessages) {
extractMessages(itemMgs, virtualId, handler, extractor);
if (extractMessages) {
extractMessages(itemMgs, subList, virtualId, handler, extractor);
}
}
}

} catch (Exception e) {
Expand All @@ -258,9 +258,17 @@ public void parse(InputStream inputStream, ContentHandler handler, Metadata meta

}

private void extractMessages(List<IItemReader> messages, String virtualId, ContentHandler handler,
private void extractMessages(List<IItemReader> messages, List<UfedMessage> subList, String virtualId, ContentHandler handler,
EmbeddedDocumentExtractor extractor) throws SAXException, IOException {

HashSet<Long> idsInSubList = subList.stream().mapToLong(UfedMessage::getId).boxed().collect(Collectors.toCollection(HashSet::new));

for (IItemReader msg : messages) {

if (!idsInSubList.contains((long) msg.getId())) {
continue;
}

Metadata meta = msg.getMetadata();
meta.set(TikaCoreProperties.TITLE, msg.getName()); // $NON-NLS-1$
meta.set(BasicProps.ID, Integer.toString(msg.getId()));
Expand Down

0 comments on commit dcd7e49

Please sign in to comment.