Skip to content

Commit

Permalink
sepinf-inc#2286: add support for "Communication:" metadata in WhatsAp…
Browse files Browse the repository at this point in the history
…p parser
  • Loading branch information
aberenguel committed Aug 9, 2024
1 parent a81646c commit 106eb23
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,20 @@ protected Connection getConnection() throws SQLException {
return DriverManager.getConnection("jdbc:sqlite:" + databaseFile.getAbsolutePath());
}

protected void setGroupMembers(Chat c, Connection conn, String SELECT_GROUP_MEMBERS) throws SQLException {
protected void setGroupMembers(Chat c, Connection conn, String selectGroupMembersQuery) throws SQLException {
// adds all contacts that sent at least one message
for (Message m : c.getMessages()) {
if (m.getRemoteResource() != null)
c.getGroupMembers().add(contacts.getContact(m.getRemoteResource()));
}
if (SELECT_GROUP_MEMBERS == null) {
// add account as member
c.getGroupMembers().add(contacts.getContact(account.getFullId()));

if (selectGroupMembersQuery == null) {
return;
}
// adds all contacts which is a member of the group now
try (PreparedStatement stmt = conn.prepareStatement(SELECT_GROUP_MEMBERS)) {
try (PreparedStatement stmt = conn.prepareStatement(selectGroupMembersQuery)) {
stmt.setString(1, c.getRemote().getFullId());
try (ResultSet rs = stmt.executeQuery()) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,4 +160,9 @@ public int hashCode() {
return this.getFullId().hashCode();
}

@Override
public String toString() {
return getName() + "(" + getFullId() + ")";
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
import iped.parsers.sqlite.SQLite3DBParser;
import iped.parsers.sqlite.SQLite3Parser;
import iped.parsers.standard.StandardParser;
import iped.parsers.util.CommunicationConstants;
import iped.parsers.util.ItemInfo;
import iped.parsers.util.PhoneParsingConfig;
import iped.parsers.vcard.VCardParser;
Expand Down Expand Up @@ -327,21 +328,34 @@ private void createReport(List<Chat> chatList, IItemSearcher searcher, WAContact
chatMetadata.set(BasicProps.HASCHILD, Boolean.TRUE.toString());
}
if (account != null) {
String local = formatContact(account, cache);
chatMetadata.add(ExtraProperties.PARTICIPANTS, local);
chatMetadata.add(ExtraProperties.COMMUNICATION_ACCOUNT, account.getFullId());
}
if (c.isGroupChat()) {
for (WAContact member : c.getGroupMembers()) {
chatMetadata.add(ExtraProperties.PARTICIPANTS, formatContact(member, cache));
addParticipantFields(chatMetadata, ExtraProperties.PARTICIPANTS, member, cache);
}
for (WAContact admin : c.getGroupAdmins()) {
addParticipantFields(chatMetadata, ExtraProperties.COMMUNICATION_ADMINS, admin, cache);
}
// string formatted as {creator's phone number}-{creation time}@g.us
chatMetadata.add(ExtraProperties.GROUP_ID, c.getRemote().getFullId());
chatMetadata.add(ExtraProperties.COMMUNICATION_TYPE, CommunicationConstants.TYPE_GROUP);
} else if (c.isChannelChat()) {
chatMetadata.add(ExtraProperties.COMMUNICATION_TYPE, CommunicationConstants.TYPE_BROADCAST);
} else {
if (c.getRemote() != null) {
chatMetadata.add(ExtraProperties.PARTICIPANTS, formatContact(c.getRemote(), cache));
addParticipantFields(chatMetadata, ExtraProperties.PARTICIPANTS, c.getRemote(), cache);
}
if (account != null) {
addParticipantFields(chatMetadata, ExtraProperties.PARTICIPANTS, account, cache);
}
chatMetadata.add(ExtraProperties.COMMUNICATION_TYPE, CommunicationConstants.TYPE_PRIVATE);
}

chatMetadata.add(ExtraProperties.COMMUNICATION_ID, c.getRemote() != null ? c.getRemote().getFullId() : c.getPrintId());
chatMetadata.add(ExtraProperties.COMMUNICATION_MESSAGES_COUNT,
Long.toString(c.getMessages().stream().filter(m -> !m.isSystemMessage()).count()));

ByteArrayInputStream chatStream = new ByteArrayInputStream(bytes);
extractor.parseEmbedded(chatStream, handler, chatMetadata, false);
bytes = nextBytes;
Expand All @@ -357,6 +371,12 @@ private void createReport(List<Chat> chatList, IItemSearcher searcher, WAContact

}

private void addParticipantFields(Metadata chatMetadata, String field, WAContact member, HashMap<String, String> cache) {
chatMetadata.add(field, formatContact(member, cache));
chatMetadata.add(field + ":ID", member.getFullId());
chatMetadata.add(field + ":Phone", member.getId());
}

private void parseWhatsappMessages(InputStream stream, ContentHandler handler, Metadata metadata,
ParseContext context, ExtractorFactory extFactory) throws IOException, SAXException, TikaException {

Expand Down Expand Up @@ -944,6 +964,12 @@ private void extractMessages(String chatName, Chat c, List<Message> messages, WA
meta.set(ExtraProperties.MESSAGE_BODY, m.getData());
meta.set(ExtraProperties.URL, m.getUrl());

if (m.isFromMe()) {
meta.set(ExtraProperties.COMMUNICATION_DIRECTION, CommunicationConstants.DIRECTION_OUTGOING);
} else {
meta.set(ExtraProperties.COMMUNICATION_DIRECTION, CommunicationConstants.DIRECTION_INCOMING);
}

meta.set("mediaName", m.getMediaName()); //$NON-NLS-1$
meta.set("mediaMime", m.getMediaMime()); //$NON-NLS-1$
if (m.getMediaSize() != 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;

Expand Down Expand Up @@ -35,11 +36,13 @@ protected ParseContext getContext(String resource) throws IOException {
@SuppressWarnings("serial")
protected static class EmbeddedWhatsAppParser extends AbstractParser {
protected List<String> title = new ArrayList<String>();
protected List<String> type = new ArrayList<String>();
protected List<String> username = new ArrayList<String>();
protected List<String> userphone = new ArrayList<String>();
protected List<String> useraccount = new ArrayList<String>();
protected List<String> usernotes = new ArrayList<String>();
protected List<String> participants = new ArrayList<String>();
protected List<List<String>> participants = new ArrayList<>();
protected List<List<String>> admins = new ArrayList<>();
protected List<String> messagefrom = new ArrayList<String>();
protected List<String> messagebody = new ArrayList<String>();
protected List<String> messageto = new ArrayList<String>();
Expand All @@ -53,6 +56,8 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
throws IOException, SAXException, TikaException {
if (metadata.get(TikaCoreProperties.TITLE) != null)
title.add(metadata.get(TikaCoreProperties.TITLE));
if (metadata.get(ExtraProperties.COMMUNICATION_TYPE) != null)
type.add(metadata.get(ExtraProperties.COMMUNICATION_TYPE));
if (metadata.get(ExtraProperties.USER_NAME) != null)
username.add(metadata.get(ExtraProperties.USER_NAME));
if (metadata.get(ExtraProperties.USER_PHONE) != null)
Expand All @@ -61,8 +66,10 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
useraccount.add(metadata.get(ExtraProperties.USER_ACCOUNT));
if (metadata.get(ExtraProperties.USER_NOTES) != null)
usernotes.add(metadata.get(ExtraProperties.USER_NOTES));
if (metadata.get(ExtraProperties.PARTICIPANTS) != null)
participants.add(metadata.get(ExtraProperties.PARTICIPANTS));
if (metadata.get(ExtraProperties.PARTICIPANTS) != null) {
participants.add(Arrays.asList(metadata.getValues(ExtraProperties.PARTICIPANTS)));
admins.add(Arrays.asList(metadata.getValues(ExtraProperties.COMMUNICATION_ADMINS)));
}
if (metadata.get(org.apache.tika.metadata.Message.MESSAGE_FROM) != null)
messagefrom.add(metadata.get(org.apache.tika.metadata.Message.MESSAGE_FROM));
if (metadata.get(ExtraProperties.MESSAGE_BODY) != null)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package iped.parsers.whatsapp;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.CoreMatchers.*;

import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
Expand All @@ -13,6 +17,7 @@
import org.xml.sax.SAXException;

import iped.parsers.standard.StandardParser;
import iped.parsers.util.CommunicationConstants;

public class WhatsAppParserTest extends AbstractPkgTest {

Expand Down Expand Up @@ -42,6 +47,11 @@ public void testWhatsAppParserAndroid() throws IOException, SAXException, TikaEx
assertEquals(4, whatsapptracker.messageto.size());
assertEquals(74, whatsapptracker.messagedate.size());

assertEquals(11, Collections.frequency(whatsapptracker.type, CommunicationConstants.TYPE_PRIVATE));
assertEquals(18, Collections.frequency(whatsapptracker.type, CommunicationConstants.TYPE_GROUP));
assertEquals(0, Collections.frequency(whatsapptracker.type, CommunicationConstants.TYPE_BROADCAST));
assertEquals(0, Collections.frequency(whatsapptracker.type, CommunicationConstants.TYPE_UNKONWN));

assertEquals("WhatsApp Chat - Nickerida - 556183125151", whatsapptracker.title.get(0));
assertEquals("WhatsApp Chat - Nickerida - 556183125151_message_0", whatsapptracker.title.get(1));
assertEquals("WhatsApp Chat - Nickerida - 556183125151_message_1", whatsapptracker.title.get(2));
Expand All @@ -50,13 +60,52 @@ public void testWhatsAppParserAndroid() throws IOException, SAXException, TikaEx
assertEquals("WhatsApp Group - Lar - 556185747642-1461964508_message_1", whatsapptracker.title.get(61));
assertEquals("WhatsApp Group - Lar - 556185747642-1461964508_message_2", whatsapptracker.title.get(62));

assertEquals("unknownAccount", whatsapptracker.participants.get(0));
assertEquals("unknownAccount", whatsapptracker.participants.get(1));
assertEquals("unknownAccount", whatsapptracker.participants.get(2));
assertEquals("unknownAccount", whatsapptracker.participants.get(3));
assertEquals("unknownAccount", whatsapptracker.participants.get(4));
assertEquals("unknownAccount", whatsapptracker.participants.get(17));
assertEquals("unknownAccount", whatsapptracker.participants.get(18));
// Test Private Chats

assertEquals(CommunicationConstants.TYPE_PRIVATE, whatsapptracker.type.get(0));
assertEquals(CommunicationConstants.TYPE_PRIVATE, whatsapptracker.type.get(19));
assertEquals(CommunicationConstants.TYPE_PRIVATE, whatsapptracker.type.get(20));
assertEquals(CommunicationConstants.TYPE_PRIVATE, whatsapptracker.type.get(21));

assertEquals(2, whatsapptracker.participants.get(0).size());
assertEquals(2, whatsapptracker.participants.get(19).size());
assertEquals(2, whatsapptracker.participants.get(20).size());
assertEquals(2, whatsapptracker.participants.get(21).size());

assertThat(whatsapptracker.participants.get(0), hasItems("unknownAccount", "Nickerida (556183125151@s.whatsapp.net)"));
assertThat(whatsapptracker.participants.get(19), hasItems("unknownAccount", "Hotdog412 (556181704627@s.whatsapp.net)"));
assertThat(whatsapptracker.participants.get(20), hasItems("unknownAccount", "Nwi Fibra Ótica (556133223200@s.whatsapp.net)"));
assertThat(whatsapptracker.participants.get(21), hasItems("unknownAccount", "Xavier (556135952111@s.whatsapp.net)"));

assertEquals(0, whatsapptracker.admins.get(0).size());
assertEquals(0, whatsapptracker.admins.get(19).size());
assertEquals(0, whatsapptracker.admins.get(20).size());
assertEquals(0, whatsapptracker.admins.get(21).size());

// Test Group Chats

assertEquals(CommunicationConstants.TYPE_GROUP, whatsapptracker.type.get(1));
assertEquals(CommunicationConstants.TYPE_GROUP, whatsapptracker.type.get(2));
assertEquals(CommunicationConstants.TYPE_GROUP, whatsapptracker.type.get(3));
assertEquals(CommunicationConstants.TYPE_GROUP, whatsapptracker.type.get(4));

assertEquals(23, whatsapptracker.participants.get(1).size());
assertEquals(21, whatsapptracker.participants.get(2).size());
assertEquals(5, whatsapptracker.participants.get(3).size());
assertEquals(3, whatsapptracker.participants.get(4).size());

assertThat(whatsapptracker.participants.get(1), hasItems("unknownAccount"));
assertThat(whatsapptracker.participants.get(2), hasItems("unknownAccount"));
assertThat(whatsapptracker.participants.get(3), hasItems("unknownAccount"));
assertThat(whatsapptracker.participants.get(4), hasItems("unknownAccount"));

assertEquals(21, whatsapptracker.admins.get(1).size());
assertEquals(19, whatsapptracker.admins.get(2).size());
assertEquals(1, whatsapptracker.admins.get(3).size());
assertEquals(0, whatsapptracker.admins.get(4).size());

assertThat(whatsapptracker.admins.get(1), hasItems("556192644086@s.whatsapp.net"));
assertThat(whatsapptracker.admins.get(2), hasItems("Pedro Gonzaga (556199351995@s.whatsapp.net)"));

assertEquals("unknownAccount", whatsapptracker.messagefrom.get(0));
assertEquals("unknownAccount", whatsapptracker.messagefrom.get(1));
Expand Down Expand Up @@ -127,6 +176,7 @@ public void testWhatsAppParserWADBAndroid() throws IOException, SAXException, Ti
assertEquals(384, whatsapptracker.useraccount.size());
assertEquals(166, whatsapptracker.usernotes.size());
assertEquals(0, whatsapptracker.participants.size());
assertEquals(0, whatsapptracker.admins.size());
assertEquals(0, whatsapptracker.messagefrom.size());
assertEquals(0, whatsapptracker.messagebody.size());
assertEquals(0, whatsapptracker.messageto.size());
Expand Down

0 comments on commit 106eb23

Please sign in to comment.