From 0ffa49178b88071c5b6b03ccab412999f31f34f6 Mon Sep 17 00:00:00 2001 From: Jelena Mirkovic Date: Fri, 13 Oct 2023 17:59:31 -0700 Subject: [PATCH] Moved into separate folder --- .../B_Root_Anomaly-20151130/README.md | 13 - .../B_Root_Anomaly-20160625/README.md | 6 - .../B_Root_Anomaly-20170221/README.md | 5 - .../B_Root_Anomaly-20170306/README.md | 5 - .../B_Root_Anomaly-20170425/README.md | 6 - .../B_Root_Anomaly-20190907/README.md | 19 - .../B_Root_Anomaly-20200213/README.md | 5 - .../B_Root_Anomaly-20201024/README.md | 5 - .../B_Root_Anomaly-20210528/README.md | 33 -- B_Root_Anomalies/Makefile | 14 - B_Root_Anomalies/README.md | 19 - B_Root_Anomalies/stats.cc | 212 ----------- B_Root_Anomalies/tag.cc | 197 ---------- B_Root_Anomalies/utils.cc | 347 ------------------ B_Root_Anomalies/utils.h | 89 ----- 15 files changed, 975 deletions(-) delete mode 100644 B_Root_Anomalies/B_Root_Anomaly-20151130/README.md delete mode 100644 B_Root_Anomalies/B_Root_Anomaly-20160625/README.md delete mode 100644 B_Root_Anomalies/B_Root_Anomaly-20170221/README.md delete mode 100644 B_Root_Anomalies/B_Root_Anomaly-20170306/README.md delete mode 100644 B_Root_Anomalies/B_Root_Anomaly-20170425/README.md delete mode 100644 B_Root_Anomalies/B_Root_Anomaly-20190907/README.md delete mode 100644 B_Root_Anomalies/B_Root_Anomaly-20200213/README.md delete mode 100644 B_Root_Anomalies/B_Root_Anomaly-20201024/README.md delete mode 100644 B_Root_Anomalies/B_Root_Anomaly-20210528/README.md delete mode 100644 B_Root_Anomalies/Makefile delete mode 100644 B_Root_Anomalies/README.md delete mode 100644 B_Root_Anomalies/stats.cc delete mode 100644 B_Root_Anomalies/tag.cc delete mode 100644 B_Root_Anomalies/utils.cc delete mode 100644 B_Root_Anomalies/utils.h diff --git a/B_Root_Anomalies/B_Root_Anomaly-20151130/README.md b/B_Root_Anomalies/B_Root_Anomaly-20151130/README.md deleted file mode 100644 index 4ff4b0a..0000000 --- a/B_Root_Anomalies/B_Root_Anomaly-20151130/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# How to run the tagging code - -## For 11/30 attack - -``` -tag -s 1448866200 -e 1448875160 -r -q www.336901.com -q www.366901.com -``` - -## For 12/1 attack - -``` -tag -s 1448946569 -e 1448950480 -r -q www.916yy.com -``` \ No newline at end of file diff --git a/B_Root_Anomalies/B_Root_Anomaly-20160625/README.md b/B_Root_Anomalies/B_Root_Anomaly-20160625/README.md deleted file mode 100644 index edd895a..0000000 --- a/B_Root_Anomalies/B_Root_Anomaly-20160625/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# How to run the tagging code - -``` -tag -s 1466895600 -e 1466987100 -r -``` -You may need to run this for the folders on 25th and 26th separately. \ No newline at end of file diff --git a/B_Root_Anomalies/B_Root_Anomaly-20170221/README.md b/B_Root_Anomalies/B_Root_Anomaly-20170221/README.md deleted file mode 100644 index 4341a16..0000000 --- a/B_Root_Anomalies/B_Root_Anomaly-20170221/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# How to run the tagging code - -``` -tag -s 1487659200 -e 1487695200 -r -E lax -q .jiang.com -q .phone.tianxintv.cn -q clgc88.com -``` \ No newline at end of file diff --git a/B_Root_Anomalies/B_Root_Anomaly-20170306/README.md b/B_Root_Anomalies/B_Root_Anomaly-20170306/README.md deleted file mode 100644 index ec6aea2..0000000 --- a/B_Root_Anomalies/B_Root_Anomaly-20170306/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# How to run the tagging code - -``` -tag -s 1488775205 -e 1488795600 -r -E lax -q qycl520.com -q calling168.com -``` \ No newline at end of file diff --git a/B_Root_Anomalies/B_Root_Anomaly-20170425/README.md b/B_Root_Anomalies/B_Root_Anomaly-20170425/README.md deleted file mode 100644 index 8b1bc46..0000000 --- a/B_Root_Anomalies/B_Root_Anomaly-20170425/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# How to run the tagging code - -``` -tag -s 1493114000 -e 1493124900 -r -E lax -q plaza.game981.com - -``` \ No newline at end of file diff --git a/B_Root_Anomalies/B_Root_Anomaly-20190907/README.md b/B_Root_Anomalies/B_Root_Anomaly-20190907/README.md deleted file mode 100644 index 1c7684f..0000000 --- a/B_Root_Anomalies/B_Root_Anomaly-20190907/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# How to run the tagging code - -## For .ari POP - -``` -tag -s 1567838739 -e 1567838772 -r -E ari -``` - -## For .lax POP - -``` -tag -s 1567838738 -e 1567838773 -r -E lax -``` - -## For .mia POP - -``` -tag -s 1567838739 -e 1567838769 -r -E mia -``` \ No newline at end of file diff --git a/B_Root_Anomalies/B_Root_Anomaly-20200213/README.md b/B_Root_Anomalies/B_Root_Anomaly-20200213/README.md deleted file mode 100644 index 880b228..0000000 --- a/B_Root_Anomalies/B_Root_Anomaly-20200213/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# How to run the tagging code - -``` -tag -s 1581581100 -e 1581581360 -r -E sin -q 8.8.8.8 -``` \ No newline at end of file diff --git a/B_Root_Anomalies/B_Root_Anomaly-20201024/README.md b/B_Root_Anomalies/B_Root_Anomaly-20201024/README.md deleted file mode 100644 index 63a976c..0000000 --- a/B_Root_Anomalies/B_Root_Anomaly-20201024/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# How to run the tagging code - -``` -tag -s 1603507954 -e 1603508345 -r -E ams -``` diff --git a/B_Root_Anomalies/B_Root_Anomaly-20210528/README.md b/B_Root_Anomalies/B_Root_Anomaly-20210528/README.md deleted file mode 100644 index 6065f39..0000000 --- a/B_Root_Anomalies/B_Root_Anomaly-20210528/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# How to run the tagging code - -## For .ams POP - -``` -tag -s 1622169357 -e 1622169441 -r -E ams -``` - -## For .ari POP - -``` -tag -s 1622169357 -e 1622169422 -r -E ari -``` - -## For .lax POP - -``` -tag -s 1622169357 -e 1622169608 -r -E lax -``` - -## For .iad POP - -``` -tag -s 1622169357 -e 1622169414 -r -E iad -``` - -## For .mia POP - -``` -tag -s 1622169357 -e 1622169487 -r -E mia -``` - - diff --git a/B_Root_Anomalies/Makefile b/B_Root_Anomalies/Makefile deleted file mode 100644 index 9052007..0000000 --- a/B_Root_Anomalies/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -CFLAGS = -g -fpermissive - -PROGS = tag stats - -all: $(PROGS) - -stats: stats.cc - g++ -o stats $(CFLAGS) stats.cc -lpcap - -tag: tag.cc utils.cc utils.h - g++ -o tag $(CFLAGS) tag.cc utils.cc - -clean: - -rm -f $(PROGS) diff --git a/B_Root_Anomalies/README.md b/B_Root_Anomalies/README.md deleted file mode 100644 index 4b01b5a..0000000 --- a/B_Root_Anomalies/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# B-Root Anomalies - -This folder contains labeling program for B-Root anomalies dataset. -Run: -``` -make -``` -to create `tag` executable. There are -several folders, containing information on how to use `tag` executable -with the original data to produce record-level tags. -The record-level labels look like: -``` -recordID label -``` -where recordID looks like: -``` -timestamp-sourceIP-sourceport-destIP-destport -``` -and label can be A (attack) or B (benign) \ No newline at end of file diff --git a/B_Root_Anomalies/stats.cc b/B_Root_Anomalies/stats.cc deleted file mode 100644 index abb85d7..0000000 --- a/B_Root_Anomalies/stats.cc +++ /dev/null @@ -1,212 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include - -#define PCAP_BUF_SIZE 1024 -#define PCAP_SRC_FILE 2 - -int icmpCount = 0; -int tcpCount = 0; -int udpCount = 0; -int dnsCount = 0; -int synCount[PCAP_BUF_SIZE]; -int synIdx = 0; -char synIP[PCAP_BUF_SIZE][INET_ADDRSTRLEN]; -int httpCount[PCAP_BUF_SIZE]; -int httpIdx = 0; -char httpIP[PCAP_BUF_SIZE][INET_ADDRSTRLEN]; - -void packetHandler(u_char *userData, const struct pcap_pkthdr* pkthdr, const u_char* packet); - -int main(int argc, char **argv) { - - pcap_t *fp; - char errbuf[PCAP_ERRBUF_SIZE]; - char source[PCAP_BUF_SIZE]; - int i, maxCountSyn = 0, maxCountHttp = 0, maxIdxSyn = 0, maxIdxHttp = 0; - - if(argc != 2) { - printf("usage: %s filename\n", argv[0]); - return -1; - } - - struct bpf_program ffp; - - /*fp = pcap_open_offline_with_tstamp_precision(argv[0], PCAP_TSTAMP_PRECISION_NANO, errbuf);*/ - fp = pcap_open_offline(argv[1], errbuf); - if (fp == NULL) { - fprintf(stderr, "\npcap_open_offline() failed: %s\n", errbuf); - return 0; - } - - - if(pcap_compile(fp,&ffp,"src port not 53 and dst port 53",0,0) == -1) - { fprintf(stderr,"Error calling pcap_compile\n"); return 1; } - - /* set the compiled program as the filter */ - if(pcap_setfilter(fp,&ffp) == -1) - { fprintf(stderr,"Error setting filter\n"); return 1; } - - - if (pcap_loop(fp, 0, packetHandler, NULL) < 0) { - fprintf(stderr, "\npcap_loop() failed: %s\n", pcap_geterr(fp)); - return 0; - } - - return 0; -} - -void packetHandler(u_char *userData, const struct pcap_pkthdr* pkthdr, const u_char* packet) { - - double ts = pkthdr->ts.tv_sec + pkthdr->ts.tv_usec/1000000.0; - int caplen = pkthdr->caplen; - const struct ether_header* ethernetHeader; - const struct ip* ipHeader; - const struct ip6_hdr* ipHeader6; - const struct tcphdr* tcpHeader; - const struct udphdr* udpHeader; - char sourceIP[INET6_ADDRSTRLEN]; - char destIP[INET6_ADDRSTRLEN]; - u_int sport, dport; - u_char *data; - int dataLength = 0; - int i; - unsigned int ttl; - unsigned int plen; - int proto; - unsigned char* payload = 0; - std::string query = ""; - int isquery = 0; - int size_payload = 0; - - ethernetHeader = (struct ether_header*)packet; - if (ntohs(ethernetHeader->ether_type) == ETHERTYPE_IP || ntohs(ethernetHeader->ether_type) == ETHERTYPE_IPV6) { - - int size_ip; - int ip_len; - - if (ntohs(ethernetHeader->ether_type) == ETHERTYPE_IP) - { - ipHeader = (struct ip*)(packet + sizeof(struct ether_header)); - inet_ntop(AF_INET, &(ipHeader->ip_src), sourceIP, INET_ADDRSTRLEN); - inet_ntop(AF_INET, &(ipHeader->ip_dst), destIP, INET_ADDRSTRLEN); - ttl = ipHeader->ip_ttl; - plen = ntohs(ipHeader->ip_len); - proto = ipHeader->ip_p; - size_ip = ipHeader->ip_hl*4; - ip_len = ntohs(ipHeader->ip_len); - //std::cout<<"IP packet "<ip6_src), sourceIP, INET6_ADDRSTRLEN); - inet_ntop(AF_INET6, &(ipHeader6->ip6_dst), destIP, INET6_ADDRSTRLEN); - ttl = ipHeader6->ip6_ctlun.ip6_un1.ip6_un1_hlim; - plen = ntohs(ipHeader6->ip6_ctlun.ip6_un1.ip6_un1_plen); - proto = ipHeader6->ip6_ctlun.ip6_un1.ip6_un1_nxt; - size_ip = sizeof(ip6_hdr); - ip_len = plen + size_ip; // in ip6 plen is just payload, not IP header - } - int opcode; - if (proto == IPPROTO_TCP) - { - tcpHeader = (struct tcphdr*)(packet + sizeof(struct ether_header) + size_ip); - int size_tcp = tcpHeader->th_off*4; - size_payload = ip_len - (size_ip + size_tcp); - //cout<<"Payload "<source); - dport = ntohs(tcpHeader->dest); - if (size_payload > 8) // size of DNS header - { - payload = (u_char*)(packet + sizeof(struct ether_header) + size_ip + size_tcp); - opcode = payload[2]>>4; - if (opcode == 0) - isquery = 1; - } - } - else if (proto == IPPROTO_UDP) - { - udpHeader = (struct udphdr*)(packet + sizeof(struct ether_header) + size_ip); - int size_udp = 8; - sport = ntohs(udpHeader->source); - dport = ntohs(udpHeader->dest); - size_payload = ip_len - (size_ip + size_udp); - if (size_payload > 8) // size of DNS header - { - payload = (u_char*)(packet + sizeof(struct ether_header) + size_ip + size_udp); - opcode = payload[2]>>4; - //std::cout<<"UDP packet opcode "< 8 && opcode == 0) - { - //for (int i=0; i size_payload) - { - //std::cout<<"This is malformed query "< -#include -#include -#include -#include - -#include "utils.h" - -bool first = true; -bool attacksources = false; -bool atlist = false; -int PERIOD = 60; -long int starttime = 0; -long int endtime = 0; -double lasttime = 0; - -string readfolder = ""; -string infile = ""; -string atfile = ""; -string extension = ""; - -set queries; - -int attackers[(int)pow(2,27)]; - -long int total = 0; -long int afiltered = 0, apassed = 0; -long int gfiltered = 0, gpassed = 0; -long int passed = 0; - -// We store delimiters in this array -int* delimiters; - -void loadattackers(string infile) -{ - memset(attackers, 0, pow(2,24)*8); - int i = 0; - ifstream in(infile, std::ofstream::in); - while (in.good()) - { - char ip[50]; - int req; - in>>ip; - if (!in.good()) - break; - unsigned int ipi=todec(ip); - attackers[int(ipi/32)] = attackers[int(ipi/32)] | (1 >> (ipi % 32)); - i++; - if (i % 100000 == 0) - cout<<"Inserted attacker "< 256) && queries.size() == 0) - isattack = true; - //cout<<"For "<c_str()) != 0) - { - isattack = true; - } - - if (attacksources && isattack && !atlist) - { - unsigned int ipi=todec(ip); - attackers[int(ipi/32)] = attackers[int(ipi/32)] | (1 >> (ipi % 32)); - } - if (attacksources && !isattack) - { - unsigned int ipi=todec(ip); - int cur = attackers[int(ipi/32)] & (1 >> (ipi % 32)); - if (cur > 0) - { - isattack = true; - } - } - // Periodic reset - if ((outtime > lasttime + PERIOD) && !atlist && attacksources) - { - memset(attackers, 0, pow(2,24)*8); - lasttime = outtime; - } - char outs[MAXLEN]; - - if (isattack) - sprintf(outs, "%s A\n", recordID); - else - sprintf(outs, "%s B\n", recordID); - - return outs; -} - - -void printHelp() -{ - printf ("tag\n(C) 2022 University of Southern California.\n\n"); - - printf ("-h Print this help\n"); - printf ("-r Folder with pcap.xz files to use in training\n"); - printf ("-s Start processing from this epoch time in UTC\n"); - printf ("-e End at this epoch time in UTC\n"); - printf ("-E Only process files with this extension in the name (e.g., lax, mia)\n"); - printf ("-a Optionally read attack IPs from this file\n"); - printf ("-A Tag all traffic from attack IPs as attack\n"); - printf ("-q This is a substring occuring in attack queries, you can repeat this arg spec multiple times\n"); -} - - -// This is deployment version, we load trained values -// and use them to block resolvers that are more aggressive than -// their model - -int main(int argc, char** argv) -{ - delimiters = (int*) malloc(AR_LEN*sizeof(int)); - char c; - set wildtrain; - set HCFtrain; - set URtrain; - set FQtrain; - - for (int i = 0; i 0 && (s[j] == ' ' || s[j] == '\n')) - j--; - string ns = ""; - if (j < 0 || j <= i) - return ns; - if (s[j] == '.') - j--; - ns = s.substr(i, j-i+1); - return ns; -} - -// Check if all chars are digits -bool checkdigits(const char* str) -{ - for (int i=0; i= 'a' && src[i] <= 'f') - cur = cur*16 + src[i] - 'a'; - else if (src[i] >= 'A' && src[i] <= 'F') - cur = cur*16 + src[i] - 'A'; - else - cur = cur*16 + src[i] - '0'; - } - } - } - return res; -} - - -// Something like strtok but it doesn't create new -// strings. Instead it replaces delimiters with 0 -// in the original string -int parse(char* input, char delimiter, int** array) -{ - int pos = 0; - memset(*array, 255, AR_LEN); - int len = strlen(input); - int found = 0; - for(int i = 0; i oldtime) - oldtime = time; - if (len > 0) - oldlen = len; - if (ttl > 0) - oldttl = ttl; - } - } - } catch (...) { - pclose(pipe); - throw; - } - pclose(pipe); - for (int j=0; j 0) - nfiles += dirs[nd].n; - nd++; - for (int i = 0; i i) - break; - total += dirs[d].n; - } - int nf = i - total; - char filename[200]; - long now = time(0); - sprintf(filename, "%s/%s", dirs[d].dir, dirs[d].namelist[nf]->d_name); - // Assumed file name structure is 20170221-033949-00603104.lax.pcap.xz - // Check if the file ends in xz, if not drop it - if(((string)filename).substr(((string)filename).find_last_of(".") + 1) != "xz") - { - continue; - } - // We assume that all xz files in a directory should be processed. If extension is specified - // we will drop files that don't have a given location extension, e.g., lax - if (extension != "" && ((string)filename).find(extension) == string::npos) - { - continue; - } - long myepoch = getepoch(dirs[d].namelist[nf]->d_name); - if (myepoch < starttime - 300) - { - continue; - } - if (myepoch >= endtime + 300) - { - done = true; - } - if (done) - break; - loadfile2(filename, process, dirs[d].namelist[nf]->d_name); - long diff = time(0) - now; - } -} - - -const int NQ=10; -char* querytypes[NQ]= {"A?", "AAAA?", "CNAME?", "PTR?", "NS?", "SOA?", "MX?", "DS?", "SRV?", "TXT?"}; - -// Is given string epoch time -bool nottime(char* buffer) -{ - if (strlen(buffer) < 17) - return true; - for(int i=0; i<17; i++) - { - if ((i <= 9 || i > 10) && !isdigit(buffer[i])) - return true; - if (i == 10 && buffer[i] != '.') - return true; - } - return false; -} - -// More elegant way to process with libpcap -bool shouldprocess2(char* buffer, double& outtime, int& outlen, int*& delimiters, string& ip, - double starttime, double endtime, int& isquery, char* queryname, int& outttl) -{ - //std::cout<<"Got buffer "<= 6) - { - strcpy(queryname, buffer+delimiters[5]); - //std::cout<<"queryname "< 0 && outtime < starttime) - { - //cout<<"time too early "<= endtime) - { - return false; - } - - // Do a format check, is the first item epoch time - if (nottime(buffer)) - { - return false; - } - return true; -} - - - -// Filter files with a given name -int filter(const struct dirent *dir) -{ - const char *s = dir->d_name; - if (strcmp(dir->d_name, ".") != 0 && strcmp(dir->d_name, "..") != 0) - return 1; - else - return 0; -} - -// Get epoch from filename -unsigned long getepoch(string filename) -{ - int pos1 = filename.find("-"); - int pos2 = filename.find("-", pos1+1); - //20170221-033949-00603104.lax.pcap.xz - string date = filename.substr(0, pos1); - string clock = filename.substr(pos1+1, pos2-pos1-1); - struct tm t; - time_t epoch; - t.tm_year = (atoi(date.c_str()) / 10000) - 1900; - t.tm_mon = ((atoi(date.c_str()) % 10000)/100) - 1; - t.tm_mday = atoi(date.c_str()) % 100; - t.tm_hour = (atoi(clock.c_str()) / 10000); - t.tm_min = ((atoi(clock.c_str()) % 10000)/100); - t.tm_sec = atoi(clock.c_str()) % 100; - t.tm_isdst = 0; - epoch = mktime(&t) - timezone; - return epoch; -} - diff --git a/B_Root_Anomalies/utils.h b/B_Root_Anomalies/utils.h deleted file mode 100644 index f7cafb3..0000000 --- a/B_Root_Anomalies/utils.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef UTILS_H -#define UTILS_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#define AR_LEN 300 // highest number of delimiters in a string parsed with parse function -#define MAXLEN 20000 - -#define CUSUMTHRESH 5 -#define MINSAMPLES 5 -#define ATTHRESH 5 -#define N 10 -#define ASAMPLES 10 -#define NL 9 -#define NR 65536 -#define NUMSTD 5 - -using namespace std; - -struct item -{ - double avg[NL]; - double ss[NL]; - int samples[NL]; -}; - - -struct record -{ - // Every so often remember what you know - // and start again - // Historical values keep the highest - // measure we have - double avg[NL]; - double ss[NL]; - int samples[NL]; - int r[NL]; - long int time[NL]; - int start; - int lasttime; - int at; - double xcd; - bool blocked; -}; - -// Directory entry -struct dirrecord -{ - char dir[200]; - struct dirent** namelist; - int n; -}; - -// Utility functions -bool nottime(char* buffer); -unsigned int todec(string ip); -bool checkdigits(const char* str); -int gettwo(char* src); -int parse(char* input, char delimiter, int** array); -void loadfile2(char* fname, string (*process)(char*, double&, int&, int&, ofstream&)); -void loadfiles(const char* file, string (*process)(char*, double&, int&, int&, ofstream&), - string, long int, long int); -unsigned long getepoch(string filename); -int filter(const struct dirent *dir); -bool shouldprocess2(char* buffer, double& outtime, int& outlen, int*& delimiters, string& ip, - double starttime, double endtime, int& isquery, char* queryname, int& outttl); - -string trim(string s); - -#endif