diff --git a/README.md b/README.md
index c12bd7c..f550441 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,27 @@
-# OB-Tree
\ No newline at end of file
+# OB-Tree
+OB-Tree: A New Write-Optimization Index on Out-of-Core Column-Store Databases
+
+## folder structure
+
+* basic
+ - fundamental classes
+* exp_merge_ob
+ - data cleaning experiments using OB-Tree
+* exp_merge_progressive
+ - data cleaning experiments using the progressive approach
+* exp_select
+ - data select experiments
+* exp_update
+ - data update experiments
+
+## contact
+
+Feng "George" Yu, Ph.D.
+Assistant Professor
+Dept. Computer Science and Information Systems
+Youngstown State University
+Youngstown, OH, 44555
+YSU Data Lab: http://datalab.ysu.edu/
+Email: fyu@ysu.edu
+
+
diff --git a/basic/btree/BTree.java b/basic/btree/BTree.java
new file mode 100644
index 0000000..a6f960b
--- /dev/null
+++ b/basic/btree/BTree.java
@@ -0,0 +1,478 @@
+package basic.btree;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Scanner;
+import java.io.IOException;
+
+import basic.btree.*;
+import basic.util.DataRetriever;
+
+/**
+ * B-Tree implementation largely based on the implementation made in Algorithms
+ * book by Robert Sedgewick. Original code can be found in the book web
+ * site.
+ *
+ * @author cgavidia
+ *
+ * @param
+ * Type of the Search Key
+ * @param
+ * Type of the Value Stored
+ */
+@SuppressWarnings("unchecked")
+public class BTree, Value> {
+
+ /**
+ * Tree parameter. Every node must have at most M - 1 key-link pairs
+ */
+// public static int M = 16; //for small tests
+// public static int M = 64; //for small tests
+ public static int M = 128;
+// public static int M = 32768;//2^15 for MB level?
+// public static int M = 8388608;//2^23 for GB level?
+// public static int M = 1_000_000;
+// public static int M = Integer.MAX_VALUE;//for large data test
+
+
+ protected Node root;
+ /**
+ * Height of B-Tree
+ */
+ private int height;
+ /**
+ * Number of key-value pairs in B-Tree
+ */
+ private int size;
+
+ Key key_min;//the smallest key
+ Key key_max;//the maximum key
+
+ public BTree() {
+ root = new Node(0);
+ }
+
+ /**
+ * BTree size in node number
+ * To get space size, use toByte() and toKB()
+ */
+ public int getSize() {
+ return size;
+ }
+
+ public int getHeight() {
+ return height;
+ }
+
+ /**
+ * Search for given key
+ *
+ * @param key
+ * Key to search
+ * @return Associated value; return null if no such key
+ */
+ public Value get(Key key) {
+ if(size==0){
+ return null;
+ }
+ if(key.compareTo(key_max)>0 || key.compareTo(key_min) <0){
+ return null;
+ }else {
+ return search(root, key, height);
+ }
+ }
+
+ /**
+ * @param treeHeight current subtree height
+ */
+ private Value search(Node node, Key key, int treeHeight) {
+ Entry[] children = node.getChildrenArray();
+
+ // external node
+ if (treeHeight == 0) {
+ for (int j = 0; j < node.getNumberOfChildren(); j++) {
+ if (equal(key, children[j].getKey())) {
+ return (Value) children[j].getValue();
+ }
+ }
+ }
+ // internal node
+ else {
+ for (int j = 0; j < node.getNumberOfChildren(); j++) {
+ if (j== node.getNumberOfChildren() - 1 || less(key, children[j + 1].getKey()))
+ return search(children[j].getNext(), key, treeHeight - 1);
+ }
+ }
+ return null;
+ }
+
+ @SuppressWarnings("unchecked")
+ public void put(Key key) {
+ put(key, (Value) key);
+ }
+
+ public String toString() {
+ return toString(root, height, "") + "\n";
+ }
+
+ private boolean less(Key k1, Key k2) {
+ return k1.compareTo(k2) < 0;
+ }
+
+ private boolean equal(Key k1, Key k2) {
+ return k1.compareTo(k2) == 0;
+ }
+
+ //================ Eric Jones - Begins =================================
+
+ public Value findReplace(Key key, Value val){
+ return searchReplace(root, key, val, height);
+ }
+
+ private Value searchReplace(Node node, Key key, Value val, int treeHeight) {
+ Entry[] children = node.getChildrenArray();
+
+ // external node
+ if (treeHeight == 0) {
+ for (int j = 0; j < node.getNumberOfChildren(); j++) {
+ if (equal(key, children[j].getKey())) {
+ children[j].setValue(val);
+ return val;
+ }
+ }
+ }
+
+ // internal node
+ else {
+ for (int j = 0; j < node.getNumberOfChildren(); j++) {
+ if (j == node.getNumberOfChildren() - 1 || less(key, children[j+1].getKey()))
+ return searchReplace(children[j].getNext(), key, val, treeHeight - 1);
+ }
+ }
+ return null;
+ }
+
+ //================ Eric Jones - Ends =================================
+
+ /**
+ * search for the minimal node
+ * fyu
+ */
+ public Node getMinNode(){
+ return searchMinNode(root, height);
+ }
+ private Node searchMinNode(Node node, int treeHeight) {
+ // external node
+ if (treeHeight == 0) {
+ return node;
+ }
+ // internal node
+ else {
+ return searchMinNode(node.getChildrenArray()[0].getNext(),treeHeight - 1);
+ }
+ }
+
+ /**
+ * Inserts a Key-Value pair
+ *
+ * @param key
+ * Key to insert
+ * @param value
+ * Value to insert
+ */
+ @SuppressWarnings("unchecked")
+ public void put(Key key, Value value) {
+ //automatically update key_max and key_min when inputing new keys
+ if(key_max==null || key_min==null){
+ key_max=key_min=key;//initial status
+ }else if(key.compareTo(key_max)>0){
+ key_max=key;
+ }else if(key.compareTo(key_min)<0){
+ key_min=key;
+ }
+
+ Node nodeFromSplit = insert(root, key, value, height);
+ size++;
+ if (nodeFromSplit == null) {
+ return;
+ }
+
+ Node newRoot = new Node(2);
+ newRoot.getChildrenArray()[0] = new Entry(root.getChildrenArray()[0].getKey(), null, root);
+ newRoot.getChildrenArray()[1] = new Entry(nodeFromSplit.getChildrenArray()[0].getKey(), null,
+ nodeFromSplit);
+ root = newRoot;
+ height++;
+ }
+
+ /**
+ * insert into subtree
+ * @param node current subtree root
+ * @param key
+ * @param value
+ * @param treeHeight current subtree height
+ * @return
+ */
+ private Node insert(Node node, Key key, Value value, int treeHeight) {
+ int newEntryPosition;
+ Entry entryToInsert = new Entry(key, value, null);
+ // external node
+ if (treeHeight == 0) {
+ for (newEntryPosition = 0; newEntryPosition < node
+ .getNumberOfChildren(); newEntryPosition++) {
+ if (less(key, node.getChildrenArray()[newEntryPosition].getKey())) {
+ break;
+ }
+ }
+ }
+ // internal node
+ else {
+ for (newEntryPosition = 0; newEntryPosition < node.getNumberOfChildren(); newEntryPosition++) {
+ if ((newEntryPosition == node.getNumberOfChildren()-1) || less(key,node.getChildrenArray()[newEntryPosition + 1].getKey())){
+ Node nodeFromSplit =
+ insert(node.getChildrenArray()[newEntryPosition++].getNext(),key, value, treeHeight - 1);
+ if (nodeFromSplit == null) {
+ return null;
+ }
+ entryToInsert.setKey(nodeFromSplit.getChildrenArray()[0].getKey());
+ entryToInsert.setNext(nodeFromSplit);
+ break;
+ }
+ }
+ }
+ //set ONLY leaf node[M-1] to point to its next sibling node
+ Node nextTemp=null;//temp pointer
+ if(treeHeight==0 && node.getChildrenArray()[M-1]!=null){
+ nextTemp=node.getChildrenArray()[M-1].getNext();//if current node is full and next is pointing to a sibling node
+ }
+ for (int i = node.getNumberOfChildren(); i > newEntryPosition; i--) {
+ //move entry one step backward
+ //if the current node is full and pointing to a sibling node, this will clean the next pointer
+ node.getChildrenArray()[i] = node.getChildrenArray()[i - 1];
+ }
+ node.getChildrenArray()[newEntryPosition] = entryToInsert;//if the current node is full and pointing to a sibling node, this will clean the next pointer
+ node.setNumberOfChildren(node.getNumberOfChildren() + 1);
+ if(treeHeight==0 && node.getChildrenArray()[M-1]!=null && nextTemp!=null){
+ node.getChildrenArray()[M-1].setNext(nextTemp);//restore the last pointer to the sibling node
+ }
+ if (node.getNumberOfChildren() < M) {
+ return null;
+ } else {
+ if(treeHeight!=0){
+ return splitInternal(node);
+ }
+ else{
+ return splitLeaf(node);
+ }
+
+ }
+ }
+
+ /**
+ * Splits node in half
+ * internal nodes set the next pointer to the sibling node
+ *
+ * @param oldNode
+ * The Node to Split
+ */
+ private Node splitLeaf(Node oldNode) {
+ Node newNode = new Node(M / 2);
+ oldNode.setNumberOfChildren(M / 2);
+ for (int j = 0; j < M / 2; j++) {
+ newNode.getChildrenArray()[j] = oldNode.getChildrenArray()[M/ 2 + j];
+ }
+ newNode.getChildrenArray()[M-1]=new Entry(null,null,oldNode.getChildrenArray()[M-1].getNext());
+
+ //clean unused space
+ for (int j = 0; j < M / 2-1; j++) {
+ oldNode.getChildrenArray()[M/ 2 + j]=null;
+ }
+ oldNode.getChildrenArray()[M-1]=new Entry(null,null,newNode);
+ return newNode;
+ }
+
+ /**
+ * split root is different from internal
+ */
+ private Node splitInternal(Node oldNode) {
+ Node newNode = new Node(M / 2);
+ oldNode.setNumberOfChildren(M / 2);
+ for (int j = 0; j < M / 2; j++) {
+ newNode.getChildrenArray()[j] = oldNode.getChildrenArray()[M
+ / 2 + j];
+ }
+ return newNode;
+ }
+
+ /**
+ * output all leaf entries
+ * @return
+ */
+ public ArrayList> getLeafEntryList() {
+ ArrayList> entry_list=new ArrayList>(1024);
+ Node current_node=this.getMinNode();
+ Entry[] children_array = current_node.getChildrenArray();
+ while(true){
+ for(int j=0; j currentNode, int ht, String indent) {
+ String outputString = "";
+ if(currentNode==null || currentNode.getChildrenArray()==null){
+ return "";
+ }
+ Entry[] childrenArray = currentNode.getChildrenArray();
+
+ if (ht == 0) {
+ for (int j = 0; j < currentNode.getNumberOfChildren(); j++) {
+ outputString += indent + childrenArray[j].getKey() + " "
+ + childrenArray[j].getValue() + "\n";
+ }
+ } else {
+ int num_children=currentNode.getNumberOfChildren();
+ outputString += toString(childrenArray[0].getNext(), ht - 1, indent + " ");
+ outputString += indent + "[" + childrenArray[0].getKey() + "\n";
+ if(num_children>=2){
+ for (int j = 1; j < num_children - 1; j++) {
+ outputString += indent + "(" + childrenArray[j].getKey() + ")\n";
+ outputString += toString(childrenArray[j].getNext(), ht - 1, indent + " ");
+ }
+ }
+ outputString += indent + "(" + childrenArray[num_children-1].getKey() + ")]\n";
+ outputString += toString(childrenArray[num_children-1].getNext(), ht - 1, indent + " ");
+ }
+ return outputString;
+ }
+
+ private int toByte(Node currentNode, int treeHeight){
+ int space=0;
+ if(currentNode==null || currentNode.getChildrenArray()==null){
+ return 0;
+ }
+ Entry[] childrenArray = currentNode.getChildrenArray();
+ if(treeHeight==0){
+ for (int j = 0; j < currentNode.getNumberOfChildren(); j++) {
+ space+=Entry.key_size+Entry.value_size;//space for key and value
+ }
+ if(currentNode.getChildrenArray()[M-1]!=null && currentNode.getChildrenArray()[M-1].getNext()!=null){
+ space+=Entry.pointer_size;//pointer to sibling
+ }
+ }else{
+ int num_children=currentNode.getNumberOfChildren();
+ space+=Entry.pointer_size;//first pointer to left child
+ space+=toByte(childrenArray[0].getNext(),treeHeight-1);
+ if(num_children>=2){
+ for (int j=1; j node, Key key, Value val, int treeHeight) {
+Entry[] children = node.getChildrenArray();
+
+// external node
+if (treeHeight == 0) {
+ for (int j = 0; j < node.getNumberOfChildren(); j++) {
+ if (equal(key, children[j].getKey())) {
+ children[j].setKey(null);
+ children[j].setValue(null);
+
+ return val; //perhaps this should be return null, PERHAPS!!!!!!
+ }
+ }
+}
+
+// internal node
+else {
+ for (int j = 0; j < node.getNumberOfChildren(); j++) {
+ if (node.getNumberOfChildren() == j + 1
+ || less(key, children[j + 1].getKey()))
+ return searchRemove(children[j].getNext(), key, val, treeHeight - 1);
+ }
+}
+return null;
+}
+
+public Value findReplace(Key key, Value val){
+return searchReplace(root, key, val, height);
+}
+
+private Value searchReplace(Node node, Key key, Value val, int treeHeight) {
+Entry[] children = node.getChildrenArray();
+
+// external node
+if (treeHeight == 0) {
+ for (int j = 0; j < node.getNumberOfChildren(); j++) {
+ if (equal(key, children[j].getKey())) {
+ children[j].setValue(val);
+ return val;
+ }
+ }
+}
+
+// internal node
+else {
+ for (int j = 0; j < node.getNumberOfChildren(); j++) {
+ if (node.getNumberOfChildren() == j + 1
+ || less(key, children[j + 1].getKey()))
+ return searchReplace(children[j].getNext(), key, val, treeHeight - 1);
+ }
+}
+return null;
+}
+
+*/
\ No newline at end of file
diff --git a/basic/btree/Entry.java b/basic/btree/Entry.java
new file mode 100644
index 0000000..df6d68c
--- /dev/null
+++ b/basic/btree/Entry.java
@@ -0,0 +1,60 @@
+package basic.btree;
+
+/**
+ * Entry in a node. Internal nodes only use key and next while External Nodes
+ * use key and value
+ *
+ * @author cgavidia
+ *
+ * @param
+ * Type of the Search Key
+ * @param
+ * Type of the Value Stored
+ */
+public class Entry, Value> {
+
+ private Key key;
+ private Value value;
+ private Node next; // Helper field to iterate over array entries
+ //--for space calculation--beginning
+ public static final int key_size=4;
+ public static final int value_size=4;
+ public static final int pointer_size=4;
+ //--for space calculation--end
+
+ public Entry(Key key, Value value, Node next) {
+ this.key = key;
+ this.value = value;
+ this.next = next;
+ }
+
+ public Key getKey() {
+ return key;
+ }
+
+ public void setKey(Key key) {
+ this.key = key;
+ }
+
+ public Value getValue() {
+ return value;
+ }
+
+ public void setValue(Value value) {
+ this.value = value;
+ }
+
+ public Node getNext() {
+ return next;
+ }
+
+ public void setNext(Node next) {
+ this.next = next;
+ }
+
+ @Override
+ public String toString() {
+ return "(Key: " + key + " Value: " + value + ")";
+ }
+
+}
\ No newline at end of file
diff --git a/basic/btree/Node.java b/basic/btree/Node.java
new file mode 100644
index 0000000..07b3a00
--- /dev/null
+++ b/basic/btree/Node.java
@@ -0,0 +1,54 @@
+package basic.btree;
+import java.util.ArrayList;
+
+/**
+ * B-Tree Node data type
+ *
+ * @author cgavidia
+ *
+ */
+@SuppressWarnings("unchecked")
+public class Node, Value> {
+
+ private int numberOfChildren;
+ private Entry[] childrenArray = new Entry[BTree.M];
+// private ArrayList> childrenArray=new ArrayList<>(BTree.M);
+
+ /**
+ * Creates a node with k children
+ *
+ * @param k
+ * number of children
+ */
+ public Node(int k) {
+ numberOfChildren = k;
+ }
+
+ public Entry[] getChildrenArray() {
+ return childrenArray;
+ }
+
+ public void setChildrenArray(Entry[] children) {
+ this.childrenArray = children;
+ }
+
+ public int getNumberOfChildren() {
+ return numberOfChildren;
+ }
+
+ public void setNumberOfChildren(int childrenNumber) {
+ this.numberOfChildren = childrenNumber;
+ }
+
+ @Override
+ public String toString() {
+ String result = "{ ";
+ for (int i = 0; i < numberOfChildren; i++) {
+ Entry entry = childrenArray[i];
+ result = result + entry.toString() + ", ";
+ }
+ result = result + " }";
+ return result;
+ }
+
+}
\ No newline at end of file
diff --git a/basic/btree/OBTree.java b/basic/btree/OBTree.java
new file mode 100644
index 0000000..bd579d5
--- /dev/null
+++ b/basic/btree/OBTree.java
@@ -0,0 +1,132 @@
+package basic.btree;
+
+import java.io.*;
+import java.util.*;
+import java.util.concurrent.*;
+
+import basic.util.DataRetriever;
+import basic.storage_model.TBAT;
+
+/**
+ * OBTree use Long oid, and Long offset to insert into BTree
+ */
+@SuppressWarnings("unchecked")
+public class OBTree extends BTree {
+ long total_inserts;
+// public OBTree(){
+// super();
+// key_max=key_min=0L;//in OBTree initially both max and min keys (oid) are 0
+// }
+
+ public long loadUpdateFile(String update_file_name) throws IOException{//if file read had timestamp use this
+ this.total_inserts=0;
+ long off=1;
+ String a;
+ long b;
+ long oid;
+ Scanner reads = new Scanner(new File(update_file_name));
+ while (reads.hasNext()) {
+ a = reads.next(); // read OID
+ b = reads.nextLong(); // read VALUE
+ a = a.substring(0, a.length() - 1); // removing the comma that was auto-generated
+ oid = Long.parseLong(a); // placing that number into a variable
+ if (get(oid) != null) {
+ findReplace(oid, off);
+ } else {
+ put(oid, off);
+ total_inserts++;
+ }// end of if-else
+ off++;
+ }
+ reads.close();
+ return total_inserts;
+ }
+
+ /**
+ * load update the appendix of an updated file into a new BTree
+ */
+ public OBTree loadAppendixIntoOBTree(String update_file_name) throws IOException{
+ return new OBTree().loadAppendixIntoOBTree(update_file_name);
+ }
+
+ /**
+ *
+ * @param update_file_name
+ * @param line_width
+ * @param start_line_num >=1
+ * @param end_line_num >=1
+ * @return
+ * @throws IOException
+ */
+ public long loadAppendixRangeIntoOBTree(String update_file_name, int line_width, long start_line_num, long end_line_num) throws IOException{
+ total_inserts=0;
+ long current_line_num=start_line_num;
+ BufferedReader input_file=new BufferedReader(new FileReader(update_file_name));
+ input_file.skip((start_line_num-1)*line_width);//skip first start_line_num - 1 lines
+ String current_line, a, b;
+ long oid;
+ long off=current_line_num;//offset starts with current line num in the update file
+ while((current_line=input_file.readLine())!=null && current_line_num <= end_line_num){
+ //only take the 1st part of "oid, val" after split and convert to long oid
+ oid=Long.parseLong(current_line.split(",")[0].trim());
+ if(get(oid)!=null){//if this oid already exists in obtree
+ findReplace(oid,off);//replace with new offset
+ } else {//o.w. insert this new oid
+ put(oid, off);
+ total_inserts++;
+ }
+ off++;
+ current_line_num++;
+ }
+ input_file.close();
+ return total_inserts;
+ }
+
+ public long getTotal_inserts(){return total_inserts;}
+
+ public long searchKey(long oid){
+ Long offset=get(oid);
+ if(offset!=null){
+ return offset;
+ }else{
+ return DataRetriever.NOT_FOUND;
+ }
+ }
+
+ /**
+ * obtree selection experiment using a selection file
+ * @param tbat_file_name
+ * @param select_file_name
+ * @param num_lines_body
+ * @param tbat_line_length
+ * @param search_value if true the searching for value by offset will be used
+ * @throws IOException
+ */
+ public void searchSelectionFile(String tbat_file_name, String select_file_name, long num_lines_body, int tbat_line_length, boolean search_value) throws IOException{
+ BufferedReader select_file=new BufferedReader(new FileReader(select_file_name));
+ RandomAccessFile tbat_file=new RandomAccessFile(new File(tbat_file_name), "r");
+ String str;
+ long target_oid;
+ long offset;
+ long value;
+ while((str=select_file.readLine())!=null && str.length()!=0){
+ target_oid=Long.parseLong(str);
+ offset=searchKey(target_oid);
+ if(search_value) {
+ if (offset != DataRetriever.NOT_FOUND) {
+ value = TBAT.searchAppendixByOffSet(tbat_file, num_lines_body, tbat_line_length, offset, 2);//in a tbat, value is at 2 (3rd position in one line)
+ //out.println("***found in obtree: oid="+target_oid+" | value="+value);
+ } else {
+ value = TBAT.selectTBAT_body(tbat_file,num_lines_body,tbat_line_length,target_oid);
+ }
+ }
+ }
+ tbat_file.close();
+ select_file.close();
+ }
+
+}
+
+
+
+
diff --git a/basic/btree/OBTreeInt.java b/basic/btree/OBTreeInt.java
new file mode 100644
index 0000000..a77825b
--- /dev/null
+++ b/basic/btree/OBTreeInt.java
@@ -0,0 +1,84 @@
+package basic.btree;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Scanner;
+
+@SuppressWarnings("unchecked")
+public class OBTreeInt extends BTree {
+ public int total_inserts=0;
+ public int loadUpdateFile(String update_file_name) throws IOException{//if file read had timestamp use this
+ int OFF=1;
+ String a;
+ int b;
+ int valueOfA;
+ Scanner reads = new Scanner(new File(update_file_name));
+ while (reads.hasNext()) {
+ a = reads.next(); // read OID
+ b = reads.nextInt(); // read VALUE
+ a = a.substring(0, a.length() - 1); // removing the comma that was auto-generated
+ valueOfA = Integer.parseInt(a); // placing that number into a variable
+ if (get(valueOfA) != null) {
+ findReplace(valueOfA, OFF);
+ } else {
+ put(valueOfA, OFF);
+ total_inserts++;
+ }// end of if-else
+ OFF++;
+ }
+ reads.close();
+ return total_inserts;
+ }
+
+ /**
+ * load update the appendix of an updated file into a new BTree
+ */
+ public OBTreeInt loadAppendixIntoOBTree(String update_file_name) throws IOException{
+ OBTreeInt appendixBTree = new OBTreeInt();
+ int OFF=1;
+ String a;
+ int b;
+ int valueOfA;
+ Scanner reads = new Scanner(new File(update_file_name));
+ while (reads.hasNext()) {
+ a = reads.next(); // read OID
+ b = reads.nextInt(); // read VALUE
+ a = a.substring(0, a.length() - 1); // removing the comma that was auto-generated
+ valueOfA = Integer.parseInt(a); // placing that number into a variable
+ if (appendixBTree.get(valueOfA) != null) {
+ appendixBTree.findReplace(valueOfA, OFF);
+ } else {
+ appendixBTree.put(valueOfA, OFF);
+ }// end of if-else
+ OFF++;
+ }
+ reads.close();
+ return appendixBTree;
+ }
+
+}
+
+//public int bulkLoadUpdateFile2(String update_file_name) throws FileNotFoundException{
+//Scanner reads = new Scanner(new File(update_file_name));// reader for update file
+//Integer offset = 1; // offset starts from 1
+//String line; // buffer for reach each line
+//String[] line_vector; // tokenized line
+//Integer oid; // each oid
+//while (reads.hasNext()) {
+// line = reads.nextLine();
+// line_vector = line.split(",");
+// oid = Integer.parseInt(line_vector[1].trim());
+// if (findReplace((Key)oid, (Value) offset) != null) {
+//// System.out.println("\nKey " + oid + " already exists. Update offset to " + offset + ".");
+// } else {
+//// System.out.println("A new key: " + oid + " \t\tInserting at offset: " + offset + ".");
+// put((Key) oid, (Value) offset);
+// }
+// offset++;
+//}
+//reads.close();
+//this.total_inserts=offset.intValue()-1;
+//return total_inserts;
+//}
+
+
diff --git a/basic/external_sorting/ExternalSort.java b/basic/external_sorting/ExternalSort.java
new file mode 100644
index 0000000..19fce3b
--- /dev/null
+++ b/basic/external_sorting/ExternalSort.java
@@ -0,0 +1,681 @@
+package basic.external_sorting;
+
+
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.PriorityQueue;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+import java.util.zip.Deflater;
+
+/**
+ * reference: http://grepcode.com/file/repo1.maven.org/maven2/com.google.code.externalsortinginjava/externalsortinginjava/0.1.8/com/google/code/externalsorting/ExternalSort.java
+ *
+* Goal: offer a generic external-memory sorting program in Java.
+*
+* It must be : - hackable (easy to adapt) - scalable to large files - sensibly
+* efficient.
+*
+* This software is in the public domain.
+*
+* Usage: java com/google/code/external_sorting/ExternalSort somefile.txt out.txt
+*
+* You can change the default maximal number of temporary files with the -t
+* flag: java com/google/code/external_sorting/ExternalSort somefile.txt out.txt
+* -t 3
+*
+* For very large files, you might want to use an appropriate flag to allocate
+* more memory to the Java VM: java -Xms2G
+* com/google/code/external_sorting/ExternalSort somefile.txt out.txt
+*
+* By (in alphabetical order) Philippe Beaudoin, Eleftherios Chetzakis, Jon
+* Elsas, Christan Grant, Daniel Haran, Daniel Lemire, Sugumaran Harikrishnan,
+* Jerry Yang, First published: April 2010 originally posted at
+* http://lemire.me/blog/archives/2010/04/01/external-memory-sorting-in-java/
+*/
+public class ExternalSort {
+
+ /*
+ * This sorts a file (input) to an output file (output) using
+ * default parameters
+ *
+ * @param file
+ * source file
+ *
+ * @param file
+ * output file
+ *
+ */
+ public static void sort(File input, File output) throws IOException {
+ ExternalSort.mergeSortedFiles(ExternalSort.sortInBatch(input),output);
+ }
+
+
+
+ static int DEFAULTMAXTEMPFILES = 1024;
+
+ // we divide the file into small blocks. If the blocks
+ // are too small, we shall create too many temporary files.
+ // If they are too big, we shall be using too much memory.
+ public static long estimateBestSizeOfBlocks(File filetobesorted,
+ int maxtmpfiles) {
+ long sizeoffile = filetobesorted.length() * 2;
+ /**
+ * We multiply by two because later on someone insisted on
+ * counting the memory usage as 2 bytes per character. By this
+ * model, loading a file with 1 character will use 2 bytes.
+ */
+ // we don't want to open up much more than maxtmpfiles temporary
+ // files, better run
+ // out of memory first.
+ long blocksize = sizeoffile / maxtmpfiles
+ + (sizeoffile % maxtmpfiles == 0 ? 0 : 1);
+
+ // on the other hand, we don't want to create many temporary
+ // files
+ // for naught. If blocksize is smaller than half the free
+ // memory, grow it.
+ long freemem = Runtime.getRuntime().freeMemory();
+ if (blocksize < freemem / 2) {
+ blocksize = freemem / 2;
+ }
+ return blocksize;
+ }
+
+
+ /**
+ * This will simply load the file by blocks of lines, then sort them
+ * in-memory, and write the result to temporary files that have to be
+ * merged later.
+ *
+ * @param file
+ * some flat file
+ * @param cmp
+ * string comparator
+ * @return a list of temporary flat files
+ */
+ public static List sortInBatch(File file)
+ throws IOException {
+ return sortInBatch(file, defaultcomparator, DEFAULTMAXTEMPFILES,
+ Charset.defaultCharset(), null, false);
+ }
+ /**
+ * This will simply load the file by blocks of lines, then sort them
+ * in-memory, and write the result to temporary files that have to be
+ * merged later.
+ *
+ * @param file
+ * some flat file
+ * @param cmp
+ * string comparator
+ * @return a list of temporary flat files
+ */
+ public static List sortInBatch(File file, Comparator cmp)
+ throws IOException {
+ return sortInBatch(file, cmp, DEFAULTMAXTEMPFILES,
+ Charset.defaultCharset(), null, false);
+ }
+
+ /**
+ * This will simply load the file by blocks of lines, then sort them
+ * in-memory, and write the result to temporary files that have to be
+ * merged later.
+ *
+ * @param file
+ * some flat file
+ * @param cmp
+ * string comparator
+ * @param distinct
+ * Pass true
if duplicate lines should be
+ * discarded.
+ * @return a list of temporary flat files
+ */
+ public static List sortInBatch(File file, Comparator cmp,
+ boolean distinct) throws IOException {
+ return sortInBatch(file, cmp, DEFAULTMAXTEMPFILES,
+ Charset.defaultCharset(), null, distinct);
+ }
+
+ /**
+ * This will simply load the file by blocks of lines, then sort them
+ * in-memory, and write the result to temporary files that have to be
+ * merged later. You can specify a bound on the number of temporary
+ * files that will be created.
+ *
+ * @param file
+ * some flat file
+ * @param cmp
+ * string comparator
+ * @param maxtmpfiles
+ * maximal number of temporary files
+ * @param Charset
+ * character set to use (can use
+ * Charset.defaultCharset())
+ * @param tmpdirectory
+ * location of the temporary files (set to null for
+ * default location)
+ * @param distinct
+ * Pass true
if duplicate lines should be
+ * discarded.
+ * @param numHeader
+ * number of lines to preclude before sorting starts
+ * @parame usegzip use gzip compression for the temporary files
+ * @return a list of temporary flat files
+ */
+ public static List sortInBatch(File file, Comparator cmp,
+ int maxtmpfiles, Charset cs, File tmpdirectory,
+ boolean distinct, int numHeader, boolean usegzip)
+ throws IOException {
+ List files = new ArrayList();
+ BufferedReader fbr = new BufferedReader(new InputStreamReader(
+ new FileInputStream(file), cs));
+ long blocksize = estimateBestSizeOfBlocks(file, maxtmpfiles);// in
+ // bytes
+
+ try {
+ List tmplist = new ArrayList();
+ String line = "";
+ try {
+ int counter = 0;
+ while (line != null) {
+ long currentblocksize = 0;// in bytes
+ while ((currentblocksize < blocksize)
+ && ((line = fbr.readLine()) != null)) {
+ // as long as you have enough memory
+ if (counter < numHeader) {
+ counter++;
+ continue;
+ }
+ tmplist.add(line);
+ // ram usage estimation, not
+ // very accurate, still more
+ // realistic that the simple 2 *
+ // String.length
+ currentblocksize += StringSizeEstimator
+ .estimatedSizeOf(line);
+ }
+ files.add(sortAndSave(tmplist, cmp, cs,
+ tmpdirectory, distinct, usegzip));
+ tmplist.clear();
+ }
+ } catch (EOFException oef) {
+ if (tmplist.size() > 0) {
+ files.add(sortAndSave(tmplist, cmp, cs,
+ tmpdirectory, distinct, usegzip));
+ tmplist.clear();
+ }
+ }
+ } finally {
+ fbr.close();
+ }
+ return files;
+ }
+
+
+ /**
+ * This will simply load the file by blocks of lines, then sort them
+ * in-memory, and write the result to temporary files that have to be
+ * merged later. You can specify a bound on the number of temporary
+ * files that will be created.
+ *
+ * @param file
+ * some flat file
+ * @param cmp
+ * string comparator
+ * @param maxtmpfiles
+ * maximal number of temporary files
+ * @param Charset
+ * character set to use (can use
+ * Charset.defaultCharset())
+ * @param tmpdirectory
+ * location of the temporary files (set to null for
+ * default location)
+ * @param distinct
+ * Pass true
if duplicate lines should be
+ * discarded.
+ * @return a list of temporary flat files
+ */
+ public static List sortInBatch(File file, Comparator cmp,
+ int maxtmpfiles, Charset cs, File tmpdirectory, boolean distinct)
+ throws IOException {
+ return sortInBatch(file, cmp, maxtmpfiles, cs, tmpdirectory,
+ distinct, 0, false);
+ }
+
+ /**
+ * Sort a list and save it to a temporary file
+ *
+ * @return the file containing the sorted data
+ * @param tmplist
+ * data to be sorted
+ * @param cmp
+ * string comparator
+ * @param cs
+ * charset to use for output (can use
+ * Charset.defaultCharset())
+ * @param tmpdirectory
+ * location of the temporary files (set to null for
+ * default location)
+ * @param distinct
+ * Pass true
if duplicate lines should be
+ * discarded.
+ */
+ public static File sortAndSave(List tmplist,
+ Comparator cmp, Charset cs, File tmpdirectory,
+ boolean distinct, boolean usegzip) throws IOException {
+ Collections.sort(tmplist, cmp);
+ File newtmpfile = File.createTempFile("sortInBatch",
+ "flatfile", tmpdirectory);
+ newtmpfile.deleteOnExit();
+ OutputStream out = new FileOutputStream(newtmpfile);
+ int ZIPBUFFERSIZE = 2048;
+ if (usegzip)
+ out = new GZIPOutputStream(out, ZIPBUFFERSIZE) {
+ {
+ def.setLevel(Deflater.BEST_SPEED);
+ }
+ };
+ BufferedWriter fbw = new BufferedWriter(new OutputStreamWriter(
+ out, cs));
+ String lastLine = null;
+ try {
+ for (String r : tmplist) {
+ // Skip duplicate lines
+ if (!distinct || !r.equals(lastLine)) {
+ fbw.write(r);
+ fbw.newLine();
+ lastLine = r;
+ }
+ }
+ } finally {
+ fbw.close();
+ }
+ return newtmpfile;
+ }
+
+ /**
+ * Sort a list and save it to a temporary file
+ *
+ * @return the file containing the sorted data
+ * @param tmplist
+ * data to be sorted
+ * @param cmp
+ * string comparator
+ * @param cs
+ * charset to use for output (can use
+ * Charset.defaultCharset())
+ * @param tmpdirectory
+ * location of the temporary files (set to null for
+ * default location)
+ */
+ public static File sortAndSave(List tmplist,
+ Comparator cmp, Charset cs, File tmpdirectory)
+ throws IOException {
+ return sortAndSave(tmplist, cmp, cs, tmpdirectory, false, false);
+ }
+ /**
+ * This merges a bunch of temporary flat files
+ *
+ * @param files
+ * @param output
+ * file
+ * @return The number of lines sorted. (P. Beaudoin)
+ */
+ public static int mergeSortedFiles(List files, File outputfile) throws IOException {
+ return mergeSortedFiles(files, outputfile, defaultcomparator,
+ Charset.defaultCharset());
+ }
+ /**
+ * This merges a bunch of temporary flat files
+ *
+ * @param files
+ * @param output
+ * file
+ * @return The number of lines sorted. (P. Beaudoin)
+ */
+ public static int mergeSortedFiles(List files, File outputfile,
+ final Comparator cmp) throws IOException {
+ return mergeSortedFiles(files, outputfile, cmp,
+ Charset.defaultCharset());
+ }
+
+ /**
+ * This merges a bunch of temporary flat files
+ *
+ * @param files
+ * @param output
+ * file
+ * @return The number of lines sorted. (P. Beaudoin)
+ */
+ public static int mergeSortedFiles(List files, File outputfile,
+ final Comparator cmp, boolean distinct)
+ throws IOException {
+ return mergeSortedFiles(files, outputfile, cmp,
+ Charset.defaultCharset(), distinct);
+ }
+
+ /**
+ * This merges a bunch of temporary flat files
+ *
+ * @param files
+ * The {@link List} of sorted {@link File}s to be merged.
+ * @param Charset
+ * character set to use to load the strings
+ * @param distinct
+ * Pass true
if duplicate lines should be
+ * discarded. (elchetz@gmail.com)
+ * @param outputfile
+ * The output {@link File} to merge the results to.
+ * @param cmp
+ * The {@link Comparator} to use to compare
+ * {@link String}s.
+ * @param cs
+ * The {@link Charset} to be used for the byte to
+ * character conversion.
+ * @param append
+ * Pass true
if result should append to
+ * {@link File} instead of overwrite. Default to be false
+ * for overloading methods.
+ * @param usegzip
+ * assumes we used gzip compression for temporary files
+ * @return The number of lines sorted. (P. Beaudoin)
+ * @since v0.1.4
+ */
+ public static int mergeSortedFiles(List files, File outputfile,
+ final Comparator cmp, Charset cs, boolean distinct,
+ boolean append, boolean usegzip) throws IOException {
+ PriorityQueue pq = new PriorityQueue(
+ 11, new Comparator() {
+ @Override
+ public int compare(BinaryFileBuffer i,
+ BinaryFileBuffer j) {
+ return cmp.compare(i.peek(), j.peek());
+ }
+ });
+ ArrayList bfbs = new ArrayList();
+ for (File f : files) {
+ final int BUFFERSIZE = 2048;
+ InputStream in = new FileInputStream(f);
+ BufferedReader br;
+ if (usegzip) {
+ br = new BufferedReader(new InputStreamReader(
+ new GZIPInputStream(in, BUFFERSIZE), cs));
+ } else {
+ br = new BufferedReader(new InputStreamReader(in,
+ cs));
+ }
+
+ BinaryFileBuffer bfb = new BinaryFileBuffer(br);
+ bfbs.add(bfb);
+ }
+ BufferedWriter fbw = new BufferedWriter(new OutputStreamWriter(
+ new FileOutputStream(outputfile, append), cs));
+ int rowcounter = merge(fbw,cmp,distinct, bfbs);
+ for (File f : files) f.delete();
+ return rowcounter;
+ }
+ /**
+ * This merges several BinaryFileBuffer to an output writer.
+ *
+ * @param BufferedWriter
+ * A buffer where we write the data.
+ * @param cmp
+ * A comparator object that tells us how to sort the lines.
+ * @param distinct
+ * Pass true
if duplicate lines should be
+ * discarded. (elchetz@gmail.com)
+ * @param buffers
+ * Where the data should be read.
+ * @return The number of lines sorted. (P. Beaudoin)
+ *
+ */
+ public static int merge(BufferedWriter fbw, final Comparator cmp, boolean distinct, List buffers) throws IOException {
+ PriorityQueue pq = new PriorityQueue(
+ 11, new Comparator() {
+ @Override
+ public int compare(BinaryFileBuffer i,
+ BinaryFileBuffer j) {
+ return cmp.compare(i.peek(), j.peek());
+ }
+ });
+ for (BinaryFileBuffer bfb: buffers)
+ if(!bfb.empty())
+ pq.add(bfb);
+ int rowcounter = 0;
+ String lastLine = null;
+ try {
+ while (pq.size() > 0) {
+ BinaryFileBuffer bfb = pq.poll();
+ String r = bfb.pop();
+ // Skip duplicate lines
+ if (!distinct || !r.equals(lastLine)) {
+ fbw.write(r);
+ fbw.newLine();
+ lastLine = r;
+ }
+ ++rowcounter;
+ if (bfb.empty()) {
+ bfb.fbr.close();
+ } else {
+ pq.add(bfb); // add it back
+ }
+ }
+ } finally {
+ fbw.close();
+ for (BinaryFileBuffer bfb : pq)
+ bfb.close();
+ }
+ return rowcounter;
+
+ }
+
+ /**
+ * This merges a bunch of temporary flat files
+ *
+ * @param files
+ * The {@link List} of sorted {@link File}s to be merged.
+ * @param Charset
+ * character set to use to load the strings
+ * @param distinct
+ * Pass true
if duplicate lines should be
+ * discarded. (elchetz@gmail.com)
+ * @param outputfile
+ * The output {@link File} to merge the results to.
+ * @param cmp
+ * The {@link Comparator} to use to compare
+ * {@link String}s.
+ * @param cs
+ * The {@link Charset} to be used for the byte to
+ * character conversion.
+ * @return The number of lines sorted. (P. Beaudoin)
+ * @since v0.1.2
+ */
+ public static int mergeSortedFiles(List files, File outputfile,
+ final Comparator cmp, Charset cs, boolean distinct)
+ throws IOException {
+ return mergeSortedFiles(files, outputfile, cmp, cs, distinct,
+ false, false);
+ }
+
+ /**
+ * This merges a bunch of temporary flat files
+ *
+ * @param files
+ * @param output
+ * file
+ * @param Charset
+ * character set to use to load the strings
+ * @return The number of lines sorted. (P. Beaudoin)
+ */
+ public static int mergeSortedFiles(List files, File outputfile,
+ final Comparator cmp, Charset cs) throws IOException {
+ return mergeSortedFiles(files, outputfile, cmp, cs, false);
+ }
+
+ public static void displayUsage() {
+ System.out
+ .println("java com.google.external_sorting.ExternalSort inputfile outputfile");
+ System.out.println("Flags are:");
+ System.out.println("-v or --verbose: verbose output");
+ System.out.println("-d or --distinct: prune duplicate lines");
+ System.out
+ .println("-t or --maxtmpfiles (followed by an integer): specify an upper bound on the number of temporary files");
+ System.out
+ .println("-c or --charset (followed by a charset code): specify the character set to use (for sorting)");
+ System.out
+ .println("-z or --gzip: use compression for the temporary files");
+ System.out
+ .println("-H or --header (followed by an integer): ignore the first few lines");
+ System.out
+ .println("-s or --store (following by a path): where to store the temporary files");
+ System.out.println("-h or --help: display this message");
+ }
+
+ public static void main(String[] args) throws IOException {
+ boolean verbose = false;
+ boolean distinct = false;
+ int maxtmpfiles = DEFAULTMAXTEMPFILES;
+ Charset cs = Charset.defaultCharset();
+ String inputfile = null, outputfile = null;
+ File tempFileStore = null;
+ boolean usegzip = false;
+ int headersize = 0;
+ for (int param = 0; param < args.length; ++param) {
+ if (args[param].equals("-v")
+ || args[param].equals("--verbose")) {
+ verbose = true;
+ } else if ((args[param].equals("-h") || args[param]
+ .equals("--help"))) {
+ displayUsage();
+ return;
+ } else if ((args[param].equals("-d") || args[param]
+ .equals("--distinct"))) {
+ distinct = true;
+ } else if ((args[param].equals("-t") || args[param]
+ .equals("--maxtmpfiles"))
+ && args.length > param + 1) {
+ param++;
+ maxtmpfiles = Integer.parseInt(args[param]);
+ if (headersize < 0) {
+ System.err
+ .println("maxtmpfiles should be positive");
+ }
+ } else if ((args[param].equals("-c") || args[param]
+ .equals("--charset"))
+ && args.length > param + 1) {
+ param++;
+ cs = Charset.forName(args[param]);
+ } else if ((args[param].equals("-z") || args[param]
+ .equals("--gzip"))) {
+ usegzip = true;
+ } else if ((args[param].equals("-H") || args[param]
+ .equals("--header")) && args.length > param + 1) {
+ param++;
+ headersize = Integer.parseInt(args[param]);
+ if (headersize < 0) {
+ System.err
+ .println("headersize should be positive");
+ }
+ } else if ((args[param].equals("-s") || args[param]
+ .equals("--store")) && args.length > param + 1) {
+ param++;
+ tempFileStore = new File(args[param]);
+ } else {
+ if (inputfile == null)
+ inputfile = args[param];
+ else if (outputfile == null)
+ outputfile = args[param];
+ else
+ System.out.println("Unparsed: "
+ + args[param]);
+ }
+ }
+ if (outputfile == null) {
+ System.out
+ .println("please provide input and output file names");
+ displayUsage();
+ return;
+ }
+ Comparator comparator = defaultcomparator;
+ List l = sortInBatch(new File(inputfile), comparator,
+ maxtmpfiles, cs, tempFileStore, distinct, headersize,
+ usegzip);
+ if (verbose)
+ System.out
+ .println("created " + l.size() + " tmp files");
+ mergeSortedFiles(l, new File(outputfile), comparator, cs,
+ distinct, false, usegzip);
+ }
+
+ public static Comparator defaultcomparator = new Comparator() {
+ @Override
+ public int compare(String r1, String r2) {
+ return r1.compareTo(r2);
+ }
+ };
+
+}
+
+
+class BinaryFileBuffer {
+ public BufferedReader fbr;
+ private String cache;
+ private boolean empty;
+
+ public BinaryFileBuffer(BufferedReader r)
+ throws IOException {
+ this.fbr = r;
+ reload();
+ }
+
+ public boolean empty() {
+ return this.empty;
+ }
+
+ private void reload() throws IOException {
+ try {
+ if ((this.cache = this.fbr.readLine()) == null) {
+ this.empty = true;
+ this.cache = null;
+ } else {
+ this.empty = false;
+ }
+ } catch (EOFException oef) {
+ this.empty = true;
+ this.cache = null;
+ }
+ }
+
+ public void close() throws IOException {
+ this.fbr.close();
+ }
+
+ public String peek() {
+ if (empty())
+ return null;
+ return this.cache.toString();
+ }
+
+ public String pop() throws IOException {
+ String answer = peek();
+ reload();
+ return answer;
+ }
+
+}
+
diff --git a/basic/external_sorting/StringSizeEstimator.java b/basic/external_sorting/StringSizeEstimator.java
new file mode 100644
index 0000000..83b6c15
--- /dev/null
+++ b/basic/external_sorting/StringSizeEstimator.java
@@ -0,0 +1,66 @@
+package basic.external_sorting;
+
+
+/**
+ * reference: http://grepcode.com/file_/repo1.maven.org/maven2/com.google.code.externalsortinginjava/externalsortinginjava/0.1.8/com/google/code/externalsorting/StringSizeEstimator.java/?v=source
+ *
+ * @author Eleftherios Chetzakis
+ *
+ */
+public final class StringSizeEstimator {
+
+ private static int OBJ_HEADER;
+ private static int ARR_HEADER;
+ private static int INT_FIELDS = 12;
+ private static int OBJ_REF;
+ private static int OBJ_OVERHEAD;
+ private static boolean IS_64_BIT_JVM;
+
+ /**
+ * Private constructor to prevent instantiation.
+ */
+ private StringSizeEstimator() {
+ }
+
+ /**
+ * Class initializations.
+ */
+ static {
+ // By default we assume 64 bit JVM
+ // (defensive approach since we will get
+ // larger estimations in case we are not sure)
+ IS_64_BIT_JVM = true;
+ // check the system property "sun.arch.data.model"
+ // not very safe, as it might not work for all JVM implementations
+ // nevertheless the worst thing that might happen is that the JVM is 32bit
+ // but we assume its 64bit, so we will be counting a few extra bytes per string object
+ // no harm done here since this is just an approximation.
+ String arch = System.getProperty("sun.arch.data.model");
+ if (arch != null) {
+ if (arch.indexOf("32") != -1) {
+ // If exists and is 32 bit then we assume a 32bit JVM
+ IS_64_BIT_JVM = false;
+ }
+ }
+ // The sizes below are a bit rough as we don't take into account
+ // advanced JVM options such as compressed oops
+ // however if our calculation is not accurate it'll be a bit over
+ // so there is no danger of an out of memory error because of this.
+ OBJ_HEADER = IS_64_BIT_JVM ? 16 : 8;
+ ARR_HEADER = IS_64_BIT_JVM ? 24 : 12;
+ OBJ_REF = IS_64_BIT_JVM ? 8 : 4;
+ OBJ_OVERHEAD = OBJ_HEADER + INT_FIELDS + OBJ_REF + ARR_HEADER;
+
+ }
+
+ /**
+ * Estimates the size of a {@link String} object in bytes.
+ *
+ * @param s The string to estimate memory footprint.
+ * @return The estimated size in bytes.
+ */
+ public static long estimatedSizeOf(String s) {
+ return (s.length() * 2) + OBJ_OVERHEAD;
+ }
+
+}
diff --git a/basic/storage_model/BAT.java b/basic/storage_model/BAT.java
new file mode 100644
index 0000000..6bed05a
--- /dev/null
+++ b/basic/storage_model/BAT.java
@@ -0,0 +1,32 @@
+package basic.storage_model;
+/**
+ * Created by fyu on 11/1/16.
+ */
+import basic.util.DataRetriever;
+
+import java.io.*;
+
+public class BAT {
+ public static final String bat_format = "%10d,%10d\n";
+
+ public static void searchSelectFile(String bat_file_name, String select_file_name, long num_lines_body, int bat_line_length) throws IOException{
+ BufferedReader select_file=new BufferedReader(new FileReader(select_file_name));
+ String str;
+ long target_oid;
+ long offset;
+ long value;
+ while((str=select_file.readLine())!=null && str.length()!=0) {
+ target_oid = Long.parseLong(str);
+ value=selectBAT(bat_file_name, num_lines_body, bat_line_length, target_oid);
+ }
+ select_file.close();
+ }
+
+ public static long selectBAT(String file_name,long num_lines, int line_length,long target_oid) throws IOException{
+ int oid_position=0;
+ RandomAccessFile file=new RandomAccessFile(new File(file_name), "r");
+ long value= DataRetriever.binarySearchValue(file, num_lines, line_length, oid_position, target_oid);
+ file.close();
+ return value;
+ }
+}
diff --git a/basic/storage_model/BUN.java b/basic/storage_model/BUN.java
new file mode 100644
index 0000000..2bb787f
--- /dev/null
+++ b/basic/storage_model/BUN.java
@@ -0,0 +1,13 @@
+package basic.storage_model;
+
+public class BUN {
+ public int oid;
+ public T value;
+ public BUN(int oid, T value){
+ this.oid=oid;
+ this.value=value;
+ }
+ public String toString(){
+ return "("+oid+","+value+")";
+ }
+}
diff --git a/basic/storage_model/BUNL.java b/basic/storage_model/BUNL.java
new file mode 100644
index 0000000..aeacab1
--- /dev/null
+++ b/basic/storage_model/BUNL.java
@@ -0,0 +1,16 @@
+package basic.storage_model;
+
+/**
+ * use long for oid
+ */
+public class BUNL {
+ public long oid;
+ public T value;
+ public BUNL(long oid, T value){
+ this.oid=oid;
+ this.value=value;
+ }
+ public String toString(){
+ return "("+oid+","+value+")";
+ }
+}
diff --git a/basic/storage_model/TBAT.java b/basic/storage_model/TBAT.java
new file mode 100644
index 0000000..00fe5f4
--- /dev/null
+++ b/basic/storage_model/TBAT.java
@@ -0,0 +1,207 @@
+package basic.storage_model;
+
+import basic.util.DataRetriever;
+import basic.btree.OBTree;
+
+
+import java.io.*;
+import java.util.ArrayList;
+import static java.lang.System.out;
+
+/**
+ * Created by fyu on 11/1/16.
+ */
+public class TBAT {
+ public static final String tbat_format = "%s,%10d,%10d\n";
+
+ /**
+ * search in appendix by offset
+ * file is the updated tbat file
+ * offset must start from 1!!!
+ */
+ public static long searchAppendixByOffSet(RandomAccessFile file, long num_lines_body, int line_length,long offset, int value_position) throws IOException {
+ long value= DataRetriever.NO_VALUE;
+ file.seek((offset + num_lines_body - 1) * line_length);
+ String line = file.readLine();
+ if(line!=null) {
+ //out.println("searchAppendixByOffSet:"+line);
+ value = Long.parseLong(line.split(",")[value_position].trim());
+ }
+ file.seek(0);
+ return value;
+ }
+
+
+ /**
+ * same method
+ * save file handler open time
+ */
+ public static long selectTBAT_body(RandomAccessFile file, long num_lines_body, int line_length, long target_oid) throws IOException{
+ long value;
+ int oid_position=1;
+ value= DataRetriever.binarySearchValue(file, num_lines_body, line_length, oid_position, target_oid);
+ return value;
+ }
+
+
+ /**
+ * fyu
+ * search only the body of a TBAT using binary search, regardless of the appendix
+ * used for searching in combination with btree (which stores data in the appendix)
+ */
+ public static long selectTBAT_body(String file_name, long num_lines_body, int line_length, long target_oid) throws IOException{
+ long value;
+ int oid_position=1;
+ RandomAccessFile file=new RandomAccessFile(new File(file_name), "r");
+ value= DataRetriever.binarySearchValue(file, num_lines_body, line_length, oid_position, target_oid);
+ file.close();
+ return value;
+ }
+
+ /**
+ * use select file
+ */
+ public static void selectTBAT_body(String tbat_file_name, String select_file_name, long num_lines_body, int line_length) throws IOException{
+ BufferedReader select_file=new BufferedReader(new FileReader(select_file_name));
+ String str;
+ long target_oid;
+ long offset;
+ long value;
+ while((str=select_file.readLine())!=null && str.length()!=0) {
+ target_oid = Long.parseLong(str);
+ value=selectTBAT_body(tbat_file_name,num_lines_body,line_length,target_oid);
+ }
+ select_file.close();
+ }
+
+ public static void selectTBAT_Uncleaned(String tbat_file_name, String select_file_name, long num_lines_body, int line_length) throws IOException{
+ BufferedReader select_file=new BufferedReader(new FileReader(select_file_name));
+ String str;
+ long target_oid;
+ long offset;
+ long value;
+ while((str=select_file.readLine())!=null && str.length()!=0) {
+ target_oid = Long.parseLong(str);
+ value=selectTBAT_Uncleaned(tbat_file_name, num_lines_body, line_length, target_oid);
+ }
+ select_file.close();
+ }
+
+
+ public static long selectTBAT_Uncleaned(String tbat_file_name, long num_lines_body, int line_length, long target_oid) throws IOException{
+ long value=0;
+ int oid_position=1;
+ BufferedReader append_reader=new BufferedReader(new FileReader(tbat_file_name));
+ value=searchAppendedFile(append_reader, num_lines_body, line_length, oid_position, target_oid);
+ append_reader.close();
+ if(value== DataRetriever.NOT_FOUND){
+ RandomAccessFile file=new RandomAccessFile(new File(tbat_file_name), "r");
+ value= DataRetriever.binarySearchValue(file, num_lines_body, line_length, oid_position, target_oid);
+ file.close();
+ }
+ return value;
+ }
+
+ public static long selectTBAT_Uncleaned2(String file_name, int num_lines_body, int line_length, int target_oid) throws IOException{
+ long value=0;
+ int oid_position=1;
+ RandomAccessFile file=new RandomAccessFile(new File(file_name), "r");
+ value= DataRetriever.binarySearchValue(file, num_lines_body, line_length, oid_position, target_oid);
+ file.close();
+
+ return value;
+ }
+
+
+ /**
+ * @param oid_position the position of the oid (for tbat =1, for bat=0)
+ */
+ public static long searchAppendedFile(BufferedReader append_reader, long num_lines_body, int line_length,
+ int oid_position, long target_oid) throws IOException{
+
+ append_reader.skip((num_lines_body)*line_length);
+ // skip the body of the updated tbat file, only read the appended part at the end
+ String current_line;
+ int temp_oid;
+ long temp_value;
+ long value= DataRetriever.NOT_FOUND;
+
+ while((current_line=append_reader.readLine())!=null){
+ temp_oid=Integer.parseInt(current_line.split(",")[oid_position].trim());
+ if(temp_oid==target_oid){
+ value=Integer.parseInt(current_line.split(",")[oid_position+1].trim());
+ }
+ }
+ return value;
+ }
+
+ /**
+ *
+ */
+ public static void searchWithOBTree(OBTree obtree, String tbat_file_name, String select_file_name, long num_lines_body, int tbat_line_length) throws IOException{
+ BufferedReader select_file=new BufferedReader(new FileReader(select_file_name));
+ RandomAccessFile tbat_file=new RandomAccessFile(new File(tbat_file_name), "r");//open
+ String str;
+ long offset;
+ long target_oid;
+ long value;
+ while((str=select_file.readLine())!=null && str.length()!=0) {
+ target_oid = Long.parseLong(str);
+ offset=obtree.searchKey(target_oid);
+ if(offset!=DataRetriever.NOT_FOUND){
+ value= TBAT.searchAppendixByOffSet(tbat_file, num_lines_body, tbat_line_length, offset, 2);//in a tbat, value is at 2 (3rd position in one line)
+ }else{
+ value= TBAT.selectTBAT_body(tbat_file_name, num_lines_body, tbat_line_length, target_oid);
+ }
+ }
+ tbat_file.close();
+ select_file.close();
+ }
+
+
+ /*method for Eric Jones Thesis--begin*/
+ /**
+ * select the value of the target oid
+ * given a tbat file and a list of split appendix files
+ */
+ public static long selectTBAT_Uncleaned_Split(String tbat_file_name,
+ ArrayList appendix_file_names,
+ int num_lines_body, int line_length, int target_oid)
+ throws IOException{
+ long value= DataRetriever.NOT_FOUND;
+ int oid_position=1;
+ if(!appendix_file_names.isEmpty()){
+ for(String appendix_file_name:appendix_file_names){
+ BufferedReader append_reader=new BufferedReader(new FileReader(appendix_file_name));
+ //no line needs to be skipped in split appendix files
+ value=searchAppendedFile(append_reader, 0, line_length, oid_position, target_oid);
+ append_reader.close();
+ if(value!= DataRetriever.NOT_FOUND) return value;
+ }
+ }
+
+ RandomAccessFile file=new RandomAccessFile(new File(tbat_file_name), "r");
+ value= DataRetriever.binarySearchValue(file, num_lines_body, line_length, oid_position, target_oid);
+ file.close();
+
+ return value;
+ }
+
+ public static long selectTBAT_Uncleaned_Split2(ArrayList appendix_file_names,
+ int num_lines_body, int line_length, int target_oid) throws IOException{
+ long value = DataRetriever.NOT_FOUND;
+ //int oid_position=1;
+ if(!appendix_file_names.isEmpty()){
+ for(String appendix_file_name:appendix_file_names){
+ RandomAccessFile appendix_file = new RandomAccessFile(appendix_file_name, "r");
+ value = searchAppendixByOffSet(appendix_file, 0, line_length,
+ target_oid, 1);
+ if(value!= DataRetriever.NOT_FOUND) return value;
+
+ }
+ }
+ return value;
+ }
+ /*Method by Eric Jones for Thesis--end*/
+
+}
diff --git a/basic/storage_model/TBUN.java b/basic/storage_model/TBUN.java
new file mode 100644
index 0000000..fb1f1df
--- /dev/null
+++ b/basic/storage_model/TBUN.java
@@ -0,0 +1,30 @@
+package basic.storage_model;
+
+
+
+public class TBUN extends BUN implements Comparable {
+ public long timestamp;
+ public static final String tbat_format = "%s,%10d,%10d";
+ public TBUN(long timestamp, int oid, T value) {
+ super(oid, value);
+ this.timestamp=timestamp;
+ }
+
+ public String toString(){
+ String timestampstr=String.format("%d", timestamp);
+ if(timestampstr.length()>=8){
+ timestampstr=timestampstr.substring(timestampstr.length()-8,timestampstr.length());
+ }
+ return String.format(tbat_format, timestampstr, oid, value);
+ }
+
+ public int compareTo(TBUN tbun2){
+ int diff_oid=oid-tbun2.oid;
+ if(diff_oid!=0){
+ return diff_oid;
+ }else{
+ return (int)(timestamp-tbun2.timestamp);
+ }
+ }
+
+}
diff --git a/basic/storage_model/TBUNL.java b/basic/storage_model/TBUNL.java
new file mode 100644
index 0000000..7e316b8
--- /dev/null
+++ b/basic/storage_model/TBUNL.java
@@ -0,0 +1,35 @@
+package basic.storage_model;
+
+/**
+ * same as TBUN but use Long for oid
+ */
+
+public class TBUNLextends BUNL implements Comparable {
+ public long timestamp;
+ public static final String tbat_format = "%s,%10d,%10d";
+ public TBUNL(long timestamp, long oid, T value) {
+ super(oid, value);
+ this.timestamp=timestamp;
+ }
+
+ public String toString(){
+ String timestampstr=String.format("%d", timestamp);
+ if(timestampstr.length()>=8){
+ timestampstr=timestampstr.substring(timestampstr.length()-8,timestampstr.length());
+ }
+ return String.format(tbat_format, timestampstr, oid, value);
+ }
+
+ /**
+ * it's only for sorting, no need to get the actual comparing difference
+ */
+ public int compareTo(TBUNL tbun2){
+ long diff_oid=oid-tbun2.oid;
+ if(diff_oid!=0){
+ return (int)diff_oid;
+ }else{
+ return (int)(timestamp-tbun2.timestamp);
+ }
+ }
+
+}
diff --git a/basic/util/BasicTools.java b/basic/util/BasicTools.java
new file mode 100644
index 0000000..286d09b
--- /dev/null
+++ b/basic/util/BasicTools.java
@@ -0,0 +1,80 @@
+package basic.util;
+import java.io.*;
+import java.lang.instrument.Instrumentation;
+
+public class BasicTools {
+ public static void copyFile(String file_in_name, String file_out_name) throws IOException{
+ BufferedReader br = new BufferedReader(new FileReader(file_in_name));
+ BufferedWriter bw = new BufferedWriter(new FileWriter(file_out_name));
+ int i;
+ do {
+ i = br.read();
+ if (i != -1) {
+ if (Character.isLowerCase((char) i))
+ bw.write(Character.toUpperCase((char) i));
+ else if (Character.isUpperCase((char) i))
+ bw.write(Character.toLowerCase((char) i));
+ else
+ bw.write((char) i);
+ }
+ } while (i != -1);
+ br.close();
+ bw.close();
+ }
+
+ private static Instrumentation instrumentation;
+
+ public static void premain(String args, Instrumentation inst) {
+ instrumentation = inst;
+ }
+
+ public static long getObjectSize(Object o) {
+ return instrumentation.getObjectSize(o);
+ }
+
+
+ /**
+ * reference:https://www.cs.cmu.edu/~adamchik/15-121/lectures/Sorting%20Algorithms/code/MergeSort.java
+ */
+ public static void mergeSort(Comparable [ ] a)
+ {
+ Comparable[] tmp = new Comparable[a.length];
+ mergeSort(a, tmp, 0, a.length - 1);
+ }
+
+ private static void mergeSort(Comparable [ ] a, Comparable [ ] tmp, int left, int right)
+ {
+ if( left < right )
+ {
+ int center = (left + right) / 2;
+ mergeSort(a, tmp, left, center);
+ mergeSort(a, tmp, center + 1, right);
+ merge(a, tmp, left, center + 1, right);
+ }
+ }
+
+ private static void merge(Comparable[ ] a, Comparable[ ] tmp, int left, int right, int rightEnd )
+ {
+ int leftEnd = right - 1;
+ int k = left;
+ int num = rightEnd - left + 1;
+
+ while(left <= leftEnd && right <= rightEnd)
+ if(a[left].compareTo(a[right]) <= 0)
+ tmp[k++] = a[left++];
+ else
+ tmp[k++] = a[right++];
+
+ while(left <= leftEnd) // Copy rest of first half
+ tmp[k++] = a[left++];
+
+ while(right <= rightEnd) // Copy rest of right half
+ tmp[k++] = a[right++];
+
+ // Copy tmp back
+ for(int i = 0; i < num; i++, rightEnd--)
+ a[rightEnd] = tmp[rightEnd];
+ }
+
+
+}
diff --git a/basic/util/DataCreator.java b/basic/util/DataCreator.java
new file mode 100644
index 0000000..f394ac8
--- /dev/null
+++ b/basic/util/DataCreator.java
@@ -0,0 +1,399 @@
+package basic.util;
+
+import basic.storage_model.BAT;
+import basic.storage_model.TBAT;
+
+import java.io.*;
+import java.util.*;
+
+
+
+public class DataCreator {
+
+ public static void prepareData(long num_lines, String bat_file_name,
+ String tbat_file_name) throws IOException{
+
+ PrintWriter bat_file= new PrintWriter(new FileWriter(bat_file_name));
+ PrintWriter tbat_file = new PrintWriter(new FileWriter(tbat_file_name));
+
+ String bat_str="";
+ String tbat_str="";
+ String timestampstr="";
+
+ timestampstr=String.format("%d", System.currentTimeMillis());
+ timestampstr=timestampstr.substring(timestampstr.length()-8,timestampstr.length());
+
+ for(long i=0;i update_list=makeUpdateList(per, num_lines);
+ for(Integer current_line : update_list){
+ update_file.format(BAT.bat_format, (int)current_line, -1);
+ }
+ update_file.close();
+ }
+
+ /**
+ * update value is same as line number
+ */
+ public static void prepareUpdateList3(double per, long num_lines,
+ String update_file_name) throws IOException {
+
+ PrintWriter update_file=new PrintWriter(new FileWriter(update_file_name));
+ List update_list=makeUpdateList(per, num_lines);
+ for(Long current_line : update_list){
+ update_file.format(BAT.bat_format, (long)current_line, (long)current_line);
+ }
+ update_file.close();
+ }
+
+ /**
+ * version 4 creates TBAT.tbat_formate update list
+ * allow duplicated update values
+ * the update value increases from 1
+ */
+ public static void prepareUpdateList4(double per, int num_lines,
+ String update_file_name) throws IOException {
+ PrintWriter update_file=new PrintWriter(new FileWriter(update_file_name));
+ int update_num_lines=(int)(per*num_lines);
+ int total_updated=0;
+ int current_line=1;
+ int update_value=1;
+ String timestampstr;
+ Random rand=new Random();
+ while(total_updated < update_num_lines){
+ timestampstr=String.format("%d", System.currentTimeMillis());
+ timestampstr=timestampstr.substring(timestampstr.length()-8,timestampstr.length());
+ current_line=rand.nextInt(num_lines)+1;
+ update_file.format(TBAT.tbat_format, timestampstr, current_line, update_value++);
+ total_updated++;
+ }
+ update_file.close();
+ }
+
+ /**
+ * version 4.1
+ * num_lines use long
+ */
+ public static void prepareUpdateList41(double per, long num_lines,
+ String update_file_name) throws IOException {
+ PrintWriter update_file=new PrintWriter(new FileWriter(update_file_name));
+ long update_num_lines=(long)(per*num_lines);
+ long total_updated=0;
+ long current_line=1;
+ long update_value=1;
+// String timestampstr;
+ Random rand=new Random();
+ while(total_updated < update_num_lines){
+// timestampstr=String.format("%d", System.currentTimeMillis());
+// timestampstr=timestampstr.substring(timestampstr.length()-8,timestampstr.length());
+ current_line=(long)(rand.nextDouble()*num_lines)+1L;
+ update_file.format(BAT.bat_format, current_line, update_value++);
+ total_updated++;
+ }
+ update_file.close();
+ }
+
+ /**
+ * doesn't need to shuffle a list randomly
+ * we just need to make a file
+ * for oid =1 to num_lines
+ * each time there is a random double probability p generated
+ * if p<=per
+ * then write this oid with the update value++
+ */
+ public static void prepareUpdateList5(double per, long num_lines,
+ String update_file_name) throws IOException {
+ PrintWriter update_file=new PrintWriter(new FileWriter(update_file_name));
+ long update_num_lines=(long)(per*num_lines);
+ long total_updated=0;
+ long current_line=1;
+ long update_value=1;
+// String timestampstr;
+ Random rand=new Random();
+ double p;//random probability
+ while(total_updated < update_num_lines){
+ p=rand.nextDouble();
+ if(p<=per){
+ update_file.format(BAT.bat_format, current_line, update_value++);
+ total_updated++;
+ }
+ if(current_line++>=num_lines)
+ current_line=1;
+ }
+ update_file.close();
+ }
+
+ static List makeList(int begin, int end){
+ List list=new ArrayList(end-begin+1);
+ for (int i=begin;i<=end;i++){
+ list.add(i);
+ }
+ return list;
+ }
+
+ static List makeListLong(long begin, long end){
+ List list=new ArrayList();
+ for (long i=begin;i<=end;i++){
+ list.add(i);
+ }
+ return list;
+ }
+
+ /**
+ * the update list doesn't need to be sorted 2014-10-02
+ */
+ public static List makeUpdateList(double per, int num_lines){
+ List list=makeList(1,num_lines);
+ Collections.shuffle(list);
+ int update_num_lines=(int)(per*num_lines);
+ List update_list_sorted=list.subList(0, update_num_lines);
+// Collections.sort(update_list_sorted);
+ return update_list_sorted;
+ }
+
+ /**
+ * the update list doesn't need to be sorted 2014-10-02
+ */
+ public static List makeUpdateList(double per, long num_lines){
+ List list=makeListLong(1,num_lines);
+ Collections.shuffle(list);
+ long update_num_lines=(long)(per*num_lines);
+ List update_list_sorted=list.subList(0, (int)update_num_lines);
+// Collections.sort(update_list_sorted);
+ return update_list_sorted;
+ }
+
+ /**
+ * works only for a small selection file
+ */
+ public static void prepareSelectionFile(String output_file_name, double sel_per, long num_lines) throws IOException{
+ PrintWriter output_file=new PrintWriter(new BufferedWriter(new FileWriter(output_file_name)));
+ List list=DataCreator.makeUpdateList(sel_per, num_lines);
+ for(long oid:list){
+ output_file.println(oid+"");
+ }
+ output_file.close();
+ }
+
+ /**
+ * produce selection of large size
+ * same as prepareUpdateList5 use a probability menor
+ */
+ public static void prepareSelectionFile5(String output_file_name, double sel_per, long num_lines) throws IOException{
+ PrintWriter output_file=new PrintWriter(new BufferedWriter(new FileWriter(output_file_name)));
+// List list=DataCreator.makeUpdateList(sel_per, num_lines);
+// for(long oid:list){
+// output_file.println(oid+"");
+// }
+ long sel_num_lines=(long)(sel_per*num_lines);//total selection number
+ long total_selected=0;
+ long current_line=1;
+ Random rand=new Random();
+ double p;//random probability
+ while(total_selected < sel_num_lines){
+ p=rand.nextDouble();
+ if(p<=sel_per){
+ output_file.format("%d\n",current_line);
+ total_selected++;
+ }
+ if(current_line++>=num_lines)
+ current_line=1;
+ }
+ output_file.close();
+ }
+
+ public static List loadSelectionFile(String input_file_name) throws IOException{
+ BufferedReader file_in=new BufferedReader(new FileReader(input_file_name));
+ List list=new ArrayList();
+ String line="";
+ while((line=file_in.readLine())!=null){
+ list.add(Integer.parseInt(line.trim()));
+ }
+ file_in.close();
+ return list;
+ }
+
+
+ /**
+ *
+ * @param update_file_name
+ * @param appendix_file_prefix
+ * @param appendix_block_size
+ * @return number of appendix files returned
+ * @throws IOException
+ */
+ public static int creaetTBATAppendix(String update_file_name, String appendix_file_prefix, int appendix_block_size) throws IOException{
+ if(appendix_block_size==0){
+ throw new IOException("appendix_block_size is zero!");
+ }
+ BufferedReader update_file_in =new BufferedReader(new FileReader(update_file_name));
+ ArrayList update_lines=new ArrayList();//buffer of update file
+
+ //read update file to buffer
+ String line="";
+ ArrayList split_buffer=new ArrayList();//buffer of to split the update file buffer
+ int appendix_file_index=1;
+ int split_buffer_count=0;
+ String timestampstr="";
+ long current_time_mills=System.currentTimeMillis();
+
+ while((line = update_file_in.readLine()) != null){
+ split_buffer.add(line);
+ if(++split_buffer_count % appendix_block_size == 0){
+ timestampstr=String.format("%d", current_time_mills++);
+ timestampstr=timestampstr.substring(timestampstr.length()-8,timestampstr.length());
+ saveStringBufferToFile(appendix_file_prefix+"_"+(appendix_file_index++)+".txt",split_buffer,timestampstr);
+ split_buffer.clear();
+ }
+ }
+
+ //dump the rest of update file
+ if(!split_buffer.isEmpty()){
+ timestampstr=String.format("%d", current_time_mills++);
+ timestampstr=timestampstr.substring(timestampstr.length()-8,timestampstr.length());
+ saveStringBufferToFile(appendix_file_prefix+"_"+(appendix_file_index++)+".txt",split_buffer,timestampstr);
+ }
+ update_file_in.close();
+ return appendix_file_index--;
+ }
+
+ public static void saveStringBufferToFile(String output_file_name, ArrayList buffer, String timestampstr) throws IOException{
+ PrintWriter output_file=new PrintWriter(new BufferedWriter(new FileWriter(output_file_name)));
+ for(String line:buffer){
+ output_file.println(timestampstr+","+line);
+ }
+ output_file.close();
+ }
+
+
+// /**
+// * devide the appendix files into a given number of split files
+// */
+// public static void creaetTBATAppendix2(String update_file_name, String appendix_file_prefix,
+// int appendix_num) throws IOException{
+// if(appendix_num==0){
+// throw new IOException("appendix_num is zero!");
+// }
+//
+// BufferedReader update_file_in =new BufferedReader(new FileReader(update_file_name));
+// ArrayList update_lines=new ArrayList();//buffer of update file
+//
+//
+// //read update file to buffer
+// String line="";
+// ArrayList split_buffer=new ArrayList();//buffer of to split the update file buffer
+// int appendix_file_index=1;
+// int split_buffer_count=0;
+// String timestampstr="";
+// long current_time_mills=System.currentTimeMillis();
+//
+// while((line = update_file_in.readLine()) != null){
+// split_buffer.add(line);
+// if(++split_buffer_count % appendix_block_size == 0){
+// timestampstr=String.format("%d", current_time_mills++);
+// timestampstr=timestampstr.substring(timestampstr.length()-8,timestampstr.length());
+// saveStringBufferToFile(appendix_file_prefix+"_"+(appendix_file_index++)+".txt",split_buffer,timestampstr);
+// split_buffer.clear();
+// }
+// }
+//
+// //dump the rest of update file
+// if(!split_buffer.isEmpty()){
+// timestampstr=String.format("%d", current_time_mills++);
+// timestampstr=timestampstr.substring(timestampstr.length()-8,timestampstr.length());
+// saveStringBufferToFile(appendix_file_prefix+"_"+(appendix_file_index++)+".txt",split_buffer,timestampstr);
+// }
+// update_file_in.close();
+// }
+
+
+}
+
diff --git a/basic/util/DataRetriever.java b/basic/util/DataRetriever.java
new file mode 100644
index 0000000..8f32d49
--- /dev/null
+++ b/basic/util/DataRetriever.java
@@ -0,0 +1,115 @@
+package basic.util;
+
+import javax.xml.crypto.Data;
+import java.io.*;
+import java.util.ArrayList;
+import static java.lang.System.out;
+
+public class DataRetriever {
+// public static int DEFAULT_BUFFER_SIZE=8192;
+
+ /**
+ *
+ * select the value of the target oid
+ * given a tbat with appendix appended at the end of the tbat file
+ *
+ * @param file_name
+ * @param num_lines_body
+ * @param line_length
+ * @param oid_position the position of the oid (for tbat =1, for bat=1)
+ * @param target_oid
+ * @return target_value
+ * @throws IOException
+ */
+
+ //public static final int NOT_FOUND=Integer.MIN_VALUE;
+ //public static final long NOT_FOUND=Long.MIN_VALUE;
+ public static final long NOT_FOUND=-1;
+ public static final long NO_VALUE=-9999;
+
+
+ /**
+ * binary search for oid in the body of tbat (not the appended part)
+ * @param oid_position the position of the oid (for tbat =1, for bat=1)
+ */
+ public static long binarySearchValue(RandomAccessFile file, long num_lines_body, int line_length,
+ int oid_position, long target_oid) throws IOException{
+ long low=0;
+ long high=num_lines_body-1;
+ long mid, oid_mid;
+ String bat_current_line;
+
+ while(low<=high){
+ mid=(low+high)/2;
+ file.seek(mid*line_length);
+ bat_current_line=file.readLine();
+ oid_mid=Long.parseLong(bat_current_line.split(",")[oid_position].trim());
+ if(oid_mid == target_oid){
+ //file.seek(0);//reset file pointer after updating
+ //System.out.println("found at: "+oid_mid);
+ return oid_mid;
+ }else if(oid_mid < target_oid) low=mid+1;
+ else high=mid-1;
+ }
+ System.out.println("Not found");
+ return Integer.MIN_VALUE;
+ }
+
+
+ /**
+ * searchKey the length of one line in a file
+ */
+ public static int getLineLength(String file_name) throws IOException {
+ RandomAccessFile randomReader=new RandomAccessFile(new File(file_name),"r");
+ String first_line=randomReader.readLine();
+ randomReader.close();
+ int line_length=first_line.length()+1;//include '\n'
+ return line_length;
+ }
+
+ /**
+ * searchKey the total line numbers in a file
+ * reference: http://stackoverflow.com/questions/453018/number-of-lines-in-a-file-in-java
+ */
+ public static int getFileLineNumber(String file_name) throws IOException {
+ InputStream is = new BufferedInputStream(new FileInputStream(file_name));
+ try {
+ byte[] c = new byte[1024];
+ int count = 0;
+ int readChars = 0;
+ boolean endsWithoutNewLine = false;
+ while ((readChars = is.read(c)) != -1) {
+ for (int i = 0; i < readChars; ++i) {
+ if (c[i] == '\n')
+ ++count;
+ }
+ endsWithoutNewLine = (c[readChars - 1] != '\n');
+ }
+ if (endsWithoutNewLine) {
+ ++count;
+ }
+ return count;
+ } finally {
+ is.close();
+ }
+ }
+
+}
+
+
+
+// /**
+// * binary search TBAT body
+// * this method don't need line_length
+// */
+// public static long selectTBAT_body(String file_name,int num_lines_body, int target_oid) throws IOException{
+// long value=0;
+// int oid_position=1;
+// RandomAccessFile file=new RandomAccessFile(new File(file_name), "r");
+// String first_line=file.readLine();
+// file.seek(0);
+// int line_length=first_line.length()+1;//include '\n'
+// value=binarySearchValue(file, num_lines_body, line_length, oid_position, target_oid);
+// file.close();
+// return value;
+// }
diff --git a/basic/util/DataUpdator.java b/basic/util/DataUpdator.java
new file mode 100644
index 0000000..9d084f2
--- /dev/null
+++ b/basic/util/DataUpdator.java
@@ -0,0 +1,591 @@
+package basic.util;
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Collections;
+
+import basic.storage_model.*;
+import basic.btree.Entry;
+import basic.btree.OBTree;
+public class DataUpdator {
+
+ public static int DEFAULT_BUFFER_SIZE=8192;
+
+ public static void updateTBAT(String tbat_file_name,
+ String update_file_name) throws IOException{
+ updateTBAT(tbat_file_name, update_file_name, DEFAULT_BUFFER_SIZE);
+ }
+
+ /**
+ * append update file to the end of the TBAT
+ */
+ public static void updateTBAT(String tbat_file_name,
+ String update_file_name, int buffer_size) throws IOException{
+ PrintWriter tbat_file_out = new PrintWriter(new FileWriter(tbat_file_name,true));
+ BufferedReader update_file_in =new BufferedReader(new FileReader(update_file_name), buffer_size);
+ String line="";
+ String timestampstr="";
+ timestampstr=String.format("%d", System.currentTimeMillis());
+ timestampstr=timestampstr.substring(timestampstr.length()-8,timestampstr.length());
+ while((line = update_file_in.readLine()) != null){
+ tbat_file_out.println(timestampstr+","+line);
+ }
+ update_file_in.close();
+ tbat_file_out.close();
+ }
+
+ public static void updateBAT1(String bat_file_name,
+ String update_file_name) throws IOException{
+ RandomAccessFile bat_file = new RandomAccessFile(new File(bat_file_name), "rw");
+ BufferedReader update_file_in =new BufferedReader(new FileReader(update_file_name));
+ int update_oid;
+ int bat_oid;
+ long current_pos=0;
+ String current_line="";
+ String update_line="";
+ //read in update file
+ while((update_line = update_file_in.readLine()) != null){
+ String[] tokens=update_line.split(",");
+ update_oid=Integer.parseInt(tokens[0].trim());
+ //update bat file according to update_oid
+ while((current_line=bat_file.readLine())!=null){
+ String[] tokens_bat=current_line.split(",");
+ bat_oid=Integer.parseInt(tokens_bat[0].trim());
+ if(bat_oid == update_oid){
+ current_pos=bat_file.getFilePointer();
+ bat_file.seek(current_pos-current_line.length()-1);
+ bat_file.writeBytes(update_line+"\n");
+ bat_file.seek(0);//back to top of bat file
+ break;
+ }
+ }
+ }
+ update_file_in.close();
+ bat_file.close();
+ }
+
+ /**
+ * faster than updateBAT1, v2 uses buffered reader to read bat_file, and use randomaccessfile only when writing
+ * after one line is updated, the buffered reader will seek(0)
+ * this version works in all cases, including the update list is not sorted according to oid
+ */
+ public static void updateBAT2(String bat_file_name,
+ String update_file_name) throws IOException{
+ updateBAT2(bat_file_name, update_file_name, DEFAULT_BUFFER_SIZE);
+ }
+
+ /**
+ * default BufferedReader size is 8192
+ * this version can change the buffered reader size
+ */
+ public static void updateBAT2(String bat_file_name,
+ String update_file_name, int buffer_size) throws IOException{
+ RandomAccessFile bat_file_writer = new RandomAccessFile(new File(bat_file_name), "rw");
+ FileInputStream bat_file_in=new FileInputStream(bat_file_name);
+ BufferedReader bat_file_reader=new BufferedReader(new InputStreamReader(bat_file_in), buffer_size);
+ BufferedReader update_file_in =new BufferedReader(new FileReader(update_file_name), buffer_size);
+
+ int update_oid;
+ int bat_oid;
+ String current_line="";
+ String update_line="";
+ //read in update file
+ while((update_line = update_file_in.readLine()) != null){
+ long current_line_num=1;
+ String[] tokens=update_line.split(",");
+ update_oid=Integer.parseInt(tokens[0].trim());
+// System.out.println("update oid:"+update_oid);
+
+ //update bat file according to update_oid
+ current_line = bat_file_reader.readLine();//read the 1st line of bat file
+ while(current_line != null){
+ String[] tokens_bat=current_line.split(",");
+ bat_oid=Integer.parseInt(tokens_bat[0].trim());
+
+ if(bat_oid == update_oid){
+ bat_file_writer.seek((current_line_num-1)*(current_line.length()+1));
+ bat_file_writer.writeBytes(update_line+"\n");
+
+ //reset buffered reader to the beginning of bat file
+ bat_file_in.getChannel().position(0);
+ bat_file_reader=new BufferedReader(new InputStreamReader(bat_file_in));
+ current_line_num=1;
+ break;
+ }
+ current_line_num++;
+ current_line=bat_file_reader.readLine();
+ }
+ }
+ update_file_in.close();
+ bat_file_in.close();
+ bat_file_reader.close();
+ bat_file_writer.close();
+ }
+
+ /**
+ * faster than updateBAT2, no need to seek(0) in bat_file when one line is updated.
+ * this version only works when the update list file is sorted.
+ */
+ public static void updateBAT3(String bat_file_name,
+ String update_file_name) throws IOException{
+ RandomAccessFile bat_file_writer = new RandomAccessFile(new File(bat_file_name), "rw");
+ FileInputStream bat_file_in=new FileInputStream(bat_file_name);
+ BufferedReader bat_file_reader=new BufferedReader(new InputStreamReader(bat_file_in));
+ BufferedReader update_file_in =new BufferedReader(new FileReader(update_file_name));
+ int update_oid;
+ int bat_oid;
+ long current_pos=0;
+ long current_line_num=1;
+ String current_line="";
+ String update_line="";
+ //read in update file
+ while((update_line = update_file_in.readLine()) != null){
+ String[] tokens=update_line.split(",");
+ update_oid=Integer.parseInt(tokens[0].trim());
+
+ //update bat file according to update_oid
+ current_line = bat_file_reader.readLine();
+ while(current_line != null){
+ String[] tokens_bat=current_line.split(",");
+ bat_oid=Integer.parseInt(tokens_bat[0].trim());
+
+ if(bat_oid == update_oid){
+ bat_file_writer.seek((current_line_num-1)*(current_line.length()+1));
+ bat_file_writer.writeBytes(update_line+"\n");
+ current_line_num++;
+
+ break;
+ }
+ current_line_num++;
+ current_line=bat_file_reader.readLine();
+ }
+ }
+ update_file_in.close();
+ bat_file_in.close();
+ bat_file_reader.close();
+ bat_file_writer.close();
+ }
+
+ /**
+ * update bat using binary search
+ * Assumption: the oids in the BAT file are sorted
+ */
+ public static void updateBAT_BinarySearch(String bat_file_name, String update_file_name) throws IOException{
+ RandomAccessFile bat_file = new RandomAccessFile(new File(bat_file_name), "rw");
+ BufferedReader update_file_in =new BufferedReader(new FileReader(update_file_name));
+ int line_length=bat_file.readLine().length()+1;
+ bat_file.seek(0);
+ String update_line=null;
+ while((update_line = update_file_in.readLine()) != null){
+ binarySearchUpdateBAT(bat_file,line_length,update_line);
+ }
+ update_file_in.close();
+ }
+
+ /**
+ * binary search for one update_line according to oid
+ * @param bat_file
+ * @param update_line
+ * @throws IOException
+ */
+ public static int binarySearchUpdateBAT(RandomAccessFile bat_file, int line_length, String update_line) throws IOException{
+ int update_oid=Integer.parseInt(update_line.split(",")[0].trim());
+// System.out.println("update oid:"+update_oid);
+
+ int low=0;
+ int high=(int)bat_file.length()/line_length-1;
+ int mid, bat_oid_mid;
+ String bat_current_line;
+
+ while(low<=high){
+ mid=(low+high)/2;
+ bat_file.seek(mid*line_length);
+ bat_current_line=bat_file.readLine();
+// System.out.println(bat_current_line);
+ bat_oid_mid=Integer.parseInt(bat_current_line.split(",")[0].trim());
+ if(bat_oid_mid == update_oid){
+ //update this line
+ bat_file.seek(mid*line_length);
+ bat_file.writeBytes(update_line+"\n");
+ bat_file.seek(0);//reset file pointer after updating
+ return mid+1;//return this line number
+ }else if(bat_oid_mid < update_oid) low=mid+1;
+ else high=mid-1;
+ }
+ return -1;
+ }
+
+ /**
+ * binary search tbat file according to target tbun
+ */
+ public static int binarySearchUpdateTBAT(RandomAccessFile tbat_file, int line_length, TBUN tbun_target) throws IOException{
+ int low=0;
+ int high=(int)tbat_file.length()/line_length-1;
+ int mid, tbat_oid_mid;
+ String tbat_current_line;
+
+ while(low<=high){
+ mid=(low+high)/2;
+ tbat_file.seek(mid*line_length);
+ tbat_current_line=tbat_file.readLine();
+// System.out.println(mid+":"+tbat_current_line);
+ tbat_oid_mid=Integer.parseInt(tbat_current_line.split(",")[1].trim());//tbat oid index is 1!
+ if(tbat_oid_mid == tbun_target.oid){
+ //update this line
+ tbat_file.seek(mid*line_length);
+ tbat_file.writeBytes(tbun_target+"\n");
+ tbat_file.seek(0);//reset file pointer after updating
+ return mid;//return this line number
+ }else if(tbat_oid_mid < tbun_target.oid) low=mid+1;
+ else high=mid-1;
+ }
+ System.out.println("Not found "+tbun_target+" !");
+ return -1;
+ }
+
+ /**
+ * binary search tbat file according to target tbun
+ * high is the end of the tbat file (in our exp tbat=body, appendix=update list file).
+ * @param low the starting row number to begin binary search, low starts at 0
+ * @return line number where the tbun_target is found
+ */
+ public static int binarySearchUpdateTBAT(RandomAccessFile tbat_file, int line_length, TBUN tbun_target, int low) throws IOException{
+ int high=(int)tbat_file.length()/line_length;
+ int mid, tbat_oid_mid;
+ String tbat_current_line;
+
+// System.out.println("low: "+low);
+
+ int count=0;//search round
+ while(low<=high){
+ count++;
+ mid=(low+high)/2;
+ tbat_file.seek(mid*line_length);
+ tbat_current_line=tbat_file.readLine();
+// System.out.println(mid+":"+tbat_current_line);
+ tbat_oid_mid=Integer.parseInt(tbat_current_line.split(",")[1].trim());//tbat oid index is 1!
+ if(tbat_oid_mid == tbun_target.oid){
+ //update this line
+ tbat_file.seek(mid*line_length);
+ tbat_file.writeBytes(tbun_target+"\n");
+ tbat_file.seek(0);//reset file pointer after updating
+// System.out.println("searching round: "+count);
+ return mid;//return this line number
+ }else if(tbat_oid_mid < tbun_target.oid) low=mid+1;
+ else high=mid-1;
+ }
+ System.out.println("Not found "+tbun_target+" !");
+ return -1;
+ }
+
+ /**
+ * binary search tbat file according to target tbun
+ * high is the end of the tbat file (in our exp tbat=body, appendix=update list file).
+ * @param low the starting row number to begin binary search, low starts at 0
+ * @return line number where the tbun_target is found
+ */
+ public static int binarySearchUpdateTBAT(RandomAccessFile tbat_file, int line_length, TBUN tbun_target, int low, DiskAccessCount countTotal) throws IOException{
+ int high=(int)tbat_file.length()/line_length;
+ int mid, tbat_oid_mid;
+ String tbat_current_line;
+
+// System.out.println("low: "+low);
+
+ int count=0;//search round
+ while(low<=high){
+ count++;
+ mid=(low+high)/2;
+ tbat_file.seek(mid*line_length);
+ tbat_current_line=tbat_file.readLine();
+// System.out.println(mid+":"+tbat_current_line);
+ tbat_oid_mid=Integer.parseInt(tbat_current_line.split(",")[1].trim());//tbat oid index is 1!
+ if(tbat_oid_mid == tbun_target.oid){
+ //update this line
+ tbat_file.seek(mid*line_length);
+ tbat_file.writeBytes(tbun_target+"\n");
+ tbat_file.seek(0);//reset file pointer after updating
+ countTotal.disk_access_total+=count;
+ return mid;//return this line number
+ }else if(tbat_oid_mid < tbun_target.oid) low=mid+1;
+ else high=mid-1;
+ }
+ System.out.println("Not found "+tbun_target+" !");
+ countTotal.disk_access_total+=count;
+ return 0;
+ }
+
+ /**
+ * based on binarySearchUpdateTBAT
+ * use long numbers
+ */
+ public static long binarySearchUpdateTBAT_Long(RandomAccessFile tbat_file, int line_length, TBUNL tbun_target, long low, DiskAccessCount countTotal) throws IOException{
+ long high=(int)tbat_file.length()/line_length;
+ long mid, tbat_oid_mid;
+ String tbat_current_line;
+
+// System.out.println("low: "+low);
+
+ long count=0;//search round
+ while(low<=high){
+ count++;
+ mid=(low+high)/2;
+ tbat_file.seek(mid*line_length);
+ tbat_current_line=tbat_file.readLine();
+// System.out.println(mid+":"+tbat_current_line);
+ tbat_oid_mid=Long.parseLong(tbat_current_line.split(",")[1].trim());//tbat oid index is 1!
+ if(tbat_oid_mid == tbun_target.oid){
+ //update this line
+ tbat_file.seek(mid*line_length);
+ tbat_file.writeBytes(tbun_target+"\n");
+ tbat_file.seek(0);//reset file pointer after updating
+ countTotal.disk_access_total+=count;
+ return mid;//return this line number
+ }else if(tbat_oid_mid < tbun_target.oid) low=mid+1;
+ else high=mid-1;
+ }
+ System.out.println("Not found "+tbun_target+" !");
+ countTotal.disk_access_total+=count;
+ return 0;
+ }
+
+ /**
+ * @param oid_position the place of oid in the line for appendix =1, for normal update file =0
+ *
+ */
+ public static void sortMergeFileToTBAT(String tbat_file_name, String appendix_file_name, int oid_position) throws IOException{
+ BufferedReader appendix_file_in =new BufferedReader(new FileReader(appendix_file_name));
+ String line="";
+ ArrayList buffer=new ArrayList(1000);
+
+ System.out.println("load buffer");
+ if(oid_position==1){//for appendix file
+ while((line=appendix_file_in.readLine())!=null){
+ String[] tbun_fields=line.split(",");
+ long timestamp=Long.parseLong(tbun_fields[0].trim());
+ int oid=Integer.parseInt(tbun_fields[1].trim());
+ int value=Integer.parseInt(tbun_fields[2].trim());
+ buffer.add(new TBUN(timestamp,oid,value));
+ }
+ }else{//for normal update file
+ long timestamp=System.currentTimeMillis();
+ while((line=appendix_file_in.readLine())!=null){
+ String[] tbun_fields=line.split(",");
+ int oid=Integer.parseInt(tbun_fields[0].trim());
+ int value=Integer.parseInt(tbun_fields[1].trim());
+ buffer.add(new TBUN(timestamp,oid,value));
+ }
+ }
+
+ System.out.println("buffer size:"+buffer.size());
+
+ appendix_file_in.close();
+
+ System.out.println("sorting buffer");
+ Collections.sort(buffer);//a modified merge sort
+
+ System.out.println("binarySearchUpdateTBAT");
+ RandomAccessFile tbat_file = new RandomAccessFile(new File(tbat_file_name), "rw");
+ int line_length=tbat_file.readLine().length()+1;
+ tbat_file.seek(0);
+ for(TBUN tbun:buffer){
+ DataUpdator.binarySearchUpdateTBAT(tbat_file,line_length,tbun);
+ }
+
+ }
+
+ /**
+ * In this version2, we load the lines of file into the memory first and then parse into TBUN ArrayList
+ * @param oid_position the place of oid in the line for appendix =1, for normal update file =0
+ *
+ */
+ public static void sortMergeFileToTBAT2(String tbat_file_name, String appendix_file_name, int oid_position) throws IOException{
+ BufferedReader appendix_file_in =new BufferedReader(new FileReader(appendix_file_name));
+
+ ArrayList buffer=new ArrayList(1000);
+ ArrayList lines=new ArrayList(1000);
+
+ String line_temp="";
+ while((line_temp=appendix_file_in.readLine())!=null){
+ lines.add(line_temp);
+ }
+
+ if(oid_position==1){//for appendix file
+ for(String line:lines){
+ String[] tbun_fields=line.split(",");
+ long timestamp=Long.parseLong(tbun_fields[0].trim());
+ int oid=Integer.parseInt(tbun_fields[1].trim());
+ int value=Integer.parseInt(tbun_fields[2].trim());
+ buffer.add(new TBUN(timestamp,oid,value));
+ }
+ }else{//for normal update file
+ long timestamp=System.currentTimeMillis();
+ for(String line:lines){
+ String[] tbun_fields=line.split(",");
+ int oid=Integer.parseInt(tbun_fields[0].trim());
+ int value=Integer.parseInt(tbun_fields[1].trim());
+ buffer.add(new TBUN(timestamp,oid,value));
+ }
+ }
+
+ appendix_file_in.close();
+ Collections.sort(buffer);//a modified merge sort
+ RandomAccessFile tbat_file = new RandomAccessFile(new File(tbat_file_name), "rw");
+ int line_length=tbat_file.readLine().length()+1;
+ tbat_file.seek(0);
+ for(TBUN tbun:buffer){
+ DataUpdator.binarySearchUpdateTBAT(tbat_file,line_length,tbun);
+ }
+
+ }
+
+ /**
+ * version 3 is same as version 2, except return disk read count
+ * @param oid_position the place of oid in the line for appendix =1, for normal update file =0
+ *
+ */
+ public static long sortMergeFileToTBAT3(String tbat_file_name, String appendix_file_name, int oid_position) throws IOException{
+ DiskAccessCount countTotal=new DiskAccessCount();
+ BufferedReader appendix_file_in =new BufferedReader(new FileReader(appendix_file_name));
+ ArrayList buffer=new ArrayList(1000);
+ ArrayList lines=new ArrayList(1000);
+
+ String line_temp="";
+ while((line_temp=appendix_file_in.readLine())!=null){
+ lines.add(line_temp);
+ countTotal.disk_access_total++;
+ }
+
+ if(oid_position==1){//for appendix file
+ for(String line:lines){
+ String[] tbun_fields=line.split(",");
+ long timestamp=Long.parseLong(tbun_fields[0].trim());
+ int oid=Integer.parseInt(tbun_fields[1].trim());
+ int value=Integer.parseInt(tbun_fields[2].trim());
+ buffer.add(new TBUN(timestamp,oid,value));
+ }
+ }else{//for normal update file
+ long timestamp=System.currentTimeMillis();
+ for(String line:lines){
+ String[] tbun_fields=line.split(",");
+ int oid=Integer.parseInt(tbun_fields[0].trim());
+ int value=Integer.parseInt(tbun_fields[1].trim());
+ buffer.add(new TBUN(timestamp,oid,value));
+ }
+ }
+
+ appendix_file_in.close();
+ Collections.sort(buffer);//a modified merge sort
+ RandomAccessFile tbat_file = new RandomAccessFile(new File(tbat_file_name), "rw");
+ int line_length=tbat_file.readLine().length()+1;
+ tbat_file.seek(0);
+ for(TBUN tbun:buffer){
+ DataUpdator.binarySearchUpdateTBAT(tbat_file,line_length,tbun, 0, countTotal);
+ }
+ return countTotal.disk_access_total;
+ }
+
+ /**
+ * version 4 is based on version 3
+ * version 4 aims to reduce the temporary memory needed
+ *
+ */
+ public static long sortMergeFileToTBAT4(String tbat_file_name, String appendix_file_name, int oid_position) throws IOException{
+ DiskAccessCount countTotal=new DiskAccessCount();
+ BufferedReader appendix_file_in =new BufferedReader(new FileReader(appendix_file_name));
+ ArrayList buffer=new ArrayList(1000);
+ long current_time_stamp=System.currentTimeMillis();
+ String line="";
+ while((line=appendix_file_in.readLine())!=null){
+ String[] tbun_fields=line.split(",");
+ int oid=Integer.parseInt(tbun_fields[oid_position].trim());
+ int value=Integer.parseInt(tbun_fields[oid_position+1].trim());
+ long timestamp;
+ if(oid_position==1){//for apendix file
+ timestamp=Long.parseLong(tbun_fields[0].trim());
+ }else{//for normal update file
+ timestamp=current_time_stamp;
+ }
+ buffer.add(new TBUN(timestamp,oid,value));
+ countTotal.disk_access_total++;
+ }
+ appendix_file_in.close();
+ Collections.sort(buffer);//a modified merge sort
+ RandomAccessFile tbat_file = new RandomAccessFile(new File(tbat_file_name), "rw");
+ int line_length=tbat_file.readLine().length()+1;
+ tbat_file.seek(0);
+ for(TBUN tbun:buffer){
+ DataUpdator.binarySearchUpdateTBAT(tbat_file,line_length,tbun, 0, countTotal);
+ }
+ tbat_file.close();
+ return countTotal.disk_access_total;
+ }
+
+
+ /**
+ * use long numbers
+ * oid_position is small, int works
+ */
+ public static long sortMergeFileToTBAT41(String tbat_file_name, String appendix_file_name, int oid_position, int buffer_size) throws IOException{
+ DiskAccessCount countTotal=new DiskAccessCount();
+ BufferedReader appendix_file_in =new BufferedReader(new FileReader(appendix_file_name));
+ ArrayList buffer=new ArrayList(buffer_size);
+ long current_time_stamp=System.currentTimeMillis();
+ String line="";
+ while((line=appendix_file_in.readLine())!=null){
+ String[] tbun_fields=line.split(",");
+ long oid=Long.parseLong(tbun_fields[oid_position].trim());
+ long value=Long.parseLong(tbun_fields[oid_position+1].trim());
+ long timestamp;
+ if(oid_position==1){//for apendix file
+ timestamp=Long.parseLong(tbun_fields[0].trim());
+ }else{//for normal update file
+ timestamp=current_time_stamp;
+ }
+ buffer.add(new TBUNL(timestamp,oid,value));
+ countTotal.disk_access_total++;
+ }
+ appendix_file_in.close();
+ Collections.sort(buffer);//a modified merge sort
+ RandomAccessFile tbat_file = new RandomAccessFile(new File(tbat_file_name), "rw");
+ int line_length=tbat_file.readLine().length()+1;
+ tbat_file.seek(0);
+ for(TBUNL tbun:buffer){
+ DataUpdator.binarySearchUpdateTBAT_Long(tbat_file,line_length,tbun, 0, countTotal);
+ }
+ tbat_file.close();
+ return countTotal.disk_access_total;
+ }
+
+ /**
+ * same as sortMergeFileToTBAT41 but use default buffer size (10, as in java doc)
+ * https://docs.oracle.com/javase/8/docs/api/java/util/ArrayList.html
+ */
+ public static long sortMergeFileToTBAT41(String tbat_file_name, String appendix_file_name, int oid_position) throws IOException{
+ return sortMergeFileToTBAT41(tbat_file_name, appendix_file_name, oid_position, 10);
+ }
+
+ /**
+ * merge appendix to body using OBTree
+ */
+ public static long mergeAppendixToTBAT_OBTree (OBTree obtree, RandomAccessFile reader,
+ RandomAccessFile writer, int line_length) throws Exception{
+ ArrayList> entry_list=obtree.getLeafEntryList();
+ DiskAccessCount countTotal=new DiskAccessCount();
+ int low=0;//low searching position when doing binary search
+ for (Entryentry: entry_list ){
+ long line_num_update=entry.getValue();
+ reader.seek((line_num_update-1)*line_length);
+ String line_updating=reader.readLine();
+ countTotal.disk_access_total++;//one reading
+ String[] tokens=line_updating.split(",");
+ //!!! update file must be a tbat format file
+ Long timestamp=Long.parseLong(tokens[0].trim());
+ Integer tbat_oid=Integer.parseInt(tokens[1].trim());
+ Integer value=Integer.parseInt(tokens[2].trim());
+ TBUN tbun_updating=new TBUN(timestamp.longValue(),tbat_oid.intValue(),value);
+ low=DataUpdator.binarySearchUpdateTBAT(writer, line_length, tbun_updating, low, countTotal);
+ }
+ return countTotal.disk_access_total;
+ }
+
+
+}
diff --git a/basic/util/DiskAccessCount.java b/basic/util/DiskAccessCount.java
new file mode 100644
index 0000000..d3dea82
--- /dev/null
+++ b/basic/util/DiskAccessCount.java
@@ -0,0 +1,5 @@
+package basic.util;
+
+public class DiskAccessCount {
+ public long disk_access_total=0;
+}
diff --git a/basic/util/FileSplitter.java b/basic/util/FileSplitter.java
new file mode 100644
index 0000000..52d7192
--- /dev/null
+++ b/basic/util/FileSplitter.java
@@ -0,0 +1,47 @@
+package basic.util;
+
+import java.io.*;
+
+public class FileSplitter {
+
+ // total splitting partitions
+ public static int total_partition_num=3;
+
+ public FileSplitter() {
+
+ }
+
+ public FileSplitter(int total_partition_num) {
+ this.total_partition_num=total_partition_num;
+ }
+
+ public void setTotalPartitionNum(int total_partition_num) {
+ this.total_partition_num=total_partition_num;
+ }
+
+ public static void splitByPartitionNum(String input_file_name, int total_partition_num) throws IOException{
+ long file_line_num = DataRetriever.getFileLineNumber(input_file_name);
+ long chunk_line_num = (file_line_num -1)/ total_partition_num +1 ;//line number in each chunk. rounded up!
+// System.out.println("chunk_line_num="+chunk_line_num);
+ BufferedReader input_file=new BufferedReader(new FileReader(input_file_name));
+ long current_chunk_line_num=0;//line number in current chunk
+ int current_chunk_num=1;
+ PrintWriter output_file = new PrintWriter(new BufferedWriter(new FileWriter(input_file_name+"_"+current_chunk_num)));
+ String current_line;
+ while((current_line=input_file.readLine())!=null){
+ if(current_chunk_line_num < chunk_line_num){
+ output_file.println(current_line);
+ current_chunk_line_num++;
+ }else {
+ output_file.close();
+ current_chunk_num++;
+ output_file = new PrintWriter(new BufferedWriter(new FileWriter(input_file_name+"_"+current_chunk_num)));
+ output_file.println(current_line);
+ current_chunk_line_num=1;
+ }
+ }
+ output_file.close();
+ input_file.close();
+ }
+
+}
diff --git a/basic/util/MathTool.java b/basic/util/MathTool.java
new file mode 100644
index 0000000..709ebfd
--- /dev/null
+++ b/basic/util/MathTool.java
@@ -0,0 +1,76 @@
+package basic.util;
+
+import java.util.*;
+
+public class MathTool {
+ public static double mean(ArrayList list) {
+ double sum = 0;
+ for (double val : list) {
+ sum += val;
+ }
+ return sum / list.size();
+ }
+
+
+ public static double median(ArrayList list) {
+ if(list.size()==0){
+ System.out.println("list is empty for:"+list);
+ return Double.NEGATIVE_INFINITY;
+ }
+ if(list.size()==1){
+ return list.get(0);
+ }
+ int middle=list.size()/2;
+ if (list.size()%2 == 1) {
+ return list.get(middle);
+ } else {
+ return (list.get(middle-1)+list.get(middle))/2.0;
+ }
+ }
+
+ /**
+ * Returns the sample variance in the ArrayList a, NaN if no such value.
+ */
+ public static double var(ArrayList a) {
+ if (a.size() == 0) return Double.NaN;
+ double avg = mean(a);
+ double sum = 0.0;
+ for (int i = 0; i < a.size(); i++) {
+ sum += (a.get(i) - avg) * (a.get(i) - avg);
+ }
+ return sum / (a.size() - 1);
+ }
+
+ /**
+ * Returns the sample standard deviation in the ArrayList a, NaN if no such value.
+ */
+ public static double stddev(ArrayList a) {
+ return Math.sqrt(var(a));
+ }
+
+ /**
+ * remove outlier
+ */
+
+ public static ArrayList removeOutlier(ArrayList a, double m){
+ double u=mean(a);
+ double s=stddev(a);
+ ArrayList filtered=new ArrayList();
+ for(Double e:a){
+ if(e > u-m*s && e < u+m*s){
+ filtered.add(e);
+ }
+ }
+ return filtered;
+ }
+
+ private static final long MEGABYTE = 1024L * 1024L;
+
+ public static double bytesToKB(long bytes) {
+ return bytes*1.0 / 1024L;
+ }
+
+ public static double bytesToMB(long bytes) {
+ return bytes*1.0 / MEGABYTE;
+ }
+}
diff --git a/basic/util/Merger.java b/basic/util/Merger.java
new file mode 100644
index 0000000..7a932f9
--- /dev/null
+++ b/basic/util/Merger.java
@@ -0,0 +1,20 @@
+package basic.util;
+
+
+/**
+ * This is the sort merge class for data cleaning after AOC updates
+ * @author fyu
+ *
+ */
+
+import java.io.*;
+import java.util.*;
+
+public class Merger {
+
+ public static void mergeWithBody(String body_file_name, String appendix, String output_file_name) throws IOException {
+ // read in an appendix
+
+
+ }
+}
diff --git a/build.xml b/build.xml
new file mode 100644
index 0000000..76f5321
--- /dev/null
+++ b/build.xml
@@ -0,0 +1,91 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/exp_merge_ob/exp_merge_ob.java b/exp_merge_ob/exp_merge_ob.java
new file mode 100644
index 0000000..ff89ac8
--- /dev/null
+++ b/exp_merge_ob/exp_merge_ob.java
@@ -0,0 +1,232 @@
+package exp_merge_ob;
+
+import static java.lang.System.out;
+
+import java.io.*;
+import java.text.SimpleDateFormat;
+import java.util.*;
+
+import basic.btree.*;
+import basic.util.*;
+
+public class exp_merge_ob {
+
+ static int num_lines_1m=47660;//number of lines of 1MB BAT file
+ static int num_lines;//number of lines in the tbat and bat files
+ static int max_exp_times;//maximum iteration times of experiment
+ static ArrayList pers=new ArrayList();//update percentages
+ static int appendix_num_split=10; //number of split files for appendixes
+ static double sel_per=0.1;//selection percentage
+ static String result_dir= "data/exp_merge_ob/";
+ static String data_dir="data/";
+ static String tbat_file_name_original=data_dir+"tbat.txt";
+ static String tbat_file_name_copy1=tbat_file_name_original.substring(0, tbat_file_name_original.length()-4)+"_cp1.txt";
+ static String tbat_file_name_copy2=tbat_file_name_original.substring(0, tbat_file_name_original.length()-4)+"_cp2.txt";
+
+ public static void main(String[] args) throws Exception{
+ String program_start_date_time=new SimpleDateFormat("yyyy/MM/dd HH:mm:ssZ").format(Calendar.getInstance().getTime());
+// final String result_file_name=result_dir+"result-merge-ob-"+
+// (new SimpleDateFormat("yyyyMMdd-HHmmss").format(Calendar.getInstance().getTime()));
+ final String result_file_name=result_dir+"result-merge-ob.txt";
+ PrintWriter result_file = null;
+ if(args.length < 3){
+ out.println("Please input num_lines "
+ + " max_exp_times per1 per2 per3 ... ");
+ System.exit(0);
+ }else{
+ num_lines = Integer.parseInt(args[0]);
+ max_exp_times = Integer.parseInt(args[1]);
+ for(int i=2;i> all_times_merge_ob=new HashMap>();
+ HashMap> all_times_merge_bi=new HashMap>();
+ HashMap mean_times_merge_ob=new HashMap();
+ HashMap mean_times_merge_bi=new HashMap();
+ HashMap all_memories_ob=new HashMap();//ob-tree memory used
+ HashMap disk_access_ob=new HashMap();//ob-tree disk access
+ HashMap disk_access_bi=new HashMap();//binary search merge disk access
+
+ //---do the experiment---
+ for(double per:pers){
+
+
+ out.println("exp: update "+per+"%");
+ result_file.println("* exp: update "+per+"%");
+ String update_file_name=data_dir+"update_"+num_lines+"_"+per+".txt";
+
+ ArrayList merge_bi_time_temp=new ArrayList();
+ ArrayList merge_ob_time_temp=new ArrayList();
+
+ // bulk loading of update list file into OB-tree
+ // OBTree obtree = new OBTree();
+ // OBTree changed to BTree
+ OBTree obtree = new OBTree();
+ obtree.loadUpdateFile(update_file_name);
+ all_memories_ob.put(per,obtree.toKB());
+ long disk_access_ob_temp=0;
+ long disk_access_bi_temp=0;
+ for(int i=0;i merge_bi_time_temp=all_times_merge_bi.searchKey(per);
+// ArrayList merge_ob_time_temp=all_times_merge_ob.searchKey(per);
+// for(int i=0;i merge_bi_time_temp=all_times_merge_bi.get(per);
+ for(int i=0;i merge_ob_time_temp=all_times_merge_ob.get(per);
+ for(int i=0;i pers=new ArrayList();//update percentages
+ static int appendix_num_split=10; //number of split files for appendixes
+ static double sel_per=0.1;//selection percentage
+ static String result_dir= "data/exp_merge_ob/";
+ static String data_dir="data/";
+ static String tbat_file_name_original=data_dir+"tbat.txt";
+ static String tbat_file_name_copy1=tbat_file_name_original.substring(0, tbat_file_name_original.length()-4)+"_cp1.txt";
+ static String tbat_file_name_copy2=tbat_file_name_original.substring(0, tbat_file_name_original.length()-4)+"_cp2.txt";
+
+ public static void main(String[] args) throws Exception{
+ String program_start_date_time=new SimpleDateFormat("yyyy/MM/dd HH:mm:ssZ").format(Calendar.getInstance().getTime());
+ final String result_file_name=result_dir+"result-ob-memory.txt";
+ PrintWriter result_file = null;
+ if(args.length < 3){
+ out.println("OB-Tree Loading Memory Test: please input num_lines "
+ + " per1 per2 per3 ... ");
+ System.exit(0);
+ }else{
+ num_lines = Integer.parseInt(args[0]);
+ for(int i=1;i> all_times_merge_ob=new HashMap>();
+ HashMap> all_times_merge_bi=new HashMap>();
+ HashMap mean_times_merge_ob=new HashMap();
+ HashMap mean_times_merge_bi=new HashMap();
+ HashMap all_memories_ob=new HashMap();//ob-tree memory used
+
+ //---do the experiment---
+ for(double per:pers){
+ out.println("exp: update "+per+"%");
+ result_file.println("* exp: update "+per+"%");
+ String update_file_name=data_dir+"update_"+num_lines+"_"+per+".txt";
+
+ ArrayList merge_bi_time_temp=new ArrayList();
+ ArrayList merge_ob_time_temp=new ArrayList();
+
+ // bulk loading of update list file into OB-tree
+ //OBTree changed to BTree
+ OBTree obtree = new OBTree();
+ obtree.loadUpdateFile(update_file_name);
+ all_memories_ob.put(per,obtree.toKB());
+ }
+ out.println("Major expriment finished!");
+ out.println();
+ result_file.println("\n#Analysis:\n");
+
+ //---------memory - OB-merge -------
+ result_file.println("OB-Merge memory used:\n");
+ result_file.format("%3s, %10s \n","perc","KB");
+ for (double per : pers) {
+ result_file.format("%-3.2f, %10.3f \n", per, all_memories_ob.get(per));
+ }
+
+ //end of file
+ result_file.println();
+ String program_end_date_time=new SimpleDateFormat("yyyy/MM/dd HH:mm:ssZ").format(Calendar.getInstance().getTime());
+ result_file.println("Program Started at: "+program_start_date_time);
+ result_file.println("Program Ended at: "+program_end_date_time);
+ long end_global=System.currentTimeMillis();
+ double elapsedTime=(end_global-start_global)/1000.0;
+ result_file.println("Elapsed Time:"+elapsedTime+"s\n");
+
+ result_file.close();
+ out.println("Elapsed Time:"+elapsedTime+"s");
+ }
+
+ /**
+ * prepare files for merge progressive select experiment
+ * files include:
+ * - tbat and bat files
+ * - updat list file
+ * - appendix files for each update file (AOC appendix files)
+ * - selection oid file
+ *
+ */
+ public static void prepareFiles() throws IOException {
+ //-----prepare tbat and bat files-----
+ DataCreator.prepareTBAT(num_lines, tbat_file_name_original);
+ out.println("TBAT file "+tbat_file_name_original+" created");
+ //-----prepare update and appendix files p%=1%-5%-----
+ for(double per:pers){
+ //create update files
+ String update_file_name=data_dir+"update_"+num_lines+"_"+per+".txt";
+ DataCreator.prepareUpdateList1(per, num_lines, update_file_name, 1);
+ out.println("Update file: "+update_file_name+" created");
+ }
+
+
+ //-----prepare selection query files-----
+// DataCreator.prepareSelectionFile(select_file_name, sel_per, num_lines);
+// System.out.println("Selection files created");
+ }
+
+
+}
diff --git a/exp_merge_progressive/exp_merge_progressive_btree.java b/exp_merge_progressive/exp_merge_progressive_btree.java
new file mode 100644
index 0000000..caba5d8
--- /dev/null
+++ b/exp_merge_progressive/exp_merge_progressive_btree.java
@@ -0,0 +1,282 @@
+package exp_merge_progressive;
+
+import java.io.*;
+import java.text.SimpleDateFormat;
+import java.util.*;
+
+import basic.btree.*;
+import basic.storage_model.TBAT;
+import basic.util.*;
+
+public class exp_merge_progressive_btree {
+
+ static int num_lines_1m=47660;
+ static int num_lines;
+// static int num_lines=64*num_lines_1m;
+ static ArrayList pers=new ArrayList();//update percentages
+ static int appendix_num_split=10; //number of split files for appendixes
+ static double sel_per=0.1;//selection percentage
+ final static String dir_name= "results/exp_merge/";
+ final static String bat_file_name=dir_name+"bat.txt";
+ final static String tbat_file_name=dir_name+"tbat.txt";
+ final static String tbat_temp_file_name=dir_name+"tbat_temp.txt";
+ final static String select_file_name=dir_name+"select_"+sel_per+".txt";
+ final static String result_file_name=dir_name+"results/result-merge-progressive-select.txt";
+
+ public static void main(String[] args) throws IOException{
+ String program_start_date_time=new SimpleDateFormat("yyyy/MM/dd HH:mm:ssZ").format(Calendar.getInstance().getTime());
+ PrintWriter result_file= new PrintWriter(new FileWriter(result_file_name));
+
+ if(args.length<1){
+ System.out.println("Input: num_lines\n");
+ System.exit(0);
+ }else{
+ num_lines = Integer.parseInt(args[0]);
+ System.out.println("Number of lines for experiment:"+num_lines);
+ result_file.println("Number of lines for experiment:"+num_lines+"\n");
+ }
+ BTree appendixBTree = new BTree();
+ int OFF=1;
+ String a;
+ String timestamp;
+ int b;
+ int valueOfA;
+
+
+ long start=System.currentTimeMillis();
+ for(int p=1;p<=5;p++){
+ pers.add(p*0.01);
+ }
+
+
+
+ //---prepare files---
+ prepareFiles();
+
+ //---do the experiment---
+ int tbat_line_length= DataRetriever.getLineLength(tbat_file_name);
+ int bat_line_length = DataRetriever.getLineLength(bat_file_name);
+ List select_list= DataCreator.loadSelectionFile(select_file_name);
+ //for progressive approach
+ HashMap> all_times_select=new HashMap>();
+ HashMap> all_times_merge=new HashMap>();
+ HashMap all_memories=new HashMap();
+ //for eacher approach
+// HashMap all_times_select2=new HashMap();
+ HashMap all_memories_eager=new HashMap();
+
+ for(double per:pers){
+ System.out.println("exp: update "+per+"%");
+ result_file.println("* exp: update "+per+"%");
+ //-----progressive approach-----
+ BasicTools.copyFile(tbat_file_name, tbat_temp_file_name);
+ System.out.println("copied temp file");
+ ArrayList times_select=new ArrayList();
+ ArrayList times_merge=new ArrayList();
+ ArrayList memories=new ArrayList();
+ String appendix_file_prefix=dir_name+"appendix_"+per;
+ ArrayList appendix_file_names=new ArrayList();
+ for(int i=1;i<=appendix_num_split;i++){
+ appendix_file_names.add(appendix_file_prefix+"_"+i+".txt");
+ Scanner reads = new Scanner(new File(dir_name + "update_" + per + ".txt"));
+ OFF = 1;
+ while (reads.hasNext()) {
+ a = reads.next(); // read OID
+ b = reads.nextInt(); // read VALUE
+ a = a.substring(0, a.length() - 1); // removing the comma that was auto-generated
+ valueOfA = Integer.parseInt(a); // placing that number into a variable
+ if (appendixBTree.get(valueOfA) != null) {
+ appendixBTree.findReplace(valueOfA, OFF);
+ } else {
+ appendixBTree.put(valueOfA, OFF);
+ }// end of if-else
+ OFF++;
+ }
+ //reads.close();
+
+
+ }
+ for(int index=0;index<=appendix_num_split;index++){
+ System.out.println("progressive sort merge tbat index:"+index);
+ if(index!=0){
+ Runtime runtime = Runtime.getRuntime();//Get the Java runtime
+ long start_merge=System.currentTimeMillis();
+ DataUpdator.sortMergeFileToTBAT2(tbat_temp_file_name, appendix_file_prefix+"_"+index+".txt", 1);
+ long end_merge=System.currentTimeMillis();
+ double elapsed_time_merge=(end_merge-start_merge)/1000.0;
+ times_merge.add(elapsed_time_merge);
+ runtime.gc();//Run garbage collector
+ long memory = runtime.totalMemory() - runtime.freeMemory();//used memory
+ memories.add(MathTool.bytesToKB(memory)*1.0);
+ appendix_file_names.remove(0);
+ }
+ long target_value;
+ System.out.println("exp select TBAT uncleaned");
+ long start2=System.currentTimeMillis();
+ //System.out.println(appendixBTree);
+ try{
+ RandomAccessFile updates = new RandomAccessFile(new File(dir_name + "update_" + per + ".txt"), "r");
+ for(int target_oid:select_list){
+ if(appendixBTree.get(target_oid) == null){
+ target_value = TBAT.selectTBAT_Uncleaned2(tbat_file_name, num_lines,
+ tbat_line_length, target_oid);
+ }else{
+ //target_value = DataRetriever.selectTBAT_Uncleaned_Split2(appendix_file_names,
+ //0, bat_line_length, target_oid);
+ //System.out.println(appendixBTree.searchKey(target_oid));
+ target_value = TBAT.searchAppendixByOffSet(updates, 0,
+ bat_line_length, appendixBTree.get(target_oid), 1);
+ }
+ //System.out.printf("Target OID %d has value %d ", target_oid, target_value);
+ }
+ }catch(Exception ex){
+ System.out.println("File Problems Again");
+ }
+ long end2=System.currentTimeMillis();
+ double elapsed_time2=(end2-start2)/1000.0;
+ times_select.add(elapsed_time2);
+ }
+ all_times_merge.put(per, times_merge);
+ all_times_select.put(per, times_select);
+ all_memories.put(per, MathTool.mean(memories));
+
+
+ //-----eager approach-----
+ System.out.println("eager sort merge tbat");
+ BasicTools.copyFile(tbat_file_name, tbat_temp_file_name);
+ Runtime runtime2 = Runtime.getRuntime();//Get the Java runtime
+ runtime2.gc();//Run garbage collector
+ DataUpdator.sortMergeFileToTBAT2(tbat_temp_file_name, dir_name+"update_"+per+".txt", 0);
+ long memory2 = runtime2.totalMemory() - runtime2.freeMemory();//used memory
+ all_memories_eager.put(per, MathTool.bytesToKB(memory2)*1.0);
+ }
+ System.out.println("Major expriment finished!");
+ System.out.println();
+ result_file.println("\n#Progressive:\n");
+
+ //-------merge time----------
+ result_file.println("Merge Time:\n");
+ //print table head
+ result_file.print("update_per\\merge_per");
+ for(int index=1;index<=appendix_num_split;index++){
+ result_file.print("|"+(int)(index*10)+"%");
+ }
+ result_file.println();
+ result_file.print("---");
+ for(int index=0;index<=appendix_num_split;index++){
+ result_file.print("|---");
+ }
+ result_file.println();
+ //print table body
+ for(double per:pers){
+ result_file.print(per+"");
+ ArrayList times_merge=all_times_merge.get(per);
+ for(int i=0;i times_select=all_times_select.get(per);
+ for(int i=0;i pers=new ArrayList();//update percentages
+ static int appendix_num_split=10; //number of split files for appendixes
+ static double sel_per=0.1;//selection percentage
+ final static String dir_name= "results/exp_merge/";
+ final static String bat_file_name=dir_name+"bat.txt";
+ final static String tbat_file_name=dir_name+"tbat.txt";
+ final static String tbat_temp_file_name=dir_name+"tbat_temp.txt";
+ final static String select_file_name=dir_name+"select_"+sel_per+".txt";
+ final static String result_file_name=dir_name+"results/result-merge-progressive-select.txt";
+
+ public static void main(String[] args) throws IOException{
+ String program_start_date_time=new SimpleDateFormat("yyyy/MM/dd HH:mm:ssZ").format(Calendar.getInstance().getTime());
+ PrintWriter result_file= new PrintWriter(new FileWriter(result_file_name));
+
+ if(args.length<1){
+ System.out.println("Input: num_lines\n");
+ System.exit(0);
+ }else{
+ num_lines = Integer.parseInt(args[0]);
+ System.out.println("Number of lines for experiment:"+num_lines);
+ result_file.println("Number of lines for experiment:"+num_lines+"\n");
+ }
+
+ long start=System.currentTimeMillis();
+ for(int p=1;p<=5;p++){
+ pers.add(p*0.01);
+ }
+
+ //---prepare files---
+ prepareFiles();
+
+ //---do the experiment---
+ int tbat_line_length= DataRetriever.getLineLength(tbat_file_name);
+ List select_list= DataCreator.loadSelectionFile(select_file_name);
+ //for progressive approach
+ HashMap> all_times_select=new HashMap>();
+ HashMap> all_times_merge=new HashMap>();
+ HashMap all_memories=new HashMap();
+
+ //for eacher approach
+// HashMap all_times_select2=new HashMap();
+ HashMap all_memories_eager=new HashMap();
+
+ for(double per:pers){
+ System.out.println("exp: update "+per+"%");
+ result_file.println("* exp: update "+per+"%");
+
+ //-----progressive approach-----
+ BasicTools.copyFile(tbat_file_name, tbat_temp_file_name);
+ System.out.println("copy temp file");
+ ArrayList times_select=new ArrayList