Skip to content

Commit

Permalink
Sturff
Browse files Browse the repository at this point in the history
  • Loading branch information
iluvjava committed Mar 6, 2020
1 parent 5c63f71 commit c6effcb
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 14 deletions.
Binary file added CentroidMatrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Difference_Matrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added FinnMatrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
67 changes: 67 additions & 0 deletions adjustments_investigations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
This files investigate some of the special thing discovered
when writing the paper.
TODO: DO THIS SHIT
* Compering the Huckleburry and the prince from Twain's
writings.
"""


from core import *
import matplotlib.pyplot as plt


def f1():
Novel1, Novel2 = "Adventures of Huckleberry Finn.txt", \
"The Prince and The Pauper.txt"
Mark = Author(MARK_TWAIN, IgnoreSpecialNoun=True)
for Work, Matrix in zip(Mark.list_of_works(), Mark.get_matrices()):
print(Work)
if Work == Novel1:
Novel1 = Matrix
if Work == Novel2:
Novel2 = Matrix
print("The distance between \"Adventrues of Huckleberry Finn.txt\""
"and \"The Prince and The Pauper.txt\"is: ")
print(dis(Novel1, Novel2, MatrixMetric.TwoNorm))
print("Ignoring Special Nouns and using matrix 2 norm")

def f2():
"""
Comparing thespecial nouns impact on Huckleberry Finn,
in tm27.
:return:
"""
A1 = Author(MARK_TWAIN, matrixfunction=get_tm27, IgnoreSpecialNoun=True)
A1WorkDict = A1.work_matrix_dict()
WorkName = "Adventures of Huckleberry Finn.txt"
FinnMatrix = A1WorkDict[WorkName]
CentroidMatrix = A1.get_center()

DifferenceMatrix = FinnMatrix - CentroidMatrix
plt.imshow(DifferenceMatrix)
plt.colorbar()
plt.title("Difference_Matrix")
plt.savefig("Difference_Matrix")
plt.clf()

plt.imshow(CentroidMatrix)
plt.colorbar()
plt.title("CentroidMatrix")
plt.savefig("CentroidMatrix")
plt.clf()

plt.imshow(CentroidMatrix)
plt.colorbar()
plt.title("FinnMatrix")
plt.savefig("FinnMatrix")
plt.clf()


pass

if __name__ == "__main__":
f2()
pass
49 changes: 40 additions & 9 deletions core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
from os import listdir
from os.path import isfile

__all__ = ["Author", "dis_between_authors", "get_tm27", "get_2ndtm","get_2ndlogarithm",
__all__ = ["Author", "dis_between_authors", "get_tm27", "get_2ndtm","get_2ndlogarithm",
"CHARLES_DICKENS",
"MARK_TWAIN", "CentroidOption", "MatrixMetric", "AuthorMetric"]
"MARK_TWAIN", "CentroidOption", "MatrixMetric", "AuthorMetric", "dis"]

# A list of authors' directory:
CHARLES_DICKENS = "data/Charles Dickens"
Expand Down Expand Up @@ -114,6 +114,7 @@ def s(letter):
npmatrix[i] /= s
return npmatrix


def get_2ndlogarithm(lines: List[str], skipSpecialNoun=False):
"""
Takes the logarithm after counting the frequency,
Expand Down Expand Up @@ -182,7 +183,7 @@ class AuthorMetric(enum.Enum):

AverageDis = 2 # Taking the average distance of the given transition matrix with respect to
# All the matrices of the author.
# TODO: SOMETHING IS WRONG HERE.


CentroidDis = 3 # This metric take the matrix norm on the difference of 2 centroids of the author.

Expand Down Expand Up @@ -273,6 +274,14 @@ def list_of_works(self):
def list_of_works_content(self):
return list(self.__FilePathToLines.values())

def work_matrix_dict(self):
"""
Give a dictionary that maps the name of the works to the
transition matrices.
:return:
"""
return dict(zip(self.list_of_works(), self.get_matrices()))

def name(self):
return self.__AuthorName

Expand Down Expand Up @@ -510,7 +519,6 @@ def save_nparray(m, name):
print("Ok, we are going to save some centroid for both of the authors now: ")



def save_matrices_forall_data():
"""
This function will save all the works of the authors's transition
Expand All @@ -521,13 +529,36 @@ def save_matrices_forall_data():
None
"""
global Author
def save_listof_matrices():
pass
def gernate_all_authors():
def save(NpMatrix, dir:str, filename:str):

np.savetxt(fname=filename, X=NpMatrix)
return

def generate_all_authors():
AllMatrices = [get_tm27, get_2ndtm, get_2ndlogarithm]
FileLocations = [CHARLES_DICKENS, MARK_TWAIN]
ListofAuthors = []
for L in FileLocations:
for G in AllMatrices:
ListofAuthors.append(
Author(dir=L,
matrixfunction=G,
IgnoreSpecialNoun=True))
ListofAuthors.append(
Author(dir=L,
matrixfunction=G,
IgnoreSpecialNoun=True))

return ListofAuthors

# TODO: FINISH THIS SHIT.
for Aut in generate_all_authors():
AuthorName = Aut.name()
for Work, Matrix in zip(Aut.list_of_works(), Aut.get_matrices()):
save(Matrix, "")
pass


pass # TODO: IMPLEMENT THIS SHIT
pass


if __name__ == '__main__':
Expand Down
5 changes: 0 additions & 5 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,5 @@ def print_author(A, B):


if __name__ == "__main__":
# print_experiment(MatrixGeneratingFxn=get_tm27,
# IgnoreSpecialNouns=True,
# CentroidType=CentroidOption.AggregateMatrix,
# MatrixMetric=MatrixMetric.TwoNorm,
# AuthorMetric=AuthorMetric.CentroidDis)
print_cross_compare_experiement("data/Charles Dickens", "data/Mark Twain")

0 comments on commit c6effcb

Please sign in to comment.