-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathMCS.py
124 lines (101 loc) · 5.04 KB
/
MCS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import os
def load_graph(image_path):
# Load graph from image
print(f"Loading graph from image: {image_path}")
image = plt.imread(image_path)
# Check if image has valid dimensions for a graph (e.g., 3 channels for RGB)
if len(image.shape) != 3:
raise ValueError("Invalid image dimensions. Expected RGB image.")
# Convert image to grayscale
gray_image = np.mean(image, axis=2)
# Convert grayscale image to binary (0 or 1) based on thresholding
binary_image = (gray_image > np.mean(gray_image)) * 1
# Create a square image by cropping or resizing
min_dim = min(binary_image.shape)
square_image = binary_image[:min_dim, :min_dim]
# Convert binary image to graph
return nx.from_numpy_array(square_image)
def compute_mcs(graph1, graph2):
# Compute MCS between two graphs
mcs_size = 0
# Convert graphs to adjacency matrices
adjacency_matrix1 = nx.to_numpy_array(graph1)
adjacency_matrix2 = nx.to_numpy_array(graph2)
# Iterate over all pairs of nodes in graph1 and graph2
for node1 in graph1.nodes():
for node2 in graph2.nodes():
if adjacency_matrix1[node1, node1] == 1 and adjacency_matrix2[node2, node2] == 1:
if adjacency_matrix1[node1, node2] == 1 and adjacency_matrix2[node2, node1] == 1:
# Found a common node, increment MCS size
mcs_size += 1
return mcs_size
def distance_metric(graph1, graph2):
# Compute distance metric based on MCS between two graphs
return compute_mcs(graph1, graph2)
def knn(train_graphs, test_graph, train_labels, k):
# Compute distances between test graph and all training graphs
distances = [distance_metric(train_graph, test_graph) for train_graph in train_graphs]
# Get indices of k-nearest neighbors
nearest_indices = np.argsort(distances)[:k]
# Get class labels of k-nearest neighbors
nearest_labels = [train_labels[i] for i in nearest_indices]
# Predict class label based on majority vote
predicted_label = max(set(nearest_labels), key=nearest_labels.count)
return predicted_label
# Example usage
graph_folder = "E:\Github Repos\Classification-of-Documents-Using-Graph-Based-Features-and-KNN\prepped graphs"
graph_filenames = os.listdir(graph_folder)
num_graphs = len(graph_filenames)
# Load graphs from image files
train_graphs = []
train_labels = [] # Assuming labels are available for training graphs
for i, filename in enumerate(graph_filenames):
graph_path = os.path.join(graph_folder, filename)
print(f"Loading graph {i+1}/{num_graphs} from image: {graph_path}")
graph = load_graph(graph_path)
train_graphs.append(graph)
# Assuming labels are stored in the filename or any other way
# You need to implement this part to extract labels from filenames or any other source
# For now, let's assume labels are integers corresponding to different classes
train_labels.append(int(filename.split('.')[0].split('(')[-1]))
# Choose value of k (number of neighbors)
k = 5
# Load test graph (You need to implement this part)
test_graph = None # You need to load the test graph from file or any other source
# Perform classification for the test graph
predicted_label = knn(train_graphs, test_graph, train_labels, k)
print("Predicted label:", predicted_label)
#--- Load training graphs from image files
train_graphs = []
train_labels = [] # Assuming labels are available for training graphs
train_graph_folder = "E:\Github Repos\Classification-of-Documents-Using-Graph-Based-Features-and-KNN\training_graphs"
train_graph_filenames = os.listdir(train_graph_folder)
num_train_graphs = len(train_graph_filenames)
for i, filename in enumerate(train_graph_filenames):
graph_path = os.path.join(train_graph_folder, filename)
print(f"Loading training graph {i+1}/{num_train_graphs} from image: {graph_path}")
graph = load_graph(graph_path)
train_graphs.append(graph)
# Assuming labels are stored in the filename or any other way
# You need to implement this part to extract labels from filenames or any other source
# For now, let's assume labels are integers corresponding to different classes
train_labels.append(int(filename.split('.')[0].split('(')[-1]))
# Load test graphs from image files
test_graphs = []
test_graph_folder = "E:\Github Repos\Classification-of-Documents-Using-Graph-Based-Features-and-KNN\test_graphs"
test_graph_filenames = os.listdir(test_graph_folder)
num_test_graphs = len(test_graph_filenames)
for i, filename in enumerate(test_graph_filenames):
graph_path = os.path.join(test_graph_folder, filename)
print(f"Loading test graph {i+1}/{num_test_graphs} from image: {graph_path}")
graph = load_graph(graph_path)
test_graphs.append(graph)
# Choose value of k (number of neighbors)
k = 5
# Perform classification for each test graph
for i, test_graph in enumerate(test_graphs):
predicted_label = knn(train_graphs, test_graph, train_labels, k)
print(f"Predicted label for test graph {i+1}: {predicted_label}")