-
Notifications
You must be signed in to change notification settings - Fork 0
/
oruga2_nsga2.py
175 lines (139 loc) · 5.37 KB
/
oruga2_nsga2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# -*- coding: utf-8 -*-
"""
ORUGA: Optimizing Readability Using Genetic Algorithms
[Martinez-Gil2023a] J. Martinez-Gil, "Optimizing Readability Using Genetic Algorithms", arXiv preprint arXiv:2301.00374, 2023
@author: Jorge Martinez-Gil
"""
from jmetal.algorithm.multiobjective import NSGAII
from jmetal.operator import SBXCrossover, PolynomialMutation
from jmetal.util.termination_criterion import StoppingByEvaluations
from readability import Readability
from nltk.corpus import wordnet
from jmetal.core.problem import FloatProblem
from jmetal.core.solution import FloatSolution
def listToString(s):
str1 = ""
for ele in s:
str1 += str(ele)
str1 += " "
str1 = str1.replace(' ,', ',')
str1 = str1.replace('_', ' ')
return str1
def Synonym(word, number):
synonyms = []
for syn in wordnet.synsets(word):
for lm in syn.lemmas():
synonyms.append(lm.name())
if (not synonyms):
return -2, word
elif number >= len(synonyms):
return len(synonyms)-1, synonyms[len(synonyms)-1]
else:
return int(number), synonyms[int(number-1)]
def fitness_func1(solution):
#preprocessing
a = 0
for i in index_array:
if index_array[a] <= 0:
solution[a] = 0
a += 1
res2 = text.split()
text_converted = []
index=0
for i in res2:
if solution[index] < 1:
text_converted.append (i)
elif solution[index] >= 1:
number, word = Synonym(i,solution[index])
text_converted.append (word)
else:
print ('Error')
index += 1
result = listToString(text_converted)
r = Readability(result)
return r.flesch_kincaid().score
text = 'Real Madrid Club de Futbol, meaning Royal Madrid Football Club, commonly referred to as Real Madrid, is a Spanish professional football club based in Madrid. Founded in 1902 as Madrid Football Club, the club has traditionally worn a white home kit since its inception. The honorific title real is Spanish for Royal and was bestowed to the club by King Alfonso XIII in 1920 together with the royal crown in the emblem. Real Madrid have played their home matches in the Santiago Bernabeu Stadium in downtown Madrid since 1947. Unlike most European sporting entities, Real Madrid members (socios) have owned and operated the club throughout its history.'
text_array = []
index_array = []
res = text.split()
for i in res:
flag = 0
if ',' in i:
i = i.replace(',', '')
flag = 1
if '.' in i:
i = i.replace('.', '')
flag = 2
if (not i[0].isupper() and len(i) > 3):
number, word = Synonym(i,6)
text_array.append (word)
index_array.append (number)
else:
text_array.append (i)
index_array.append (0)
if flag == 1:
cad = text_array[-1]
text_array.pop()
cad = cad + str(',')
text_array.append (cad)
flag = 0
if flag == 2:
cad = text_array[-1]
text_array.pop()
cad = cad + str('.')
text_array.append (cad)
flag = 0
def obtain_text (solution):
res2 = text.split()
text_converted = []
index=0
for i in res2:
if solution[index] < 1:
text_converted.append (i)
elif solution[index] >= 1:
number, word = Synonym(i,solution[index])
text_converted.append (word.upper())
else:
print ('Error')
index += 1
result = listToString(text_converted)
return result
class Oruga(FloatProblem):
def __init__(self):
super(Oruga, self).__init__()
self.number_of_objectives = 2
self.number_of_variables = len(index_array)
self.number_of_constraints = 0
self.obj_directions = [self.MINIMIZE, self.MINIMIZE]
self.obj_labels = ['f(x)', 'f(y)']
self.lower_bound = self.number_of_variables * [-4]
self.upper_bound = self.number_of_variables * [4]
FloatSolution.lower_bound = self.lower_bound
FloatSolution.upper_bound = self.upper_bound
def evaluate(self, solution: FloatSolution) -> FloatSolution:
solution.objectives[1] = fitness_func1(solution.variables)
solution.objectives[0] = len([1 for i in solution.variables if i >= 1])
return solution
def get_name(self):
return 'Oruga'
problem = Oruga()
algorithm = NSGAII(
problem=problem,
population_size=20,
offspring_population_size=30,
mutation=PolynomialMutation(probability=1.0 / problem.number_of_variables, distribution_index=20),
crossover=SBXCrossover(probability=1.0, distribution_index=20),
termination_criterion=StoppingByEvaluations(max_evaluations=800)
)
algorithm.run()
from jmetal.util.solution import get_non_dominated_solutions, print_function_values_to_file, print_variables_to_file
from jmetal.lab.visualization import Plot
front = get_non_dominated_solutions(algorithm.get_result())
# save to files
print_function_values_to_file(front, 'FUN.NSGAII')
print_variables_to_file(front, 'VAR.NSGAII')
plot_front = Plot(title='ORUGA', axis_labels=['Words to be replaced', 'Readability Score'])
plot_front.plot(front, label='NSGA-II', filename='NSGAII-ORUGA', format='png')
for solution in front:
# We should call here a function to try to correct the text
print (obtain_text(solution.variables))