-
Notifications
You must be signed in to change notification settings - Fork 5
/
GAMESS#2
328 lines (303 loc) · 12.4 KB
/
GAMESS#2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# input4bsse.py
#
# USAGE: python input4bsse.py filename.out
# by: Victor M. Rosas Garcia rosas.victor@gmail.com
# 2009-08-14
# developed with python 2.5.2
#
# this program generates GAMESS-US input files for BSSE
# counterpoise correction calculations on clusters.
# It takes two input files:
# a GAMESS-US geometry optimization output file. The name
# of this file must have a 3-letter extension. The
# extension itself can be whatever you want.
# a "molecules.dat" text file
#
# It works by scanning for the xyz coordinates of the
# equilibrium geometry and for the basis functions used
# for each atom.
# Currently it only works for C1 symmetry groups, anything else
# will not yield enough atoms, with the probable exception of some
# Cs cases (when all the atoms are contained in the Cs plane).
#
# The purpose of the "molecules.dat" file is to supply
# information on which groups of atoms form molecules, as
# currently this program is not smart enough to find that
# out by itself. The user must generate "molecules.dat" as a
# plain text file.
#
# The format of "molecules.dat" is as follows:
#
# moleculeId=atomno1,atomno2,atomno3...
#
# where moleculeId is completely user-defined with ASCII characters. There should be
# no spaces at the sides of the "=" sign or after the commas
# e.g. if a water dimer was defined as follows:
# O x1 y1 z1
# H x2 y2 z2
# H x3 y3 z3
# O x4 y4 z4
# H x5 y5 z5
# H x6 y6 z6
# then the contents of "molecules.dat" would be (minus the '# '):
# water1=1,2,3
# water2=4,5,6
# it could equally well be:
# agua1=1,2,3
# agua2=4,5,6
# The program then generates a single geometry input file
# for each molecule in the cluster, and an input file
# for each molecule with all the atoms in the other molecules
# defined as ghost atoms. The moleculeIds become part of the
# filenames, and are included in the input file comment, for
# ease in keeping track of things.
# The filenames follow the pattern: filename_geom_moleculeid.inp
# and filename_bsse_moleculeid.inp
import sys, csv
eq_geom=dict() #dictionary for full geom
basis = dict() #dictionary for basis set
geom_basis = dict() #mapping eq_geom <--> basis
basis_count = dict()# this tells me how many lines after an atom contain its basis set
molecules = dict() # list of user-defined molecules
geom_file = dict() # dictionary of geometry files file streams
bsse_file = dict() # dictionary of bsse files file stremas
one_geom = []
bsse_out = dict()
if sys.argv[1] == '-h':
print "USO: python input4bsse.py nombrearchivo.out"
print "by: Victor M. Rosas Garcia rosas.victor@gmail.com"
print "2009-08-14"
print "desarrollado con python 2.5.2"
print "este programa genera archivos de entrada de GAMESS-US para calculos de correccion por BSSE en agregados."
print "Requiere dos archivos de entrada:"
print "***Un archivo de salida de optimacion geometrica de GAMESS-US. El nombre debe tener una extension de tres letras. La extension puede ser lo que el usuario quiera."
print "***Un archivo de texto \"molecules.dat\""
print "\n"
print "Funciona haciendo un barrido buscando la coordenadas xyz de la geometria de equilibrio y las funciones base usadas para cada atomo."
print "Al presente solamente funciona para el grupo de simetria C1, cualquier otro grupo no proporcionara suficientes atomos, con la probable excepcion de algunos casos de Cs (cuando todos los atomos estan contenidos en el plano Cs)."
print "El archivo \"molecules.dat\" proporciona informacion sobre cuales grupos de atomos forman moleculas, ya que el programa no es lo suficientemente listo como para averiguarlo por si mismo (y probablemente jamas lo sea). El usuario debe generar \"molecules.dat\" como un archivo de texto sencillo."
print "\n"
print "El formato de \"molecules.dat\" es como sigue:"
print "moleculeId=atomno1,atomno2,atomno3..."
print "donde moleculeId es completamente definido por el usuario con caracteres ASCII. No debe haber espacios a los lados del signo \"=\" o despues de las comas, p.ej., si definimos un dimero de agua asi:\n"
print "O x1 y1 z1"
print "H x2 y2 z2"
print "H x3 y3 z3"
print "O x4 y4 z4"
print "H x5 y5 z5"
print "H x6 y6 z6\n"
print "entonces el contenido de \"molecules.dat\" seria:\n"
print "water1=1,2,3"
print "water2=4,5,6\n"
print "tambien podria ser:\n"
print "agua1=1,2,3"
print "agua2=4,5,6\n"
print "El programa entonces genera un archivo entrada con la geometria individual para cada molecula del agregado, y un archivo de entrada para cada molecula del agregado con todos los demas atomos definidos como fantasmas. La particula moleculeId se vuelve parte de los nombres de archivo, y tambien queda incluida en el comentario/titulo del contenido del archivo de entrada, para facilitar seguir la pista de cada molecula. Los nombres de archivo siguen el patron:\n filename_geom_moleculeId.inp\n filename_bsse_moleculeId.inp"
exit(0)
else:
name=sys.argv[1]
f = open(name,'r')
# find_eq_geom() scans a GAMESS-US output file for an equilibrium geometry
# there is no error checking: it endlessly loops if there is no eq geom
# It gets the number of atoms from the output file itself
# all the atoms and their coords are stored in dictionary eq_geom
def find_eq_geom():
f.seek(0)
string = ''
i = 0
while string != " ***** EQUILIBRIUM GEOMETRY LOCATED *****\n":
string = f.readline()
if string[:22] == ' TOTAL NUMBER OF ATOMS':
num_atoms = int(string[-3:])
else:
eq_geom_begins = f.tell()
f.seek(eq_geom_begins+154) #the geom starts 154 characters after the EQUIL GEOM line
for i in range(num_atoms):
eq_geom[i] = f.readline()
return num_atoms
# find_basis loads into memory all the basis functions from the GAMESS-US output file
def find_basis():
f.seek(0)
string = ''
i = 0
while string != " ATOMIC BASIS SET\n":
string = f.readline()
else:
basis_begins = f.tell()
f.seek(basis_begins+219) #the basis start 219 characters after the ATOMIC BASIS line
while string[:47] != ' TOTAL NUMBER OF BASIS SET SHELLS =': #this indicates the end of the basis
string = f.readline()
# if string == '\n':
# print "Found empty line!"
# string = f.readline()
basis[i] = string
i = i + 1
# form_output scans the basis dictionary to find out the line where
# each element appears. The result is stored in a dictionary called
# geom_basis
def form_output():
j = 0
for i in range(len(basis)-1):
if basis[i][1:3] == "CA":
# print "Ca at key: ",i
geom_basis[j] = i
j = j + 1
if basis[i][1:3] == "O ":
# print "O at key: ",i
geom_basis[j] = i
j = j + 1
if basis[i][1:3] == "H ":
# print "H at key: ",i
geom_basis[j] = i
j = j + 1
if basis[i][1:3] == "F ":
# print "F at key: ",i
geom_basis[j] = i
j = j + 1
if basis[i][1:3] == "C ":
# print "C at key: ",i
geom_basis[j] = i
j = j + 1
if basis[i][1:3] == "S ":
# print "S at key: ",i
geom_basis[j] = i
j = j + 1
if basis[i][1:3] == "AS":
# print "As at key: ",i
geom_basis[j] = i
j = j + 1
for i in geom_basis:
basis[int(geom_basis[i])] = eq_geom[i]
num_atoms = find_eq_geom()
find_basis()
find_eq_geom()
form_output() #put xyz coords instead of only chemical symbol
counter = 0
# NEED TO TRANSFORM THIS TO A FUNCTION
# this routine counts the number of times a certain function type appears
# and then creates the string that goes immediately after the element-coords
# in the input file. Things like "S 6" or "D 1" that are required by the
# GAMESS-US input format
# an empty line indicates stop the function type counting and store the result
# in the previous empty line
for i in range(len(basis)-1):
# print basis[i]
if basis[i][10:12] == 'S ':
type = 's'
# print "Function is S type"
basis[i] = basis[i][14:]
counter = counter + 1
elif basis[i] == '\n' and counter != 0 and type == 's':
# print "Found empty line"
basis[i-(counter+1)] = ' S '+str(counter)+'\n'
# print basis[i-(counter+1)]
counter = 0
if basis[i][10:12] == 'L ':
type = 'l'
# print "Function is L type"
basis[i] = basis[i][14:]
counter = counter + 1
elif basis[i] == '\n' and counter != 0 and type == 'l':
# print "Found empty line"
basis[i-(counter+1)] = ' L '+str(counter)+'\n'
# print basis[i-(counter+1)]
counter = 0
if basis[i][10:12] == 'D ':
type = 'd'
# print "Function is D type"
basis[i] = basis[i][14:]
counter = counter + 1
elif basis[i] == '\n' and counter != 0 and type == 'd':
# print "Found empty line"
basis[i-(counter+1)] = ' D '+str(counter)+'\n'
# print basis[i-(counter+1)]
counter = 0
f.close()
# now I need to open as many writing streams as input files I want.
# The number of molecules defined in "molecules.dat" will tell me
# how many file streams to open.
moleculeReader = csv.reader(open('molecules.dat','r'), delimiter='=')
for row in moleculeReader:
try:
molecules[row[0]] = row[1]
except IndexError:
break
for i in molecules.keys():
geom_file[i] = open(name[:-4]+'_geom_'+i+'.inp', 'w')
bsse_file[i] = open(name[:-4]+'_bsse_'+i+'.inp', 'w')
# I need a way to select single molecules with their basis sets
for i in range(1,num_atoms): #here we find out all the lines with basis set of an atom
basis_count[i-1] = geom_basis[i]-geom_basis[i-1]-1
basis_count[num_atoms-1] = len(basis) - geom_basis[num_atoms-1] - 2
# Routine for saving individual geometries in single-point input files
# I'll have to renumber the basis functions
file_counter = 0
for k in molecules.keys():
commands = " $CONTRL COORD=UNIQUE UNITS=ANGS RUNTYP=ENERGY MAXIT=100 $END\n $SYSTEM MWORDS=10 $END\n $SCF DIIS=.TRUE. $END\n $DATA\n optimum "+name+" geom "+str(k)+" for bsse\n"
geom_file[k].write(commands)
# print "Working on molecule ", k
one_geom[:] = []
b = eval(molecules[k])
for j in b:
for i in range(geom_basis[j-1],geom_basis[j-1]+basis_count[j-1]):
one_geom.append(basis[i])
one_geom.append('\n')
# here I have to renumber the basis functions that go into the file
# probably traverse one_geom, skip the lines with letters at the beginning
# and renumber the others
counter = 1
for m in range(len(one_geom)):
if one_geom[m][0:3] == ' S' or one_geom[m][0:3] == ' L' or one_geom[m][0:3] == ' D' or one_geom[m][0:3] == ' CA' or one_geom[m][0:3] == ' F ' or one_geom[m][0:3] == ' O ' or one_geom[m][0:3] == ' H ' or one_geom[m][0:3] == ' C ' or one_geom[m][0:3] == ' AS' or one_geom[m][0:3] == ' S ' or one_geom[m] == '\n':
pass
else:
one_geom[m] = ' '+str(counter)+one_geom[m][5:]
counter = counter + 1
geom_file[k].write("C1\n")
for m in range(len(one_geom)):
if one_geom[m] == '\n':
geom_file[k].write(one_geom[m])
else:
geom_file[k].write(one_geom[m][1:])
geom_file[k].write(" $END\n")
file_counter = file_counter + 1
# In order to generate the ghost atoms, I need to use the whole contents
# of basis, I need to traverse the molecules, leave one real, and make
# all the others ghost atoms.
molecules_list = molecules.keys()
molecules_list.sort()
print molecules_list
# here we initialize the dictionary that will contain the real+ghost atoms
for i in range(len(basis)-1):
bsse_out[i] = "\n"
for i in molecules_list:
commands = " $CONTRL COORD=UNIQUE UNITS=ANGS RUNTYP=ENERGY MAXIT=100 $END\n $SYSTEM MWORDS=10 $END\n $SCF DIIS=.TRUE. $END\n $DATA\n optimum "+name+" geom "+str(i)+" for bsse\n"
bsse_file[i].write(commands)
bsse_file[i].write("C1\n")
for j in molecules_list:
b = eval(molecules[j])
if j == i:
for k in b:
for l in range(geom_basis[k-1],geom_basis[k-1]+basis_count[k-1]):
bsse_out[l] = basis[l]
else:
for k in b:
for l in range(geom_basis[k-1],geom_basis[k-1]+basis_count[k-1]):
if l == geom_basis[k-1]:
if basis[l][12] != ' ':
# print " X -"+basis[l][12:]
bsse_out[l] = " X -"+basis[l][12:]
else:
# print " X -"+basis[l][13:]
bsse_out[l] = " X -"+basis[l][13:]
# print bsse_out[l],
else:
# print basis[l]
bsse_out[l] = basis[l]
for j in range(len(bsse_out)):
bsse_file[i].write(bsse_out[j])
bsse_file[i].write(" $END\n")
#for i in range(len(bsse_out)):
# print bsse_out[i],
for i in molecules.keys():
geom_file[i].close()
bsse_file[i].close()