-
Notifications
You must be signed in to change notification settings - Fork 21
/
mysqldump_to_csv.py
156 lines (134 loc) · 5.14 KB
/
mysqldump_to_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# The following code is a slight alteration of code written by James Mirsha: https://github.com/jamesmishra
# License is as follows:
"""
The MIT License (MIT)
Copyright (c) 2014 James Mishra
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
#!/usr/bin/env python
import fileinput
import csv
import sys
# This prevents prematurely closed pipes from raising
# an exception in Python
from signal import signal, SIGPIPE, SIG_DFL
signal(SIGPIPE, SIG_DFL)
# allow large content in the dump
csv.field_size_limit(sys.maxsize)
def is_insert(line):
"""
Returns true if the line begins a SQL insert statement.
"""
return line.startswith('INSERT INTO') or False
def is_table(line):
"""
Returns true if the line begins a SQL create table statement.
"""
return line.startswith('CREATE TABLE') or False
def get_name(line):
"""
returns the name of the current table
"""
return line.partition('`')[2].partition('`')[0]
def get_values(line):
"""
Returns the portion of an INSERT statement containing values
"""
return line.partition('` VALUES ')[2]
def values_sanity_check(values):
"""
Ensures that values from the INSERT statement meet basic checks.
"""
assert values
assert values[0] == '('
# Assertions have not been raised
return True
def parse_values(values, outfile):
"""
Given a file handle and the raw values from a MySQL INSERT
statement, write the equivalent CSV to the file
"""
latest_row = []
reader = csv.reader([values], delimiter=',',
doublequote=False,
escapechar='\\',
quotechar="'",
strict=True
)
writer = csv.writer(outfile, quoting=csv.QUOTE_MINIMAL)
for reader_row in reader:
for column in reader_row:
# If our current string is empty...
if len(column) == 0 or column == 'NULL':
latest_row.append(chr(0))
continue
# If our string starts with an open paren
if column[0] == "(":
# Assume that this column does not begin
# a new row.
new_row = False
# If we've been filling out a row
if len(latest_row) > 0:
# Check if the previous entry ended in
# a close paren. If so, the row we've
# been filling out has been COMPLETED
# as:
# 1) the previous entry ended in a )
# 2) the current entry starts with a (
if latest_row[-1][-1] == ")":
# Remove the close paren.
latest_row[-1] = latest_row[-1][:-1]
new_row = True
# If we've found a new row, write it out
# and begin our new one
if new_row:
writer.writerow(latest_row)
latest_row = []
# If we're beginning a new row, eliminate the
# opening parentheses.
if len(latest_row) == 0:
column = column[1:]
# Add our column to the row we're working on.
latest_row.append(column)
# At the end of an INSERT statement, we'll
# have the semicolon.
# Make sure to remove the semicolon and
# the close paren.
if latest_row[-1][-2:] == ");":
latest_row[-1] = latest_row[-1][:-2]
writer.writerow(latest_row)
def main():
"""
Parse arguments and start the program
"""
# Iterate over all lines in all files
# listed in sys.argv[1:]
# or stdin if no args given.
try:
for line in fileinput.input():
# Look for an INSERT statement and parse it.
if is_table(line):
tableName = get_name(line)
sys.stdout = open(tableName + '.csv', 'w')
if is_insert(line):
values = get_values(line)
if values_sanity_check(values):
parse_values(values, sys.stdout)
except KeyboardInterrupt:
sys.exit(0)
if __name__ == "__main__":
main()