25
25
import sickbeard
26
26
27
27
from sickbeard import logger
28
+ from sickbeard import encodingKludge as ek
29
+ from sickbeard import helpers
30
+
28
31
29
32
class NameParser (object ):
30
- def __init__ (self , file_name = True ):
33
+ def __init__ (self , is_file_name = True ):
31
34
32
- self .file_name = file_name
35
+ self .is_file_name = is_file_name
33
36
self .compiled_regexes = []
34
37
self ._compile_regexes ()
35
38
36
39
def clean_series_name (self , series_name ):
37
40
"""Cleans up series name by removing any . and _
38
41
characters, along with any trailing hyphens.
39
-
42
+
40
43
Is basically equivalent to replacing all _ and . with a
41
44
space, but handles decimal numbers in string, for example:
42
-
45
+
43
46
>>> cleanRegexedSeriesName("an.example.1.0.test")
44
47
'an example 1.0 test'
45
48
>>> cleanRegexedSeriesName("an_example_1.0_test")
46
49
'an example 1.0 test'
47
-
50
+
48
51
Stolen from dbr's tvnamer
49
52
"""
50
-
53
+
51
54
series_name = re .sub ("(\D)\.(?!\s)(\D)" , "\\ 1 \\ 2" , series_name )
52
- series_name = re .sub ("(\d)\.(\d{4})" , "\\ 1 \\ 2" , series_name ) # if it ends in a year then don't keep the dot
55
+ series_name = re .sub ("(\d)\.(\d{4})" , "\\ 1 \\ 2" , series_name ) # if it ends in a year then don't keep the dot
53
56
series_name = re .sub ("(\D)\.(?!\s)" , "\\ 1 " , series_name )
54
57
series_name = re .sub ("\.(?!\s)(\D)" , " \\ 1" , series_name )
55
58
series_name = series_name .replace ("_" , " " )
@@ -66,44 +69,44 @@ def _compile_regexes(self):
66
69
self .compiled_regexes .append ((cur_pattern_name , cur_regex ))
67
70
68
71
def _parse_string (self , name ):
69
-
72
+
70
73
if not name :
71
74
return None
72
-
75
+
73
76
for (cur_regex_name , cur_regex ) in self .compiled_regexes :
74
77
match = cur_regex .match (name )
75
78
76
79
if not match :
77
80
continue
78
-
81
+
79
82
result = ParseResult (name )
80
83
result .which_regex = [cur_regex_name ]
81
-
84
+
82
85
named_groups = match .groupdict ().keys ()
83
86
84
87
if 'series_name' in named_groups :
85
88
result .series_name = match .group ('series_name' )
86
89
if result .series_name :
87
90
result .series_name = self .clean_series_name (result .series_name )
88
-
91
+
89
92
if 'season_num' in named_groups :
90
93
tmp_season = int (match .group ('season_num' ))
91
- if cur_regex_name == 'bare' and tmp_season in (19 ,20 ):
94
+ if cur_regex_name == 'bare' and tmp_season in (19 , 20 ):
92
95
continue
93
96
result .season_number = tmp_season
94
-
97
+
95
98
if 'ep_num' in named_groups :
96
99
ep_num = self ._convert_number (match .group ('ep_num' ))
97
100
if 'extra_ep_num' in named_groups and match .group ('extra_ep_num' ):
98
- result .episode_numbers = range (ep_num , self ._convert_number (match .group ('extra_ep_num' ))+ 1 )
101
+ result .episode_numbers = range (ep_num , self ._convert_number (match .group ('extra_ep_num' )) + 1 )
99
102
else :
100
103
result .episode_numbers = [ep_num ]
101
104
102
105
if 'air_year' in named_groups and 'air_month' in named_groups and 'air_day' in named_groups :
103
106
year = int (match .group ('air_year' ))
104
107
month = int (match .group ('air_month' ))
105
108
day = int (match .group ('air_day' ))
106
-
109
+
107
110
# make an attempt to detect YYYY-DD-MM formats
108
111
if month > 12 :
109
112
tmp_month = month
@@ -148,18 +151,18 @@ def _combine_results(self, first, second, attr):
148
151
# if the second doesn't exist then return the first
149
152
if not second :
150
153
return getattr (first , attr )
151
-
154
+
152
155
a = getattr (first , attr )
153
156
b = getattr (second , attr )
154
-
157
+
155
158
# if a is good use it
156
159
if a != None or (type (a ) == list and len (a )):
157
160
return a
158
161
# if not use b (if b isn't set it'll just be default)
159
162
else :
160
163
return b
161
164
162
- def _unicodify (self , obj , encoding = "utf-8" ):
165
+ def _unicodify (self , obj , encoding = "utf-8" ):
163
166
if isinstance (obj , basestring ):
164
167
if not isinstance (obj , unicode ):
165
168
obj = unicode (obj , encoding )
@@ -203,30 +206,30 @@ def _convert_number(self, number):
203
206
return int (number )
204
207
205
208
def parse (self , name ):
206
-
209
+
207
210
name = self ._unicodify (name )
208
-
211
+
209
212
cached = name_parser_cache .get (name )
210
213
if cached :
211
214
return cached
212
215
213
216
# break it into parts if there are any (dirname, file name, extension)
214
- dir_name , file_name = os .path .split ( name )
215
- ext_match = re . match ( '(.*)\.\w{3,4}$' , file_name )
216
- if ext_match and self .file_name :
217
- base_file_name = ext_match . group ( 1 )
217
+ dir_name , file_name = ek . ek ( os .path .split , name )
218
+
219
+ if self .is_file_name :
220
+ base_file_name = helpers . remove_extension ( file_name )
218
221
else :
219
222
base_file_name = file_name
220
-
223
+
221
224
# use only the direct parent dir
222
- dir_name = os .path .basename ( dir_name )
223
-
225
+ dir_name = ek . ek ( os .path .basename , dir_name )
226
+
224
227
# set up a result to use
225
228
final_result = ParseResult (name )
226
-
229
+
227
230
# try parsing the file name
228
231
file_name_result = self ._parse_string (base_file_name )
229
-
232
+
230
233
# parse the dirname for extra info if needed
231
234
dir_name_result = self ._parse_string (dir_name )
232
235
@@ -263,6 +266,7 @@ def parse(self, name):
263
266
# return it
264
267
return final_result
265
268
269
+
266
270
class ParseResult (object ):
267
271
def __init__ (self ,
268
272
original_name ,
@@ -275,7 +279,7 @@ def __init__(self,
275
279
):
276
280
277
281
self .original_name = original_name
278
-
282
+
279
283
self .series_name = series_name
280
284
self .season_number = season_number
281
285
if not episode_numbers :
@@ -285,15 +289,15 @@ def __init__(self,
285
289
286
290
self .extra_info = extra_info
287
291
self .release_group = release_group
288
-
292
+
289
293
self .air_date = air_date
290
-
294
+
291
295
self .which_regex = None
292
-
296
+
293
297
def __eq__ (self , other ):
294
298
if not other :
295
299
return False
296
-
300
+
297
301
if self .series_name != other .series_name :
298
302
return False
299
303
if self .season_number != other .season_number :
@@ -306,7 +310,7 @@ def __eq__(self, other):
306
310
return False
307
311
if self .air_date != other .air_date :
308
312
return False
309
-
313
+
310
314
return True
311
315
312
316
def __str__ (self ):
@@ -315,10 +319,10 @@ def __str__(self):
315
319
else :
316
320
to_return = u''
317
321
if self .season_number != None :
318
- to_return += 'S' + str (self .season_number )
322
+ to_return += 'S' + str (self .season_number )
319
323
if self .episode_numbers and len (self .episode_numbers ):
320
324
for e in self .episode_numbers :
321
- to_return += 'E' + str (e )
325
+ to_return += 'E' + str (e )
322
326
323
327
if self .air_by_date :
324
328
to_return += str (self .air_date )
@@ -328,7 +332,7 @@ def __str__(self):
328
332
if self .release_group :
329
333
to_return += ' (' + self .release_group + ')'
330
334
331
- to_return += ' [ABD: ' + str (self .air_by_date )+ ']'
335
+ to_return += ' [ABD: ' + str (self .air_by_date ) + ']'
332
336
333
337
return to_return .encode ('utf-8' )
334
338
@@ -338,19 +342,20 @@ def _is_air_by_date(self):
338
342
return False
339
343
air_by_date = property (_is_air_by_date )
340
344
345
+
341
346
class NameParserCache (object ):
342
347
#TODO: check if the fifo list can beskiped and only use one dict
343
- _previous_parsed_list = [] # keep a fifo list of the cached items
348
+ _previous_parsed_list = [] # keep a fifo list of the cached items
344
349
_previous_parsed = {}
345
350
_cache_size = 100
346
-
351
+
347
352
def add (self , name , parse_result ):
348
353
self ._previous_parsed [name ] = parse_result
349
354
self ._previous_parsed_list .append (name )
350
355
while len (self ._previous_parsed_list ) > self ._cache_size :
351
356
del_me = self ._previous_parsed_list .pop (0 )
352
357
self ._previous_parsed .pop (del_me )
353
-
358
+
354
359
def get (self , name ):
355
360
if name in self ._previous_parsed :
356
361
logger .log ("Using cached parse result for: " + name , logger .DEBUG )
@@ -360,5 +365,6 @@ def get(self, name):
360
365
361
366
name_parser_cache = NameParserCache ()
362
367
368
+
363
369
class InvalidNameException (Exception ):
364
370
"The given name is not valid"
0 commit comments