-
Notifications
You must be signed in to change notification settings - Fork 0
/
mr_wordcount.py
executable file
·70 lines (55 loc) · 2.21 KB
/
mr_wordcount.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/python
#
#
# Vanderbilt University Computer Science
# Author: Aniruddha Gokhale
# Course: CS4287-5287 Principles of Cloud Computing
# Created: Fall 2016
#
# Purpose: Simple MapReduce Wordcount
#
# Note that this is standalone main program. Although we can invoke it via mininet,
# you can start it manually from an xterm that you can start on the master node
# from the mininet CLI
# system and time
import os
import sys
import time
import argparse # argument parser
import re # regular expression
from mr_framework import MR_Framework # our wordcount MR framework
# @NOTE@: Clearly, this file is specifically for wordcount and so you will
# need to do things differently for Assignment #4 in the
# mr_framework.py file. However, this file may not need any
# change other than just renaming it (since it is not wordcount
# for your assignment)
##################################
# Command line parsing
##################################
def parseCmdLineArgs ():
# parse the command line
parser = argparse.ArgumentParser ()
# add optional arguments
parser.add_argument ("-m", "--map", type=int, default=10, help="Number of Map jobs, default 10")
parser.add_argument ("-r", "--reduce", type=int, default=3, help="Number of Reduce jobs, default 3")
parser.add_argument ("-p", "--masterport", type=int, default=5557, help="Master node port, default 5557")
# add positional arguments in that order
# parser.add_argument ("addrfile", help="File of host ip addresses")
parser.add_argument ("datafile", help="Big data file")
# parse the args
args = parser.parse_args ()
return args
#------------------------------------------
# main function
def main ():
""" Main program """
print "MapReduce Wordcount Main program"
parsed_args = parseCmdLineArgs ()
# now invoke the mapreduce framework. Notice we have slightly changed the way the
# constructor works and the arguments it takes.
mrf = MR_Framework (parsed_args)
# invoke the process
mrf.solve ()
#----------------------------------------------
if __name__ == '__main__':
main ()