1 '''
2 Created on Jul 29, 2010
3
4 @author: dwmclary
5 '''
6 import unittest
7 import os
8 import hdmc
9 import hdfs.hdfs_access as hdfs
10 import hadoop_config as config
11 from glob import glob
12
18
20 self.wd = os.getcwd()
21 self.script = self.wd+"/test/numpy_random_means.py"
22 self.reducer = self.wd+"/test/numpy_mean_reduction.py"
23 self.output_file = self.wd+"/test/random_means"
24 self.checkpoint_names = map(str, range(1,20))
25 self.checkpoint_dir = config.shared_tmp_space+os.getlogin()+"/hdmc_checkpoints"
26 pass
27
28
31
32
34 hdmc.make_frame(self.script)
35 self.assertTrue(os.path.isfile(self.wd+"/frame.py"))
36
38 hdmc.make_checkpointing_frame(self.script, self.checkpoint_names, self.checkpoint_dir)
39 self.assertTrue(os.path.isfile(self.wd+"/checkpoint_frame.py"))
40
46
48 os.system('rmdir '+self.checkpoint_dir)
49 checkpoint_dir = hdmc.set_checkpoint_directory(self.output_file)
50 self.assertTrue(os.path.exists(checkpoint_dir))
51
53 hdmc.download_hdfs_data(self.wd+"/test/dummy")
54 self.assertTrue(os.path.isfile(self.wd+"/test/dummy"))
55 os.system('rm '+self.wd+'/test/dummy')
56 self.assertFalse(os.path.isfile(self.wd+"/test/dummy"))
57
59 hdfs.rm("random_means")
60 hdmc.submit_inline(self.script, self.output_file, iterations=200)
61 self.assertTrue(os.path.exists(self.wd+"/test/random_means"))
62
64 hdfs.rm("random_means")
65 hdmc.submit_inline(self.script, self.output_file, iterations=200, reduction_script = self.reducer)
66 self.assertTrue(os.path.exists(self.wd+"/test/random_means"))
67
69 hdfs.rm("line_counts")
70 file_list = glob(self.wd+"/test/gutenberg/*")
71 self.script = self.wd+"/test/line_counter.py"
72 self.output_file = self.wd+"/test/line_counts"
73 checkpoints = hdmc.submit_checkpoint_inline(self.script, self.output_file, file_list, "")
74 self.assertEqual(len(file_list), len(checkpoints))
75 self.assertTrue(os.path.exists(self.wd+"/test/line_counts"))
76 hadoop_result_file = self.wd+"/test/line_counts"
77 master_result_file = self.wd+"/test/wc_output.dat"
78 hadoop_results = {}
79 master_results = {}
80
81 for line in open(master_result_file).readlines():
82 if len(line.rstrip()) > 0:
83 entry = line.split()
84 master_results[entry[1]] = int(entry[0])
85 for line in open(hadoop_result_file).readlines():
86 if len(line.rstrip()) > 0:
87 entry = line.split()
88 hadoop_results[entry[1]] = int(entry[0])
89
90 for key in master_results.keys():
91 self.assertEqual(master_results[key], hadoop_results[key])
92
94 hdfs.rm("line_total")
95 file_list = glob(self.wd+"/test/gutenberg/*")
96 self.script = self.wd+"/test/line_counter.py"
97 self.output_file = self.wd+"/test/line_total"
98 self.reducer = self.wd+"/test/line_sum.py"
99 checkpoints = hdmc.submit_checkpoint_inline(self.script, self.output_file, file_list, reduction_script = self.reducer, arguments="")
100 self.assertEqual(len(file_list), len(checkpoints))
101 self.assertTrue(os.path.exists(self.wd+"/test/line_total"))
102 hadoop_result_file = self.wd+"/test/line_total"
103 master_result_file = self.wd+"/test/wc_total.dat"
104 hadoop_results = {}
105 master_results = {}
106 for line in open(master_result_file).readlines():
107 if len(line.rstrip()) > 0:
108 entry = line.split()
109 master_results[entry[1]] = int(entry[0])
110 for line in open(hadoop_result_file).readlines():
111 if len(line.rstrip()) > 0:
112 entry = line.split()
113 hadoop_results[entry[1]] = int(entry[0])
114 for key in master_results.keys():
115 self.assertEqual(master_results[key], hadoop_results[key])
116
117 if __name__ == "__main__":
118
119 unittest.main()
120