Package ziggy :: Package hdmc :: Module hdmc_tests
[hide private]
[frames] | no frames]

Source Code for Module ziggy.hdmc.hdmc_tests

  1  ''' 
  2  Created on Jul 29, 2010 
  3   
  4  @author: dwmclary 
  5  ''' 
  6  import unittest 
  7  import os 
  8  import hdmc 
  9  import hdfs.hdfs_access as hdfs 
 10  import hadoop_config as config 
 11  from glob import glob 
 12   
13 -class HDMCTest(unittest.TestCase):
14 - def runTest(self):
15 self.setUp() 16 self.testSubmitNoSupportingFiles() 17 self.tearDown()
18
19 - def setUp(self):
20 self.wd = os.getcwd() 21 self.script = self.wd+"/test/numpy_random_means.py" 22 self.reducer = self.wd+"/test/numpy_mean_reduction.py" 23 self.output_file = self.wd+"/test/random_means" 24 self.checkpoint_names = map(str, range(1,20)) 25 self.checkpoint_dir = config.shared_tmp_space+os.getlogin()+"/hdmc_checkpoints" 26 pass
27 28
29 - def tearDown(self):
30 pass
31 32
33 - def testMakeFrame(self):
34 hdmc.make_frame(self.script) 35 self.assertTrue(os.path.isfile(self.wd+"/frame.py"))
36
37 - def testMakeCheckpointFrame(self):
38 hdmc.make_checkpointing_frame(self.script, self.checkpoint_names, self.checkpoint_dir) 39 self.assertTrue(os.path.isfile(self.wd+"/checkpoint_frame.py"))
40
41 - def testCreateDummyData(self):
42 hdfs.rm("dummy") 43 hdmc.create_dummy_data() 44 dummy_data = hdfs.cat("dummy")["stdout"] 45 self.assertEqual("dummy data", dummy_data.rstrip())
46
48 os.system('rmdir '+self.checkpoint_dir) 49 checkpoint_dir = hdmc.set_checkpoint_directory(self.output_file) 50 self.assertTrue(os.path.exists(checkpoint_dir))
51
52 - def testDownloadHDFSData(self):
53 hdmc.download_hdfs_data(self.wd+"/test/dummy") 54 self.assertTrue(os.path.isfile(self.wd+"/test/dummy")) 55 os.system('rm '+self.wd+'/test/dummy') 56 self.assertFalse(os.path.isfile(self.wd+"/test/dummy"))
57
59 hdfs.rm("random_means") 60 hdmc.submit_inline(self.script, self.output_file, iterations=200) 61 self.assertTrue(os.path.exists(self.wd+"/test/random_means"))
62
64 hdfs.rm("random_means") 65 hdmc.submit_inline(self.script, self.output_file, iterations=200, reduction_script = self.reducer) 66 self.assertTrue(os.path.exists(self.wd+"/test/random_means"))
67
68 - def testSubmitCheckpoints(self):
69 hdfs.rm("line_counts") 70 file_list = glob(self.wd+"/test/gutenberg/*") 71 self.script = self.wd+"/test/line_counter.py" 72 self.output_file = self.wd+"/test/line_counts" 73 checkpoints = hdmc.submit_checkpoint_inline(self.script, self.output_file, file_list, "") 74 self.assertEqual(len(file_list), len(checkpoints)) 75 self.assertTrue(os.path.exists(self.wd+"/test/line_counts")) 76 hadoop_result_file = self.wd+"/test/line_counts" 77 master_result_file = self.wd+"/test/wc_output.dat" 78 hadoop_results = {} 79 master_results = {} 80 81 for line in open(master_result_file).readlines(): 82 if len(line.rstrip()) > 0: 83 entry = line.split() 84 master_results[entry[1]] = int(entry[0]) 85 for line in open(hadoop_result_file).readlines(): 86 if len(line.rstrip()) > 0: 87 entry = line.split() 88 hadoop_results[entry[1]] = int(entry[0]) 89 90 for key in master_results.keys(): 91 self.assertEqual(master_results[key], hadoop_results[key])
92
94 hdfs.rm("line_total") 95 file_list = glob(self.wd+"/test/gutenberg/*") 96 self.script = self.wd+"/test/line_counter.py" 97 self.output_file = self.wd+"/test/line_total" 98 self.reducer = self.wd+"/test/line_sum.py" 99 checkpoints = hdmc.submit_checkpoint_inline(self.script, self.output_file, file_list, reduction_script = self.reducer, arguments="") 100 self.assertEqual(len(file_list), len(checkpoints)) 101 self.assertTrue(os.path.exists(self.wd+"/test/line_total")) 102 hadoop_result_file = self.wd+"/test/line_total" 103 master_result_file = self.wd+"/test/wc_total.dat" 104 hadoop_results = {} 105 master_results = {} 106 for line in open(master_result_file).readlines(): 107 if len(line.rstrip()) > 0: 108 entry = line.split() 109 master_results[entry[1]] = int(entry[0]) 110 for line in open(hadoop_result_file).readlines(): 111 if len(line.rstrip()) > 0: 112 entry = line.split() 113 hadoop_results[entry[1]] = int(entry[0]) 114 for key in master_results.keys(): 115 self.assertEqual(master_results[key], hadoop_results[key])
116 117 if __name__ == "__main__": 118 #import sys;sys.argv = ['', 'Test.testName'] 119 unittest.main() 120