Package starcluster :: Package plugins :: Module ipcluster
[hide private]
[frames] | no frames]

Source Code for Module starcluster.plugins.ipcluster

  1  """ 
  2  A starcluster plugin for running an IPython cluster using SGE 
  3  (requires IPython 0.11+pyzmq or 0.10+twisted) 
  4   
  5  See ipythondev plugin for installing git master IPython and its dependencies 
  6  """ 
  7  import os 
  8  import time 
  9  import posixpath 
 10   
 11  from starcluster import utils 
 12  from starcluster import static 
 13  from starcluster import spinner 
 14  from starcluster.utils import print_timing 
 15  from starcluster.clustersetup import ClusterSetup 
 16   
 17  from starcluster.logger import log 
 18   
 19  IPCLUSTER_CACHE = os.path.join(static.STARCLUSTER_CFG_DIR, 'ipcluster') 
 20   
 21  STARTED_MSG_10 = """\ 
 22  IPCluster has been started on %(cluster)s for user '%(user)s'. 
 23   
 24  See the IPython 0.10.* parallel docs for usage details 
 25  (http://ipython.org/ipython-doc/rel-0.10.2/html/parallel) 
 26  """ 
27 28 29 -class IPCluster10(ClusterSetup):
30 """ 31 Starts an IPCluster (0.10.*) on StarCluster 32 """ 33 cluster_file = '/etc/clusterfile.py' 34 log_file = '/var/log/ipcluster.log' 35
36 - def _create_cluster_file(self, master, nodes):
37 engines = {} 38 for node in nodes: 39 engines[node.alias] = node.num_processors 40 cfile = 'send_furl = True\n' 41 cfile += 'engines = %s\n' % engines 42 f = master.ssh.remote_file(self.cluster_file, 'w') 43 f.write(cfile) 44 f.close()
45
46 - def run(self, nodes, master, user, user_shell, volumes):
47 self._create_cluster_file(master, nodes) 48 log.info("Starting ipcluster...") 49 master.ssh.execute( 50 "su - %s -c 'screen -d -m ipcluster ssh --clusterfile %s'" % 51 (user, self.cluster_file)) 52 log.info(STARTED_MSG_10 % dict(cluster=master.parent_cluster, 53 user=user))
54
55 - def on_add_node(self, node, nodes, master, user, user_shell, volumes):
56 log.info("Adding %s to ipcluster" % node.alias) 57 self._create_cluster_file(master, nodes) 58 user_home = node.getpwnam(user).pw_dir 59 furl_file = posixpath.join(user_home, '.ipython', 'security', 60 'ipcontroller-engine.furl') 61 node.ssh.execute( 62 "su - %s -c 'screen -d -m ipengine --furl-file %s'" % 63 (user, furl_file))
64
65 - def on_remove_node(self, node, nodes, master, user, user_shell, volumes):
66 log.info("Removing %s from ipcluster" % node.alias) 67 less_nodes = filter(lambda x: x.id != node.id, nodes) 68 self._create_cluster_file(master, less_nodes) 69 node.ssh.execute('pkill ipengine')
70 71 72 STARTED_MSG_11 = """\ 73 IPCluster has been started on %(cluster)s for user '%(user)s'. 74 75 See the IPCluster plugin doc for usage details: 76 http://web.mit.edu/starcluster/docs/latest/plugins/ipython.html 77 """
78 79 80 -class IPCluster11(ClusterSetup):
81 """ 82 Start an IPython (0.11) cluster 83 """
84 - def __init__(self, enable_notebook=False, notebook_passwd=None):
85 self.enable_notebook = enable_notebook 86 self.notebook_passwd = notebook_passwd or utils.generate_passwd(16)
87
88 - def _write_config(self, master, user, profile_dir):
89 """ 90 Create cluster config 91 """ 92 log.info("Writing IPython cluster config files") 93 master.ssh.execute('ipython profile create') 94 f = master.ssh.remote_file('%s/ipcontroller_config.py' % profile_dir) 95 ssh_server = "@".join([user, master.public_dns_name]) 96 f.write('\n'.join([ 97 "c = get_config()", 98 "c.HubFactory.ip='%s'" % master.private_ip_address, 99 "c.IPControllerApp.ssh_server='%s'" % ssh_server, 100 # "c.Application.log_level = 'DEBUG'", 101 "", 102 ])) 103 f.close() 104 f = master.ssh.remote_file('%s/ipcluster_config.py' % profile_dir) 105 f.write('\n'.join([ 106 "c = get_config()", 107 "c.IPClusterStart.controller_launcher_class=" + 108 "'SGEControllerLauncher'", 109 # restrict controller to master node: 110 "c.SGEControllerLauncher.queue='all.q@master'", 111 "c.IPClusterEngines.engine_launcher_class='SGEEngineSetLauncher'", 112 # "c.Application.log_level = 'DEBUG'", 113 "", 114 ])) 115 f.close() 116 f = master.ssh.remote_file('%s/ipengine_config.py' % profile_dir) 117 f.write('\n'.join([ 118 "c = get_config()", 119 "c.EngineFactory.timeout = 10", 120 # Engines should wait a while for url files to arrive, 121 # in case Controller takes a bit to start: 122 "c.IPEngineApp.wait_for_url_file = 30", 123 # "c.Application.log_level = 'DEBUG'", 124 "", 125 ])) 126 f.close() 127 f = master.ssh.remote_file('%s/ipython_config.py' % profile_dir) 128 f.write('\n'.join([ 129 "c = get_config()", 130 "try:", 131 " import msgpack", 132 "except ImportError:", 133 # use pickle if msgpack is unavailable 134 " c.Session.packer='pickle'", 135 "else:", 136 # use msgpack if we can, because it's fast 137 " c.Session.packer='msgpack.packb'", 138 " c.Session.unpacker='msgpack.unpackb'", 139 "c.EngineFactory.timeout = 10", 140 # Engines should wait a while for url files to arrive, 141 # in case Controller takes a bit to start via SGE 142 "c.IPEngineApp.wait_for_url_file = 30", 143 # "c.Application.log_level = 'DEBUG'", 144 "", 145 ])) 146 f.close()
147
148 - def _start_cluster(self, master, n, profile_dir):
149 log.info("Starting IPython cluster with %i engines" % n) 150 # cleanup existing connection files, to prevent their use 151 master.ssh.execute("rm -f %s/security/*.json" % profile_dir) 152 master.ssh.execute("ipcluster start --n=%i --delay=5 --daemonize" % n, 153 source_profile=True) 154 # wait for JSON file to exist 155 json = '%s/security/ipcontroller-client.json' % profile_dir 156 log.info("Waiting for JSON connector file...", 157 extra=dict(__nonewline__=True)) 158 s = spinner.Spinner() 159 s.start() 160 while not master.ssh.isfile(json): 161 time.sleep(1) 162 s.stop() 163 # retrieve JSON connection info 164 if not os.path.isdir(IPCLUSTER_CACHE): 165 log.info("Creating IPCluster cache directory: %s" % 166 IPCLUSTER_CACHE) 167 os.makedirs(IPCLUSTER_CACHE) 168 local_json = os.path.join(IPCLUSTER_CACHE, 169 '%s-%s.json' % (master.parent_cluster, 170 master.region.name)) 171 log.info("Saving JSON connector file to '%s'" % 172 os.path.abspath(local_json)) 173 master.ssh.get(json, local_json) 174 return local_json
175
176 - def _start_notebook(self, master, user, profile_dir):
177 log.info("Setting up IPython web notebook for user: %s" % user) 178 user_cert = posixpath.join(profile_dir, '%s.pem' % user) 179 ssl_cert = posixpath.join(profile_dir, '%s.pem' % user) 180 if not master.ssh.isfile(user_cert): 181 log.info("Creating SSL certificate for user %s" % user) 182 ssl_subj = "/C=US/ST=SC/L=STAR/O=Dis/CN=%s" % master.dns_name 183 master.ssh.execute( 184 "openssl req -new -newkey rsa:4096 -days 365 " 185 '-nodes -x509 -subj %s -keyout %s -out %s' % 186 (ssl_subj, ssl_cert, ssl_cert)) 187 else: 188 log.info("Using existing SSL certificate...") 189 f = master.ssh.remote_file('%s/ipython_notebook_config.py' % 190 profile_dir) 191 notebook_port = 8888 192 sha1py = 'from IPython.lib import passwd; print passwd("%s")' 193 sha1cmd = "python -c '%s'" % sha1py 194 sha1pass = master.ssh.execute(sha1cmd % self.notebook_passwd)[0] 195 f.write('\n'.join([ 196 "c = get_config()", 197 "c.IPKernelApp.pylab = 'inline'", 198 "c.NotebookApp.certfile = u'%s'" % ssl_cert, 199 "c.NotebookApp.ip = '*'", 200 "c.NotebookApp.open_browser = False", 201 "c.NotebookApp.password = u'%s'" % sha1pass, 202 "c.NotebookApp.port = %d" % notebook_port, 203 ])) 204 f.close() 205 master.ssh.execute_async("ipython notebook") 206 group = master.cluster_groups[0] 207 world_cidr = '0.0.0.0/0' 208 port_open = master.ec2.has_permission(group, 'tcp', notebook_port, 209 notebook_port, world_cidr) 210 if not port_open: 211 log.info("Authorizing tcp port %s on %s" % 212 (notebook_port, world_cidr)) 213 group.authorize('tcp', notebook_port, notebook_port, world_cidr) 214 log.info("IPython notebook URL: https://%s:%s" % 215 (master.dns_name, notebook_port)) 216 log.info("The notebook password is: %s" % self.notebook_passwd)
217 218 @print_timing("IPCluster")
219 - def run(self, nodes, master, user, user_shell, volumes):
220 n = sum([node.num_processors for node in nodes]) - 1 221 user_home = node.getpwnam(user).pw_dir 222 profile_dir = posixpath.join(user_home, '.ipython', 'profile_default') 223 master.ssh.switch_user(user) 224 self._write_config(master, user, profile_dir) 225 cfile = self._start_cluster(master, n, profile_dir) 226 if self.enable_notebook: 227 self._start_notebook(master, user, profile_dir) 228 log.info(STARTED_MSG_11 % dict(cluster=master.parent_cluster, 229 user=user, connector_file=cfile, 230 key_location=master.key_location)) 231 master.ssh.switch_user('root')
232
233 - def _stop_cluster(self, master, user):
234 master.ssh.execute("pkill -f ipengineapp.py") 235 master.ssh.execute("pkill -f ipcontrollerapp.py")
236
237 - def on_add_node(self, node, nodes, master, user, user_shell, volumes):
238 n = node.num_processors 239 log.info("Adding %i engines on %s to ipcluster" % (n, node.alias)) 240 node.ssh.execute("ipcluster engines --n=%i --daemonize" % n, 241 source_profile=True)
242
243 244 -class IPCluster(ClusterSetup):
245
246 - def __init__(self, enable_notebook=False, notebook_passwd=None):
247 self.enable_notebook = enable_notebook 248 self.notebook_passwd = notebook_passwd
249
250 - def _get_ipy_version(self, node):
251 version_cmd = "python -c 'import IPython; print IPython.__version__'" 252 return node.ssh.execute(version_cmd)[0]
253
254 - def _get_ipcluster_plugin(self, node):
255 ipyversion = self._get_ipy_version(node) 256 if ipyversion < '0.11': 257 if not ipyversion.startswith('0.10'): 258 log.warn("Trying unsupported IPython version %s" % ipyversion) 259 return IPCluster10() 260 else: 261 return IPCluster11(self.enable_notebook, self.notebook_passwd)
262
263 - def _check_ipython_installed(self, node):
264 has_ipy = node.ssh.has_required(['ipython', 'ipcluster']) 265 if not has_ipy: 266 log.error("IPython is not installed...skipping plugin") 267 return has_ipy
268
269 - def run(self, nodes, master, user, user_shell, volumes):
270 if not self._check_ipython_installed(master): 271 return 272 plug = self._get_ipcluster_plugin(master) 273 plug.run(nodes, master, user, user_shell, volumes)
274
275 - def on_add_node(self, node, nodes, master, user, user_shell, volumes):
276 if not self._check_ipython_installed(master): 277 return 278 plug = self._get_ipcluster_plugin(master) 279 plug.on_add_node(node, nodes, master, user, user_shell, volumes)
280
281 - def on_remove_node(self, node, nodes, master, user, user_shell, volumes):
282 if not self._check_ipython_installed(master): 283 return 284 plug = self._get_ipcluster_plugin(master) 285 plug.on_remove_node(node, nodes, master, user, user_shell, volumes)
286
287 288 -class IPClusterStop(ClusterSetup):
289
290 - def run(self, nodes, master, user, user_shell, volumes):
291 log.info("Shutting down IPython cluster") 292 master.ssh.switch_user(user) 293 master.ssh.execute("ipcluster stop", source_profile=True) 294 time.sleep(2) 295 master.ssh.execute("pkill -f ipcontrollerapp.py", 296 ignore_exit_status=True) 297 for node in nodes: 298 master.ssh.execute("pkill -f ipengineapp.py", 299 ignore_exit_status=True) 300 master.ssh.switch_user('root')
301