1 """
2 A starcluster plugin for running an IPython cluster using SGE
3 (requires IPython 0.11+pyzmq or 0.10+twisted)
4
5 See ipythondev plugin for installing git master IPython and its dependencies
6 """
7 import os
8 import time
9 import posixpath
10
11 from starcluster import utils
12 from starcluster import static
13 from starcluster import spinner
14 from starcluster.utils import print_timing
15 from starcluster.clustersetup import ClusterSetup
16
17 from starcluster.logger import log
18
19 IPCLUSTER_CACHE = os.path.join(static.STARCLUSTER_CFG_DIR, 'ipcluster')
20
21 STARTED_MSG_10 = """\
22 IPCluster has been started on %(cluster)s for user '%(user)s'.
23
24 See the IPython 0.10.* parallel docs for usage details
25 (http://ipython.org/ipython-doc/rel-0.10.2/html/parallel)
26 """
30 """
31 Starts an IPCluster (0.10.*) on StarCluster
32 """
33 cluster_file = '/etc/clusterfile.py'
34 log_file = '/var/log/ipcluster.log'
35
45
46 - def run(self, nodes, master, user, user_shell, volumes):
54
55 - def on_add_node(self, node, nodes, master, user, user_shell, volumes):
56 log.info("Adding %s to ipcluster" % node.alias)
57 self._create_cluster_file(master, nodes)
58 user_home = node.getpwnam(user).pw_dir
59 furl_file = posixpath.join(user_home, '.ipython', 'security',
60 'ipcontroller-engine.furl')
61 node.ssh.execute(
62 "su - %s -c 'screen -d -m ipengine --furl-file %s'" %
63 (user, furl_file))
64
65 - def on_remove_node(self, node, nodes, master, user, user_shell, volumes):
70
71
72 STARTED_MSG_11 = """\
73 IPCluster has been started on %(cluster)s for user '%(user)s'.
74
75 See the IPCluster plugin doc for usage details:
76 http://web.mit.edu/starcluster/docs/latest/plugins/ipython.html
77 """
81 """
82 Start an IPython (0.11) cluster
83 """
84 - def __init__(self, enable_notebook=False, notebook_passwd=None):
85 self.enable_notebook = enable_notebook
86 self.notebook_passwd = notebook_passwd or utils.generate_passwd(16)
87
89 """
90 Create cluster config
91 """
92 log.info("Writing IPython cluster config files")
93 master.ssh.execute('ipython profile create')
94 f = master.ssh.remote_file('%s/ipcontroller_config.py' % profile_dir)
95 ssh_server = "@".join([user, master.public_dns_name])
96 f.write('\n'.join([
97 "c = get_config()",
98 "c.HubFactory.ip='%s'" % master.private_ip_address,
99 "c.IPControllerApp.ssh_server='%s'" % ssh_server,
100
101 "",
102 ]))
103 f.close()
104 f = master.ssh.remote_file('%s/ipcluster_config.py' % profile_dir)
105 f.write('\n'.join([
106 "c = get_config()",
107 "c.IPClusterStart.controller_launcher_class=" +
108 "'SGEControllerLauncher'",
109
110 "c.SGEControllerLauncher.queue='all.q@master'",
111 "c.IPClusterEngines.engine_launcher_class='SGEEngineSetLauncher'",
112
113 "",
114 ]))
115 f.close()
116 f = master.ssh.remote_file('%s/ipengine_config.py' % profile_dir)
117 f.write('\n'.join([
118 "c = get_config()",
119 "c.EngineFactory.timeout = 10",
120
121
122 "c.IPEngineApp.wait_for_url_file = 30",
123
124 "",
125 ]))
126 f.close()
127 f = master.ssh.remote_file('%s/ipython_config.py' % profile_dir)
128 f.write('\n'.join([
129 "c = get_config()",
130 "try:",
131 " import msgpack",
132 "except ImportError:",
133
134 " c.Session.packer='pickle'",
135 "else:",
136
137 " c.Session.packer='msgpack.packb'",
138 " c.Session.unpacker='msgpack.unpackb'",
139 "c.EngineFactory.timeout = 10",
140
141
142 "c.IPEngineApp.wait_for_url_file = 30",
143
144 "",
145 ]))
146 f.close()
147
149 log.info("Starting IPython cluster with %i engines" % n)
150
151 master.ssh.execute("rm -f %s/security/*.json" % profile_dir)
152 master.ssh.execute("ipcluster start --n=%i --delay=5 --daemonize" % n,
153 source_profile=True)
154
155 json = '%s/security/ipcontroller-client.json' % profile_dir
156 log.info("Waiting for JSON connector file...",
157 extra=dict(__nonewline__=True))
158 s = spinner.Spinner()
159 s.start()
160 while not master.ssh.isfile(json):
161 time.sleep(1)
162 s.stop()
163
164 if not os.path.isdir(IPCLUSTER_CACHE):
165 log.info("Creating IPCluster cache directory: %s" %
166 IPCLUSTER_CACHE)
167 os.makedirs(IPCLUSTER_CACHE)
168 local_json = os.path.join(IPCLUSTER_CACHE,
169 '%s-%s.json' % (master.parent_cluster,
170 master.region.name))
171 log.info("Saving JSON connector file to '%s'" %
172 os.path.abspath(local_json))
173 master.ssh.get(json, local_json)
174 return local_json
175
177 log.info("Setting up IPython web notebook for user: %s" % user)
178 user_cert = posixpath.join(profile_dir, '%s.pem' % user)
179 ssl_cert = posixpath.join(profile_dir, '%s.pem' % user)
180 if not master.ssh.isfile(user_cert):
181 log.info("Creating SSL certificate for user %s" % user)
182 ssl_subj = "/C=US/ST=SC/L=STAR/O=Dis/CN=%s" % master.dns_name
183 master.ssh.execute(
184 "openssl req -new -newkey rsa:4096 -days 365 "
185 '-nodes -x509 -subj %s -keyout %s -out %s' %
186 (ssl_subj, ssl_cert, ssl_cert))
187 else:
188 log.info("Using existing SSL certificate...")
189 f = master.ssh.remote_file('%s/ipython_notebook_config.py' %
190 profile_dir)
191 notebook_port = 8888
192 sha1py = 'from IPython.lib import passwd; print passwd("%s")'
193 sha1cmd = "python -c '%s'" % sha1py
194 sha1pass = master.ssh.execute(sha1cmd % self.notebook_passwd)[0]
195 f.write('\n'.join([
196 "c = get_config()",
197 "c.IPKernelApp.pylab = 'inline'",
198 "c.NotebookApp.certfile = u'%s'" % ssl_cert,
199 "c.NotebookApp.ip = '*'",
200 "c.NotebookApp.open_browser = False",
201 "c.NotebookApp.password = u'%s'" % sha1pass,
202 "c.NotebookApp.port = %d" % notebook_port,
203 ]))
204 f.close()
205 master.ssh.execute_async("ipython notebook")
206 group = master.cluster_groups[0]
207 world_cidr = '0.0.0.0/0'
208 port_open = master.ec2.has_permission(group, 'tcp', notebook_port,
209 notebook_port, world_cidr)
210 if not port_open:
211 log.info("Authorizing tcp port %s on %s" %
212 (notebook_port, world_cidr))
213 group.authorize('tcp', notebook_port, notebook_port, world_cidr)
214 log.info("IPython notebook URL: https://%s:%s" %
215 (master.dns_name, notebook_port))
216 log.info("The notebook password is: %s" % self.notebook_passwd)
217
218 @print_timing("IPCluster")
219 - def run(self, nodes, master, user, user_shell, volumes):
220 n = sum([node.num_processors for node in nodes]) - 1
221 user_home = node.getpwnam(user).pw_dir
222 profile_dir = posixpath.join(user_home, '.ipython', 'profile_default')
223 master.ssh.switch_user(user)
224 self._write_config(master, user, profile_dir)
225 cfile = self._start_cluster(master, n, profile_dir)
226 if self.enable_notebook:
227 self._start_notebook(master, user, profile_dir)
228 log.info(STARTED_MSG_11 % dict(cluster=master.parent_cluster,
229 user=user, connector_file=cfile,
230 key_location=master.key_location))
231 master.ssh.switch_user('root')
232
234 master.ssh.execute("pkill -f ipengineapp.py")
235 master.ssh.execute("pkill -f ipcontrollerapp.py")
236
237 - def on_add_node(self, node, nodes, master, user, user_shell, volumes):
242
245
246 - def __init__(self, enable_notebook=False, notebook_passwd=None):
247 self.enable_notebook = enable_notebook
248 self.notebook_passwd = notebook_passwd
249
251 version_cmd = "python -c 'import IPython; print IPython.__version__'"
252 return node.ssh.execute(version_cmd)[0]
253
255 ipyversion = self._get_ipy_version(node)
256 if ipyversion < '0.11':
257 if not ipyversion.startswith('0.10'):
258 log.warn("Trying unsupported IPython version %s" % ipyversion)
259 return IPCluster10()
260 else:
261 return IPCluster11(self.enable_notebook, self.notebook_passwd)
262
264 has_ipy = node.ssh.has_required(['ipython', 'ipcluster'])
265 if not has_ipy:
266 log.error("IPython is not installed...skipping plugin")
267 return has_ipy
268
269 - def run(self, nodes, master, user, user_shell, volumes):
274
275 - def on_add_node(self, node, nodes, master, user, user_shell, volumes):
280
281 - def on_remove_node(self, node, nodes, master, user, user_shell, volumes):
286
289
290 - def run(self, nodes, master, user, user_shell, volumes):
291 log.info("Shutting down IPython cluster")
292 master.ssh.switch_user(user)
293 master.ssh.execute("ipcluster stop", source_profile=True)
294 time.sleep(2)
295 master.ssh.execute("pkill -f ipcontrollerapp.py",
296 ignore_exit_status=True)
297 for node in nodes:
298 master.ssh.execute("pkill -f ipengineapp.py",
299 ignore_exit_status=True)
300 master.ssh.switch_user('root')
301