1
2 import os
3 import re
4 import time
5 import zlib
6 import string
7 import pprint
8 import base64
9 import cPickle
10 import traceback
11
12 from starcluster import utils
13 from starcluster import static
14 from starcluster import spinner
15 from starcluster import iptools
16 from starcluster import managers
17 from starcluster import exception
18 from starcluster import progressbar
19 from starcluster import clustersetup
20 from starcluster.node import Node
21 from starcluster.utils import print_timing
22 from starcluster.templates import user_msgs
23 from starcluster.logger import log
27 """
28 Manager class for Cluster objects
29 """
31 return "<ClusterManager: %s>" % self.ec2.region.name
32
33 - def get_cluster(self, cluster_name, group=None, load_receipt=True,
34 load_plugins=True):
55
56 - def get_clusters(self, load_receipt=True, load_plugins=True):
66
72
74 """
75 Returns a new Cluster object using the settings from the cluster
76 template template_name
77
78 If tag_name is passed, the Cluster object's cluster_tag setting will
79 be set to tag_name
80 """
81 cl = self.cfg.get_cluster_template(template_name, tag_name=tag_name,
82 ec2_conn=self.ec2)
83 return cl
84
86 """
87 Same as get_cluster but returns None instead of throwing an exception
88 if the cluster does not exist
89 """
90 try:
91 return self.get_cluster(cluster_name)
92 except exception.ClusterDoesNotExist:
93 pass
94
96 """
97 Returns True if cluster exists
98 """
99 return self.get_cluster_or_none(tag_name) is not None
100
101 - def ssh_to_master(self, cluster_name, user='root', command=None):
102 """
103 ssh to master node of cluster_name
104
105 user keyword specifies an alternate user to login as
106 """
107 cluster = self.get_cluster(cluster_name)
108 return cluster.ssh_to_master(user=user, command=command)
109
112 """
113 ssh to a node in cluster_name that has either an id,
114 dns name, or alias matching node_id
115
116 user keyword specifies an alternate user to login as
117 """
118 cluster = self.get_cluster(cluster_name)
119 return cluster.ssh_to_node(node_id, user=user, command=command)
120
128
129 - def add_node(self, cluster_name, alias=None, no_create=False):
132
133 - def add_nodes(self, cluster_name, num_nodes, aliases=None,
134 no_create=False):
135 """
136 Add one or more nodes to cluster
137 """
138 cl = self.get_cluster(cluster_name)
139 cl.add_nodes(num_nodes, aliases=aliases, no_create=no_create)
140
141 - def remove_node(self, cluster_name, alias, terminate=True):
150
157
158 - def stop_cluster(self, cluster_name, terminate_unstoppable=False):
159 """
160 Stop an EBS-backed cluster
161 """
162 cl = self.get_cluster(cluster_name)
163 cl.stop_cluster(terminate_unstoppable)
164
171
178
186
188 """
189 Returns the cluster tag name from a security group name that starts
190 with static.SECURITY_GROUP_PREFIX
191
192 Example:
193 sg = '@sc-mycluster'
194 print get_tag_from_sg(sg)
195 mycluster
196 """
197 regex = re.compile(static.SECURITY_GROUP_PREFIX + '-(.*)')
198 match = regex.match(sg)
199 if match:
200 return match.groups()[0]
201
202 - def list_clusters(self, cluster_groups=None, show_ssh_status=False):
203 """
204 Prints a summary for each active cluster on EC2
205 """
206 if not cluster_groups:
207 cluster_groups = self.get_cluster_security_groups()
208 if not cluster_groups:
209 log.info("No clusters found...")
210 else:
211 try:
212 cluster_groups = [self.get_cluster_security_group(g) for g \
213 in cluster_groups]
214 except exception.SecurityGroupDoesNotExist:
215 raise exception.ClusterDoesNotExist(g)
216 for scg in cluster_groups:
217 tag = self.get_tag_from_sg(scg.name)
218 try:
219 cl = self.get_cluster(tag, group=scg, load_plugins=False)
220 except exception.IncompatibleCluster, e:
221 sep = '*' * 60
222 log.error('\n'.join([sep, e.msg, sep]),
223 extra=dict(__textwrap__=True))
224 continue
225 header = '%s (security group: %s)' % (tag, scg.name)
226 print '-' * len(header)
227 print header
228 print '-' * len(header)
229 nodes = cl.nodes
230 try:
231 n = nodes[0]
232 except IndexError:
233 n = None
234 state = getattr(n, 'state', None)
235 ltime = 'N/A'
236 uptime = 'N/A'
237 if state in ['pending', 'running']:
238 ltime = getattr(n, 'local_launch_time', 'N/A')
239 uptime = getattr(n, 'uptime', 'N/A')
240 print 'Launch time: %s' % ltime
241 print 'Uptime: %s' % uptime
242 print 'Zone: %s' % getattr(n, 'placement', 'N/A')
243 print 'Keypair: %s' % getattr(n, 'key_name', 'N/A')
244 ebs_nodes = [n for n in nodes if n.attached_vols]
245 if ebs_nodes:
246 print 'EBS volumes:'
247 for node in ebs_nodes:
248 devices = node.attached_vols
249 node_id = node.alias or node.id
250 for dev in devices:
251 d = devices.get(dev)
252 vol_id = d.volume_id
253 status = d.status
254 print ' %s on %s:%s (status: %s)' % \
255 (vol_id, node_id, dev, status)
256 else:
257 print 'EBS volumes: N/A'
258 spot_reqs = cl.spot_requests
259 if spot_reqs:
260 active = len([s for s in spot_reqs if s.state == 'active'])
261 opn = len([s for s in spot_reqs if s.state == 'open'])
262 msg = ''
263 if active != 0:
264 msg += '%d active' % active
265 if opn != 0:
266 if msg:
267 msg += ', '
268 msg += '%d open' % opn
269 print 'Spot requests: %s' % msg
270 if nodes:
271 print 'Cluster nodes:'
272 for node in nodes:
273 nodeline = " %7s %s %s %s" % (node.alias, node.state,
274 node.id, node.dns_name)
275 if node.spot_id:
276 nodeline += ' (spot %s)' % node.spot_id
277 if show_ssh_status:
278 ssh_status = {True: 'Up', False: 'Down'}
279 nodeline += ' (SSH: %s)' % ssh_status[node.is_up()]
280 print nodeline
281 print 'Total nodes: %d' % len(nodes)
282 else:
283 print 'Cluster nodes: N/A'
284 print
285
299
302 - def __init__(self,
303 ec2_conn=None,
304 spot_bid=None,
305 cluster_tag=None,
306 cluster_description=None,
307 cluster_size=None,
308 cluster_user=None,
309 cluster_shell=None,
310 master_image_id=None,
311 master_instance_type=None,
312 node_image_id=None,
313 node_instance_type=None,
314 node_instance_types=[],
315 availability_zone=None,
316 keyname=None,
317 key_location=None,
318 volumes=[],
319 plugins=[],
320 permissions=[],
321 refresh_interval=30,
322 disable_queue=False,
323 disable_threads=False,
324 cluster_group=None,
325 force_spot_master=False,
326 **kwargs):
327
328 now = time.strftime("%Y%m%d%H%M")
329 self.ec2 = ec2_conn
330 self.spot_bid = spot_bid
331 self.cluster_tag = cluster_tag
332 self.cluster_description = cluster_description
333 if self.cluster_tag is None:
334 self.cluster_tag = "cluster%s" % now
335 if cluster_description is None:
336 self.cluster_description = "Cluster created at %s" % now
337 self.cluster_size = cluster_size or 0
338 self.cluster_user = cluster_user
339 self.cluster_shell = cluster_shell
340 self.master_image_id = master_image_id
341 self.master_instance_type = master_instance_type
342 self.node_image_id = node_image_id
343 self.node_instance_type = node_instance_type
344 self.node_instance_types = node_instance_types
345 self.availability_zone = availability_zone
346 self.keyname = keyname
347 self.key_location = key_location
348 self.volumes = self.load_volumes(volumes)
349 self.plugins = self.load_plugins(plugins)
350 self.permissions = permissions
351 self.refresh_interval = refresh_interval
352 self.disable_queue = disable_queue
353 self.disable_threads = disable_threads
354 self.force_spot_master = force_spot_master
355
356 self.__instance_types = static.INSTANCE_TYPES
357 self.__cluster_settings = static.CLUSTER_SETTINGS
358 self.__available_shells = static.AVAILABLE_SHELLS
359 self.__protocols = static.PROTOCOLS
360 self._progress_bar = None
361 self._master_reservation = None
362 self._node_reservation = None
363 self._nodes = []
364 self._master = None
365 self._zone = None
366 self._plugins = plugins
367 self._cluster_group = None
368 self._placement_group = None
369
371 return '<Cluster: %s (%s-node)>' % (self.cluster_tag,
372 self.cluster_size)
373
374 @property
376 """
377 If volumes are specified, this method determines the common
378 availability zone between those volumes. If an availability zone
379 is explicitly specified in the config and does not match the common
380 availability zone of the volumes, an exception is raised. If all
381 volumes are not in the same availabilty zone an exception is raised.
382 If no volumes are specified, returns the user specified availability
383 zone if it exists.
384 """
385 if not self._zone:
386 zone = None
387 if self.availability_zone:
388 zone = self.ec2.get_zone(self.availability_zone).name
389 common_zone = None
390 for volume in self.volumes:
391 volid = self.volumes.get(volume).get('volume_id')
392 vol = self.ec2.get_volume(volid)
393 if not common_zone:
394 common_zone = vol.zone
395 elif vol.zone != common_zone:
396 vols = [self.volumes.get(v).get('volume_id')
397 for v in self.volumes]
398 raise exception.VolumesZoneError(vols)
399 if common_zone and zone and zone != common_zone:
400 raise exception.InvalidZone(zone, common_zone)
401 if not zone and common_zone:
402 zone = common_zone
403 self._zone = zone
404 return self._zone
405
407 """
408 Iterate through vols and set device/partition settings automatically if
409 not specified.
410
411 This method assigns the first volume to /dev/sdz, second to /dev/sdy,
412 etc for all volumes that do not include a device/partition setting
413 """
414 devices = ['/dev/sd%s' % s for s in string.lowercase]
415 devmap = {}
416 for volname in vols:
417 vol = vols.get(volname)
418 dev = vol.get('device')
419 if dev in devices:
420
421 devices.remove(dev)
422 volid = vol.get('volume_id')
423 if dev and not volid in devmap:
424 devmap[volid] = dev
425 volumes = {}
426 for volname in vols:
427 vol = vols.get(volname)
428 vol_id = vol.get('volume_id')
429 device = vol.get('device')
430 if not device:
431 if vol_id in devmap:
432 device = devmap.get(vol_id)
433 else:
434 device = devices.pop()
435 devmap[vol_id] = device
436 if not utils.is_valid_device(device):
437 raise exception.InvalidDevice(device)
438 v = volumes[volname] = utils.AttributeDict()
439 v.update(vol)
440 v['device'] = device
441 part = vol.get('partition')
442 if part:
443 partition = device + str(part)
444 if not utils.is_valid_partition(partition):
445 raise exception.InvalidPartition(part)
446 v['partition'] = partition
447 return volumes
448
450 plugs = []
451 for plugin in plugins:
452 setup_class = plugin.get('setup_class')
453 plugin_name = plugin.get('__name__').split()[-1]
454 mod_name = '.'.join(setup_class.split('.')[:-1])
455 class_name = setup_class.split('.')[-1]
456 try:
457 mod = __import__(mod_name, globals(), locals(), [class_name])
458 except SyntaxError, e:
459 raise exception.PluginSyntaxError(
460 "Plugin %s (%s) contains a syntax error at line %s" % \
461 (plugin_name, e.filename, e.lineno))
462 except ImportError, e:
463 raise exception.PluginLoadError(
464 "Failed to import plugin %s: %s" % \
465 (plugin_name, e[0]))
466 klass = getattr(mod, class_name, None)
467 if not klass:
468 raise exception.PluginError(
469 'Plugin class %s does not exist' % setup_class)
470 if not issubclass(klass, clustersetup.ClusterSetup):
471 raise exception.PluginError(
472 ("Plugin %s must be a subclass of " + \
473 "starcluster.clustersetup.ClusterSetup") % setup_class)
474 args, kwargs = utils.get_arg_spec(klass.__init__)
475 config_args = []
476 missing_args = []
477 for arg in args:
478 if arg in plugin:
479 config_args.append(plugin.get(arg))
480 else:
481 missing_args.append(arg)
482 log.debug("config_args = %s" % config_args)
483 if missing_args:
484 raise exception.PluginError(
485 "Not enough settings provided for plugin %s (missing: %s)"
486 % (plugin_name, ', '.join(missing_args)))
487 config_kwargs = {}
488 for arg in kwargs:
489 if arg in plugin:
490 config_kwargs[arg] = plugin.get(arg)
491 log.debug("config_kwargs = %s" % config_kwargs)
492 plugs.append((plugin_name, klass(*config_args, **config_kwargs)))
493 return plugs
494
496 for key in kwargs.keys():
497 if hasattr(self, key):
498 self.__dict__[key] = kwargs[key]
499
501 """
502 Validate existing instances against this cluster's settings
503 """
504 self.wait_for_active_spots()
505 nodes = self.nodes
506 if not nodes:
507 raise exception.ClusterValidationError("No existing nodes found!")
508 log.info("Validating existing instances...")
509 mazone = self.master_node.placement
510 rlmap = self._get_launch_map(reverse=True)
511 for node in nodes:
512 itype, image = rlmap.get(node.alias)
513 alias = node.alias
514 ntype = node.instance_type
515 if ntype != itype:
516 raise exception.ClusterValidationError(
517 "%s's instance type (%s) != %s" % (alias, ntype, itype))
518 nimage = node.image_id
519 if nimage != image:
520 raise exception.ClusterValidationError(
521 "%s's image id (%s) != %s" % (alias, nimage, image))
522 if node.key_name != self.keyname:
523 raise exception.ClusterValidationError(
524 "%s's key_name (%s) != %s" % (alias, node.key_name,
525 self.keyname))
526 nazone = node.placement
527 if mazone != nazone:
528 raise exception.ClusterValidationError(
529 "Node '%s' zone (%s) does not match master's zone (%s)" %
530 (alias, nazone, mazone))
531
532 self._zone = None
533 if self.zone and self.zone != mazone:
534 raise exception.ClusterValidationError(
535 "Running cluster's availability_zone (%s) != %s" %
536 (mazone, self.zone))
537
538 - def get(self, name):
539 return self.__dict__.get(name)
540
544
546 """
547 Load the original settings used to launch this cluster into this
548 Cluster object. The settings are loaded from the cluster group's
549 description field.
550 """
551 try:
552 desc = self.cluster_group.description
553 version, b64data = desc.split('-', 1)
554 if utils.program_version_greater(version, static.VERSION):
555 d = dict(cluster=self.cluster_tag, old_version=static.VERSION,
556 new_version=version)
557 msg = user_msgs.version_mismatch % d
558 sep = '*' * 60
559 log.warn('\n'.join([sep, msg, sep]), extra={'__textwrap__': 1})
560 compressed_data = base64.b64decode(b64data)
561 pkl_data = zlib.decompress(compressed_data)
562 cluster_settings = cPickle.loads(str(pkl_data)).__dict__
563 except (cPickle.PickleError, zlib.error, ValueError, TypeError,
564 EOFError, IndexError), e:
565 log.debug('load receipt exception: ', exc_info=True)
566 raise exception.IncompatibleCluster(self.cluster_group)
567 except Exception, e:
568 raise exception.ClusterReceiptError(
569 'failed to load cluster receipt: %s' % e)
570 for key in cluster_settings:
571 if hasattr(self, key):
572 setattr(self, key, cluster_settings.get(key))
573 if load_plugins:
574 try:
575 self.plugins = self.load_plugins(self._plugins)
576 except exception.PluginError, e:
577 log.warn(e)
578 log.warn("An error occured while loading plugins")
579 log.warn("Not running any plugins")
580 except Exception, e:
581 raise exception.ClusterReceiptError(
582 'failed to load cluster receipt: %s' % e)
583 return True
584
586 cfg = {}
587 exclude = ['key_location', 'plugins']
588 include = ['_zone', '_plugins']
589 for key in self.__dict__.keys():
590 private = key.startswith('_')
591 if (not private or key in include) and not key in exclude:
592 val = getattr(self, key)
593 if type(val) in [str, unicode, bool, int, float, list, dict]:
594 cfg[key] = val
595 elif type(val) is utils.AttributeDict:
596 cfg[key] = dict(val)
597 return cfg
598
599 @property
602
603 @property
605 if self._cluster_group is None:
606 ssh_port = static.DEFAULT_SSH_PORT
607 desc = base64.b64encode(zlib.compress(cPickle.dumps(self)))
608 desc = '-'.join([static.VERSION, desc])
609 sg = self.ec2.get_or_create_group(self._security_group,
610 desc,
611 auth_ssh=True,
612 auth_group_traffic=True)
613 for p in self.permissions:
614 perm = self.permissions.get(p)
615 ip_protocol = perm.get('ip_protocol', 'tcp')
616 from_port = perm.get('from_port')
617 to_port = perm.get('to_port')
618 cidr_ip = perm.get('cidr_ip', static.WORLD_CIDRIP)
619 if not self.ec2.has_permission(sg, ip_protocol, from_port,
620 to_port, cidr_ip):
621 log.info("Opening %s port range %s-%s for CIDR %s" %
622 (ip_protocol, from_port, to_port, cidr_ip))
623 sg.authorize(ip_protocol, from_port, to_port, cidr_ip)
624 if ip_protocol == 'tcp' and from_port <= ssh_port <= to_port:
625 sg.revoke(ip_protocol, ssh_port, ssh_port,
626 static.WORLD_CIDRIP)
627 self._cluster_group = sg
628 return self._cluster_group
629
630 @property
636
637 @property
646
647 @property
649 states = ['pending', 'running', 'stopping', 'stopped']
650 filters = {'group-name': self._security_group,
651 'instance-state-name': states}
652 nodes = self.ec2.get_all_instances(filters=filters)
653
654 current_ids = [n.id for n in nodes]
655 remove_nodes = [n for n in self._nodes if n.id not in current_ids]
656 for node in remove_nodes:
657 self._nodes.remove(node)
658
659 existing_nodes = dict([(n.id, n) for n in self._nodes])
660 log.debug('existing nodes: %s' % existing_nodes)
661 for node in nodes:
662 if node.id in existing_nodes:
663 log.debug('updating existing node %s in self._nodes' % node.id)
664 enode = existing_nodes.get(node.id)
665 enode.key_location = self.key_location
666 enode.instance = node
667 else:
668 log.debug('adding node %s to self._nodes list' % node.id)
669 n = Node(node, self.key_location)
670 if n.is_master():
671 self._master = n
672 self._nodes.insert(0, n)
673 else:
674 self._nodes.append(n)
675 self._nodes.sort(key=lambda n: n.alias)
676 log.debug('returning self._nodes = %s' % self._nodes)
677 return self._nodes
678
686
692
698
704
706 return filter(lambda x: x.state in states, self.nodes)
707
708 @property
711
712 @property
715
716 @property
721
727
728 - def create_node(self, alias, image_id=None, instance_type=None, zone=None,
729 placement_group=None, spot_bid=None, force_flat=False):
734
735 - def create_nodes(self, aliases, image_id=None, instance_type=None, count=1,
736 zone=None, placement_group=None, spot_bid=None,
737 force_flat=False):
738 """
739 Convenience method for requesting instances with this cluster's
740 settings. All settings (kwargs) except force_flat default to cluster
741 settings if not provided. Passing force_flat=True ignores spot_bid
742 completely forcing a flat-rate instance to be requested.
743 """
744 spot_bid = spot_bid or self.spot_bid
745 if force_flat:
746 spot_bid = None
747 cluster_sg = self.cluster_group.name
748 instance_type = instance_type or self.node_instance_type
749 if not placement_group and instance_type in static.CLUSTER_TYPES:
750 placement_group = self.placement_group.name
751 image_id = image_id or self.node_image_id
752 kwargs = dict(price=spot_bid, instance_type=instance_type,
753 min_count=count, max_count=count, count=count,
754 key_name=self.keyname, security_groups=[cluster_sg],
755 availability_zone_group=cluster_sg,
756 launch_group=cluster_sg, placement=zone or self.zone,
757 user_data='|'.join(aliases),
758 placement_group=placement_group)
759 resvs = []
760 if spot_bid:
761 for alias in aliases:
762 kwargs['user_data'] = alias
763 resvs.extend(self.ec2.request_instances(image_id, **kwargs))
764 else:
765 resvs.append(self.ec2.request_instances(image_id, **kwargs))
766 for resv in resvs:
767 log.info(str(resv), extra=dict(__raw__=True))
768 return resvs
769
771 nodes = self._nodes_in_states(['pending', 'running'])
772 nodes = filter(lambda x: not x.is_master(), nodes)
773 highest = 0
774 for n in nodes:
775 try:
776 highest = max(highest, int(n.alias[4:8]))
777 except ValueError:
778 pass
779 next = highest + 1
780 log.debug("Highest node number is %d. choosing %d." % (highest, next))
781 return next
782
783 - def add_node(self, alias=None, no_create=False):
784 """
785 Add a single node to this cluster
786 """
787 aliases = None
788 if alias:
789 aliases = [alias]
790 self.add_nodes(1, aliases=aliases, no_create=no_create)
791
792 - def add_nodes(self, num_nodes, aliases=None, no_create=False):
793 """
794 Add new nodes to this cluster
795
796 aliases - list of aliases to assign to new nodes (len must equal
797 num_nodes)
798 """
799 running_pending = self._nodes_in_states(['pending', 'running'])
800 aliases = aliases or []
801 if not aliases:
802 next_node_id = self._get_next_node_num()
803 for i in range(next_node_id, next_node_id + num_nodes):
804 alias = 'node%.3d' % i
805 aliases.append(alias)
806 assert len(aliases) == num_nodes
807 if "master" in aliases:
808 raise exception.ClusterValidationError(
809 "worker nodes cannot have master as an alias")
810 if not no_create:
811 for node in running_pending:
812 if node.alias in aliases:
813 raise exception.ClusterValidationError(
814 "node with alias %s already exists" % node.alias)
815 log.info("Launching node(s): %s" % ', '.join(aliases))
816 self.create_nodes(aliases, count=len(aliases))
817 self.wait_for_cluster(msg="Waiting for node(s) to come up...")
818 log.debug("Adding node(s): %s" % aliases)
819 default_plugin = clustersetup.DefaultClusterSetup(self.disable_queue,
820 self.disable_threads)
821 for alias in aliases:
822 node = self.get_node_by_alias(alias)
823 default_plugin.on_add_node(
824 node, self.nodes, self.master_node,
825 self.cluster_user, self.cluster_shell,
826 self.volumes)
827 self.run_plugins(method_name="on_add_node", node=node)
828
834
856
858 """
859 Groups all node-aliases that have similar instance types/image ids
860 Returns a dictionary that's used to launch all similar instance types
861 and image ids in the same request. Example return value:
862
863 {('c1.xlarge', 'ami-a5c02dcc'): ['node001', 'node002'],
864 ('m1.large', 'ami-a5c02dcc'): ['node003'],
865 ('m1.small', 'ami-17b15e7e'): ['master', 'node005', 'node006'],
866 ('m1.small', 'ami-19e17a2b'): ['node004']}
867
868 Passing reverse=True will return the same information only keyed by
869 node aliases:
870
871 {'master': ('m1.small', 'ami-17b15e7e'),
872 'node001': ('c1.xlarge', 'ami-a5c02dcc'),
873 'node002': ('c1.xlarge', 'ami-a5c02dcc'),
874 'node003': ('m1.large', 'ami-a5c02dcc'),
875 'node004': ('m1.small', 'ami-19e17a2b'),
876 'node005': ('m1.small', 'ami-17b15e7e'),
877 'node006': ('m1.small', 'ami-17b15e7e')}
878 """
879 lmap = {}
880 mtype = self.master_instance_type or self.node_instance_type
881 mimage = self.master_image_id or self.node_image_id
882 lmap[(mtype, mimage)] = ['master']
883 id_start = 1
884 for itype in self.node_instance_types:
885 count = itype['size']
886 image_id = itype['image'] or self.node_image_id
887 type = itype['type'] or self.node_instance_type
888 if not (type, image_id) in lmap:
889 lmap[(type, image_id)] = []
890 for id in range(id_start, id_start + count):
891 alias = 'node%.3d' % id
892 log.debug("Launch map: %s (ami: %s, type: %s)..." % \
893 (alias, image_id, type))
894 lmap[(type, image_id)].append(alias)
895 id_start += 1
896 ntype = self.node_instance_type
897 nimage = self.node_image_id
898 if not (ntype, nimage) in lmap:
899 lmap[(ntype, nimage)] = []
900 for id in range(id_start, self.cluster_size):
901 alias = 'node%.3d' % id
902 log.debug("Launch map: %s (ami: %s, type: %s)..." % \
903 (alias, nimage, ntype))
904 lmap[(ntype, nimage)].append(alias)
905 if reverse:
906 rlmap = {}
907 for (itype, image_id) in lmap:
908 aliases = lmap.get((itype, image_id))
909 for alias in aliases:
910 rlmap[alias] = (itype, image_id)
911 return rlmap
912 return lmap
913
915 """
916 Returns (instance_type,image_id) for a given alias based
917 on the map returned from self._get_launch_map
918 """
919 lmap = self._get_launch_map()
920 for (type, image) in lmap:
921 key = (type, image)
922 if alias in lmap.get(key):
923 return key
924
926 """
927 Launches all EC2 instances based on this cluster's settings.
928 """
929 log.info("Launching a %d-node cluster..." % self.cluster_size)
930 mtype = self.master_instance_type or self.node_instance_type
931 self.master_instance_type = mtype
932 if self.spot_bid:
933 self._create_spot_cluster()
934 else:
935 self._create_flat_rate_cluster()
936
938 """
939 Launches cluster using flat-rate instances. This method attempts to
940 minimize the number of launch requests by grouping nodes of the same
941 type/ami and launching each group simultaneously within a single launch
942 request. This is especially important for Cluster Compute instances
943 given that Amazon *highly* recommends requesting all CCI in a single
944 launch request.
945 """
946 lmap = self._get_launch_map()
947 zone = None
948 master_map = None
949 for (type, image) in lmap:
950
951 aliases = lmap.get((type, image))
952 if 'master' in aliases:
953 master_map = (type, image)
954 for alias in aliases:
955 log.debug("Launching %s (ami: %s, type: %s)" % \
956 (alias, image, type))
957 master_response = self.create_nodes(aliases, image_id=image,
958 instance_type=type,
959 count=len(aliases),
960 force_flat=True)[0]
961 zone = master_response.instances[0].placement
962 lmap.pop(master_map)
963 if self.cluster_size <= 1:
964 return
965 for (type, image) in lmap:
966 aliases = lmap.get((type, image))
967 for alias in aliases:
968 log.debug("Launching %s (ami: %s, type: %s)" % \
969 (alias, image, type))
970 self.create_nodes(aliases, image_id=image, instance_type=type,
971 count=len(aliases), zone=zone, force_flat=True)
972
974 """
975 Launches cluster using all spot instances. This method makes a single
976 spot request for each node in the cluster since spot instances
977 *always* have an ami_launch_index of 0. This is needed in order to
978 correctly assign aliases to nodes.
979 """
980 (mtype, mimage) = self._get_type_and_image_id('master')
981 log.info("Launching master node (ami: %s, type: %s)..." % \
982 (mimage, mtype))
983 force_flat = not self.force_spot_master and self.cluster_size > 1
984 master_response = self.create_node('master',
985 image_id=mimage,
986 instance_type=mtype,
987 force_flat=force_flat)
988 zone = None
989 if not force_flat and self.spot_bid:
990
991 launch_spec = master_response.launch_specification
992 zone = launch_spec.placement
993 else:
994
995 zone = master_response.instances[0].placement
996 if self.cluster_size <= 1:
997 return
998 for id in range(1, self.cluster_size):
999 alias = 'node%.3d' % id
1000 (ntype, nimage) = self._get_type_and_image_id(alias)
1001 log.info("Launching %s (ami: %s, type: %s)" %
1002 (alias, nimage, ntype))
1003 self.create_node(alias, image_id=nimage, instance_type=ntype,
1004 zone=zone)
1005
1007 """
1008 Returns True if all nodes are spot instances
1009 """
1010 nodes = self.nodes
1011 if not nodes:
1012 return False
1013 for node in nodes:
1014 if not node.is_spot():
1015 return False
1016 return True
1017
1019 """
1020 Returns True if any nodes are spot instances
1021 """
1022 for node in self.nodes:
1023 if node.is_spot():
1024 return True
1025 return False
1026
1028 """
1029 Returns True if all nodes are EBS-backed
1030 """
1031 nodes = self.nodes
1032 if not nodes:
1033 return False
1034 for node in nodes:
1035 if not node.is_ebs_backed():
1036 return False
1037 return True
1038
1040 """
1041 Returns True if any nodes are EBS-backed
1042 """
1043 for node in self.nodes:
1044 if node.is_ebs_backed():
1045 return True
1046 return False
1047
1049 """
1050 Returns True if all nodes are stoppable (i.e. non-spot and EBS-backed)
1051 """
1052 nodes = self.nodes
1053 if not nodes:
1054 return False
1055 for node in self.nodes:
1056 if not node.is_stoppable():
1057 return False
1058 return True
1059
1061 """
1062 Returns True if any nodes are stoppable (i.e. non-spot and EBS-backed)
1063 """
1064 nodes = self.nodes
1065 if not nodes:
1066 return False
1067 for node in nodes:
1068 if node.is_stoppable():
1069 return True
1070 return False
1071
1073 """
1074 Returns true if all instances are Cluster/GPU Compute type
1075 """
1076 nodes = self.nodes
1077 if not nodes:
1078 return False
1079 for node in nodes:
1080 if not node.is_cluster_compute():
1081 return False
1082 return True
1083
1089
1091 """
1092 Check that all nodes are 'running' and that ssh is up on all nodes
1093 This method will return False if any spot requests are in an 'open'
1094 state.
1095 """
1096 spots = self.spot_requests
1097 active_spots = filter(lambda x: x.state == 'active', spots)
1098 if len(spots) != len(active_spots):
1099 return False
1100 nodes = self.nodes
1101 if not nodes:
1102 return False
1103 for node in nodes:
1104 if not node.is_up():
1105 return False
1106 return True
1107
1109 """
1110 Logs a status msg, starts a spinner, and returns the spinner object.
1111 This is useful for long running processes:
1112
1113 s = self.get_spinner("Long running process running...")
1114 (do something)
1115 s.stop()
1116 """
1117 s = spinner.Spinner()
1118 log.info(msg, extra=dict(__nonewline__=True))
1119 s.start()
1120 return s
1121
1122 @property
1133
1135 """
1136 Wait for all open spot requests for this cluster to transition to
1137 'active'.
1138 """
1139 spots = spots or self.spot_requests
1140 open_spots = [spot for spot in spots if spot.state == "open"]
1141 if open_spots:
1142 pbar = self.progress_bar.reset()
1143 log.info('Waiting for open spot requests to become active...')
1144 pbar.maxval = len(spots)
1145 pbar.update(0)
1146 while not pbar.finished:
1147 active_spots = filter(lambda x: x.state == "active", spots)
1148 pbar.maxval = len(spots)
1149 pbar.update(len(active_spots))
1150 if not pbar.finished:
1151 time.sleep(self.refresh_interval)
1152 spots = self.get_spot_requests_or_raise()
1153 pbar.reset()
1154
1156 """
1157 Wait indefinitely for cluster nodes to show up.
1158 """
1159 nodes = nodes or self.nodes
1160 if len(nodes) == 0:
1161 s = self.get_spinner("Waiting for instances to activate...")
1162 while len(nodes) == 0:
1163 time.sleep(self.refresh_interval)
1164 nodes = self.nodes
1165 s.stop()
1166
1184
1186 """
1187 Wait until all cluster nodes are in a 'running' state
1188 """
1189 log.info("Waiting for SSH to come up on all nodes...")
1190 nodes = nodes or self.get_nodes_or_raise()
1191 pbar = self.progress_bar.reset()
1192 pbar.maxval = len(nodes)
1193 pbar.update(0)
1194 while not pbar.finished:
1195 active_nodes = filter(lambda n: n.is_up(), nodes)
1196 pbar.maxval = len(nodes)
1197 pbar.update(len(active_nodes))
1198 if not pbar.finished:
1199 time.sleep(self.refresh_interval)
1200 nodes = self.get_nodes_or_raise()
1201 pbar.finish()
1202
1203 @print_timing("Waiting for cluster to come up")
1205 """
1206 Wait for cluster to come up and display progress bar. Waits for all
1207 spot requests to become 'active', all instances to be in a 'running'
1208 state, and for all SSH daemons to come up.
1209
1210 msg - custom message to print out before waiting on the cluster
1211 """
1212 interval = self.refresh_interval
1213 log.info("%s %s" % (msg, "(updating every %ds)" % interval))
1214 self.wait_for_active_spots()
1215 self.wait_for_active_instances()
1216 self.wait_for_running_instances()
1217 self.wait_for_ssh()
1218
1220 """
1221 Check whether all nodes are in the 'stopped' state
1222 """
1223 nodes = self.nodes
1224 if not nodes:
1225 return False
1226 for node in nodes:
1227 if node.state != 'stopped':
1228 return False
1229 return True
1230
1232 """
1233 Check whether all nodes are in a 'terminated' state
1234 """
1235 states = filter(lambda x: x != 'terminated', static.INSTANCE_STATES)
1236 filters = {'group-name': self._security_group,
1237 'instance-state-name': states}
1238 insts = self.ec2.get_all_instances(filters=filters)
1239 return len(insts) == 0
1240
1242 """
1243 Attach each volume to the master node
1244 """
1245 for vol in self.volumes:
1246 volume = self.volumes.get(vol)
1247 device = volume.get('device')
1248 vol_id = volume.get('volume_id')
1249 vol = self.ec2.get_volume(vol_id)
1250 if vol.attach_data.instance_id == self.master_node.id:
1251 log.info("Volume %s already attached to master...skipping" % \
1252 vol.id)
1253 continue
1254 if vol.status != "available":
1255 log.error(('Volume %s not available...' +
1256 'please check and try again') % vol.id)
1257 continue
1258 log.info("Attaching volume %s to master node on %s ..." % (vol.id,
1259 device))
1260 resp = vol.attach(self.master_node.id, device)
1261 log.debug("resp = %s" % resp)
1262 while True:
1263 vol.update()
1264 if vol.attachment_state() == 'attached':
1265 break
1266 time.sleep(5)
1267
1274
1275 @print_timing('Restarting cluster')
1291
1293 """
1294 Shutdown this cluster by detaching all volumes and 'stopping' all nodes
1295
1296 In general, all nodes in the cluster must be 'stoppable' meaning all
1297 nodes are backed by flat-rate EBS-backed instances. If any
1298 'unstoppable' nodes are found an exception is raised. A node is
1299 'unstoppable' if it is backed by either a spot or S3-backed instance.
1300
1301 If the cluster contains a mix of 'stoppable' and 'unstoppable' nodes
1302 you can stop all stoppable nodes and terminate any unstoppable nodes by
1303 setting terminate_unstoppable=True.
1304
1305 This will stop all nodes that can be stopped and terminate the rest.
1306 """
1307 nodes = self.nodes
1308 if not nodes:
1309 raise exception.ClusterValidationError("No running nodes found")
1310 if not self.is_stoppable():
1311 has_stoppable_nodes = self.has_stoppable_nodes()
1312 if not terminate_unstoppable and has_stoppable_nodes:
1313 raise exception.InvalidOperation(
1314 "Cluster contains nodes that are not stoppable")
1315 if not has_stoppable_nodes:
1316 raise exception.InvalidOperation(
1317 "Cluster does not contain any stoppable nodes")
1318 try:
1319 self.run_plugins(method_name="on_shutdown", reverse=True)
1320 except exception.MasterDoesNotExist, e:
1321 log.warn("Cannot run plugins: %s" % e)
1322 self.detach_volumes()
1323 for node in nodes:
1324 node.shutdown()
1325
1357
1358 - def start(self, create=True, create_only=False, validate=True,
1359 validate_only=False, validate_running=False):
1360 """
1361 Creates and configures a cluster from this cluster template's settings.
1362
1363 create - create new nodes when starting the cluster. set to False to
1364 use existing nodes
1365 create_only - only create the cluster node instances, don't configure
1366 the cluster
1367 validate - whether or not to validate the cluster settings used.
1368 False will ignore validate_only and validate_running
1369 keywords and is effectively the same as running _start
1370 validate_only - only validate cluster settings, do not create or
1371 configure cluster
1372 validate_running - whether or not to validate the existing instances
1373 being used against this cluster's settings
1374 """
1375 if validate:
1376 if not create and validate_running:
1377 try:
1378 self._validate_running_instances()
1379 except exception.ClusterValidationError, e:
1380 msg = "Existing nodes are not compatible with cluster "
1381 msg += "settings:\n"
1382 e.msg = msg + e.msg
1383 raise
1384 elif create:
1385 self._validate()
1386 if validate_only:
1387 return
1388 else:
1389 log.warn("SKIPPING VALIDATION - USE AT YOUR OWN RISK")
1390 return self._start(create=create, create_only=create_only)
1391
1392 @print_timing("Starting cluster")
1393 - def _start(self, create=True, create_only=False):
1394 """
1395 Create and configure a cluster from this cluster template's settings
1396 (Does not attempt to validate before running)
1397
1398 create - create new nodes when starting the cluster. set to False to
1399 use existing nodes
1400 create_only - only create the cluster node instances, don't configure
1401 the cluster
1402 """
1403 log.info("Starting cluster...")
1404 if create:
1405 self.create_cluster()
1406 else:
1407 assert self.master_node is not None
1408 for node in self.stopped_nodes:
1409 log.info("Starting stopped node: %s" % node.alias)
1410 node.start()
1411 if create_only:
1412 return
1413 self.setup_cluster()
1414
1416 """
1417 Waits for all nodes to come up and then runs the default
1418 StarCluster setup routines followed by any additional plugin setup
1419 routines
1420 """
1421 self.wait_for_cluster()
1422 self._setup_cluster()
1423
1424 @print_timing("Configuring cluster")
1426 """
1427 Runs the default StarCluster setup routines followed by any additional
1428 plugin setup routines. Does not wait for nodes to come up.
1429 """
1430 log.info("The master node is %s" % self.master_node.dns_name)
1431 log.info("Setting up the cluster...")
1432 if self.volumes:
1433 self.attach_volumes_to_master()
1434 default_plugin = clustersetup.DefaultClusterSetup(self.disable_queue,
1435 self.disable_threads)
1436 default_plugin.run(self.nodes, self.master_node, self.cluster_user,
1437 self.cluster_shell, self.volumes)
1438 self.run_plugins()
1439
1440 - def run_plugins(self, plugins=None, method_name="run", node=None,
1441 reverse=False):
1442 """
1443 Run all plugins specified in this Cluster object's self.plugins list
1444 Uses plugins list instead of self.plugins if specified.
1445
1446 plugins must be a tuple: the first element is the plugin's name, the
1447 second element is the plugin object (a subclass of ClusterSetup)
1448 """
1449 plugs = plugins or self.plugins
1450 if reverse:
1451 plugs = plugs[:]
1452 plugs.reverse()
1453 for plug in plugs:
1454 name, plugin = plug
1455 self.run_plugin(plugin, name, method_name=method_name, node=node)
1456
1457 - def run_plugin(self, plugin, name='', method_name='run', node=None):
1458 """
1459 Run a StarCluster plugin.
1460
1461 plugin - an instance of the plugin's class
1462 name - a user-friendly label for the plugin
1463 method_name - the method to run within the plugin (default: "run")
1464 node - optional node to pass as first argument to plugin method (used
1465 for on_add_node/on_remove_node)
1466 """
1467 plugin_name = name or str(plugin)
1468 try:
1469 func = getattr(plugin, method_name, None)
1470 if not func:
1471 log.warn("Plugin %s has no %s method...skipping" % \
1472 (plugin_name, method_name))
1473 return
1474 args = [self.nodes, self.master_node, self.cluster_user,
1475 self.cluster_shell, self.volumes]
1476 if node:
1477 args.insert(0, node)
1478 log.info("Running plugin %s" % plugin_name)
1479 func(*args)
1480 except NotImplementedError:
1481 log.debug("method %s not implemented by plugin %s" % (method_name,
1482 plugin_name))
1483 except exception.MasterDoesNotExist:
1484 raise
1485 except Exception, e:
1486 log.error("Error occured while running plugin '%s':" % plugin_name)
1487 if isinstance(e, exception.ThreadPoolException):
1488 e.print_excs()
1489 log.debug(e.format_excs())
1490 else:
1491 traceback.print_exc()
1492 log.debug(traceback.format_exc())
1493
1505
1531
1533 """
1534 Returns True if all cluster template settings are valid
1535 """
1536 try:
1537 self._validate()
1538 return True
1539 except exception.ClusterValidationError, e:
1540 log.error(e.msg)
1541 return False
1542
1552
1554 try:
1555 int(self.cluster_size)
1556 if self.cluster_size < 1:
1557 raise ValueError
1558 except (ValueError, TypeError):
1559 raise exception.ClusterValidationError(
1560 'cluster_size must be an integer >= 1')
1561 num_itypes = sum([i.get('size') for i in self.node_instance_types])
1562 num_nodes = self.cluster_size - 1
1563 if num_itypes > num_nodes:
1564 raise exception.ClusterValidationError(
1565 ("total number of nodes specified in node_instance_type (%s)" +
1566 " must be <= cluster_size-1 (%s)") % (num_itypes, num_nodes))
1567 return True
1568
1570 cluster_shell = self.cluster_shell
1571 if not self.__available_shells.get(cluster_shell):
1572 raise exception.ClusterValidationError(
1573 'Invalid user shell specified. Options are %s' % \
1574 ' '.join(self.__available_shells.keys()))
1575 return True
1576
1591
1593 availability_zone = self.availability_zone
1594 if availability_zone:
1595 zone = self.ec2.get_zone(availability_zone)
1596 if not zone:
1597 azone = self.availability_zone
1598 raise exception.ClusterValidationError(
1599 'availability_zone = %s does not exist' % azone)
1600 if zone.state != 'available':
1601 log.warn('The availability_zone = %s ' % zone +
1602 'is not available at this time')
1603 return True
1604
1635
1637 master_image_id = self.master_image_id
1638 node_image_id = self.node_image_id
1639 master_instance_type = self.master_instance_type
1640 node_instance_type = self.node_instance_type
1641 instance_types = self.__instance_types
1642 instance_type_list = ', '.join(instance_types.keys())
1643 if not node_instance_type in instance_types:
1644 raise exception.ClusterValidationError(
1645 ("You specified an invalid node_instance_type %s \n" +
1646 "Possible options are:\n%s") % \
1647 (node_instance_type, instance_type_list))
1648 elif master_instance_type:
1649 if not master_instance_type in instance_types:
1650 raise exception.ClusterValidationError(
1651 ("You specified an invalid master_instance_type %s\n" + \
1652 "Possible options are:\n%s") % \
1653 (master_instance_type, instance_type_list))
1654 try:
1655 self.__check_platform(node_image_id, node_instance_type)
1656 except exception.ClusterValidationError, e:
1657 raise exception.ClusterValidationError(
1658 'Incompatible node_image_id and node_instance_type:\n' + e.msg)
1659 if master_image_id and not master_instance_type:
1660 try:
1661 self.__check_platform(master_image_id, node_instance_type)
1662 except exception.ClusterValidationError, e:
1663 raise exception.ClusterValidationError(
1664 'Incompatible master_image_id and ' +
1665 'node_instance_type\n' + e.msg)
1666 elif master_image_id and master_instance_type:
1667 try:
1668 self.__check_platform(master_image_id, master_instance_type)
1669 except exception.ClusterValidationError, e:
1670 raise exception.ClusterValidationError(
1671 'Incompatible master_image_id and ' +
1672 'master_instance_type\n' + e.msg)
1673 elif master_instance_type and not master_image_id:
1674 try:
1675 self.__check_platform(node_image_id, master_instance_type)
1676 except exception.ClusterValidationError, e:
1677 raise exception.ClusterValidationError(
1678 'Incompatible node_image_id and ' +
1679 'master_instance_type\n' + e.msg)
1680 for itype in self.node_instance_types:
1681 type = itype.get('type')
1682 img = itype.get('image') or node_image_id
1683 if not type in instance_types:
1684 raise exception.ClusterValidationError(
1685 ("You specified an invalid instance type %s \n" +
1686 "Possible options are:\n%s") % (type, instance_type_list))
1687 try:
1688 self.__check_platform(img, type)
1689 except exception.ClusterValidationError, e:
1690 raise exception.ClusterValidationError(
1691 "Invalid settings for node_instance_type %s: %s" %
1692 (type, e.msg))
1693 return True
1694
1705
1707 """
1708 Verify EBS volumes exists and that each volume's zone matches this
1709 cluster's zone setting.
1710 """
1711 for vol in self.volumes:
1712 v = self.volumes.get(vol)
1713 vol_id = v.get('volume_id')
1714 vol = self.ec2.get_volume(vol_id)
1715 if vol.status != 'available':
1716 if self.master_node:
1717 if vol.attach_data.instance_id == self.master_node.id:
1718 continue
1719 msg = "volume %s is not available (status: %s)" % (vol_id,
1720 vol.status)
1721 raise exception.ClusterValidationError(msg)
1722
1724 permissions = self.permissions
1725 for perm in permissions:
1726 permission = permissions.get(perm)
1727 protocol = permission.get('ip_protocol')
1728 if protocol not in self.__protocols:
1729 raise exception.InvalidProtocol(protocol)
1730 from_port = permission.get('from_port')
1731 to_port = permission.get('to_port')
1732 try:
1733 from_port = int(from_port)
1734 to_port = int(to_port)
1735 except ValueError:
1736 raise exception.InvalidPortRange(
1737 from_port, to_port, reason="integer range required")
1738 if from_port < 0 or to_port < 0:
1739 raise exception.InvalidPortRange(
1740 from_port, to_port,
1741 reason="from/to must be positive integers")
1742 if from_port > to_port:
1743 raise exception.InvalidPortRange(
1744 from_port, to_port,
1745 reason="'from_port' must be <= 'to_port'")
1746 cidr_ip = permission.get('cidr_ip')
1747 if not iptools.validate_cidr(cidr_ip):
1748 raise exception.InvalidCIDRSpecified(cidr_ip)
1749
1751 """
1752 Check EBS vols for missing/duplicate DEVICE/PARTITION/MOUNT_PATHs
1753 and validate these settings. Does not require AWS credentials.
1754 """
1755 volmap = {}
1756 devmap = {}
1757 mount_paths = []
1758 for vol in self.volumes:
1759 vol_name = vol
1760 vol = self.volumes.get(vol)
1761 vol_id = vol.get('volume_id')
1762 device = vol.get('device')
1763 partition = vol.get('partition')
1764 mount_path = vol.get("mount_path")
1765 vmap = volmap.get(vol_id, {})
1766 devices = vmap.get('device', [])
1767 partitions = vmap.get('partition', [])
1768 if devices and device not in devices:
1769 raise exception.ClusterValidationError(
1770 "Can't attach volume %s to more than one device" % vol_id)
1771 elif partitions and partition in partitions:
1772 raise exception.ClusterValidationError(
1773 "Multiple configurations for %s\n"
1774 "Either pick one or specify a separate partition for "
1775 "each configuration" % vol_id)
1776 vmap['partition'] = partitions + [partition]
1777 vmap['device'] = devices + [device]
1778 volmap[vol_id] = vmap
1779 dmap = devmap.get(device, {})
1780 vol_ids = dmap.get('volume_id', [])
1781 if vol_ids and vol_id not in vol_ids:
1782 raise exception.ClusterValidationError(
1783 "Can't attach more than one volume on device %s" % device)
1784 dmap['volume_id'] = vol_ids + [vol_id]
1785 devmap[device] = dmap
1786 mount_paths.append(mount_path)
1787 if not device:
1788 raise exception.ClusterValidationError(
1789 'Missing DEVICE setting for volume %s' % vol_name)
1790 if not utils.is_valid_device(device):
1791 raise exception.ClusterValidationError(
1792 "Invalid DEVICE value for volume %s" % vol_name)
1793 if partition:
1794 if not utils.is_valid_partition(partition):
1795 raise exception.ClusterValidationError(
1796 "Invalid PARTITION value for volume %s" % vol_name)
1797 if not partition.startswith(device):
1798 raise exception.ClusterValidationError(
1799 "Volume PARTITION must start with %s" % device)
1800 if not mount_path:
1801 raise exception.ClusterValidationError(
1802 'Missing MOUNT_PATH setting for volume %s' % vol_name)
1803 if not mount_path.startswith('/'):
1804 raise exception.ClusterValidationError(
1805 "MOUNT_PATH for volume %s should start with /" % vol_name)
1806 for path in mount_paths:
1807 if mount_paths.count(path) > 1:
1808 raise exception.ClusterValidationError(
1809 "Can't mount more than one volume on %s" % path)
1810 return True
1811
1813 has_all_required = True
1814 for opt in self.__cluster_settings:
1815 requirements = self.__cluster_settings[opt]
1816 name = opt
1817 required = requirements[1]
1818 if required and self.get(name.lower()) is None:
1819 log.warn('Missing required setting %s' % name)
1820 has_all_required = False
1821 return has_all_required
1822
1828
1854
1856 return self.ssh_to_node('master', user=user, command=command)
1857
1858 - def ssh_to_node(self, alias, user='root', command=None):
1872
1873 if __name__ == "__main__":
1874 from starcluster.config import StarClusterConfig
1875 cfg = StarClusterConfig().load()
1876 sc = cfg.get_cluster_template('smallcluster', 'mynewcluster')
1877 if sc.is_valid():
1878 sc.start(create=True)
1879