1
2 import os
3 import pwd
4 import grp
5 import time
6 import stat
7 import base64
8 import posixpath
9
10 from starcluster import ssh
11 from starcluster import utils
12 from starcluster import static
13 from starcluster import awsutils
14 from starcluster import managers
15 from starcluster import exception
16 from starcluster.logger import log
20 """
21 Manager class for Node objects
22 """
26
27 - def get_node(self, node_id, user='root'):
43
44
45 -class Node(object):
46 """
47 This class represents a single compute node in a StarCluster.
48
49 It contains all useful metadata for the node such as the internal/external
50 hostnames, ips, etc as well as a paramiko ssh object for executing
51 commands, creating/modifying files on the node.
52
53 'instance' arg must be an instance of boto.ec2.instance.Instance
54
55 'key_location' arg is a string that contains the full path to the
56 private key corresponding to the keypair used to launch this node
57
58 'alias' keyword arg optionally names the node. If no alias is provided,
59 the alias is retrieved from the node's user_data based on the node's
60 launch index
61
62 'user' keyword optionally specifies user to ssh as (defaults to root)
63 """
64 - def __init__(self, instance, key_location, alias=None, user='root'):
65 self.instance = instance
66 self.ec2 = awsutils.EasyEC2(None, None)
67 self.ec2._conn = instance.connection
68 self.key_location = key_location
69 self.user = user
70 self._alias = alias
71 self._groups = None
72 self._ssh = None
73 self._num_procs = None
74 self._memory = None
75
77 return '<Node: %s (%s)>' % (self.alias, self.id)
78
79 @property
81 """
82 Fetches the node's alias stored in a tag from either the instance
83 or the instance's parent spot request. If no alias tag is found an
84 exception is raised.
85 """
86 if not self._alias:
87 alias = self.tags.get('alias')
88 if not alias:
89 user_data = self.ec2.get_instance_user_data(self.id)
90 aliases = user_data.split('|')
91 index = self.ami_launch_index
92 alias = aliases[index]
93 if not alias:
94
95 raise exception.BaseException(
96 "instance %s has no alias" % self.id)
97 self.add_tag('alias', alias)
98 self._alias = alias
99 return self._alias
100
105
106 @property
109
110 - def add_tag(self, key, value=None):
111 return self.instance.add_tag(key, value)
112
115
116 @property
122
123 @property
127
128 @property
130 if not self._num_procs:
131 self._num_procs = int(
132 self.ssh.execute(
133 'cat /proc/cpuinfo | grep processor | wc -l')[0])
134 return self._num_procs
135
136 @property
138 if not self._memory:
139 self._memory = float(
140 self.ssh.execute(
141 "free -m | grep -i mem | awk '{print $2}'")[0])
142 return self._memory
143
144 @property
147
148 @property
151
152 @property
155
156 @property
159
160 @property
163
164 @property
166 return self.instance.id
167
168 @property
171
172 @property
175
176 @property
178 return self.instance.state
179
180 @property
183
184 @property
188
189 @property
192
193 @property
195 try:
196 return int(self.instance.ami_launch_index)
197 except TypeError:
198 log.error("instance %s (state: %s) has no ami_launch_index" % \
199 (self.id, self.state))
200 log.error("returning 0 as ami_launch_index...")
201 return 0
202
203 @property
206
207 @property
209 return self.instance.architecture
210
211 @property
213 return self.instance.kernel
214
215 @property
218
219 @property
222
223 @property
226
227 @property
230
231 @property
234
235 @property
238
240 """
241 Returns dictionary where keys are remote group names and values are
242 grp.struct_grp objects from the standard grp module
243
244 key_by_gid=True will use the integer gid as the returned dictionary's
245 keys instead of the group's name
246 """
247 grp_file = self.ssh.remote_file('/etc/group', 'r')
248 groups = [l.strip().split(':') for l in grp_file.readlines()]
249 grp_file.close()
250 grp_map = {}
251 for group in groups:
252 print grp
253 name, passwd, gid, mems = group
254 gid = int(gid)
255 mems = mems.split(',')
256 key = name
257 if key_by_gid:
258 key = gid
259 grp_map[key] = grp.struct_group([name, passwd, gid, mems])
260 return grp_map
261
263 """
264 Returns dictionary where keys are remote usernames and values are
265 pwd.struct_passwd objects from the standard pwd module
266
267 key_by_uid=True will use the integer uid as the returned dictionary's
268 keys instead of the user's login name
269 """
270 etc_passwd = self.ssh.remote_file('/etc/passwd', 'r')
271 users = [l.strip().split(':') for l in etc_passwd.readlines()]
272 etc_passwd.close()
273 user_map = {}
274 for user in users:
275 name, passwd, uid, gid, gecos, home, shell = user
276 uid = int(uid)
277 gid = int(gid)
278 key = name
279 if key_by_uid:
280 key = uid
281 user_map[key] = pwd.struct_passwd([name, passwd, uid, gid,
282 gecos, home, shell])
283 return user_map
284
286 """
287 Remote version of the getgrgid method in the standard grp module
288
289 returns a grp.struct_group
290 """
291 gmap = self.get_group_map(key_by_gid=True)
292 return gmap.get(gid)
293
295 """
296 Remote version of the getgrnam method in the standard grp module
297
298 returns a grp.struct_group
299 """
300 gmap = self.get_group_map()
301 return gmap.get(groupname)
302
304 """
305 Remote version of the getpwuid method in the standard pwd module
306
307 returns a pwd.struct_passwd
308 """
309 umap = self.get_user_map(key_by_uid=True)
310 return umap.get(uid)
311
313 """
314 Remote version of the getpwnam method in the standard pwd module
315
316 returns a pwd.struct_passwd
317 """
318 umap = self.get_user_map()
319 return umap.get(username)
320
321 - def add_user(self, name, uid=None, gid=None, shell="bash"):
322 """
323 Add a user to the remote system.
324
325 name - the username of the user being added
326 uid - optional user id to use when creating new user
327 gid - optional group id to use when creating new user
328 shell - optional shell assign to new user (default: bash)
329 """
330 if gid:
331 self.ssh.execute('groupadd -o -g %s %s' % (gid, name))
332 user_add_cmd = 'useradd -o '
333 if uid:
334 user_add_cmd += '-u %s ' % uid
335 if gid:
336 user_add_cmd += '-g %s ' % gid
337 if shell:
338 user_add_cmd += '-s `which %s` ' % shell
339 user_add_cmd += "-m %s" % name
340 self.ssh.execute(user_add_cmd)
341
342 - def generate_key_for_user(self, username, ignore_existing=False,
343 auth_new_key=False, auth_conn_key=False):
344 """
345 Generates an id_rsa/id_rsa.pub keypair combo for a user on the remote
346 machine.
347
348 ignore_existing - if False, any existing key combos will be used rather
349 than generating a new RSA key
350
351 auth_new_key - if True, add the newly generated public key to the
352 remote user's authorized_keys file
353
354 auth_conn_key - if True, add the public key used to establish this ssh
355 connection to the remote user's authorized_keys
356 """
357 user = self.getpwnam(username)
358 home_folder = user.pw_dir
359 ssh_folder = posixpath.join(home_folder, '.ssh')
360 if not self.ssh.isdir(ssh_folder):
361 self.ssh.mkdir(ssh_folder)
362 private_key = posixpath.join(ssh_folder, 'id_rsa')
363 public_key = private_key + '.pub'
364 authorized_keys = posixpath.join(ssh_folder, 'authorized_keys')
365 key_exists = self.ssh.isfile(private_key)
366 if key_exists and not ignore_existing:
367 log.info("Using existing key: %s" % private_key)
368 key = self.ssh.load_remote_rsa_key(private_key)
369 else:
370 key = self.ssh.generate_rsa_key()
371 pubkey_contents = self.ssh.get_public_key(key)
372 if not key_exists or ignore_existing:
373
374 pub_key = self.ssh.remote_file(public_key, 'w')
375 pub_key.write(pubkey_contents)
376 pub_key.chown(user.pw_uid, user.pw_gid)
377 pub_key.chmod(0400)
378 pub_key.close()
379
380 priv_key = self.ssh.remote_file(private_key, 'w')
381 key.write_private_key(priv_key)
382 priv_key.chown(user.pw_uid, user.pw_gid)
383 priv_key.chmod(0400)
384 priv_key.close()
385 if not auth_new_key or not auth_conn_key:
386 return key
387 auth_keys_contents = ''
388 if self.ssh.isfile(authorized_keys):
389 auth_keys = self.ssh.remote_file(authorized_keys, 'r')
390 auth_keys_contents = auth_keys.read()
391 auth_keys.close()
392 auth_keys = self.ssh.remote_file(authorized_keys, 'a')
393 if auth_new_key:
394
395 if pubkey_contents not in auth_keys_contents:
396 log.debug("adding auth_key_contents")
397 auth_keys.write('%s\n' % pubkey_contents)
398 if auth_conn_key and self.ssh._pkey:
399
400
401 conn_key = self.ssh._pkey
402 conn_pubkey_contents = self.ssh.get_public_key(conn_key)
403 if conn_pubkey_contents not in auth_keys_contents:
404 log.debug("adding conn_pubkey_contents")
405 auth_keys.write('%s\n' % conn_pubkey_contents)
406 auth_keys.chown(user.pw_uid, user.pw_gid)
407 auth_keys.chmod(0600)
408 auth_keys.close()
409 return key
410
412 """
413 Populate user's known_hosts file with pub keys from hosts in nodes list
414
415 username - name of the user to add to known hosts for
416 nodes - the nodes to add to the user's known hosts file
417 add_self - add this Node to known_hosts in addition to nodes
418
419 NOTE: this node's hostname will also be added to the known_hosts
420 file
421 """
422 user = self.getpwnam(username)
423 known_hosts_file = posixpath.join(user.pw_dir, '.ssh', 'known_hosts')
424 self.remove_from_known_hosts(username, nodes)
425 khosts = []
426 for node in nodes:
427 server_pkey = node.ssh.get_server_public_key()
428 khosts.append(' '.join([node.alias, server_pkey.get_name(),
429 base64.b64encode(str(server_pkey))]))
430 if add_self and self not in nodes:
431 server_pkey = self.ssh.get_server_public_key()
432 khosts.append(' '.join([self.alias, server_pkey.get_name(),
433 base64.b64encode(str(server_pkey))]))
434 khostsf = self.ssh.remote_file(known_hosts_file, 'a')
435 khostsf.write('\n'.join(khosts) + '\n')
436 khostsf.chown(user.pw_uid, user.pw_gid)
437 khostsf.close()
438
440 """
441 Remove all network names for nodes from username's known_hosts file
442 on this Node
443 """
444 user = self.getpwnam(username)
445 known_hosts_file = posixpath.join(user.pw_dir, '.ssh', 'known_hosts')
446 hostnames = map(lambda n: n.alias, nodes)
447 if self.ssh.isfile(known_hosts_file):
448 regex = '|'.join(hostnames)
449 self.ssh.remove_lines_from_file(known_hosts_file, regex)
450
452 """
453 Configure passwordless ssh for user between this Node and nodes
454 """
455 user = self.getpwnam(username)
456 ssh_folder = posixpath.join(user.pw_dir, '.ssh')
457 priv_key_file = posixpath.join(ssh_folder, 'id_rsa')
458 pub_key_file = priv_key_file + '.pub'
459 known_hosts_file = posixpath.join(ssh_folder, 'known_hosts')
460 auth_key_file = posixpath.join(ssh_folder, 'authorized_keys')
461 self.add_to_known_hosts(username, nodes)
462
463 nodes = filter(lambda n: n.id != self.id, nodes)
464
465 self.copy_remote_file_to_nodes(priv_key_file, nodes)
466 self.copy_remote_file_to_nodes(pub_key_file, nodes)
467
468 self.copy_remote_file_to_nodes(auth_key_file, nodes)
469 self.copy_remote_file_to_nodes(known_hosts_file, nodes)
470
473
475 """
476 Copies a remote file from this Node instance to another Node instance
477 without passwordless ssh between the two.
478
479 dest - path to store the data in on the node (defaults to remote_file)
480 """
481 if not dest:
482 dest = remote_file
483 rf = self.ssh.remote_file(remote_file, 'r')
484 contents = rf.read()
485 sts = rf.stat()
486 mode = stat.S_IMODE(sts.st_mode)
487 uid = sts.st_uid
488 gid = sts.st_gid
489 rf.close()
490 for node in nodes:
491 if self.id == node.id and remote_file == dest:
492 log.warn("src and destination are the same: %s, skipping" %
493 remote_file)
494 continue
495 nrf = node.ssh.remote_file(dest, 'w')
496 nrf.write(contents)
497 nrf.chown(uid, gid)
498 nrf.chmod(mode)
499 nrf.close()
500
502 """
503 Remove a user from the remote system
504 """
505 self.ssh.execute('userdel %s' % name)
506 self.ssh.execute('groupdel %s' % name)
507
509 """
510 Export each path in export_paths to each node in nodes via NFS
511
512 nodes - list of nodes to export each path to
513 export_paths - list of paths on this remote host to export to each node
514
515 Example:
516 # export /home and /opt/sge6 to each node in nodes
517 $ node.start_nfs_server()
518 $ node.export_fs_to_nodes(\
519 nodes=[node1,node2], export_paths=['/home', '/opt/sge6']
520 """
521
522 nfs_export_settings = "(async,no_root_squash,no_subtree_check,rw)"
523 etc_exports = self.ssh.remote_file('/etc/exports')
524 for node in nodes:
525 for path in export_paths:
526 etc_exports.write(' '.join([path, node.alias + \
527 nfs_export_settings + '\n']))
528 etc_exports.close()
529 self.ssh.execute('exportfs -a')
530
532 """
533 Removes nodes from this node's /etc/exportfs
534
535 nodes - list of nodes to stop
536
537 Example:
538 $ node.remove_export_fs_to_nodes(nodes=[node1,node2])
539 """
540 regex = '|'.join(map(lambda x: x.alias, nodes))
541 self.ssh.remove_lines_from_file('/etc/exports', regex)
542 self.ssh.execute('exportfs -a')
543
545 self.ssh.execute('/etc/init.d/portmap start')
546 self.ssh.execute('mount -t rpc_pipefs sunrpc /var/lib/nfs/rpc_pipefs/',
547 ignore_exit_status=True)
548 self.ssh.execute('/etc/init.d/nfs start')
549 self.ssh.execute('/usr/sbin/exportfs -r')
550
552 """
553 Mount each path in remote_paths from the remote server_node
554
555 server_node - remote server node that is sharing the remote_paths
556 remote_paths - list of remote paths to mount from server_node
557 """
558 self.ssh.execute('/etc/init.d/portmap start')
559
560 self.ssh.execute('mount -t devpts none /dev/pts',
561 ignore_exit_status=True)
562 remote_paths_regex = '|'.join(map(lambda x: x.center(len(x) + 2),
563 remote_paths))
564 self.ssh.remove_lines_from_file('/etc/fstab', remote_paths_regex)
565 fstab = self.ssh.remote_file('/etc/fstab', 'a')
566 for path in remote_paths:
567 fstab.write('%s:%s %s nfs user,rw,exec,noauto 0 0\n' %
568 (server_node.alias, path, path))
569 fstab.close()
570 for path in remote_paths:
571 if not self.ssh.path_exists(path):
572 self.ssh.makedirs(path)
573 self.ssh.execute('mount %s' % path)
574
576 mount_map = {}
577 mount_lines = self.ssh.execute('mount')
578 for line in mount_lines:
579 dev, on_label, path, type_label, fstype, options = line.split()
580 mount_map[dev] = [path, fstype, options]
581 return mount_map
582
584 """
585 Mount device to path
586 """
587 self.ssh.remove_lines_from_file('/etc/fstab',
588 path.center(len(path) + 2))
589 master_fstab = self.ssh.remote_file('/etc/fstab', mode='a')
590 master_fstab.write("%s %s auto noauto,defaults 0 0\n" % \
591 (device, path))
592 master_fstab.close()
593 if not self.ssh.path_exists(path):
594 self.ssh.makedirs(path)
595 self.ssh.execute('mount %s' % path)
596
606
608 """
609 Remove all network names for node in nodes arg from this node's
610 /etc/hosts file
611 """
612 aliases = map(lambda x: x.alias, nodes)
613 self.ssh.remove_lines_from_file('/etc/hosts', '|'.join(aliases))
614
616 """
617 Set this node's hostname to self.alias
618
619 hostname - optional hostname to set (defaults to self.alias)
620 """
621 hostname = hostname or self.alias
622 hostname_file = self.ssh.remote_file("/etc/hostname", "w")
623 hostname_file.write(hostname)
624 hostname_file.close()
625 self.ssh.execute('hostname -F /etc/hostname')
626
627 @property
636
637 @property
650
652 """
653 Detaches all volumes returned by self.attached_vols
654 """
655 block_devs = self.attached_vols
656 for dev in block_devs:
657 vol_id = block_devs[dev].volume_id
658 vol = self.ec2.get_volume(vol_id)
659 log.info("Detaching volume %s from %s" % (vol.id, self.alias))
660 if vol.status not in ['available', 'detaching']:
661 vol.detach()
662
664 """
665 Detach and destroy EBS root volume (EBS-backed node only)
666 """
667 if not self.is_ebs_backed():
668 return
669 root_vol = self.block_device_mapping[self.root_device_name]
670 vol_id = root_vol.volume_id
671 vol = self.ec2.get_volume(vol_id)
672 vol.detach()
673 while vol.update() != 'availabile':
674 time.sleep(5)
675 log.info("Deleting node %s's root volume" % self.alias)
676 root_vol.delete()
677
678 @property
680 if self.instance.spot_instance_request_id:
681 return self.instance.spot_instance_request_id
682
688
690 return self.alias == "master"
691
694
697
700
703
706
708 return self.spot_id is not None
709
711 """
712 Starts EBS-backed instance and puts it in the 'running' state.
713 Only works if this node is EBS-backed, raises
714 exception.InvalidOperation otherwise.
715 """
716 if not self.is_ebs_backed():
717 raise exception.InvalidOperation(
718 "Only EBS-backed instances can be started")
719 return self.instance.start()
720
722 """
723 Shutdown EBS-backed instance and put it in the 'stopped' state.
724 Only works if this node is EBS-backed, raises
725 exception.InvalidOperation otherwise.
726
727 NOTE: The EBS root device will *not* be deleted and the instance can
728 be 'started' later on.
729 """
730 if self.is_spot():
731 raise exception.InvalidOperation(
732 "spot instances can not be stopped")
733 elif not self.is_ebs_backed():
734 raise exception.InvalidOperation(
735 "Only EBS-backed instances can be stopped")
736 log.info("Stopping instance: %s (%s)" % (self.alias, self.id))
737 return self.instance.stop()
738
740 """
741 Shutdown and destroy this instance. For EBS-backed nodes, this
742 will also destroy the node's EBS root device. Puts this node
743 into a 'terminated' state.
744 """
745 log.info("Terminating node: %s (%s)" % (self.alias, self.id))
746 return self.instance.terminate()
747
749 """
750 Shutdown this instance. This method will terminate traditional
751 instance-store instances and stop EBS-backed instances
752 (ie not destroy EBS root dev)
753 """
754 if self.is_ebs_backed() and not self.is_spot():
755 self.stop()
756 else:
757 self.terminate()
758
760 """
761 Reboot this instance.
762 """
763 self.instance.reboot()
764
770
772 if self.update() != 'running':
773 return False
774 if not self.is_ssh_up():
775 return False
776 if self.private_ip_address is None:
777 log.debug("instance %s has no private_ip_address" % self.id)
778 log.debug(("attempting to determine private_ip_address for" + \
779 "instance %s") % self.id)
780 try:
781 private_ip = self.ssh.execute((
782 'python -c ' + \
783 '"import socket; print socket.gethostbyname(\'%s\')"') % \
784 self.private_dns_name)[0].strip()
785 log.debug("determined instance %s's private ip to be %s" % \
786 (self.id, private_ip))
787 self.instance.private_ip_address = private_ip
788 except Exception, e:
789 print e
790 return False
791 return True
792
797
798 @property
800 if not self._ssh:
801 self._ssh = ssh.SSHClient(self.instance.dns_name,
802 username=self.user,
803 private_key=self.key_location)
804 return self._ssh
805
806 - def shell(self, user=None):
807 """
808 Attempts to launch an interactive shell by first trying the system's
809 ssh client. If the system does not have the ssh command it falls back
810 to a pure-python ssh shell.
811 """
812 if self.state != 'running':
813 label = 'instance'
814 if self.alias == "master":
815 label = "master node"
816 elif self.alias:
817 label = "node '%s'" % self.alias
818 raise exception.InstanceNotRunning(self.id, self.state,
819 label=label)
820 user = user or self.user
821 if utils.has_required(['ssh']):
822 log.debug("using system's ssh client")
823 ssh_cmd = static.SSH_TEMPLATE % (self.key_location, user,
824 self.dns_name)
825 log.debug("ssh_cmd: %s" % ssh_cmd)
826 os.system(ssh_cmd)
827 else:
828 log.debug("using pure-python ssh client")
829 self.ssh.interactive_shell(user=user)
830
831 - def get_hosts_entry(self):
832 """ Returns /etc/hosts entry for this node """
833 etc_hosts_line = "%(INTERNAL_IP)s %(INTERNAL_ALIAS)s"
834 etc_hosts_line = etc_hosts_line % self.network_names
835 return etc_hosts_line
836
838 if self._ssh:
839 self._ssh.close()
840