#!/bin/sh

set -eu

default_rsync_opts="-rptgoD -vP"
usage(){
    local prog=$(basename $0)
    cat << eof
Usage:
    $prog <host> [<extra rsync opts>]

rsync data from a remote host using the same path names, so

  mybox$ pwd
  /home/user42/path/to/my_study
  mybox$ ls
  calc 10input.py 20eval.py
  mybox$ psweep-pull hpc.machine.edu

will rsync

  hpc.machine.edu:/home/hpcuser23/path/to/my_study/calc -> /home/user42/path/to/my_study/calc

We remove \$HOME so all paths are relative to /home/<user> on each machine.

The default rsync options are $default_rsync_opts. The first set of opts is
-a/--archive, *excluding* -l/--links (copy symlinks as symlinks), which means
we ignore links by default. This is b/c links which point to absolute paths on
the remote machine are often meaningless on the local one. If you pass further
arguments after <host>, they will be treated as extra rsync opts and added to
the default.

Sometimes want to use -l/--links (copy links as links) or -L/--copy-links
(transform symlink into referent file/dir). Then we can do

  $ psweep-pull hpc.machine.edu -l
  $ psweep-pull hpc.machine.edu -L

Also, it is always a good idea to pass -n/--dry-run as first test.

  $ psweep-pull hpc.machine.edu -n

If we find rules files 'excl_pull' or 'rsync.exclude', we add
--exclude-from=<file>. If we find 'rsync.rules' or 'rsync.filter', we add
--filter='merge <file>'. If your rules files have a different name, then do

  $ psweep-pull hpc.machine.edu --exclude-from=<file>
  $ psweep-pull hpc.machine.edu --filter='merge <file>'
eof
}

while getopts h opt; do
    case $opt in
        h) usage; exit 0;;
        \?) exit 1;;
    esac
done
shift $((OPTIND - 1))


host=$1
shift
ext_opts=$@
basedir=$(pwd | sed -re "s|$HOME/||")

rsync_opts="$default_rsync_opts $ext_opts"

for excl_fn in excl_pull rsync.exclude; do
    [ -e $excl_fn ] && rsync_opts="$rsync_opts --exclude-from=$excl_fn"
done
for rules_fn in rsync.rules rsync.filter; do
    [ -e $rules_fn ] && rsync_opts="$rsync_opts --filter='merge $rules_fn'"
done

cmd="rsync $rsync_opts $host:$basedir/calc ./"
echo "executing: $cmd"
eval "$cmd"
