Differences between revisions 12 and 13
Revision 12 as of 2010-09-13 03:39:15
Size: 10852
Editor: ?DominiqueBelhachemi
Comment:
Revision 13 as of 2010-09-13 03:44:05
Size: 17106
Editor: ?DominiqueBelhachemi
Comment:
Deletions are marked like this. Additions are marked like this.
Line 81: Line 81:
During the installation phase we compiled simple "MPI-HelloWorld" program.

Start it without torque
During the installation phase we compiled a simple MPI Hello World program.

Start it without Torque
Line 129: Line 129:

#NOTES:
# in whatever mode you are running your Eucalyptus system, you will have two interfaces, one is called public(you can reach the instances from outside),
# one is called inside(you can reach the instances usually only from inside, or maybe from your front end)

# you have to specify a TORQUE SERVER and one or more TORQUE NODES, because I am accessing those instances you have to specify working IP addresses.

# example: SYSTEM mode, both interfaces are using the same address, so there is no
# example: MANAGED modes, public and private interface are different, TORQUE has to be setup for the PRIVATE mode (intra-cloud communtcation need firewall settings)

# usage: bash start_torque.sh --verbose -s="192.168.0.13" -n="192.168.0.14,192.168.0.17,192.168.0.45" -k="~/.euca/mykey.priv"

# default
VERBOSE=0
IN_INSTANCE=0

echo `hostname` : `/sbin/ifconfig eth0 | grep "inet addr" | awk '{print $2}' | sed 's/addr\://'`

for i in $*
do
 case $i in
     -s=*|--torque-server=*)
                # remove option from string
  PUBLIC_TORQUE_SERVER_IP=`echo $i | sed 's/[-a-zA-Z0-9]*=//'`
                echo $PUBLIC_TORQUE_SERVER_IP
  ;;
        -n=*|--torque-nodes=*)
                # remove option from string
                NIPS=`echo $i | sed 's/[-a-zA-Z0-9]*=//'`
                PUBLIC_NODES_IP=`echo $NIPS | sed 's/\,/ /g'`
                echo $NIPS
                echo $PUBLIC_NODES_IP
                ;;
        -k=*|--key=*)
                # remove option from string
                KEY=`echo $i | sed 's/[-a-zA-Z0-9]*=//'`
                echo $KEY
                ;;
     --verbose)
  VERBOSE=1
  ;;
        -i|--in-instance)
                IN_INSTANCE=1
                ;;
        -m=*|--with-mpi=*)
                MPI=`echo $i | sed 's/[-a-zA-Z0-9]*=//'`
                #TODO: only 0 or 1 are feasible values
                ;;
     *)
                echo "unknown option"
  ;;
   esac
done


cat > keygen_in_instance.sh << EOF
#!/bin/bash
su guest -c 'ssh-keygen -t rsa -N "" -f /home/guest/.ssh/id_rsa'
EOF
chmod 755 keygen_in_instance.sh


# BEGIN execution on master #################################################
if [ $IN_INSTANCE -eq 0 ] ; then


    # join server and nodes
    if [[ $PUBLIC_NODES_IP == *$PUBLIC_TORQUE_SERVER_IP* ]]
    then
        ALL_INSTANCES="$PUBLIC_NODES_IP"
    else
        ALL_INSTANCES="$PUBLIC_TORQUE_SERVER_IP $PUBLIC_NODES_IP"
    fi
    echo $ALL_INSTANCES


    # copy setup-torque-script to eucalyptus instances
    for NODE_IP in `echo $ALL_INSTANCES`
    do
        echo $NODE_IP
        # make this host known to ~/.ssh/known_hosts
        eval "ssh -i $KEY -o StrictHostKeychecking=no root@$NODE_IP echo ''"
        eval "/usr/bin/scp -p -i $KEY start_torque.sh root@$NODE_IP:/root/start_torque.sh"

        # MPI example
        eval "scp -p -i $KEY compileMPI.sh helloworld.c root@$NODE_IP:/root/"

        # start script in instance
        eval "ssh -X -i $KEY root@$NODE_IP \"/root/start_torque.sh -s=\"$PUBLIC_TORQUE_SERVER_IP\" -n=\"$NIPS\" -i -m=$MPI \""
    done


    # generate keys in instances - for user guest
    for NODE_IP in `echo $ALL_INSTANCES`
    do
        echo $NODE_IP
        eval "/usr/bin/scp -p -i $KEY keygen_in_instance.sh root@$NODE_IP:/root/keygen_in_instance.sh"
        eval "ssh -X -i $KEY root@$NODE_IP \"/root/keygen_in_instance.sh\""
    done


    # distribute keys
    for NODE_IP in `echo $ALL_INSTANCES`
    do
        # distribute this key to all other nodes
        for NODE_IP2 in `echo $ALL_INSTANCES`
        do
            echo $NODE_IP2
            eval "/usr/bin/scp -p -i $KEY root@$NODE_IP:/home/guest/.ssh/id_rsa.pub /tmp/id_rsa.pub"
            eval "/usr/bin/scp -p -i $KEY /tmp/id_rsa.pub root@$NODE_IP2:/tmp/id_rsa.pub"
            eval "ssh -X -i $KEY root@$NODE_IP2 \"cat /tmp/id_rsa.pub >> /home/guest/.ssh/authorized_keys\""
            #TODO, I need an entry in known_hosts, for now the following happens from within the instances
            #eval "ssh -X -i $KEY root@$NODE_IP \"ssh -o StrictHostKeychecking=no guest@$NODE_IP2 & echo '' & wait\""
        done
        eval "ssh -X -i $KEY root@$NODE_IP /root/hosts.sh"
    done


    exit # on master don't execute commands for instances
fi
# END execution on master #################################################


# BEGIN execution in instance ###############################################
usage()
{
cat << EOF
usage: $0 options

This script starts the torque environment.

OPTIONS:

   -h Show this message
   -n nodes e.g. "192.168.0.14,192.168.0.14"
   -s torque server ip e.g. "192.168.0.13"
   -k key file
   -v Verbose
   -m With MPI support

example: start_torque.sh --verbose -s="192.168.0.13" -n="192.168.0.14,192.168.0.17,192.168.0.45" -k="~/.euca/mykey.priv"
EOF
}

#SERVER_IP NIPS KEY
#if [[ -z $PUBLIC_NODES_IP ]] || [[ -z $PUBLIC_TORQUE_SERVER_IP ]]
#then
# usage
# exit 1
#fi
Line 162: Line 312:
#SET values
#conf1
PUBLIC_TORQUE_SERVER_IP="192.168.0.2"
PUBLIC_NODES="192.168.0.3 192.168.0.4 192.168.0.5 192.168.0.6 192.168.0.7 192.168.0.8"
Line 174: Line 319:
#MODE="system"
Line 184: Line 329:
PUBLIC_INSTANCE_IP=`curl -s $METADATA_URL/public-ipv4`
if [ $MODE == "public" ] ; then
 PUBLIC_INSTANCE_IP=`/sbin/ifconfig eth0 | grep "inet addr" | awk '{print $2}' | sed 's/addr\://'`
else
Line 186: Line 334:
 PUBLIC_INSTANCE_IP=`curl -s $METADATA_URL/public-ipv4`
fi
Line 199: Line 350:
   NODES=$PUBLIC_NODES    NODES=$PUBLIC_NODES_IP
Line 259: Line 410:
# install libopenmpi-dev
install_package "libopenmpi-dev"

#
install openmpi-bin
install_package "openmpi-bin"
# install OpenMPI packages
if [ $MPI -eq 1 ] ; then
   install_package "libopenmpi-dev"
   install_package "openmpi-bin"
   #compile MPI test program
   bash compileMPI.sh
fi
Line 266: Line 419:
Line 273: Line 427:
      echo "$NODE_IP $NODE_HOSTNAME" >> /etc/torque/hostfile       mkdir -p /etc/torque
echo "$NODE_HOSTNAME slots=1" >> /etc/torque/hostfile
Line 279: Line 434:
   for NODE_IP in `echo $PUBLIC_NODES`    for NODE_IP in `echo $PUBLIC_NODES_IP`
Line 282: Line 437:
      echo "$NODE_IP $NODE_HOSTNAME" >> /etc/hosts       if [ $INSTANCE_IP != $TORQUE_SERVER_IP ] || [ $NODE_IP != $TORQUE_SERVER_IP ]; then
         if ! egrep -q "$NODE_IP|$NODE_HOSTNAME" /etc/hosts ; then
            echo "$NODE_IP $NODE_HOSTNAME" >> /etc/hosts
         fi
      fi
Line 284: Line 443:
      echo "$NODE_IP $NODE_HOSTNAME" >> /etc/torque/hostfile       mkdir -p /etc/torque
      if ! egrep -q "$NODE_HOSTNAME" /etc/torque/hostfile ; then
         echo "$NODE_HOSTNAME slots=1" >> /etc/torque/hostfile
         echo "(su - guest -c \"ssh -t -t -o StrictHostKeychecking=no guest@$NODE_HOSTNAME echo ''\")& wait" >> /root/hosts.sh # for key distribution
      fi
Line 286: Line 449:
fi
   if ! egrep -q "$PUBLIC_TORQUE_SERVER_HOSTNAME" /etc/torque/hostfile ; then
      echo "(su - guest -c \"ssh -t -t -o StrictHostKeychecking=no guest@$PUBLIC_TORQUE_SERVER_HOSTNAME echo ''\")& wait" >> /root/hosts.sh # for key distribution
   fi
fi
chmod 755 /root/hosts.sh
Line 294: Line 460:
   echo "127.0.1.1 $PUBLIC_INSTANCE_HOSTNAME" >> /etc/hosts

   echo "$PRIVATE_INSTANCE_IP $PRIVATE_INSTANCE_HOSTNAME" >> /etc/hosts
   if ! egrep -q "127.0.1.1|$PUBLIC_INSTANCE_HOSTNAME" /etc/hosts ; then
      echo "127.0.1.1 $PUBLIC_INSTANCE_HOSTNAME" >> /etc/hosts
   fi

# echo "$PRIVATE_INSTANCE_IP $PRIVATE_INSTANCE_HOSTNAME" >> /etc/hosts
Line 298: Line 466:
   echo "$TORQUE_SERVER_IP $TORQUE_SERVER_HOSTNAME" >> /etc/hosts    if ! egrep -q "$TORQUE_SERVER_IP|$TORQUE_SERVER_HOSTNAME" /etc/hosts ; then
   
echo "$TORQUE_SERVER_IP $TORQUE_SERVER_HOSTNAME" >> /etc/hosts
   fi
Line 312: Line 482:
   apt-get -o Dpkg::Options::="--force-confnew" --force-yes -y install torque-mom torque-server torque-scheduler torque-client
   #aptitude -y install torque-mom torque-server torque-scheduler torque-client
else
   apt-get -o Dpkg::Options::="--force-confnew" --force-yes -y install torque-server torque-scheduler torque-client
   #aptitude -y install torque-server torque-scheduler torque-client
fi

if [[ $PUBLIC_NODES_IP == *$INSTANCE_IP* ]]; then
Line 322: Line 494:
Line 323: Line 496:
USER=userA USER=auser
Line 341: Line 514:
## on TORQUE mom
echo $TORQUE_SERVER_HOSTNAME > /etc/torque/server_name
echo "\$timeout 120" > /var/spool/torque/mom_priv/config # more options possible (NFS...)
echo "\$loglevel 5" >> /var/spool/torque/mom_priv/config # more options possible (NFS...)

/etc/init.d/torque-mom restart
cat /var/spool/torque/mom_logs/$DATE


## on
TORQUE server
## for TORQUE mom
if [[ $PUBLIC_NODES_IP == *$INSTANCE_IP* ]]; then
    
echo $TORQUE_SERVER_HOSTNAME > /etc/torque/server_name
    echo "\$timeout 120" > /var/spool/torque/mom_priv/config # more options possible (NFS...)
    echo "\$loglevel 5" >> /var/spool/torque/mom_priv/config # more options possible (NFS...)
    /etc/init.d/torque-mom restart
    cat /var/spool/torque/mom_logs/$DATE
fi

## for
TORQUE server
Line 379: Line 552:
#debug
Line 383: Line 556:
fi
}}}
   cat /etc/torque/server_name
fi
}}}

Running Torque inside of Eucalyptus

We describe how to setup a Torque cluster system within a Eucalyptus cloud.

TORQUE

Debian TORQUE package

 $ source ~/.euca/eucarc

Specify a Squeeze image

 $ EMI=emi-1AF00C98

Start two instances of our Squeeze image

 $ euca-run-instances $EMI -k mykey -t c1.medium -n2
 RESERVATION    r-4488080C      myuser  myuser-default
 INSTANCE       i-57E309BE      emi-1AF00C98    0.0.0.0 0.0.0.0 pending mykey   2010-09-13T02:31:51.172Z        eki-D224100C    eri-059910F2
 INSTANCE       i-4C1F0986      emi-1AF00C98    0.0.0.0 0.0.0.0 pending mykey   2010-09-13T02:31:51.173Z        eki-D224100C    eri-059910F2

After a few seconds it will be running

 $ euca-describe-instances 
 RESERVATION    r-4488080C      myuser default
 INSTANCE       i-4C1F0986      emi-1AF00C98    192.168.0.14    192.168.0.14    running         mykey   1       c1.medium       2010-09-13T02:31:51.173Z   mycloud    eki-D224100C    eri-059910F2
 INSTANCE       i-57E309BE      emi-1AF00C98    192.168.0.15    192.168.0.15    running         mykey   0       c1.medium       2010-09-13T02:31:51.172Z   mycloud    eki-D224100C    eri-059910F2

Let's say you want to start a torque server on 192.168.0.14 and two torque worker on 192.168.0.14 and 192.168.0.15, MPI enabled

  • $ bash start_torque.sh -s="192.168.0.14" -n="192.168.0.14,192.168.0.15" -k="~/.euca/mykey.priv" -m=1

}}}

This will install all necessary torque packages in the instances. It might take a few minutes, depending on the internet connection and processor speed of the instances.

Connect to a instance as root with your key

 ssh -X -i ~/.euca/mykey.priv root@192.168.0.14

virtual: Switch to the guest user

 su - guest

Check if nodes are up

 pbsnodes

Perform some simple tests

 echo "sleep 10" | qsub
 echo "sleep 5" | qsub
 echo "hostname" | qsub
 echo "sleep 15" | qsub
 echo "hostname" | qsub
 echo "sleep 3" | qsub

Look at the queue

 qstat

Let sleep 2 worker nodes

 echo "sleep 10" | qsub -l nodes=2

Check if both nodes are in state 'job-exclusive'

 pbsnodes

During the installation phase we compiled a simple MPI Hello World program.

Start it without Torque

$ mpiexec -n 4 /tmp/hello.out
Hello MPI from the server process!
Hello MPI!
 mesg from 1 of 4 on ip-192-168-0-14
Hello MPI!
 mesg from 2 of 4 on ip-192-168-0-14
Hello MPI!
 mesg from 3 of 4 on ip-192-168-0-14

Start it with Torque (without -tm support)

cat <<EOF > mpi-test_2_1_mpirun
#PBS -N helloworld
#PBS -l nodes=2:ppn=1
cd $PBS_O_WORKDIR

/usr/bin/mpirun -np 2 --hostfile /etc/torque/hostfile -v -v -v /tmp/hello.out
EOF

qsub mpi-test_2_1_mpirun

Check the output files

cat helloworld.o*
Hello MPI from the server process!
Hello MPI!
 mesg from 1 of 2 on ip-192-168-0-15

cat helloworld.e*

Start it with Torque (with -tm support)

package is ready but not in squeeze yet

example script for setting up torque:

set -ex

#NOTES:
# in whatever mode you are running your Eucalyptus system, you will have two interfaces, one is called public(you can reach the instances from outside),
# one is called inside(you can reach the instances usually only from inside, or maybe from your front end)

# you have to specify a TORQUE SERVER and one or more TORQUE NODES, because I am accessing those instances you have to specify working IP addresses.

# example: SYSTEM mode,   both interfaces are using the same address, so there is no
# example: MANAGED modes, public and private interface are different, TORQUE has to be setup for the PRIVATE mode (intra-cloud communtcation need firewall settings)

# usage: bash start_torque.sh --verbose -s="192.168.0.13" -n="192.168.0.14,192.168.0.17,192.168.0.45" -k="~/.euca/mykey.priv"

# default
VERBOSE=0
IN_INSTANCE=0

echo `hostname` : `/sbin/ifconfig eth0 | grep "inet addr" | awk '{print $2}' | sed 's/addr\://'`

for i in $*
do
        case $i in
        -s=*|--torque-server=*)
                # remove option from string
                PUBLIC_TORQUE_SERVER_IP=`echo $i | sed 's/[-a-zA-Z0-9]*=//'`
                echo $PUBLIC_TORQUE_SERVER_IP
                ;;
        -n=*|--torque-nodes=*)
                # remove option from string
                NIPS=`echo $i | sed 's/[-a-zA-Z0-9]*=//'`
                PUBLIC_NODES_IP=`echo $NIPS | sed 's/\,/ /g'`
                echo $NIPS
                echo $PUBLIC_NODES_IP
                ;;
        -k=*|--key=*)
                # remove option from string
                KEY=`echo $i | sed 's/[-a-zA-Z0-9]*=//'`
                echo $KEY
                ;;
        --verbose)
                VERBOSE=1
                ;;
        -i|--in-instance)
                IN_INSTANCE=1
                ;;
        -m=*|--with-mpi=*)
                MPI=`echo $i | sed 's/[-a-zA-Z0-9]*=//'`
                #TODO: only 0 or 1 are feasible values
                ;;
        *)
                echo "unknown option"
                ;;
        esac
done


cat > keygen_in_instance.sh << EOF
#!/bin/bash
su guest -c 'ssh-keygen -t rsa -N "" -f /home/guest/.ssh/id_rsa'
EOF
chmod 755 keygen_in_instance.sh


# BEGIN execution on master #################################################
if [ $IN_INSTANCE -eq 0 ] ; then


    # join server and nodes
    if [[ $PUBLIC_NODES_IP == *$PUBLIC_TORQUE_SERVER_IP* ]]
    then
        ALL_INSTANCES="$PUBLIC_NODES_IP"
    else
        ALL_INSTANCES="$PUBLIC_TORQUE_SERVER_IP $PUBLIC_NODES_IP"
    fi
    echo $ALL_INSTANCES


    # copy setup-torque-script to eucalyptus instances
    for NODE_IP in `echo $ALL_INSTANCES`
    do
        echo $NODE_IP
        # make this host known to ~/.ssh/known_hosts
        eval "ssh -i $KEY -o StrictHostKeychecking=no root@$NODE_IP echo ''"
        eval "/usr/bin/scp -p -i $KEY start_torque.sh root@$NODE_IP:/root/start_torque.sh"

        # MPI example
        eval "scp -p -i $KEY compileMPI.sh helloworld.c root@$NODE_IP:/root/"

        # start script in instance
        eval "ssh -X -i $KEY root@$NODE_IP \"/root/start_torque.sh -s=\"$PUBLIC_TORQUE_SERVER_IP\" -n=\"$NIPS\" -i -m=$MPI \""
    done


    # generate keys in instances - for user guest
    for NODE_IP in `echo $ALL_INSTANCES`
    do
        echo $NODE_IP
        eval "/usr/bin/scp -p -i $KEY keygen_in_instance.sh root@$NODE_IP:/root/keygen_in_instance.sh"
        eval "ssh -X -i $KEY root@$NODE_IP \"/root/keygen_in_instance.sh\""
    done


    # distribute keys
    for NODE_IP in `echo $ALL_INSTANCES`
    do
        # distribute this key to all other nodes
        for NODE_IP2 in `echo $ALL_INSTANCES`
        do
            echo $NODE_IP2
            eval "/usr/bin/scp -p -i $KEY root@$NODE_IP:/home/guest/.ssh/id_rsa.pub /tmp/id_rsa.pub"
            eval "/usr/bin/scp -p -i $KEY /tmp/id_rsa.pub root@$NODE_IP2:/tmp/id_rsa.pub"
            eval "ssh -X -i $KEY root@$NODE_IP2 \"cat /tmp/id_rsa.pub >> /home/guest/.ssh/authorized_keys\""
            #TODO, I need an entry in known_hosts, for now the following happens from within the instances
            #eval "ssh -X -i $KEY root@$NODE_IP \"ssh -o StrictHostKeychecking=no guest@$NODE_IP2 & echo '' & wait\""
        done
        eval "ssh -X -i $KEY root@$NODE_IP /root/hosts.sh"
    done


    exit # on master don't execute commands for instances
fi
# END   execution on master #################################################


# BEGIN execution in instance ###############################################
usage()
{
cat << EOF
usage: $0 options

This script starts the torque environment.

OPTIONS:

   -h      Show this message
   -n      nodes              e.g. "192.168.0.14,192.168.0.14"
   -s      torque server ip   e.g. "192.168.0.13"
   -k      key file
   -v      Verbose
   -m      With MPI support

example: start_torque.sh --verbose -s="192.168.0.13" -n="192.168.0.14,192.168.0.17,192.168.0.45" -k="~/.euca/mykey.priv"
EOF
}

#SERVER_IP  NIPS   KEY
#if [[ -z $PUBLIC_NODES_IP ]] || [[ -z $PUBLIC_TORQUE_SERVER_IP ]]
#then
#     usage
#     exit 1
#fi

function install_package {
 PACKAGE=$1

 if [ "`dpkg-query -W -f='${Status}\n' $PACKAGE`" != "install ok installed" ] ; then
  apt-get -o Dpkg::Options::="--force-confnew" --force-yes -y install $PACKAGE
  #aptitude -y install $PACKAGE
  if [ $? -ne 0 ] ; then
  echo "aptitude install $PACKAGE failed"
  fi
 else
  echo "package $PACKAGE is already installed"
 fi
}


export DEBIAN_FRONTEND="noninteractive"
export APT_LISTCHANGES_FRONTEND="none"
API_VERSION="2008-02-01"
METADATA_URL="http://169.254.169.254/$API_VERSION/meta-data"
CURL="/usr/bin/curl"


# those variables are needed for the locales package
export LANGUAGE=en_US.UTF-8
export LANG=en_US.UTF-8
export LC_ALL=en_US.UTF-8

# for dialog frontend
export PATH=$PATH:/sbin:/usr/sbin:/usr/local/sbin
export TERM=linux


PRIVATE_TORQUE_SERVER_IP="172.16.1.2"
PRIVATE_NODES="172.16.1.2   172.16.1.3   172.16.1.4   172.16.1.5   172.16.1.6   172.16.1.7   172.16.1.8"



MODE="public"
#MODE="private"
#MODE="system"

PUBLIC_TORQUE_SERVER_HOSTNAME=ip-`echo $PUBLIC_TORQUE_SERVER_IP | sed 's/\./-/g'`
echo $PUBLIC_TORQUE_SERVER_IP $PUBLIC_TORQUE_SERVER_HOSTNAME

PRIVATE_TORQUE_SERVER_HOSTNAME=ip-`echo $PRIVATE_TORQUE_SERVER_IP | sed 's/\./-/g'`
echo $PRIVATE_TORQUE_SERVER_IP $PRIVATE_TORQUE_SERVER_HOSTNAME


#GET INSTANCE IPs, create hostnames

if [ $MODE == "public" ] ; then
 PUBLIC_INSTANCE_IP=`/sbin/ifconfig eth0 | grep "inet addr" | awk '{print $2}' | sed 's/addr\://'`
else
#PUBLIC_INSTANCE_IP=192.168.0.115
 PUBLIC_INSTANCE_IP=`curl -s $METADATA_URL/public-ipv4`
fi

#PUBLIC_INSTANCE_HOSTNAME=`curl -s $METADATA_URL/public-hostname`
PUBLIC_INSTANCE_HOSTNAME=ip-`echo $PUBLIC_INSTANCE_IP | sed 's/\./-/g'`
echo $PUBLIC_INSTANCE_IP $PUBLIC_INSTANCE_HOSTNAME

PRIVATE_INSTANCE_IP=`/sbin/ifconfig eth0 | grep "inet addr" | awk '{print $2}' | sed 's/addr\://'`
PRIVATE_INSTANCE_HOSTNAME=ip-`echo $PRIVATE_INSTANCE_IP | sed 's/\./-/g'`
echo $PRIVATE_INSTANCE_IP $PRIVATE_INSTANCE_HOSTNAME



#using PUBLIC or PRIVATE interface
if [ $MODE == "public" ] ; then
   INSTANCE_HOSTNAME=$PUBLIC_INSTANCE_HOSTNAME
   NODES=$PUBLIC_NODES_IP
   INSTANCE_IP=$PUBLIC_INSTANCE_IP
   TORQUE_SERVER_IP=$PUBLIC_TORQUE_SERVER_IP
   TORQUE_SERVER_HOSTNAME=$PUBLIC_TORQUE_SERVER_HOSTNAME
else
   if [ $MODE == "private" ] ; then
      INSTANCE_HOSTNAME=$PRIVATE_INSTANCE_HOSTNAME
      NODES=$PRIVATE_NODES
      INSTANCE_IP=$PRIVATE_INSTANCE_IP
      TORQUE_SERVER_IP=$PRIVATE_TORQUE_SERVER_IP
      TORQUE_SERVER_HOSTNAME=$PRIVATE_TORQUE_SERVER_HOSTNAME
   else
      echo "please specify private or public interface"
   fi
fi


# using Google's nameserver
echo "nameserver 8.8.8.8" >> /etc/resolv.conf


# update aptitude first
#echo "deb http://ftp.us.debian.org/debian squeeze main" > /etc/apt/sources.list
#echo "deb http://security.debian.org/ squeeze/updates main" >> /etc/apt/sources.list
#aptitude update
apt-get -o Dpkg::Options::="--force-confnew" --force-yes -y update
if [ $? -ne 0 ] ; then
echo "aptitude update failed"
fi



# get rid of some error messages because of missing locales package
install_package locales
echo "en_US.UTF-8 UTF-8" > /etc/locale.gen
locale-gen

# install portmap for NFS
install_package portmap
#TODO mount here


# install nmap
install_package nmap
nmap localhost -p 1-20000

# install lsb-release
install_package lsb-release

# Print some Information about the Operating System
DISTRIBUTOR=`lsb_release -i | awk '{print $3}'`
CODENAME=`lsb_release -c | awk '{print $2}'`
echo $DISTRIBUTOR $CODENAME


# install ntpdate
install_package ntpdate
###ntpdate pool.ntp.org
ntpdate ntp.ubuntu.com

# install OpenMPI packages
if [ $MPI -eq 1 ] ; then
   install_package "libopenmpi-dev"
   install_package "openmpi-bin"
   #compile MPI test program
   bash compileMPI.sh
fi

# make hostnames known to all the TORQUE nodes and server/scheduler


if [ $MODE == "private" ] ; then
   for NODE_IP in `echo $PRIVATE_NODES`
   do
      NODE_HOSTNAME=ip-`echo $NODE_IP | sed 's/\./-/g'`
      echo "$NODE_IP   $NODE_HOSTNAME" >> /etc/hosts
      #MPI support
      mkdir -p /etc/torque
      echo "$NODE_HOSTNAME slots=1" >> /etc/torque/hostfile
   done
fi


if [ $MODE == "public" ] ; then
   for NODE_IP in `echo $PUBLIC_NODES_IP`
   do
      NODE_HOSTNAME=ip-`echo $NODE_IP | sed 's/\./-/g'`
      if [ $INSTANCE_IP != $TORQUE_SERVER_IP ] || [ $NODE_IP != $TORQUE_SERVER_IP ]; then
         if ! egrep -q "$NODE_IP|$NODE_HOSTNAME" /etc/hosts ; then
            echo "$NODE_IP   $NODE_HOSTNAME" >> /etc/hosts
         fi
      fi
      #MPI support
      mkdir -p /etc/torque
      if ! egrep -q "$NODE_HOSTNAME" /etc/torque/hostfile ; then
         echo "$NODE_HOSTNAME slots=1" >> /etc/torque/hostfile
         echo "(su - guest -c \"ssh -t -t -o StrictHostKeychecking=no guest@$NODE_HOSTNAME echo ''\")& wait" >> /root/hosts.sh                # for key distribution
      fi
   done
   if ! egrep -q "$PUBLIC_TORQUE_SERVER_HOSTNAME" /etc/torque/hostfile ; then
      echo "(su - guest -c \"ssh -t -t -o StrictHostKeychecking=no guest@$PUBLIC_TORQUE_SERVER_HOSTNAME echo ''\")& wait" >> /root/hosts.sh   # for key distribution
   fi
fi
chmod 755 /root/hosts.sh



## on TORQUE server
if [ $INSTANCE_IP == $TORQUE_SERVER_IP ]; then
   #this one is for the scheduler, if using the public interface
   if ! egrep -q "127.0.1.1|$PUBLIC_INSTANCE_HOSTNAME" /etc/hosts ; then
      echo "127.0.1.1 $PUBLIC_INSTANCE_HOSTNAME" >> /etc/hosts
   fi

#   echo "$PRIVATE_INSTANCE_IP $PRIVATE_INSTANCE_HOSTNAME" >> /etc/hosts
else
   if ! egrep -q "$TORQUE_SERVER_IP|$TORQUE_SERVER_HOSTNAME" /etc/hosts ; then
      echo "$TORQUE_SERVER_IP $TORQUE_SERVER_HOSTNAME" >> /etc/hosts
   fi
fi


# need to set a hostname before installing torque packages
echo $INSTANCE_HOSTNAME > /etc/hostname # preserve hostname if rebooting is necessary
hostname $INSTANCE_HOSTNAME # immediately change
#getent hosts `hostname`
#PUBLIC_INSTANCE_HOSTNAME=`curl -s $METADATA_URL/public-hostname`


#echo "deb http://ftp.us.debian.org/debian sid main" > /etc/apt/sources.list
apt-get -o Dpkg::Options::="--force-confnew" --force-yes -y update
if [ $INSTANCE_IP == $TORQUE_SERVER_IP ]; then
   apt-get -o Dpkg::Options::="--force-confnew" --force-yes -y install torque-server torque-scheduler torque-client
   #aptitude -y install torque-server torque-scheduler torque-client
fi

if [[ $PUBLIC_NODES_IP ==  *$INSTANCE_IP* ]]; then
   apt-get -o Dpkg::Options::="--force-confnew" --force-yes -y install torque-mom
   #aptitude -y install torque-mom
fi


## fix /tmp directory in debian eucalyptus image
chmod 777 /tmp

## add user to all nodes
USER=auser

if id $USER > /dev/null 2>&1
then
   echo "user exist!"
else
   adduser $USER --disabled-password --gecos ""
fi



#echo $PUBLIC_TORQUE_SERVER_HOSTNAME > /etc/torque/server_name
#echo $PUBLIC_INSTANCE_HOSTNAME > /etc/hostname # preserve hostname  if rebooting is necessary
#hostname $PUBLIC_INSTANCE_HOSTNAME # immediately change


DATE=`date '+%Y%m%d'`

## for TORQUE mom
if [[ $PUBLIC_NODES_IP == *$INSTANCE_IP* ]]; then
    echo $TORQUE_SERVER_HOSTNAME > /etc/torque/server_name
    echo "\$timeout 120" > /var/spool/torque/mom_priv/config # more options possible (NFS...)
    echo "\$loglevel 5" >> /var/spool/torque/mom_priv/config # more options possible (NFS...)
    /etc/init.d/torque-mom restart
    cat /var/spool/torque/mom_logs/$DATE
fi

## for TORQUE server
if [ $INSTANCE_IP == $TORQUE_SERVER_IP ]; then
   echo $TORQUE_SERVER_HOSTNAME > /etc/torque/server_name
   rm -f /var/spool/torque/server_priv/nodes
   touch /var/spool/torque/server_priv/nodes
   for NODE_IP in `echo $NODES`
   do
      NODE_HOSTNAME=ip-`echo $NODE_IP | sed 's/\./-/g'`
      echo -ne "$NODE_HOSTNAME np=1\n" >> /var/spool/torque/server_priv/nodes
   done
   /etc/init.d/torque-server restart
   /etc/init.d/torque-scheduler restart
   qmgr -c "s s scheduling=true"
   qmgr -c "c q batch queue_type=execution"
   qmgr -c "s q batch started=true"
   qmgr -c "s q batch enabled=true"
   qmgr -c "s q batch resources_default.nodes=1"
   qmgr -c "s q batch resources_default.walltime=3600"
   # had to set this for MPI, TODO: double check
   qmgr -c "s q batch resources_min.nodes=1"
   qmgr -c "s s default_queue=batch"
   # let all nodes submit jobs, not only the server
   qmgr -c "s s allow_node_submit=true"
   #qmgr -c 'set server submit_hosts += $TORQUE_SERVER_IP'
   #qmgr -c 'set server submit_hosts += $INSTANCE_IP'

   # adding extra nodes
   #qmgr -c "create node $INSTANCE_HOSTNAME"

#debug
   cat /var/spool/torque/server_logs/$DATE
   qstat -q
   pbsnodes -a
   cat /etc/torque/server_name
fi