Friday, May 15, 2009

Setting up Torque Server on xCAT 2.x

Modified from xCAT 2 Advanced Cookbook
Step 1: Setup Torque Server
# cd /tmp
# wget http://www.clusterresources.com/downloads/torque/torque-2.3.0.tar.gz
# tar zxvf torque-2.3.0.tar.gz
# cd torque-2.3.0
# CFLAGS=-D__TRR ./configure \
--prefix=/opt/torque \
--exec-prefix=/opt/torque/x86_64 \
--enable-docs \
--disable-gui \
--with-server-home=/var/spool/pbs \
--enable-syslog \
--with-scp \
--disable-rpp \
--disable-spool

# make

# make install


Step 2: Configure Torque

# cd /opt/torque/x86_64/lib
# ln -s libtorque.so.2.0.0 libtorque.so.0
# echo "/opt/torque/x86_64/lib" >>/etc/ld.so.conf.d/torque.conf
# ldconfig
# cp -f /opt/xcat/share/xcat/netboot/add-on/torque/xpbsnodes /opt/torque/x86_64/bin/
# cp -f /opt/xcat/share/xcat/netboot/add-on/torque/pbsnodestat /opt/torque/x86_64/bin/

Create /etc/profile.d/torque.sh:
# vim /etc/profile.d/torque.sh
Type:
export PBS_DEFAULT=n00 (where n00 is the Head Node)
export PATH=/opt/torque/x86_64/bin:$PATH


Step 3. Define Nodes
# cd /var/spool/pbs/server_priv
# vim nodes
Type:
n01 np=8 (where np is the number of core for the server)


Step 4: Setup and Start Service
# cp -f /opt/xcat/share/xcat/netboot/add-on/torque/pbs /etc/init.d/
# cp -f /opt/xcat/share/xcat/netboot/add-on/torque/pbs_mom /etc/init.d/
# cp -f /opt/xcat/share/xcat/netboot/add-on/torque/pbs_sched /etc/init.d/
# cp -f /opt/xcat/share/xcat/netboot/add-on/torque/pbs_server /etc/init.d/
# chkconfig --del pbs
# chkconfig --del pbs_mom
# chkconfig --del pbs_sched
# chkconfig --level 345 pbs_server on
# service pbs_server start


Step 5: Edit pbs_mom, pbs_sched, pbs_server
# vim /etc/init.d/pbs_sched
(Ensure your path is correct for BASE_PBS_PREFIX=/opt/torque) 
# vim /etc/init.d/pbs_mom 
(Ensure your path is correct for chmod 777 /var/spool/pbs/spool /var/spool/pbs/undelivered)
(Ensure your path is correct for chmod o+t /var/spool/pbs/spool /var/spool/pbs/undelivered)
# vim /etc/init.d/pbs_server
(Ensure your path is correct for BASE_PBS_PREFIX=/opt/torque)
(Ensure your path is correct for PBS_HOME=/var/spool/pbs)


Step 6: Install pbstop
# cp -f /opt/xcat/share/xcat/netboot/add-on/torque/pbstop /opt/torque/x86_64/bin/
# chmod 755 /opt/torque/x86_64/bin/pbstop


Step 7: Install Perl Curses for pbstop
# yum install perl-Curses (You will need RPMForge Repository enabled. Do check Installing RPMForge)


Step 8: Create a Torque Default Queue
# qmgr

create queue dque
set queue dque queue_type = Execution
set queue dque enabled = True
set queue dque started = True
set server scheduling = True
set server default_queue = dque
set server log_events = 127
set server mail_from = adm
set server query_other_jobs = True
set server resources_default.walltime = 00:01:00
set server scheduler_iteration = 60
set server node_pack = False
set server keep_completed=300

Step 9: Setup Torque Clients (x86_64) (using manual installation)

Ensure all the /etc/hosts contains the head and compute node
# vim /etc/hosts(include all the hosts)

# pscp -r /opt/torque compute:/opt/
# pscp -r /var/spool/pbs compute:/var/spool/

Change the sticky bit for the /var/log/pbs/spool & /var/log/spool/undelivered
# chmod 777 /var/spool/pbs/spool /var/spool/pbs/undelivered
# chmod o+t /var/spool/pbs/spool /var/spool/pbs/undelivered
(Ensure your path is correct for chmod 777 /var/spool/pbs/spool /var/spool/pbs/undelivered)
(Ensure your path is correct for chmod o+t /var/spool/pbs/spool /var/spool/pbs/undelivered)

Step 10: Start pbs_mom for Torque Client Node
Go to the Head Node
# pscp /etc/init.d/pbs_mom compute:/etc/init.d/
Edit pbs_mom
# vim /etc/init.d/pbs_mom

BASE_PBS_PREFIX=/opt/torque
chmod 777 /var/spool/pbs/spool /var/spool/pbs/undelivered
chmod o+t /var/spool/pbs/spool /var/spool/pbs/undelivered

# service pbs_mom start


Step 11: Start the pbs_mom, pbs_server, pbs_sched services at Head Node
# service pbs_mom start
# service pbs_sched start
# service pbs_server start


Step 12: Check the PBS is working
Go to Head Node
# pbstop
# pbsnodes -a (You should see some the nodes as "free")

No comments: