Friday, 8 March 2013

Cluster for LAN Services switching to EXT4 (Part 4)

The rest of the services are pretty much configured as they were in the GFS2 version of this cluster, except using the ext4 filesystem. Here is my final cluster.conf file:


<?xml version="1.0"?>
<cluster config_version="84" name="bldg1ux01clu">
<cman expected_votes="1" two_node="1"/>
<clusternodes>
<clusternode name="bldg1ux01n1i" nodeid="1" votes="1">
<fence>
<method name="apc7920-dual">
<device action="off" name="apc7920" port="1"/>
<device action="off" name="apc7920" port="2"/>
<device action="on" name="apc7920" port="1"/>
<device action="on" name="apc7920" port="2"/>
</method>
<method name="bldg1ux01n1drac">
<device name="bldg1ux01n1drac"/>
</method>
</fence>
</clusternode>
<clusternode name="bldg1ux01n2i" nodeid="2" votes="1">
<fence>
<method name="apc7920-dual">
<device action="off" name="apc7920" port="3"/>
<device action="off" name="apc7920" port="4"/>
<device action="on" name="apc7920" port="3"/>
<device action="on" name="apc7920" port="4"/>
</method>
<method name="bldg1ux01n2drac">
<device name="bldg1ux01n2drac"/>
</method>
</fence>
</clusternode>
</clusternodes>
<rm>
<failoverdomains>
<failoverdomain name="bldg1ux01A" nofailback="0" ordered="1" restricted="1">
<failoverdomainnode name="bldg1ux01n1i" priority="1"/>
<failoverdomainnode name="bldg1ux01n2i" priority="2"/>
</failoverdomain>
<failoverdomain name="bldg1ux01B" nofailback="0" ordered="1" restricted="1">
<failoverdomainnode name="bldg1ux01n1i" priority="2"/>
<failoverdomainnode name="bldg1ux01n2i" priority="1"/>
</failoverdomain>
<failoverdomain name="bldg1ux01Anfb" nofailback="1" ordered="1" restricted="1">
<failoverdomainnode name="bldg1ux01n1i" priority="1"/>
<failoverdomainnode name="bldg1ux01n2i" priority="2"/>
</failoverdomain>
<failoverdomain name="bldg1ux01Bnfb" nofailback="1" ordered="1" restricted="1">
<failoverdomainnode name="bldg1ux01n1i" priority="2"/>
<failoverdomainnode name="bldg1ux01n2i" priority="1"/>
</failoverdomain>
</failoverdomains>
<resources>
<ip address="10.1.10.25" monitor_link="0"/>
<fs device="/dev/cluvg00/lv00dhcpd" force_fsck="1" force_unmount="1" mountpoint="/data/dhcpd" name="dhcpdfs" nfslock="0" options="acl" quick_status="0" self_fence="0"/>
<ip address="10.1.10.26" monitor_link="0"/>
<fs device="/dev/cluvg00/lv00named" force_fsck="1" force_unmount="1" mountpoint="/data/named" name="namedfs" nfslock="0" options="acl" quick_status="0" self_fence="0"/>
<ip address="10.1.10.27" monitor_link="0"/>
<fs device="/dev/cluvg00/lv00cups" force_fsck="1" force_unmount="1" mountpoint="/data/cups" name="cupsfs" nfslock="0" options="acl" quick_status="0" self_fence="0"/>
<ip address="10.1.10.28" monitor_link="0"/>
<fs device="/dev/cluvg00/lv00httpd" force_fsck="1" force_unmount="1" mountpoint="/data/httpd" name="httpdfs" nfslock="0" options="acl" quick_status="0" self_fence="0"/>
<ip address="10.1.10.29" monitor_link="0"/>
<fs device="/dev/cluvg00/lv00projects" force_fsck="1" force_unmount="1" mountpoint="/data/projects" name="projectsfs" nfslock="1" options="acl" quick_status="0" self_fence="0"/>
<nfsexport name="exportbldg1clunfsprojects"/>
<nfsclient name="nfsdprojects" options="rw" target="10.0.0.0/8"/>
<ip address="10.1.10.30" monitor_link="0"/>
<fs device="/dev/cluvg00/lv00home" force_fsck="1" force_unmount="1" mountpoint="/data/home" name="homefs" nfslock="1" options="acl" quick_status="0" self_fence="0"/>
<nfsexport name="exportbldg1clunfshome"/>
<nfsclient name="nfsdhome" options="rw" target="10.0.0.0/8"/>
<ip address="10.1.10.32" monitor_link="0"/>
<fs device="/dev/cluvg00/lv00smbprj" force_fsck="1" force_unmount="1" mountpoint="/data/smbprj" name="smbdprjfs" nfslock="0" options="acl" quick_status="0" self_fence="0"/>
<ip address="10.1.10.33" monitor_link="0"/>
<fs device="/dev/cluvg00/lv00smbhome" force_fsck="1" force_unmount="1" mountpoint="/data/smbhome" name="smbdhomefs" nfslock="0" options="acl" quick_status="0" self_fence="0"/>
</resources>
</resources>
<service autostart="1" domain="bldg1ux01B" exclusive="0" name="cups" recovery="relocate">
<script file="/etc/init.d/cups" name="cups"/>
<ip ref="10.1.10.27"/>
<fs ref="cupsfs"/>
</service>
<service autostart="0" domain="bldg1ux01Anfb" exclusive="0" name="nfsdprojects" nfslock="1" recovery="relocate">
<ip ref="10.1.10.29"/>
<fs ref="projectsfs">
<nfsexport ref="exportbldg1clunfsprojects">
<nfsclient ref="nfsdprojects"/>
</nfsexport>
</fs>
<ip ref="10.1.10.32">
<fs ref="smbdprjfs"/>
<samba config_file="/etc/samba/smb.conf.prj" name="bldg1clusmbprj" smbd_options="-p 445 -l /data/smbprj/var/log/samba"/>
</ip>
</service>
<service autostart="1" domain="bldg1ux01A" exclusive="0" name="httpd" nfslock="0" recovery="relocate">
<ip ref="10.1.10.28">
<fs ref="httpdfs"/>
<apache config_file="conf/httpd.conf" name="httpd" server_root="/data/httpd/etc/httpd" shutdown_wait="10"/>
</ip>
</service>
<service autostart="0" domain="bldg1ux01Bnfb" exclusive="0" name="nfsdhome" nfslock="1" recovery="relocate">
<ip ref="10.1.10.30"/>
<fs ref="homefs">
<nfsexport ref="exportbldg1clunfshome">
<nfsclient ref="nfsdhome"/>
</nfsexport>
</fs>
<ip ref="10.1.10.33">
<fs ref="smbdhomefs"/>
<samba config_file="/etc/samba/smb.conf.home" name="bldg1clusmbhome" smbd_options="-p 445 -l /data/smbhome/var/log/samba"/>
</ip>
</service>
<service autostart="1" domain="bldg1ux01A" exclusive="0" name="dhcpd" recovery="relocate">
<script file="/etc/init.d/dhcpd" name="dhcpd"/>
<ip ref="10.1.10.25"/>
<fs ref="dhcpdfs"/>
</service>
<service autostart="1" domain="bldg1ux01A" exclusive="0" name="named" recovery="relocate">
<script file="/etc/init.d/named" name="named"/>
<ip ref="10.1.10.26"/>
<fs ref="namedfs"/>
</service>
</rm>
<fencedevices>
<fencedevice agent="fence_apc" ipaddr="192.168.2.3" login="apc" name="apc7920" passwd="securepassword"/>
<fencedevice agent="fence_ipmilan" ipaddr="10.1.10.22" login="fence" name="bldg1ux01n1drac" passwd="securepassword"/>
<fencedevice agent="fence_ipmilan" ipaddr="10.1.10.23" login="fence" name="bldg1ux01n2drac" passwd="securepassword"/>
</fencedevices>
<fence_daemon clean_start="0" post_fail_delay="0" post_join_delay="3"/>
</cluster>

One change I made was I now set monitor_link="0", as I don't really want my services moving on switch stack reboots (my resilience is by bonding). 

I also was somewhat paranoid that my filesystems may (accidentally by admins) end up mounted on two nodes (a big no no on ext4), so I wrote the following script, to check that a node doesn't have anything mounted that the cluster says it shouldn't.


#!/usr/bin/perl

# Script to parse /etc/cluster/cluster.conf and check any FS's associated with services I don't hold
# aren't mounted here

$clusterconfig="/etc/cluster/cluster.conf";


open (CLUSTAT, "/usr/sbin/clustat |");
$hostname=`hostname`;

chomp($hostname);

# Find services started but that I don't own
while (<CLUSTAT>) 
{
# Find services started but that I don't own
if (/service:(.+?)\s+(.+?)\sstarted/)
{
$service=$1;
if ( $2 !~ /$hostname/ )
{
push(@nomyservices,$service);
}
}
}

close CLUSTAT;

open (MOUNTS, "/bin/mount|");

while (<MOUNTS>)
{
# What do I have mounted
if ( /\/dev\/mapper.+?on\s+?(.+?)\s+?type/)

push(@mymounts,$1);
}
}
close MOUNTS;

$retval=0;

open (CONFFILE, "<$clusterconfig") or die "Can't open cluster config file $clusterconfig";

$checkthis=0;

while (<CONFFILE>) 
{
# Create a lookup table of fs resources names to paths
if (/\<fs.+mountpoint="(.+?)".+name="(.+?)".+?$/)
{
$fslookup{$2}=$1;
next;
}

if (/\<service.+?name="(.+?)".+?$/)
{
$service=$1;
if ( $service ~~ @nomyservices)
{
$checkthis=1;
}
else
{
$checkthis=0;
}
}

if  ((/\<fs ref="(.+?)"/) && ($checkthis) )
{
# So service I don't own and do I have it's FS's mounted
$fs=$fslookup{$1};
if ( $fs ~~ @mymounts )
{
print "Double mounted Filesystem: $fs on $hostname not running that service\n";
$retval=1;
}
}

if (/\<\/service\>/)
{
$checkthis=0;
}
}

close CONFFILE;
exit ($retval);



This parses the cluster.conf for filesystems associated with services and checks if we should have them mounted (based on which services we hold). I wrapped a script around this (that checks the return code) and emails if it suspects a double mount. I then cron'd this every 15 minutes, hopefully minimising the length of time any double mount has been allowed to occur for. This is purely paranoia and if you do things properly, you should never end up here! Just a bit of belt and suspenders. 

After switching to ext4 for my cluster I have found this setup to be very stable. It has been happily running on several clusters for well over a year now.



No comments:

Post a Comment