hashnao's Blog: August 2011

As MySQL MHA Node and Manager have been installed, next is switching master server manually.

The relation Master and Slaves, including MHA manager and nodes, is below.

ha-db01: Master + MHA Node → MySQL stopped
ha-db02: Slave + MHA Node → Master
ha-mgr01: Slave + MHA Manager → Slave

This is how switchover occurs.

stop mysql on master.
switch master to Slave(ha-db02) manually.
slave server(ha-mgr02) recognizes ha-db02 as master.

Currently logged in MHA Manger(ha-mgr01)
Setting up hosts file to connect via hostname

# cat >> /etc/hosts <<EOF
192.168.100.197 ha-db01
192.168.100.198 ha-db02
192.168.100.200 ha-mgr01
EOF

Displaying a list of replication slaves registered with Master

# mysql -uroot -pmysql -e 'SHOW SLAVE HOSTS\G' -h ha-db01
*************************** 1. row ***************************
Server_id: 300
     Host: 
     Port: 3306
Master_id: 100
*************************** 2. row ***************************
Server_id: 200
     Host: 
     Port: 3306
Master_id: 100

Checking the replication status of the slave servers

# mysql -uroot -pmysql -e 'SHOW SLAVE STATUS\G' -h ha-db02
*************************** 1. row ***************************
               Slave_IO_State: Waiting for master to send event
                  Master_Host: 192.168.100.197
                  Master_User: replication
                  Master_Port: 3306
                Connect_Retry: 60
              Master_Log_File: mysql-bin.000045
          Read_Master_Log_Pos: 107
               Relay_Log_File: mysqld-relay-bin.00001
                Relay_Log_Pos: 253
        Relay_Master_Log_File: mysql-bin.000045
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
              Replicate_Do_DB: 
          Replicate_Ignore_DB: 
           Replicate_Do_Table: 
       Replicate_Ignore_Table: 
      Replicate_Wild_Do_Table: 
  Replicate_Wild_Ignore_Table: 
                   Last_Errno: 0
                   Last_Error: 
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 107
              Relay_Log_Space: 556
              Until_Condition: None
               Until_Log_File: 
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File: 
           Master_SSL_CA_Path: 
              Master_SSL_Cert: 
            Master_SSL_Cipher: 
               Master_SSL_Key: 
        Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 0
                Last_IO_Error: 
               Last_SQL_Errno: 0
               Last_SQL_Error: 
  Replicate_Ignore_Server_Ids: 
             Master_Server_Id: 100

# mysql -uroot -pmysql -e 'SHOW SLAVE STATUS\G' -h localhost
*************************** 1. row ***************************
               Slave_IO_State: Waiting for master to send event
                  Master_Host: 192.168.100.197
                  Master_User: replication
                  Master_Port: 3306
                Connect_Retry: 60
              Master_Log_File: mysql-bin.000045
          Read_Master_Log_Pos: 107
               Relay_Log_File: ha-mgr01-relay-bin.000002
                Relay_Log_Pos: 253
        Relay_Master_Log_File: mysql-bin.000045
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
              Replicate_Do_DB: 
          Replicate_Ignore_DB: 
           Replicate_Do_Table: 
       Replicate_Ignore_Table: 
      Replicate_Wild_Do_Table: 
  Replicate_Wild_Ignore_Table: 
                   Last_Errno: 0
                   Last_Error: 
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 107
              Relay_Log_Space: 412
              Until_Condition: None
               Until_Log_File: 
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File: 
           Master_SSL_CA_Path: 
              Master_SSL_Cert: 
            Master_SSL_Cipher: 
               Master_SSL_Key: 
        Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 0
                Last_IO_Error: 
               Last_SQL_Errno: 0
               Last_SQL_Error: 
  Replicate_Ignore_Server_Ids: 
             Master_Server_Id: 100

Setting up the Global configuration file

# mkdir -p /var/log/masterha/app1
# cat > /etc/masterha_default.cnf <<EOF
[server default]
user=root
password=mysql
ssh_user=root
master_binlog_dir=/var/lib/mysql
remote_workdir=/tmp/masterha
secondary_check_script=/usr/local/bin/masterha_secondary_check -s ha-db02 --user=root --master_ip=192.168.100.197 --master_port=3306 --master_host=ha-db01 -s ha-db01
ping_interval=3
#master_ip_failover_script=/usr/local/bin/master_ip_failover
#master_ip_online_change_script=/usr/local/bin/master_ip_online_change
#shutdown_script=/usr/local/bin/power_manager
report_script=/usr/local/bin/send_report
EOF

Setting up the Application configuration file

# cat > /etc/app1.cnf <<EOF
[server default]
user=root
password=mysql
manager_workdir=/var/log/masterha/app1
manager_log=/var/log/masterha/app1/app1.log
remote_workdir=/var/log/masterha/app1
[server1]
hostname=ha-db01
[server2]
hostname=ha-db02
[server2]
hostname=ha-mgr01
EOF

Checking to connect via ssh with MHA Manager and Nodes

# masterha_check_ssh --conf=/etc/app1.cnf 
Thu Jul 28 10:32:27 2011 - [info] Reading default configuratoins from /etc/masterha_default.cnf..
Thu Jul 28 10:32:27 2011 - [info] Reading application default configurations from /etc/app1.cnf..
Thu Jul 28 10:32:27 2011 - [info] Reading server configurations from /etc/app1.cnf..
Thu Jul 28 10:32:27 2011 - [info] Starting SSH connection tests..
Thu Jul 28 10:32:28 2011 - [debug] 
Thu Jul 28 10:32:27 2011 - [debug]  Connecting via SSH from root@ha-db01(192.168.100.197) to root@ha-db02(192.168.100.198)..
Thu Jul 28 10:32:28 2011 - [debug]   ok.
Thu Jul 28 10:32:28 2011 - [debug]  Connecting via SSH from root@ha-db01(192.168.100.197) to root@ha-mgr02(192.168.100.200)..
Thu Jul 28 10:32:28 2011 - [debug]   ok.
Thu Jul 28 10:32:29 2011 - [debug] 
Thu Jul 28 10:32:28 2011 - [debug]  Connecting via SSH from root@ha-db02(192.168.100.198) to root@ha-db01(192.168.100.197)..
Thu Jul 28 10:32:28 2011 - [debug]   ok.
Thu Jul 28 10:32:28 2011 - [debug]  Connecting via SSH from root@ha-db02(192.168.100.198) to root@ha-mgr02(192.168.100.200)..
Thu Jul 28 10:32:28 2011 - [debug]   ok.
Thu Jul 28 10:32:29 2011 - [debug] 
Thu Jul 28 10:32:28 2011 - [debug]  Connecting via SSH from root@ha-mgr02(192.168.100.200) to root@ha-db01(192.168.100.197)..
Thu Jul 28 10:32:29 2011 - [debug]   ok.
Thu Jul 28 10:32:29 2011 - [debug]  Connecting via SSH from root@ha-mgr02(192.168.100.200) to root@ha-db02(192.168.100.198)..
Thu Jul 28 10:32:29 2011 - [debug]   ok.
Thu Jul 28 10:32:29 2011 - [info] All SSH connection tests passed successfully.

Checking if MySQL replication is enabled

# masterha_check_repl --conf=/etc/app1.cnf 
Thu Jul 28 10:32:39 2011 - [info] Reading default configuratoins from /etc/masterha_default.cnf..
Thu Jul 28 10:32:39 2011 - [info] Reading application default configurations from /etc/app1.cnf..
Thu Jul 28 10:32:39 2011 - [info] Reading server configurations from /etc/app1.cnf..
Thu Jul 28 10:32:39 2011 - [info] MHA::MasterMonitor version 0.50.
Thu Jul 28 10:32:39 2011 - [info] Dead Servers:
Thu Jul 28 10:32:39 2011 - [info] Alive Servers:
Thu Jul 28 10:32:39 2011 - [info]   ha-db01(192.168.100.197:3306)
Thu Jul 28 10:32:39 2011 - [info]   ha-db02(192.168.100.198:3306)
Thu Jul 28 10:32:39 2011 - [info]   ha-mgr02(192.168.100.200:3306)
Thu Jul 28 10:32:39 2011 - [info] Alive Slaves:
Thu Jul 28 10:32:39 2011 - [info]   ha-db02(192.168.100.198:3306)  Version=5.5.12-log (oldest major version between slaves) log-bin:enabled
Thu Jul 28 10:32:39 2011 - [info]     Replicating from 192.168.100.197(192.168.100.197:3306)
Thu Jul 28 10:32:39 2011 - [info]   ha-mgr02(192.168.100.200:3306)  Version=5.5.12-log (oldest major version between slaves) log-bin:enabled
Thu Jul 28 10:32:39 2011 - [info]     Replicating from 192.168.100.197(192.168.100.197:3306)
Thu Jul 28 10:32:39 2011 - [info] Current Master: ha-db01(192.168.100.197:3306)
Thu Jul 28 10:32:39 2011 - [info] Checking slave configurations..
Thu Jul 28 10:32:39 2011 - [warn]  read_only=1 is not set on slave ha-db02(192.168.100.198:3306).
Thu Jul 28 10:32:39 2011 - [warn]  relay_log_purge=0 is not set on slave ha-db02(192.168.100.198:3306).
Thu Jul 28 10:32:39 2011 - [warn]  read_only=1 is not set on slave ha-mgr02(192.168.100.200:3306).
Thu Jul 28 10:32:39 2011 - [warn]  relay_log_purge=0 is not set on slave ha-mgr02(192.168.100.200:3306).
Thu Jul 28 10:32:39 2011 - [info] Checking replication filtering settings..
Thu Jul 28 10:32:39 2011 - [info]  binlog_do_db= , binlog_ignore_db= 
Thu Jul 28 10:32:39 2011 - [info]  Replication filtering check ok.
Thu Jul 28 10:32:39 2011 - [info] Starting SSH connection tests..
Thu Jul 28 10:32:41 2011 - [info] All SSH connection tests passed successfully.
Thu Jul 28 10:32:41 2011 - [info] Checking MHA Node version..
Thu Jul 28 10:32:41 2011 - [info]  Version check ok.
Thu Jul 28 10:32:41 2011 - [info] Checking SSH publickey authentication and checking recovery script configurations on the current master..
Thu Jul 28 10:32:42 2011 - [info]   Executing command: save_binary_logs --command=test --start_file=mysql-bin.000045 --start_pos=4 --binlog_dir=/var/lib/mysql --output_file=/var/log/masterha/app1/save_binary_logs_test --manager_version=0.50 
Thu Jul 28 10:32:42 2011 - [info]   Connecting to root@ha-db01(ha-db01).. 
  Creating /var/log/masterha/app1 if not exists..    ok.
  Checking output directory is accessible or not..
   ok.
  Binlog found at /var/lib/mysql, up to mysql-bin.000045
Thu Jul 28 10:32:42 2011 - [info] Master setting check done.
Thu Jul 28 10:32:42 2011 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers..
Thu Jul 28 10:32:42 2011 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user=root --slave_host=ha-db02 --slave_ip=192.168.100.198 --slave_port=3306 --workdir=/var/log/masterha/app1 --target_version=5.5.12-log --manager_version=0.50 --relay_log_info=/var/lib/mysql/relay-log.info  --slave_pass=xxx
Thu Jul 28 10:32:42 2011 - [info]   Connecting to root@192.168.100.198(ha-db02).. 
  Checking slave recovery environment settings..
    Opening /var/lib/mysql/relay-log.info ... ok.
    Relay log found at /var/lib/mysql, up to mysqld-relay-bin.000017
    Temporary relay log file is /var/lib/mysql/mysqld-relay-bin.000017
    Testing mysql connection and privileges.. done.
    Testing mysqlbinlog output.. done.
    Cleaning up test file(s).. done.
Thu Jul 28 10:32:42 2011 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user=root --slave_host=ha-mgr02 --slave_ip=192.168.100.200 --slave_port=3306 --workdir=/var/log/masterha/app1 --target_version=5.5.12-log --manager_version=0.50 --relay_log_info=/var/lib/mysql/relay-log.info  --slave_pass=xxx
Thu Jul 28 10:32:42 2011 - [info]   Connecting to root@192.168.100.200(ha-mgr02).. 
  Checking slave recovery environment settings..
    Opening /var/lib/mysql/relay-log.info ... ok.
    Relay log found at /var/lib/mysql, up to ha-mgr02-relay-bin.000002
    Temporary relay log file is /var/lib/mysql/ha-mgr02-relay-bin.000002
    Testing mysql connection and privileges.. done.
    Testing mysqlbinlog output.. done.
    Cleaning up test file(s).. done.
Thu Jul 28 10:32:42 2011 - [info] Slaves settings check done.
Thu Jul 28 10:32:42 2011 - [info] 
ha-db01 (current master)
 +--ha-db02
 +--ha-mgr02
Thu Jul 28 10:32:42 2011 - [info] Checking replication health on ha-db02..
Thu Jul 28 10:32:42 2011 - [info]  ok.
Thu Jul 28 10:32:42 2011 - [info] Checking replication health on ha-mgr02..
Thu Jul 28 10:32:42 2011 - [info]  ok.
Thu Jul 28 10:32:42 2011 - [warn] master_ip_failover_script is not defined.
Thu Jul 28 10:32:42 2011 - [warn] shutdown_script is not defined.
Thu Jul 28 10:32:42 2011 - [info] Got exit code 0 (Not master dead).
MySQL Replication Health is OK.

Starting up the Node Manager

# masterha_manager --conf=/etc/app1.cnf

Checking the status

# masterha_check_status --conf=/etc/app1.cnf

Stopping MySQL Daemon

# ssh ha-db01 '/etc/init.d/mysql stop'

Changing Master Server ha-db01 to ha-db02 manually

# masterha_master_switch --master_state=dead --conf=/etc/app1.cnf --dead_master_host=ha-db01 --new_master_host=ha-db02
--dead_master_ip=<dead_master_ip> is not set. Using 192.168.100.197.
--dead_master_port=<dead_master_port> is not set. Using 3306.
Thu Jul 28 10:39:09 2011 - [info] Reading default configuratoins from /etc/masterha_default.cnf..
Thu Jul 28 10:39:09 2011 - [info] Reading application default configurations from /etc/app1.cnf..
Thu Jul 28 10:39:09 2011 - [info] Reading server configurations from /etc/app1.cnf..
Thu Jul 28 10:39:09 2011 - [info] MHA::MasterFailover version 0.50.
Thu Jul 28 10:39:09 2011 - [info] Starting master failover.
Thu Jul 28 10:39:09 2011 - [info] 
Thu Jul 28 10:39:09 2011 - [info] * Phase 1: Configuration Check Phase..
Thu Jul 28 10:39:09 2011 - [info] 
Thu Jul 28 10:39:09 2011 - [info] Dead Servers:
Thu Jul 28 10:39:09 2011 - [info]   ha-db01(192.168.100.197:3306)
Thu Jul 28 10:39:09 2011 - [info] Checking master reachability via mysql(double check)..
Thu Jul 28 10:39:09 2011 - [info]  ok.
Thu Jul 28 10:39:09 2011 - [info] Alive Servers:
Thu Jul 28 10:39:09 2011 - [info]   ha-db02(192.168.100.198:3306)
Thu Jul 28 10:39:09 2011 - [info]   ha-mgr01(192.168.100.200:3306)
Thu Jul 28 10:39:09 2011 - [info] Alive Slaves:
Thu Jul 28 10:39:09 2011 - [info]   ha-db02(192.168.100.198:3306)  Version=5.5.12-log (oldest major version between slaves) log-bin:enabled
Thu Jul 28 10:39:09 2011 - [info]     Replicating from 192.168.100.197(192.168.100.197:3306)
Thu Jul 28 10:39:09 2011 - [info]   ha-mgr01(192.168.100.200:3306)  Version=5.5.12-log (oldest major version between slaves) log-bin:enabled
Thu Jul 28 10:39:09 2011 - [info]     Replicating from 192.168.100.197(192.168.100.197:3306)
Master ha-db01 is dead. Proceed? (yes/NO): yes
Thu Jul 28 10:39:19 2011 - [info] ** Phase 1: Configuration Check Phase completed.
Thu Jul 28 10:39:19 2011 - [info] 
Thu Jul 28 10:39:19 2011 - [info] * Phase 2: Dead Master Shutdown Phase..
Thu Jul 28 10:39:19 2011 - [info] 
Thu Jul 28 10:39:19 2011 - [info] HealthCheck: SSH to ha-db01 is reachable.
Thu Jul 28 10:39:19 2011 - [info] Forcing shutdown so that applications never connect to the current master..
Thu Jul 28 10:39:19 2011 - [warn] master_ip_failover_script is not set. Skipping invalidating dead master ip address.
Thu Jul 28 10:39:19 2011 - [warn] shutdown_script is not set. Skipping explicit shutting down of the dead master.
Thu Jul 28 10:39:19 2011 - [info] * Phase 2: Dead Master Shutdown Phase completed.
Thu Jul 28 10:39:19 2011 - [info] 
Thu Jul 28 10:39:19 2011 - [info] * Phase 3: Master Recovery Phase..
Thu Jul 28 10:39:19 2011 - [info] 
Thu Jul 28 10:39:19 2011 - [info] * Phase 3.1: Getting Latest Slaves Phase..
Thu Jul 28 10:39:19 2011 - [info] 
Thu Jul 28 10:39:19 2011 - [info] The latest binary log file/position on all slaves is mysql-bin.000045:107
Thu Jul 28 10:39:19 2011 - [info] Latest slaves (Slaves that received relay log files to the latest):
Thu Jul 28 10:39:19 2011 - [info]   ha-db02(192.168.100.198:3306)  Version=5.5.12-log (oldest major version between slaves) log-bin:enabled
Thu Jul 28 10:39:19 2011 - [info]     Replicating from 192.168.100.197(192.168.100.197:3306)
Thu Jul 28 10:39:19 2011 - [info]   ha-mgr01(192.168.100.200:3306)  Version=5.5.12-log (oldest major version between slaves) log-bin:enabled
Thu Jul 28 10:39:19 2011 - [info]     Replicating from 192.168.100.197(192.168.100.197:3306)
Thu Jul 28 10:39:19 2011 - [info] The oldest binary log file/position on all slaves is mysql-bin.000045:107
Thu Jul 28 10:39:19 2011 - [info] Oldest slaves:
Thu Jul 28 10:39:19 2011 - [info]   ha-db02(192.168.100.198:3306)  Version=5.5.12-log (oldest major version between slaves) log-bin:enabled
Thu Jul 28 10:39:19 2011 - [info]     Replicating from 192.168.100.197(192.168.100.197:3306)
Thu Jul 28 10:39:19 2011 - [info]   ha-mgr01(192.168.100.200:3306)  Version=5.5.12-log (oldest major version between slaves) log-bin:enabled
Thu Jul 28 10:39:19 2011 - [info]     Replicating from 192.168.100.197(192.168.100.197:3306)
Thu Jul 28 10:39:19 2011 - [info] 
Thu Jul 28 10:39:19 2011 - [info] * Phase 3.2: Saving Dead Master's Binlog Phase..
Thu Jul 28 10:39:19 2011 - [info] 
Thu Jul 28 10:39:19 2011 - [info] Fetching dead master's binary logs..
Thu Jul 28 10:39:19 2011 - [info] Executing command on the dead master ha-db01(192.168.100.197:3306): save_binary_logs --command=save --start_file=mysql-bin.000045  --start_pos=107 --binlog_dir=/var/lib/mysql --output_file=/var/log/masterha/app1/saved_master_binlog_from_ha-db01_3306_20110728103909.binlog --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.50
  Creating /var/log/masterha/app1 if not exists..    ok.
 Concat binary/relay logs from mysql-bin.000045 pos 107 to mysql-bin.000045 EOF into /var/log/masterha/app1/saved_master_binlog_from_ha-db01_3306_20110728103909.binlog ..
  Dumping binlog format description event, from position 0 to 107.. ok.
  Dumping effective binlog data from /var/lib/mysql/mysql-bin.000045 position 107 to tail(126).. ok.
 Concat succeeded.
saved_master_binlog_from_ha-db01_3306_20110728103909.binlog                                    100%  126     0.1KB/s   00:00    
Thu Jul 28 10:39:20 2011 - [info] scp from root@192.168.100.197:/var/log/masterha/app1/saved_master_binlog_from_ha-db01_3306_20110728103909.binlog to local:/var/log/masterha/app1/saved_master_binlog_from_ha-db01_3306_20110728103909.binlog succeeded.
Thu Jul 28 10:39:20 2011 - [info] HealthCheck: SSH to ha-db02 is reachable.
Thu Jul 28 10:39:21 2011 - [info] HealthCheck: SSH to ha-mgr01 is reachable.
Thu Jul 28 10:39:21 2011 - [info] 
Thu Jul 28 10:39:21 2011 - [info] * Phase 3.3: Determining New Master Phase..
Thu Jul 28 10:39:21 2011 - [info] 
Thu Jul 28 10:39:21 2011 - [info] Finding the latest slave that has all relay logs for recovering other slaves..
Thu Jul 28 10:39:21 2011 - [info] All slaves received relay logs to the same position. No need to resync each other.
Thu Jul 28 10:39:21 2011 - [info] ha-db02 can be new master.
Thu Jul 28 10:39:21 2011 - [info] New master is ha-db02(192.168.100.198:3306)
Thu Jul 28 10:39:21 2011 - [info] Starting master failover..
Thu Jul 28 10:39:21 2011 - [info] 
From:
ha-db01 (current master)
 +--ha-db02
 +--ha-mgr01
To:
ha-db02 (new master)
 +--ha-mgr01
Starting master switch from ha-db01(192.168.100.197:3306) to ha-db02(192.168.100.198:3306)? (yes/NO): yes
Thu Jul 28 10:39:29 2011 - [info] New master decided manually is ha-db02(192.168.100.198:3306)
Thu Jul 28 10:39:29 2011 - [info] 
Thu Jul 28 10:39:29 2011 - [info] * Phase 3.3: New Master Diff Log Generation Phase..
Thu Jul 28 10:39:29 2011 - [info] 
Thu Jul 28 10:39:29 2011 - [info]  This server has all relay logs. No need to generate diff files from the latest slave.
Thu Jul 28 10:39:29 2011 - [info] Sending binlog..
saved_master_binlog_from_ha-db01_3306_20110728103909.binlog                                    100%  126     0.1KB/s   00:00    
Thu Jul 28 10:39:30 2011 - [info] scp from local:/var/log/masterha/app1/saved_master_binlog_from_ha-db01_3306_20110728103909.binlog to root@ha-db02:/var/log/masterha/app1/saved_master_binlog_from_ha-db01_3306_20110728103909.binlog succeeded.
Thu Jul 28 10:39:30 2011 - [info] 
Thu Jul 28 10:39:30 2011 - [info] * Phase 3.4: Master Log Apply Phase..
Thu Jul 28 10:39:30 2011 - [info] 
Thu Jul 28 10:39:30 2011 - [info] *NOTICE: If any error happens from this phase, manual recovery is needed.
Thu Jul 28 10:39:30 2011 - [info] Starting recovery on ha-db02(192.168.100.198:3306)..
Thu Jul 28 10:39:30 2011 - [info]  Generating diffs succeeded.
Thu Jul 28 10:39:30 2011 - [info] Waiting until all relay logs are applied.
Thu Jul 28 10:39:30 2011 - [info]  done.
Thu Jul 28 10:39:30 2011 - [info] Getting slave status..
Thu Jul 28 10:39:30 2011 - [info] This slave(ha-db02)'s Exec_Master_Log_Pos equals to Read_Master_Log_Pos(mysql-bin.000045:107). No need to recover from Exec_Master_Log_Pos.
Thu Jul 28 10:39:30 2011 - [info] Connecting to the target slave host ha-db02, running recover script..
Thu Jul 28 10:39:30 2011 - [info] Executing command: apply_diff_relay_logs --command=apply --slave_user=root --slave_host=ha-db02 --slave_ip=192.168.100.198  --slave_port=3306 --apply_files=/var/log/masterha/app1/saved_master_binlog_from_ha-db01_3306_20110728103909.binlog --workdir=/var/log/masterha/app1 --target_version=5.5.12-log --timestamp=20110728103909 --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.50 --slave_pass=xxx
Thu Jul 28 10:39:30 2011 - [info] 
Applying differential binary/relay log files /var/log/masterha/app1/saved_master_binlog_from_ha-db01_3306_20110728103909.binlog on ha-db02:3306. This may take long time...
Applying log files succeeded.
Thu Jul 28 10:39:30 2011 - [info]  All relay logs were successfully applied.
Thu Jul 28 10:39:30 2011 - [info] Getting new master's binlog name and position..
Thu Jul 28 10:39:30 2011 - [info]  mysql-bin.000006:107
Thu Jul 28 10:39:30 2011 - [info]  All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='ha-db02 or 192.168.100.198', MASTER_PORT=3306, MASTER_LOG_FILE='mysql-bin.000006', MASTER_LOG_POS=107, MASTER_USER='replication', MASTER_PASSWORD='xxx';
Thu Jul 28 10:39:30 2011 - [warn] master_ip_failover_script is not set. Skipping taking over new master ip address.
Thu Jul 28 10:39:30 2011 - [info] ** Finished master recovery successfully.
Thu Jul 28 10:39:30 2011 - [info] * Phase 3: Master Recovery Phase completed.
Thu Jul 28 10:39:30 2011 - [info] 
Thu Jul 28 10:39:30 2011 - [info] * Phase 4: Slaves Recovery Phase..
Thu Jul 28 10:39:30 2011 - [info] 
Thu Jul 28 10:39:30 2011 - [info] * Phase 4.1: Starting Parallel Slave Diff Log Generation Phase..
Thu Jul 28 10:39:30 2011 - [info] 
Thu Jul 28 10:39:30 2011 - [info] -- Slave diff file generation on host ha-mgr01(192.168.100.200:3306) started, pid: 18983. Check tmp log /var/log/masterha/app1/ha-mgr02_3306_20110728103909.log if it takes time..
Thu Jul 28 10:39:30 2011 - [info] 
Thu Jul 28 10:39:30 2011 - [info] Log messages from ha-mgr01 ...
Thu Jul 28 10:39:30 2011 - [info] 
Thu Jul 28 10:39:30 2011 - [info]  This server has all relay logs. No need to generate diff files from the latest slave.
Thu Jul 28 10:39:30 2011 - [info] End of log messages from ha-mgr01.
Thu Jul 28 10:39:30 2011 - [info] -- ha-mgr01(192.168.100.200:3306) has the latest relay log events.
Thu Jul 28 10:39:30 2011 - [info] Generating relay diff files from the latest slave succeeded.
Thu Jul 28 10:39:30 2011 - [info] 
Thu Jul 28 10:39:30 2011 - [info] * Phase 4.2: Starting Parallel Slave Log Apply Phase..
Thu Jul 28 10:39:30 2011 - [info] 
Thu Jul 28 10:39:30 2011 - [info] -- Slave recovery on host ha-mgr01(192.168.100.200:3306) started, pid: 18985. Check tmp log /var/log/masterha/app1/ha-mgr02_3306_20110728103909.log if it takes time..
saved_master_binlog_from_ha-db01_3306_20110728103909.binlog                                    100%  126     0.1KB/s   00:00    
Thu Jul 28 10:39:31 2011 - [info] 
Thu Jul 28 10:39:31 2011 - [info] Log messages from ha-mgr01 ...
Thu Jul 28 10:39:31 2011 - [info] 
Thu Jul 28 10:39:30 2011 - [info] Sending binlog..
Thu Jul 28 10:39:31 2011 - [info] scp from local:/var/log/masterha/app1/saved_master_binlog_from_ha-db01_3306_20110728103909.binlog to root@ha-mgr01:/var/log/masterha/app1/saved_master_binlog_from_ha-db01_3306_20110728103909.binlog succeeded.
Thu Jul 28 10:39:31 2011 - [info] Starting recovery on ha-mgr01(192.168.100.200:3306)..
Thu Jul 28 10:39:31 2011 - [info]  Generating diffs succeeded.
Thu Jul 28 10:39:31 2011 - [info] Waiting until all relay logs are applied.
Thu Jul 28 10:39:31 2011 - [info]  done.
Thu Jul 28 10:39:31 2011 - [info] Getting slave status..
Thu Jul 28 10:39:31 2011 - [info] This slave(ha-mgr01)'s Exec_Master_Log_Pos equals to Read_Master_Log_Pos(mysql-bin.000045:107). No need to recover from Exec_Master_Log_Pos.
Thu Jul 28 10:39:31 2011 - [info] Connecting to the target slave host ha-mgr01, running recover script..
Thu Jul 28 10:39:31 2011 - [info] Executing command: apply_diff_relay_logs --command=apply --slave_user=root --slave_host=ha-mgr01 --slave_ip=192.168.100.200  --slave_port=3306 --apply_files=/var/log/masterha/app1/saved_master_binlog_from_ha-db01_3306_20110728103909.binlog --workdir=/var/log/masterha/app1 --target_version=5.5.12-log --timestamp=20110728103909 --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.50 --slave_pass=xxx
Thu Jul 28 10:39:31 2011 - [info] 
Applying differential binary/relay log files /var/log/masterha/app1/saved_master_binlog_from_ha-db01_3306_20110728103909.binlog on ha-mgr01:3306. This may take long time...
Applying log files succeeded.
Thu Jul 28 10:39:31 2011 - [info]  All relay logs were successfully applied.
Thu Jul 28 10:39:31 2011 - [info]  Resetting slave ha-mgr01(192.168.100.200:3306) and starting replication from the new master ha-db02(192.168.100.198:3306)..
Thu Jul 28 10:39:31 2011 - [info]  Executed CHANGE MASTER.
Thu Jul 28 10:39:31 2011 - [info]  Slave started.
Thu Jul 28 10:39:31 2011 - [info] End of log messages from ha-mgr01.
Thu Jul 28 10:39:31 2011 - [info] -- Slave recovery on host ha-mgr01(192.168.100.200:3306) succeeded.
Thu Jul 28 10:39:31 2011 - [info] All new slave servers recovered successfully.
Thu Jul 28 10:39:31 2011 - [info] 
Thu Jul 28 10:39:31 2011 - [info] * Phase 5: New master cleanup phease..
Thu Jul 28 10:39:31 2011 - [info] 
Thu Jul 28 10:39:31 2011 - [info] Resetting slave info on the new master..
Thu Jul 28 10:39:31 2011 - [info] Master failover to ha-db02(192.168.100.198:3306) completed successfully.
Thu Jul 28 10:39:31 2011 - [info] 
----- Failover Report -----
app1: MySQL Master failover ha-db01 to ha-db02 succeeded
Master ha-db01 is down!

Check MHA Manager logs at ha-mgr01 for details.


Started manual(interactive) failover.
The latest slave ha-db02(192.168.100.198:3306) has all relay logs for recovery.
Selected ha-db02 as a new master.
ha-db02: OK: Applying all logs succeeded.
ha-mgr01: This host has the latest relay log events.
Generating relay diff files from the latest slave succeeded.
ha-mgr01: OK: Applying all logs succeeded. Slave started, replicating from ha-db02.
ha-db02: Resetting slave info succeeded.
Master failover to ha-db02(192.168.100.198:3306) completed successfully.
Thu Jul 28 10:39:31 2011 - [info] Sending mail..
Unknown option: conf

Displaying a list of replication slaves registered with new Master

# mysql -uroot -pmysql -e 'SHOW SLAVE HOSTS\G' -h ha-db02
*************************** 1. row ***************************
Server_id: 300
     Host: 
     Port: 3306
Master_id: 200

Checking the replication status of the slave servers

# mysql -uroot -pmysql -e 'SHOW SLAVE STATUS\G' -h localhost
*************************** 1. row ***************************
               Slave_IO_State: Waiting for master to send event
                  Master_Host: 192.168.100.198
                  Master_User: replication
                  Master_Port: 3306
                Connect_Retry: 60
              Master_Log_File: mysql-bin.000006
          Read_Master_Log_Pos: 107
               Relay_Log_File: ha-mgr01-relay-bin.000002
                Relay_Log_Pos: 253
        Relay_Master_Log_File: mysql-bin.000006
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
              Replicate_Do_DB: 
          Replicate_Ignore_DB: 
           Replicate_Do_Table: 
       Replicate_Ignore_Table: 
      Replicate_Wild_Do_Table: 
  Replicate_Wild_Ignore_Table: 
                   Last_Errno: 0
                   Last_Error: 
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 107
              Relay_Log_Space: 412
              Until_Condition: None
               Until_Log_File: 
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File: 
           Master_SSL_CA_Path: 
              Master_SSL_Cert: 
            Master_SSL_Cipher: 
               Master_SSL_Key: 
        Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 0
                Last_IO_Error: 
               Last_SQL_Errno: 0
               Last_SQL_Error: 
  Replicate_Ignore_Server_Ids: 
             Master_Server_Id: 200

# mysql -uroot -pmysql -e 'SHOW SLAVE STATUS\G' -h ha-db02
*************************** 1. row ***************************
               Slave_IO_State: 
                  Master_Host: 192.168.100.197
                  Master_User: replication
                  Master_Port: 3306
                Connect_Retry: 60
              Master_Log_File: 
          Read_Master_Log_Pos: 4
               Relay_Log_File: mysqld-relay-bin.000001
                Relay_Log_Pos: 4
        Relay_Master_Log_File: 
             Slave_IO_Running: No
            Slave_SQL_Running: No
              Replicate_Do_DB: 
          Replicate_Ignore_DB: 
           Replicate_Do_Table: 
       Replicate_Ignore_Table: 
      Replicate_Wild_Do_Table: 
  Replicate_Wild_Ignore_Table: 
                   Last_Errno: 0
                   Last_Error: 
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 0
              Relay_Log_Space: 126
              Until_Condition: None
               Until_Log_File: 
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File: 
           Master_SSL_CA_Path: 
              Master_SSL_Cert: 
            Master_SSL_Cipher: 
               Master_SSL_Key: 
        Seconds_Behind_Master: NULL
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 0
                Last_IO_Error: 
               Last_SQL_Errno: 0
               Last_SQL_Error: 
  Replicate_Ignore_Server_Ids: 
             Master_Server_Id: 100

Lastly, let's see how master server fails over automatically.