Difference between revisions of "Dataguard"
(→Dataguard broker managed standby (dgmgrl) not applying logs after restart) |
(→See how up-to-date a standby is) |
||
| (22 intermediate revisions by the same user not shown) | |||
| Line 1: | Line 1: | ||
* [https://docs.oracle.com/cd/B28359_01/server.111/b28282/configbp006.htm#CHDFBGFE Configuring Oracle Database 11g with Oracle Data Guard (SDU size etc...)] | * [https://docs.oracle.com/cd/B28359_01/server.111/b28282/configbp006.htm#CHDFBGFE Configuring Oracle Database 11g with Oracle Data Guard (SDU size etc...)] | ||
| − | ===Start up a physical standby=== | + | * [http://www.datadisk.co.uk/html_docs/oracle_dg/monitoring.htm Monitoring a data guard setup - datadisk.co.uk] |
| + | * [http://emrebaransel.blogspot.co.uk/2013/07/data-guard-queries.html Some data guard queries - emre baransel] | ||
| + | * [http://www.oracle.com/technetwork/articles/sql/11g-dataguard-083323.html Arup Nanda - some good standby stuff (Active Data Guard, Snapshot Standby, Conversion from Physical to Logical Standby, Rolling Upgrade, Redo Compression...)] | ||
| + | * [https://saruamit4.wordpress.com/2014/05/03/recovering-standby-database-using-scn-based-backup/#more-441 Amit Saraswat - Use incremental SCN backup to allow a physical standby to catch up with primary] | ||
| + | * [https://www.pythian.com/blog/oracle-database-12c-network-recovery-in-rman/ Pythian - Close a big gap in physical standby using network recovery (12c)] | ||
| + | * [http://www.dba-scripts.com/articles/dataguard-standby/recover-standby-over-network-oracle-12c/ dba-scripts.com recover standby over network oracle 12c] | ||
| + | ===Start up the apply on a physical standby=== | ||
startup nomount | startup nomount | ||
alter database mount standby database; | alter database mount standby database; | ||
| − | alter database recover managed standby database disconnect; | + | alter database recover managed standby database disconnect; |
| − | ===Stop a physical standby=== | + | |
| + | ===Stop the apply on a physical standby=== | ||
alter database recover managed standby database cancel; | alter database recover managed standby database cancel; | ||
| + | |||
===Register a missing logfile=== | ===Register a missing logfile=== | ||
alter database register physical logfile '<fullpath/filename>'; | alter database register physical logfile '<fullpath/filename>'; | ||
| + | ===Show apply activity on the standby=== | ||
| + | <pre> | ||
| + | select process | ||
| + | , status | ||
| + | , client_process | ||
| + | , sequence# | ||
| + | , block# | ||
| + | , active_agents | ||
| + | , known_agents | ||
| + | from v$managed_standby | ||
| + | / | ||
| + | </pre> | ||
| + | shows something like this | ||
| + | <pre> | ||
| + | PROCESS STATUS CLIENT_P SEQUENCE# BLOCK# ACTIVE_AGENTS KNOWN_AGENTS | ||
| + | --------- ------------ -------- ---------- ---------- ------------- ------------ | ||
| + | ARCH CLOSING ARCH 321071 45056 0 0 | ||
| + | ARCH CONNECTED ARCH 0 0 0 0 | ||
| + | ARCH CLOSING ARCH 197886 6144 0 0 | ||
| + | ARCH CLOSING ARCH 197887 10240 0 0 | ||
| + | MRP0 APPLYING_LOG N/A 197888 2063 17 17 | ||
| + | RFS IDLE UNKNOWN 0 0 0 0 | ||
| + | RFS IDLE LGWR 197888 2063 0 0 | ||
| + | RFS IDLE ARCH 0 0 0 0 | ||
| + | RFS RECEIVING LGWR 321072 34506 0 0 | ||
| + | RFS IDLE UNKNOWN 0 0 0 0 | ||
| + | RFS IDLE UNKNOWN 0 0 0 0 | ||
| + | |||
| + | 11 rows selected. | ||
| + | </pre> | ||
| + | |||
| + | ===See Dataguard error log messages=== | ||
| + | If data guard is not functioning correctly, check the errors in this log. | ||
| + | <pre> | ||
| + | select gvi.thread# | ||
| + | , timestamp | ||
| + | , message | ||
| + | from gv$dataguard_status gvds | ||
| + | , gv$instance gvi | ||
| + | where gvds.inst_id = gvi.inst_id | ||
| + | and severity in ('Error','Fatal') | ||
| + | order by timestamp | ||
| + | , thread# | ||
| + | / | ||
| + | </pre> | ||
| + | and | ||
| + | <pre> | ||
| + | select thread# | ||
| + | , dest_id | ||
| + | , gvad.status | ||
| + | , error | ||
| + | , fail_sequence | ||
| + | from gv$archive_dest gvad | ||
| + | , gv$instance gvi | ||
| + | where gvad.inst_id = gvi.inst_id | ||
| + | and destination is not null | ||
| + | order by thread# | ||
| + | , dest_id | ||
| + | / | ||
| + | </pre> | ||
| + | |||
===Check which logs are missing=== | ===Check which logs are missing=== | ||
Assuming dest_2 is the standby | Assuming dest_2 is the standby | ||
| Line 34: | Line 103: | ||
select database_role | select database_role | ||
from v$database; | from v$database; | ||
| + | ===Show various details on health of standby database=== | ||
| + | <pre> | ||
| + | -- This script is to be run on the Standby of a Data Guard Physical Standby Site | ||
| + | |||
| + | set echo off | ||
| + | set feedback off | ||
| + | column timecol new_value tstamp | ||
| + | column spool_extension new_value suffix | ||
| + | select to_char(sysdate,'Mondd_hhmi') timecol from sys.dual; | ||
| + | column output new_value dbname | ||
| + | select value || '_' output from v$parameter where name = 'db_name'; | ||
| + | |||
| + | -- Output the results to this file | ||
| + | |||
| + | spool dg_Standby_diag_&&dbname&&tstamp | ||
| + | set lines 132 | ||
| + | set pagesize 500 | ||
| + | set numformat 999999999999999 | ||
| + | set trim on | ||
| + | set trims on | ||
| + | |||
| + | -- Get the current Date | ||
| + | |||
| + | set feedback on | ||
| + | select systimestamp from dual; | ||
| + | |||
| + | -- Standby Site Details | ||
| + | set heading off | ||
| + | set feedback off | ||
| + | select 'Standby Site Details' from dual; | ||
| + | select '********************' from dual; | ||
| + | set heading on | ||
| + | set feedback on | ||
| + | |||
| + | col db_unique_name format a15 | ||
| + | col flashb_on format a10 | ||
| + | |||
| + | select DB_UNIQUE_NAME,DATABASE_ROLE DB_ROLE,FORCE_LOGGING F_LOG,FLASHBACK_ON FLASHB_ON,LOG_MODE,OPEN_MODE, | ||
| + | GUARD_STATUS GUARD,PROTECTION_MODE PROT_MODE | ||
| + | from v$database; | ||
| + | |||
| + | -- Current SCN - this value on the primary and standby sites where real time apply is in place should be nearly the same | ||
| + | |||
| + | select DB_UNIQUE_NAME,SWITCHOVER_STATUS,CURRENT_SCN from v$database; | ||
| + | |||
| + | -- Incarnation Information | ||
| + | -- | ||
| + | |||
| + | set heading off | ||
| + | set feedback off | ||
| + | select 'Incarnation Destination Configuration' from dual; | ||
| + | select '*************************************' from dual; | ||
| + | set heading on | ||
| + | set feedback on | ||
| + | |||
| + | select INCARNATION# INC#, RESETLOGS_CHANGE# RS_CHANGE#, RESETLOGS_TIME, PRIOR_RESETLOGS_CHANGE# PRIOR_RS_CHANGE#, STATUS,FLASHBACK_DATABASE_ALLOWED FB_OK from v$database_incarnation; | ||
| + | |||
| + | |||
| + | set heading off | ||
| + | set feedback off | ||
| + | select 'Archive Destination Configuration' from dual; | ||
| + | select '*********************************' from dual; | ||
| + | set heading on | ||
| + | set feedback on | ||
| + | -- Current Archive Locations | ||
| + | -- | ||
| + | |||
| + | column host_name format a30 tru | ||
| + | column version format a10 tru | ||
| + | select INSTANCE_NAME,HOST_NAME,VERSION,ARCHIVER from v$instance; | ||
| + | |||
| + | column destination format a35 wrap | ||
| + | column process format a7 | ||
| + | column archiver format a8 | ||
| + | column dest_id format 99999999 | ||
| + | |||
| + | select DEST_ID,DESTINATION,STATUS,TARGET,ARCHIVER,PROCESS,REGISTER,TRANSMIT_MODE | ||
| + | from v$archive_dest | ||
| + | where DESTINATION IS NOT NULL; | ||
| + | |||
| + | column name format a22 | ||
| + | column value format a100 | ||
| + | select NAME,VALUE from v$parameter where NAME like 'log_archive_dest%' and upper(VALUE) like 'SERVICE%'; | ||
| + | |||
| + | set heading off | ||
| + | set feedback off | ||
| + | select 'Archive Destination Errors' from dual; | ||
| + | select '**************************' from dual; | ||
| + | set heading on | ||
| + | set feedback on | ||
| + | |||
| + | column error format a55 tru | ||
| + | select DEST_ID,STATUS,ERROR from v$archive_dest | ||
| + | where DESTINATION IS NOT NULL; | ||
| + | |||
| + | column message format a80 | ||
| + | select MESSAGE, TIMESTAMP | ||
| + | from v$dataguard_status | ||
| + | where SEVERITY in ('Error','Fatal') | ||
| + | order by TIMESTAMP; | ||
| + | |||
| + | -- Redo Log configuration | ||
| + | -- The size of the standby redo logs must match exactly the size on the online redo logs | ||
| + | |||
| + | set heading off | ||
| + | set feedback off | ||
| + | select 'Data Guard Redo Log Configuration' from dual; | ||
| + | select '*********************************' from dual; | ||
| + | set heading on | ||
| + | set feedback on | ||
| + | |||
| + | select GROUP# STANDBY_GROUP#,THREAD#,SEQUENCE#,BYTES,USED,ARCHIVED,STATUS from v$standby_log order by GROUP#,THREAD#; | ||
| + | |||
| + | select GROUP# ONLINE_GROUP#,THREAD#,SEQUENCE#,BYTES,ARCHIVED,STATUS from v$log order by GROUP#,THREAD#; | ||
| + | |||
| + | -- Data Guard Parameters | ||
| + | -- | ||
| + | set heading off | ||
| + | set feedback off | ||
| + | select 'Data Guard Related Parameters' from dual; | ||
| + | select '*****************************' from dual; | ||
| + | set heading on | ||
| + | set feedback on | ||
| + | |||
| + | column name format a30 | ||
| + | column value format a100 | ||
| + | select NAME,VALUE from v$parameter where NAME IN ('db_unique_name','cluster_database','dg_broker_start','dg_broker_config_file1','dg_broker_config_file2','fal_client','fal_server','log_archive_config','log_archive_trace','log_archive_max_processes','archive_lag_target','remote_login_password_file','redo_transport_user') order by name; | ||
| + | |||
| + | -- Managed Recovery State | ||
| + | |||
| + | set heading off | ||
| + | set feedback off | ||
| + | select 'Data Guard Apply Status' from dual; | ||
| + | select '***********************' from dual; | ||
| + | set heading on | ||
| + | set feedback on | ||
| + | |||
| + | select systimestamp from dual; | ||
| + | |||
| + | column client_pid format a10 | ||
| + | select PROCESS,STATUS,CLIENT_PROCESS,CLIENT_PID,THREAD#,SEQUENCE#,BLOCK#,ACTIVE_AGENTS,KNOWN_AGENTS | ||
| + | from v$managed_standby order by CLIENT_PROCESS,THREAD#,SEQUENCE#; | ||
| + | |||
| + | exec DBMS_LOCK.SLEEP(10); | ||
| + | |||
| + | select systimestamp from dual; | ||
| + | |||
| + | select PROCESS,STATUS,CLIENT_PROCESS,CLIENT_PID,THREAD#,SEQUENCE#,BLOCK#,ACTIVE_AGENTS,KNOWN_AGENTS | ||
| + | from v$managed_standby order by CLIENT_PROCESS,THREAD#,SEQUENCE#; | ||
| + | |||
| + | exec DBMS_LOCK.SLEEP(10); | ||
| + | |||
| + | select systimestamp from dual; | ||
| + | |||
| + | select PROCESS,STATUS,CLIENT_PROCESS,CLIENT_PID,THREAD#,SEQUENCE#,BLOCK#,ACTIVE_AGENTS,KNOWN_AGENTS | ||
| + | from v$managed_standby order by CLIENT_PROCESS,THREAD#,SEQUENCE#; | ||
| + | |||
| + | |||
| + | set heading off | ||
| + | set feedback off | ||
| + | select 'Data Guard Apply Lag' from dual; | ||
| + | select '********************' from dual; | ||
| + | set heading on | ||
| + | set feedback on | ||
| + | |||
| + | column name format a12 | ||
| + | column lag_time format a20 | ||
| + | column datum_time format a20 | ||
| + | column time_computed format a20 | ||
| + | SELECT NAME, VALUE LAG_TIME, DATUM_TIME, TIME_COMPUTED | ||
| + | from V$DATAGUARD_STATS where name like 'apply lag'; | ||
| + | |||
| + | -- If there is a lag remove the comment for the select below | ||
| + | --SELECT * FROM V$STANDBY_EVENT_HISTOGRAM WHERE NAME = 'apply lag' AND COUNT > 0; | ||
| + | |||
| + | set heading off | ||
| + | set feedback off | ||
| + | select 'Data Guard Gap Problems' from dual; | ||
| + | select '***********************' from dual; | ||
| + | set heading on | ||
| + | set feedback on | ||
| + | |||
| + | select * from v$archive_gap; | ||
| + | |||
| + | set heading off | ||
| + | set feedback off | ||
| + | select 'Data Guard Errors in the Last Hour' from dual; | ||
| + | select '**********************************' from dual; | ||
| + | set heading on | ||
| + | set feedback on | ||
| + | |||
| + | select TIMESTAMP,SEVERITY,ERROR_CODE,MESSAGE from v$dataguard_status where timestamp > systimestamp-1/24; | ||
| + | |||
| + | </pre> | ||
===See how up-to-date a standby is=== | ===See how up-to-date a standby is=== | ||
On the primary | On the primary | ||
| Line 76: | Line 339: | ||
and arch.thread# = appl.thread# | and arch.thread# = appl.thread# | ||
order by 1; | order by 1; | ||
| + | </pre> | ||
| + | |||
| + | or | ||
| + | <pre> | ||
| + | select a.thread# | ||
| + | , b.last_seq | ||
| + | , a.applied_seq | ||
| + | , to_char(a.last_app_timestamp,'DD-MON-YYYY HH24:MI:SS') last_app_timestamp | ||
| + | , b.last_seq-a.applied_seq arc_diff | ||
| + | from (select thread# | ||
| + | , max(sequence#) applied_seq | ||
| + | , max(next_time) last_app_timestamp | ||
| + | from gv$archived_log | ||
| + | where applied = 'YES' | ||
| + | group by thread# | ||
| + | ) a | ||
| + | , (select thread# | ||
| + | , max(sequence#) last_seq | ||
| + | from gv$archived_log | ||
| + | group by thread# | ||
| + | ) b | ||
| + | where a.thread# = b.thread# | ||
| + | / | ||
| + | |||
| + | </pre> | ||
| + | ===See how up-to-date a standby is (another version)=== | ||
| + | <pre> | ||
| + | PRIMARY_SQL> | ||
| + | select thread# | ||
| + | , max(sequence#) "Last Primary Seq Generated" | ||
| + | from v$archived_log val | ||
| + | , v$database vdb | ||
| + | where val.resetlogs_change# = vdb.resetlogs_change# | ||
| + | group by thread# | ||
| + | order by 1 | ||
| + | / | ||
| + | |||
| + | STANDBY_SQL> | ||
| + | select thread# | ||
| + | , max(sequence#) "Last Standby Seq Received" | ||
| + | from v$archived_log val | ||
| + | , v$database vdb | ||
| + | where val.resetlogs_change# = vdb.resetlogs_change# | ||
| + | group by thread# | ||
| + | order by 1 | ||
| + | / | ||
| + | |||
| + | STANDBY_SQL> | ||
| + | select thread# | ||
| + | , max(sequence#) "Last Standby Seq Applied" | ||
| + | from v$archived_log val | ||
| + | , v$database vdb | ||
| + | where val.resetlogs_change# = vdb.resetlogs_change# | ||
| + | and val.applied in ('YES', 'IN-MEMORY') | ||
| + | group by thread# | ||
| + | order by 1 | ||
| + | / | ||
</pre> | </pre> | ||
| Line 138: | Line 458: | ||
where 1=1 | where 1=1 | ||
and st.group# = lf.group# | and st.group# = lf.group# | ||
| + | |||
| + | ===Dataguard broker (dgmgrl) setup for management of physical standby databases=== | ||
| + | Assuming primary and standby already exist and have standby redologs setup, force logging etc...<br /> | ||
| + | Example: server1 - db_primary, server2 - db_standby<br /> | ||
| + | ====Preparation==== | ||
| + | On both systems... | ||
| + | <pre> | ||
| + | alter system set dg_broker_start=true; | ||
| + | </pre> | ||
| + | ====Create a configuration==== | ||
| + | From server1 | ||
| + | <pre> | ||
| + | dgmgrl sys/sys@db_primary | ||
| + | |||
| + | create configuration dg_config as primary database is db_primary connect identifier is db_primary; | ||
| + | </pre> | ||
| + | Configuration created, add the physical standby to it | ||
| + | <pre> | ||
| + | add database db_standby as connect identifier is db_standby maintained as physical; | ||
| + | </pre> | ||
| + | ====Configuration complete, enable it==== | ||
| + | <pre> | ||
| + | enable configuration; | ||
| + | </pre> | ||
| + | ====Show configuration==== | ||
| + | <pre> | ||
| + | show configuration; | ||
| + | </pre> | ||
| + | ====Show individual database information==== | ||
| + | <pre> | ||
| + | show database db_primary; | ||
| + | show database db_standby; | ||
| + | </pre> | ||
| + | ====Switchover primary to standby and vice versa==== | ||
| + | From server1 | ||
| + | <pre> | ||
| + | dgmgrl sys/sys@db_primary | ||
| + | switchover to db_standby; | ||
| + | </pre> | ||
| + | ====Switch back to the original situation==== | ||
| + | From server2 | ||
| + | <pre> | ||
| + | dgmgrl sys/sys@db_standby | ||
| + | switchover to db_primary; | ||
| + | </pre> | ||
| + | ====Failover==== | ||
| + | The difference between switchover and failover is that you control a switchover, a failover happens bacause the primary db is no longer available<br /> | ||
| + | From server2 | ||
| + | <pre> | ||
| + | dgmgrl sys/sys@db_standby | ||
| + | failover to db_standby; | ||
| + | </pre> | ||
| + | Backup this database immediately as it is now the only database and has no standby. This can be done with RMAN while it is running.<br /> | ||
| + | If flashback was not enabled, the old primary is now useless and will need to be recreated as a standby.<br /> | ||
| + | If, however flashback was enabled, we can use this to restart the old primary as a standby.<br /> | ||
| + | From server1 | ||
| + | <pre> | ||
| + | dgmgrl sys/sys@db_primary | ||
| + | reinstate database db_primary; | ||
| + | </pre> | ||
| + | ====If flashback was not enabled, recreate old primary as a standby something like this==== | ||
| + | <pre> | ||
| + | sqlplus / as sysdba | ||
| + | shu abort; | ||
| + | exit; | ||
| + | </pre> | ||
| + | <pre> | ||
| + | rm $ORACLE_HOME/dbs/*<SID>.ora | ||
| + | rm -Rf /oracle/<SID>/oradata*/* | ||
| + | rm -Rf /oracle/<SID>/fra/* | ||
| + | rm -Rf /oracle/<SID>/admin/* | ||
| + | mkdir -p /oracle/<SID>/oradata1/ /oracle/<SID>/oradata2/ /oracle/<SID>/oradata3/ | ||
| + | mkdir -p /oracle/<SID>/fra/ | ||
| + | mkdir -p /oracle/<SID>/admin/adump | ||
| + | </pre> | ||
| + | <pre> | ||
| + | echo "*.db_name='db'" > /tmp/initdb_primary.ora | ||
| + | export ORACLE_SID=db | ||
| + | sqlplus / as sysdba | ||
| + | startup nomount pfile='/tmp/initdb_primary.ora'; | ||
| + | exit | ||
| + | </pre> | ||
| + | <pre> | ||
| + | rman target sys/sys@db_standby auxiliary sys/sys@db_primary | ||
| + | |||
| + | duplicate target database | ||
| + | for standby | ||
| + | from active database | ||
| + | dorecover | ||
| + | spfile | ||
| + | set db_unique_name='DB_PRIMARY' comment 'Standby' | ||
| + | set db_file_name_convert='/old/dbfile/dir1/','/new/dbfile/dir1/','/old/dbfile/dir2/','/new/dbfile/dir2/' | ||
| + | set log_file_name_convert='/old/logfile/dir1/','/new/logfile/dir1/','/old/logfile/dir2/','/new/logfile/dir2/' | ||
| + | nofilenamecheck; | ||
| + | </pre> | ||
| + | <pre> | ||
| + | dgmgrl sys/sys@db_standby | ||
| + | enable database db_primary; | ||
| + | show configuration; | ||
| + | </pre> | ||
===Dataguard broker managed standby (dgmgrl) not applying logs after restart=== | ===Dataguard broker managed standby (dgmgrl) not applying logs after restart=== | ||
| Line 186: | Line 606: | ||
SUCCESS | SUCCESS | ||
Now check alert log. All is working again. | Now check alert log. All is working again. | ||
| + | |||
| + | ===(Re)Build a physical standby from an active primary database=== | ||
| + | Run from the standby server. Trivial mods to parameterise it soon! | ||
| + | <pre> | ||
| + | #!/usr/bin/ksh | ||
| + | # ----------- | ||
| + | # How to call: ./dataguard_rebuild.ksh OTMPROD | ||
| + | # ----------- | ||
| + | # Set environment Variables | ||
| + | ############################################## | ||
| + | export ORACLE_SID=$1 | ||
| + | ORAENV_ASK="NO" | ||
| + | . oraenv | ||
| + | ORAENV_ASK="YES" | ||
| + | echo ' ORACLE_HOME is ==> ' $ORACLE_HOME | ||
| + | export PATH=$ORACLE_HOME/bin:$PATH | ||
| + | |||
| + | $ORACLE_HOME/bin/rman <<EORMAN >> /oracle/${ORACLE_SID}/admin/change/rebuild_dataguard_${ORACLE_SID}_$(date +%Y%m%d_%H%M).log | ||
| + | connect target sys/********@OTMPROD | ||
| + | connect auxiliary sys/********@OTMPRODS | ||
| + | |||
| + | DUPLICATE TARGET DATABASE | ||
| + | FOR STANDBY | ||
| + | FROM ACTIVE DATABASE | ||
| + | SPFILE | ||
| + | SET db_unique_name='OTMPRODS' | ||
| + | set LOG_ARCHIVE_DEST_1='LOCATION=/oracle/arch/OTMPROD/ VALID_FOR=(ALL_LOGFILES,ALL_ROLES) DB_UNIQUE_NAME=OTMPRODS' | ||
| + | SET LOG_ARCHIVE_DEST_2='service=OTMPROD LGWR ASYNC valid_for=(ONLINE_LOGFILE,Primary_ROLE) db_unique_name=OTMPROD' | ||
| + | SET FAL_SERVER='OTMPROD' | ||
| + | SET FAL_CLIENT='OTMPRODS' | ||
| + | SET LOCAL_LISTENER='(ADDRESS = (PROTOCOL = TCP)(HOST = <standby_server>)(PORT = 1522))' | ||
| + | NOFILENAMECHECK; | ||
| + | |||
| + | EORMAN | ||
| + | |||
| + | </pre> | ||
| + | |||
| + | ===Recover a physical standby from an gap using SCN based incremental (catch up) backup=== | ||
| + | Essential steps using nfs mounted filesystem (/shared_backups) shared between both primary and secondary nodes to avoid scp copies | ||
| + | <pre> | ||
| + | Primary | ||
| + | SQL> alter system set log_archive_dest_state_2=defer scope=both; | ||
| + | |||
| + | Standby | ||
| + | SQL> select min(checkpoint_change#) lowest_scn from v$datafile_header order by 1; | ||
| + | |||
| + | Primary | ||
| + | RMAN> backup incremental from scn <lowest_scn> database format '/shared_backups/catchup_backup_%U'; | ||
| + | |||
| + | Standby | ||
| + | SQL> alter database recover managed standby database cancel; | ||
| + | |||
| + | Standby | ||
| + | RMAN> catalog start with '/shared_backups'; | ||
| + | RMAN> recover database noredo; | ||
| + | |||
| + | Primary | ||
| + | SQL> backup current controlfile for standby format '/shared_backups/catchup_control.ctl'; | ||
| + | |||
| + | Standby | ||
| + | RMAN> report schema; | ||
| + | RMAN> shutdown immediate; | ||
| + | RMAN> startup nomount; | ||
| + | RMAN> restore standby controlfile from '/shared_backups/catchup_control.ctl' | ||
| + | SQL> alter database mount; | ||
| + | RMAN> report schema; | ||
| + | RMAN> run { | ||
| + | set newname for datafile 1 to '+DATA/<standbySID>/data1/system.dbf'; | ||
| + | set newname for datafile 2 to '+DATA/<standbySID>/data1/sysaux.dbf'; | ||
| + | set newname for datafile 3 to '+DATA/<standbySID>/data2/users.dbf'; | ||
| + | set newname for datafile 4 to '+DATA/<standbySID>/data3/undotbs.dbf'; | ||
| + | switch datafile all; | ||
| + | } | ||
| + | RMAN> run { | ||
| + | set newname for tempfile 1 to '+DATA/<standbySID>/data2/temptbs.dbf'; | ||
| + | switch tempfile all; | ||
| + | } | ||
| + | RMAN> report schema; | ||
| + | |||
| + | Primary | ||
| + | SQL> alter system set log_archive_dest_state_2=enable scope=both; | ||
| + | |||
| + | Standby | ||
| + | SQL> alter database recover managed standby database cancel; | ||
| + | SQL> alter database recover managed standby database using current logfile disconnect from session; | ||
| + | SQL> select inst_id, process, thread#, sequence#, blocks, status from gv$managed_standby where process like '%MRP%'; | ||
| + | |||
| + | </pre> | ||
===Other resources=== | ===Other resources=== | ||
Latest revision as of 10:48, 1 June 2017
- Configuring Oracle Database 11g with Oracle Data Guard (SDU size etc...)
- Monitoring a data guard setup - datadisk.co.uk
- Some data guard queries - emre baransel
- Arup Nanda - some good standby stuff (Active Data Guard, Snapshot Standby, Conversion from Physical to Logical Standby, Rolling Upgrade, Redo Compression...)
- Amit Saraswat - Use incremental SCN backup to allow a physical standby to catch up with primary
- Pythian - Close a big gap in physical standby using network recovery (12c)
- dba-scripts.com recover standby over network oracle 12c
Contents
- 1 Start up the apply on a physical standby
- 2 Stop the apply on a physical standby
- 3 Register a missing logfile
- 4 Show apply activity on the standby
- 5 See Dataguard error log messages
- 6 Check which logs are missing
- 7 Stop/Start log file shipping
- 8 Start FAL tracing on the primary
- 9 Stop the DataGuard broker
- 10 Are we a standby?
- 11 Show various details on health of standby database
- 12 See how up-to-date a standby is
- 13 See how up-to-date a standby is (another version)
- 14 Show info on all log destinations
- 15 Display log destinations options
- 16 Show any standby logs
- 17 Dataguard broker (dgmgrl) setup for management of physical standby databases
- 17.1 Preparation
- 17.2 Create a configuration
- 17.3 Configuration complete, enable it
- 17.4 Show configuration
- 17.5 Show individual database information
- 17.6 Switchover primary to standby and vice versa
- 17.7 Switch back to the original situation
- 17.8 Failover
- 17.9 If flashback was not enabled, recreate old primary as a standby something like this
- 18 Dataguard broker managed standby (dgmgrl) not applying logs after restart
- 19 (Re)Build a physical standby from an active primary database
- 20 Recover a physical standby from an gap using SCN based incremental (catch up) backup
- 21 Other resources
Start up the apply on a physical standby[edit]
startup nomount alter database mount standby database; alter database recover managed standby database disconnect;
Stop the apply on a physical standby[edit]
alter database recover managed standby database cancel;
Register a missing logfile[edit]
alter database register physical logfile '<fullpath/filename>';
Show apply activity on the standby[edit]
select process , status , client_process , sequence# , block# , active_agents , known_agents from v$managed_standby /
shows something like this
PROCESS STATUS CLIENT_P SEQUENCE# BLOCK# ACTIVE_AGENTS KNOWN_AGENTS --------- ------------ -------- ---------- ---------- ------------- ------------ ARCH CLOSING ARCH 321071 45056 0 0 ARCH CONNECTED ARCH 0 0 0 0 ARCH CLOSING ARCH 197886 6144 0 0 ARCH CLOSING ARCH 197887 10240 0 0 MRP0 APPLYING_LOG N/A 197888 2063 17 17 RFS IDLE UNKNOWN 0 0 0 0 RFS IDLE LGWR 197888 2063 0 0 RFS IDLE ARCH 0 0 0 0 RFS RECEIVING LGWR 321072 34506 0 0 RFS IDLE UNKNOWN 0 0 0 0 RFS IDLE UNKNOWN 0 0 0 0 11 rows selected.
See Dataguard error log messages[edit]
If data guard is not functioning correctly, check the errors in this log.
select gvi.thread#
, timestamp
, message
from gv$dataguard_status gvds
, gv$instance gvi
where gvds.inst_id = gvi.inst_id
and severity in ('Error','Fatal')
order by timestamp
, thread#
/
and
select thread# , dest_id , gvad.status , error , fail_sequence from gv$archive_dest gvad , gv$instance gvi where gvad.inst_id = gvi.inst_id and destination is not null order by thread# , dest_id /
Check which logs are missing[edit]
Assuming dest_2 is the standby
select local.thread#
, local.sequence# from
(select thread#
, sequence#
from v$archived_log
where dest_id=1) local
where local.sequence# not in
(select sequence#
from v$archived_log
where dest_id=2 and
thread# = local.thread#);
Stop/Start log file shipping[edit]
On the primary
alter system set log_archive_dest_state_2 = 'defer'; alter system set log_archive_dest_state_2 = 'enable';
Start FAL tracing on the primary[edit]
alter system set LOG_ARCHIVE_TRACE = 128;
Stop the DataGuard broker[edit]
alter system set dg_broker_start=false;
Are we a standby?[edit]
select database_role from v$database;
Show various details on health of standby database[edit]
-- This script is to be run on the Standby of a Data Guard Physical Standby Site
set echo off
set feedback off
column timecol new_value tstamp
column spool_extension new_value suffix
select to_char(sysdate,'Mondd_hhmi') timecol from sys.dual;
column output new_value dbname
select value || '_' output from v$parameter where name = 'db_name';
-- Output the results to this file
spool dg_Standby_diag_&&dbname&&tstamp
set lines 132
set pagesize 500
set numformat 999999999999999
set trim on
set trims on
-- Get the current Date
set feedback on
select systimestamp from dual;
-- Standby Site Details
set heading off
set feedback off
select 'Standby Site Details' from dual;
select '********************' from dual;
set heading on
set feedback on
col db_unique_name format a15
col flashb_on format a10
select DB_UNIQUE_NAME,DATABASE_ROLE DB_ROLE,FORCE_LOGGING F_LOG,FLASHBACK_ON FLASHB_ON,LOG_MODE,OPEN_MODE,
GUARD_STATUS GUARD,PROTECTION_MODE PROT_MODE
from v$database;
-- Current SCN - this value on the primary and standby sites where real time apply is in place should be nearly the same
select DB_UNIQUE_NAME,SWITCHOVER_STATUS,CURRENT_SCN from v$database;
-- Incarnation Information
--
set heading off
set feedback off
select 'Incarnation Destination Configuration' from dual;
select '*************************************' from dual;
set heading on
set feedback on
select INCARNATION# INC#, RESETLOGS_CHANGE# RS_CHANGE#, RESETLOGS_TIME, PRIOR_RESETLOGS_CHANGE# PRIOR_RS_CHANGE#, STATUS,FLASHBACK_DATABASE_ALLOWED FB_OK from v$database_incarnation;
set heading off
set feedback off
select 'Archive Destination Configuration' from dual;
select '*********************************' from dual;
set heading on
set feedback on
-- Current Archive Locations
--
column host_name format a30 tru
column version format a10 tru
select INSTANCE_NAME,HOST_NAME,VERSION,ARCHIVER from v$instance;
column destination format a35 wrap
column process format a7
column archiver format a8
column dest_id format 99999999
select DEST_ID,DESTINATION,STATUS,TARGET,ARCHIVER,PROCESS,REGISTER,TRANSMIT_MODE
from v$archive_dest
where DESTINATION IS NOT NULL;
column name format a22
column value format a100
select NAME,VALUE from v$parameter where NAME like 'log_archive_dest%' and upper(VALUE) like 'SERVICE%';
set heading off
set feedback off
select 'Archive Destination Errors' from dual;
select '**************************' from dual;
set heading on
set feedback on
column error format a55 tru
select DEST_ID,STATUS,ERROR from v$archive_dest
where DESTINATION IS NOT NULL;
column message format a80
select MESSAGE, TIMESTAMP
from v$dataguard_status
where SEVERITY in ('Error','Fatal')
order by TIMESTAMP;
-- Redo Log configuration
-- The size of the standby redo logs must match exactly the size on the online redo logs
set heading off
set feedback off
select 'Data Guard Redo Log Configuration' from dual;
select '*********************************' from dual;
set heading on
set feedback on
select GROUP# STANDBY_GROUP#,THREAD#,SEQUENCE#,BYTES,USED,ARCHIVED,STATUS from v$standby_log order by GROUP#,THREAD#;
select GROUP# ONLINE_GROUP#,THREAD#,SEQUENCE#,BYTES,ARCHIVED,STATUS from v$log order by GROUP#,THREAD#;
-- Data Guard Parameters
--
set heading off
set feedback off
select 'Data Guard Related Parameters' from dual;
select '*****************************' from dual;
set heading on
set feedback on
column name format a30
column value format a100
select NAME,VALUE from v$parameter where NAME IN ('db_unique_name','cluster_database','dg_broker_start','dg_broker_config_file1','dg_broker_config_file2','fal_client','fal_server','log_archive_config','log_archive_trace','log_archive_max_processes','archive_lag_target','remote_login_password_file','redo_transport_user') order by name;
-- Managed Recovery State
set heading off
set feedback off
select 'Data Guard Apply Status' from dual;
select '***********************' from dual;
set heading on
set feedback on
select systimestamp from dual;
column client_pid format a10
select PROCESS,STATUS,CLIENT_PROCESS,CLIENT_PID,THREAD#,SEQUENCE#,BLOCK#,ACTIVE_AGENTS,KNOWN_AGENTS
from v$managed_standby order by CLIENT_PROCESS,THREAD#,SEQUENCE#;
exec DBMS_LOCK.SLEEP(10);
select systimestamp from dual;
select PROCESS,STATUS,CLIENT_PROCESS,CLIENT_PID,THREAD#,SEQUENCE#,BLOCK#,ACTIVE_AGENTS,KNOWN_AGENTS
from v$managed_standby order by CLIENT_PROCESS,THREAD#,SEQUENCE#;
exec DBMS_LOCK.SLEEP(10);
select systimestamp from dual;
select PROCESS,STATUS,CLIENT_PROCESS,CLIENT_PID,THREAD#,SEQUENCE#,BLOCK#,ACTIVE_AGENTS,KNOWN_AGENTS
from v$managed_standby order by CLIENT_PROCESS,THREAD#,SEQUENCE#;
set heading off
set feedback off
select 'Data Guard Apply Lag' from dual;
select '********************' from dual;
set heading on
set feedback on
column name format a12
column lag_time format a20
column datum_time format a20
column time_computed format a20
SELECT NAME, VALUE LAG_TIME, DATUM_TIME, TIME_COMPUTED
from V$DATAGUARD_STATS where name like 'apply lag';
-- If there is a lag remove the comment for the select below
--SELECT * FROM V$STANDBY_EVENT_HISTOGRAM WHERE NAME = 'apply lag' AND COUNT > 0;
set heading off
set feedback off
select 'Data Guard Gap Problems' from dual;
select '***********************' from dual;
set heading on
set feedback on
select * from v$archive_gap;
set heading off
set feedback off
select 'Data Guard Errors in the Last Hour' from dual;
select '**********************************' from dual;
set heading on
set feedback on
select TIMESTAMP,SEVERITY,ERROR_CODE,MESSAGE from v$dataguard_status where timestamp > systimestamp-1/24;
See how up-to-date a standby is[edit]
On the primary
set numwidth 15 select max(sequence#) current_seq from v$log;
On the standby
set numwidth 15
select max(applied_seq#) last_seq
from v$archive_dest_status;
select arch.thread# "Thread"
, arch.sequence# "Last Sequence Received"
, appl.sequence# "Last Sequence Applied"
, (arch.sequence# - appl.sequence#) "Difference"
from (
select thread#
, sequence#
from v$archived_log
where 1=1
and ( thread#,first_time ) in (
select thread#, max(first_time)
from v$archived_log
group by thread#
)
) arch
,
(
select thread#
, sequence#
from v$log_history
where 1=1
and ( thread#,first_time ) in ( select thread# ,max(first_time)
from v$log_history
group by thread#
)
) appl
where 1=1
and arch.thread# = appl.thread#
order by 1;
or
select a.thread#
, b.last_seq
, a.applied_seq
, to_char(a.last_app_timestamp,'DD-MON-YYYY HH24:MI:SS') last_app_timestamp
, b.last_seq-a.applied_seq arc_diff
from (select thread#
, max(sequence#) applied_seq
, max(next_time) last_app_timestamp
from gv$archived_log
where applied = 'YES'
group by thread#
) a
, (select thread#
, max(sequence#) last_seq
from gv$archived_log
group by thread#
) b
where a.thread# = b.thread#
/
See how up-to-date a standby is (another version)[edit]
PRIMARY_SQL>
select thread#
, max(sequence#) "Last Primary Seq Generated"
from v$archived_log val
, v$database vdb
where val.resetlogs_change# = vdb.resetlogs_change#
group by thread#
order by 1
/
STANDBY_SQL>
select thread#
, max(sequence#) "Last Standby Seq Received"
from v$archived_log val
, v$database vdb
where val.resetlogs_change# = vdb.resetlogs_change#
group by thread#
order by 1
/
STANDBY_SQL>
select thread#
, max(sequence#) "Last Standby Seq Applied"
from v$archived_log val
, v$database vdb
where val.resetlogs_change# = vdb.resetlogs_change#
and val.applied in ('YES', 'IN-MEMORY')
group by thread#
order by 1
/
Show info on all log destinations[edit]
On the primary
set lines 100 set numwidth 15 column ID format 99 column "SRLs" format 99 column active format 99 col type format a4 select ds.dest_id id , ad.status , ds.database_mode db_mode , ad.archiver type , ds.recovery_mode , ds.protection_mode , ds.standby_logfile_count "SRLs" , ds.standby_logfile_active active , ds.archived_seq# from v$archive_dest_status ds , v$archive_dest ad where 1=1 and ds.dest_id = ad.dest_id and ad.status != 'INACTIVE' order by ds.dest_id;
Display log destinations options[edit]
On the standby
set numwidth 8 set lines 100 column id format 99 select dest_id id , archiver , transmit_mode , affirm , async_blocks async , net_timeout net_time , delay_mins delay , reopen_secs reopen , register , binding from v$archive_dest order by dest_id;
Show any standby logs[edit]
set lines 100 set pages 999 col member format a70 select st.group# , st.sequence# , ceil(st.bytes/1048576) mb , lf.member from v$standby_log st , v$logfile lf where 1=1 and st.group# = lf.group#
Dataguard broker (dgmgrl) setup for management of physical standby databases[edit]
Assuming primary and standby already exist and have standby redologs setup, force logging etc...
Example: server1 - db_primary, server2 - db_standby
Preparation[edit]
On both systems...
alter system set dg_broker_start=true;
Create a configuration[edit]
From server1
dgmgrl sys/sys@db_primary create configuration dg_config as primary database is db_primary connect identifier is db_primary;
Configuration created, add the physical standby to it
add database db_standby as connect identifier is db_standby maintained as physical;
Configuration complete, enable it[edit]
enable configuration;
Show configuration[edit]
show configuration;
Show individual database information[edit]
show database db_primary; show database db_standby;
Switchover primary to standby and vice versa[edit]
From server1
dgmgrl sys/sys@db_primary switchover to db_standby;
Switch back to the original situation[edit]
From server2
dgmgrl sys/sys@db_standby switchover to db_primary;
Failover[edit]
The difference between switchover and failover is that you control a switchover, a failover happens bacause the primary db is no longer available
From server2
dgmgrl sys/sys@db_standby failover to db_standby;
Backup this database immediately as it is now the only database and has no standby. This can be done with RMAN while it is running.
If flashback was not enabled, the old primary is now useless and will need to be recreated as a standby.
If, however flashback was enabled, we can use this to restart the old primary as a standby.
From server1
dgmgrl sys/sys@db_primary reinstate database db_primary;
If flashback was not enabled, recreate old primary as a standby something like this[edit]
sqlplus / as sysdba shu abort; exit;
rm $ORACLE_HOME/dbs/*<SID>.ora rm -Rf /oracle/<SID>/oradata*/* rm -Rf /oracle/<SID>/fra/* rm -Rf /oracle/<SID>/admin/* mkdir -p /oracle/<SID>/oradata1/ /oracle/<SID>/oradata2/ /oracle/<SID>/oradata3/ mkdir -p /oracle/<SID>/fra/ mkdir -p /oracle/<SID>/admin/adump
echo "*.db_name='db'" > /tmp/initdb_primary.ora export ORACLE_SID=db sqlplus / as sysdba startup nomount pfile='/tmp/initdb_primary.ora'; exit
rman target sys/sys@db_standby auxiliary sys/sys@db_primary
duplicate target database
for standby
from active database
dorecover
spfile
set db_unique_name='DB_PRIMARY' comment 'Standby'
set db_file_name_convert='/old/dbfile/dir1/','/new/dbfile/dir1/','/old/dbfile/dir2/','/new/dbfile/dir2/'
set log_file_name_convert='/old/logfile/dir1/','/new/logfile/dir1/','/old/logfile/dir2/','/new/logfile/dir2/'
nofilenamecheck;
dgmgrl sys/sys@db_standby enable database db_primary; show configuration;
Dataguard broker managed standby (dgmgrl) not applying logs after restart[edit]
After restarting the Primary and Standby databases manually, the redo logs were not being applied on the standby.
The command used to start the standby database was:
alter database recover managed standby database using current logfile disconnect from session;
It seems this is not good enough, we see a problem in the broker configuration...
(0) OTMPRODS oracle@ravotm14:/home/oracle] dgmgrl
DGMGRL for Linux: Version 11.2.0.4.0 - 64bit Production
Copyright (c) 2000, 2009, Oracle. All rights reserved.
Welcome to DGMGRL, type "help" for information.
DGMGRL> connect sys/********@otmprod
Connected.
DGMGRL> show configuration
Configuration - dg_otmprod
Protection Mode: MaxPerformance
Databases:
otmprod - Primary database
otmprods - Physical standby database (disabled)
Fast-Start Failover: DISABLED
Configuration Status:
SUCCESS
DGMGRL>
We need to re-enable the Standby database in the broker configuration
DGMGRL> enable database OTMPRODS; Enabled. DGMGRL> show configuration Configuration - dg_otmprod Protection Mode: MaxPerformance Databases: otmprod - Primary database otmprods - Physical standby database Fast-Start Failover: DISABLED Configuration Status: SUCCESS
Now check alert log. All is working again.
(Re)Build a physical standby from an active primary database[edit]
Run from the standby server. Trivial mods to parameterise it soon!
#!/usr/bin/ksh
# -----------
# How to call: ./dataguard_rebuild.ksh OTMPROD
# -----------
# Set environment Variables
##############################################
export ORACLE_SID=$1
ORAENV_ASK="NO"
. oraenv
ORAENV_ASK="YES"
echo ' ORACLE_HOME is ==> ' $ORACLE_HOME
export PATH=$ORACLE_HOME/bin:$PATH
$ORACLE_HOME/bin/rman <<EORMAN >> /oracle/${ORACLE_SID}/admin/change/rebuild_dataguard_${ORACLE_SID}_$(date +%Y%m%d_%H%M).log
connect target sys/********@OTMPROD
connect auxiliary sys/********@OTMPRODS
DUPLICATE TARGET DATABASE
FOR STANDBY
FROM ACTIVE DATABASE
SPFILE
SET db_unique_name='OTMPRODS'
set LOG_ARCHIVE_DEST_1='LOCATION=/oracle/arch/OTMPROD/ VALID_FOR=(ALL_LOGFILES,ALL_ROLES) DB_UNIQUE_NAME=OTMPRODS'
SET LOG_ARCHIVE_DEST_2='service=OTMPROD LGWR ASYNC valid_for=(ONLINE_LOGFILE,Primary_ROLE) db_unique_name=OTMPROD'
SET FAL_SERVER='OTMPROD'
SET FAL_CLIENT='OTMPRODS'
SET LOCAL_LISTENER='(ADDRESS = (PROTOCOL = TCP)(HOST = <standby_server>)(PORT = 1522))'
NOFILENAMECHECK;
EORMAN
Recover a physical standby from an gap using SCN based incremental (catch up) backup[edit]
Essential steps using nfs mounted filesystem (/shared_backups) shared between both primary and secondary nodes to avoid scp copies
Primary
SQL> alter system set log_archive_dest_state_2=defer scope=both;
Standby
SQL> select min(checkpoint_change#) lowest_scn from v$datafile_header order by 1;
Primary
RMAN> backup incremental from scn <lowest_scn> database format '/shared_backups/catchup_backup_%U';
Standby
SQL> alter database recover managed standby database cancel;
Standby
RMAN> catalog start with '/shared_backups';
RMAN> recover database noredo;
Primary
SQL> backup current controlfile for standby format '/shared_backups/catchup_control.ctl';
Standby
RMAN> report schema;
RMAN> shutdown immediate;
RMAN> startup nomount;
RMAN> restore standby controlfile from '/shared_backups/catchup_control.ctl'
SQL> alter database mount;
RMAN> report schema;
RMAN> run {
set newname for datafile 1 to '+DATA/<standbySID>/data1/system.dbf';
set newname for datafile 2 to '+DATA/<standbySID>/data1/sysaux.dbf';
set newname for datafile 3 to '+DATA/<standbySID>/data2/users.dbf';
set newname for datafile 4 to '+DATA/<standbySID>/data3/undotbs.dbf';
switch datafile all;
}
RMAN> run {
set newname for tempfile 1 to '+DATA/<standbySID>/data2/temptbs.dbf';
switch tempfile all;
}
RMAN> report schema;
Primary
SQL> alter system set log_archive_dest_state_2=enable scope=both;
Standby
SQL> alter database recover managed standby database cancel;
SQL> alter database recover managed standby database using current logfile disconnect from session;
SQL> select inst_id, process, thread#, sequence#, blocks, status from gv$managed_standby where process like '%MRP%';