通常我们定位一个数据库的redolog是否需要增加,都是通过观察online redolog的切换频率是否频繁,观察告警是否存在checkpoint incompleted,观察awr中的log file等待时间是否正常来判断是否需要增加日志大小或者日志组数,今天找到一个非常不错的脚本,分析过去几天内平均1小时产生的redo量,lgwr的等待方面的统计,也可以作为评估redo产生量的一个统计。
最后列为建议增加的值,还有ckpt的等待,总体的cpu开销,物理读和写消耗,日志等待事件,进程数等统计。
具体脚本内容如下:
set pagesize 50000 linesize 300 col instance_number format 99 head 'In|st' col tim head 'Period end' col cpu_sec format 999,999,999.9 head 'CPU used|sec' col phy_reads format 999,999,999 head 'Physical|reads' col phy_writes format 999,999,999 head 'Physical|writes' col cr_served format 999,999,999 head 'CR blocks|served' col current_served format 999,999,999 head 'CUR blocks|served' col redo_mb format 999,999,999.9 head 'Redo, MB' col processes format 999,999 head 'Proc|esses' col avg_df_seq format 9,999.9 head 'Avg 1|read' col avg_df_scat format 9,999.9 head 'Avg N|read' col redo_diff_to_md_pct format 999,999 head 'Redo Diff|to median, %' col avg_lfpw format 999.99 head 'Avg|LFPW' col avg_log_sync format 9,999.99 head 'Avg Log|Sync, ms' col log_ckpt_sec format 999,999 head 'CKPT|waits, s' col redo_needed format 999,999 head 'Redo to|Add, MB' compute max of cpu_sec on instance_number compute max of phy_reads on instance_number compute max of phy_writes on instance_number compute max of cr_served on instance_number compute max of current_served on instance_number compute max of phy_writes on instance_number compute max of redo_needed on instance_number compute max of log_ckpt_sec on instance_number compute max of avg_log_sync on instance_number compute max of avg_lfpw on instance_number compute max of redo_mb on instance_number compute max of processes on instance_number compute max of avg_df_seq on instance_number compute max of avg_df_scat on instance_number break on instance_number skip page with t_interval as ( select /*+ inline */ sysdate-30 begin, sysdate as end from dual ) select stats.dbid dbid, stats.instance_number instance_number, to_char(stats.begin_interval_time, 'YYYYMMDD HH24MI') tim, stats.cpu_used / 100 cpu_sec, stats.phy_reads phy_reads, stats.phy_writes phy_writes, stats.cr_served cr_served, stats.current_served current_served, stats.redo_size / 1024 / 1024 redo_mb, procs.current_utilization processes, -- waits.df_seq_micro / 1000 / nullif(waits.df_seq_waits,0) avg_df_seq, waits.df_scat_micro / 1000 / nullif(waits.df_scat_waits,0) avg_df_scat, (stats.redo_size - stats.md_redo_size) * 100 / stats.md_redo_size redo_diff_to_md_pct, stats.redo_write_time*10/stats.redo_writes avg_lfpw, waits.log_sync_micro/nullif(waits.log_sync_waits, 0) / 1000 avg_log_sync, waits.log_ckpt_micro/1e6 log_ckpt_sec, ( stats.redo_size / ( waits.snap_interval * 86400 ) ) * ( waits.log_ckpt_micro/1e6 ) / 1024 / 1024 redo_needed, stats.is_restart from ( select snap_id, begin_interval_time, snap_interval, instance_number, dbid, log_sync_micro, log_sync_waits, log_ckpt_micro, log_ckpt_waits, df_seq_micro, df_seq_waits, df_scat_micro, df_scat_waits, direct_micro, direct_waits, median(log_sync_micro/nullif(log_sync_waits, 0)) over (partition by dbid, instance_number) md_log_sync_micro from ( select snap_id, begin_interval_time, instance_number, dbid, max(snap_interval) snap_interval, max(decode(event_name, 'log file sync', wait_micro)) log_sync_micro, max(decode(event_name, 'log file sync', total_waits)) log_sync_waits, max(decode(event_name, 'log file switch (checkpoint incomplete)', wait_micro)) log_ckpt_micro, max(decode(event_name, 'log file switch (checkpoint incomplete)', total_waits)) log_ckpt_waits, max(decode(event_name, 'db file sequential read', wait_micro)) df_seq_micro, max(decode(event_name, 'db file sequential read', total_waits)) df_seq_waits, max(decode(event_name, 'db file scattered read', wait_micro)) df_scat_micro, max(decode(event_name, 'db file scattered read', total_waits)) df_scat_waits, max(decode(event_name, 'direct path read', wait_micro)) direct_micro, max(decode(event_name, 'direct path read', total_waits)) direct_waits from ( select e.snap_id, e.instance_number, e.dbid, sn.begin_interval_time, cast(begin_interval_time as date) - cast(lag(begin_interval_time) over (partition by e.dbid, e.instance_number, e.event_name order by sn.begin_interval_time) as date) snap_interval, sn.startup_time, e.event_name, case when (sn.begin_interval_time >= sn.startup_time and lag(sn.begin_interval_time) over (partition by e.dbid, e.instance_number, e.event_name order by sn.begin_interval_time) < sn.startup_time) then e.time_waited_micro else e.time_waited_micro - lag(e.time_waited_micro) over (partition by e.dbid, e.instance_number, e.event_name order by sn.begin_interval_time) end wait_micro, case when (sn.begin_interval_time >= sn.startup_time and lag(sn.begin_interval_time) over (partition by e.dbid, e.instance_number, e.event_name order by sn.begin_interval_time) < sn.startup_time) then e.total_waits else e.total_waits - lag(e.total_waits) over (partition by e.dbid, e.instance_number, e.event_name order by sn.begin_interval_time) end total_waits from dba_hist_system_event e, dba_hist_snapshot sn, t_interval t where sn.snap_id = e.snap_id and sn.dbid = e.dbid and sn.instance_number = e.instance_number and sn.begin_interval_time between t.begin and t.end and e.event_name in ( 'log file sync', 'log file switch (checkpoint incomplete)', 'db file sequential read', 'db file scattered read', 'direct path read' ) ) group by dbid, instance_number, begin_interval_time, snap_id ) ) waits, ( select snap_id, begin_interval_time, instance_number, dbid, redo_size, redo_write_time, redo_writes, is_restart, cpu_used, phy_reads, phy_reads_cache, phy_writes, phy_writes_cache, cr_served, current_served, median(redo_size) over (partition by dbid, instance_number) md_redo_size from ( select snap_id, begin_interval_time, instance_number, dbid, max(is_restart) is_restart, max(decode(stat_name, 'redo size', stat_diff)) redo_size, max(decode(stat_name, 'redo write time', stat_diff)) redo_write_time, max(decode(stat_name, 'redo writes', stat_diff)) redo_writes, max(decode(stat_name, 'CPU used by this session', stat_diff)) cpu_used, max(decode(stat_name, 'physical read total IO requests', stat_diff)) phy_reads, max(decode(stat_name, 'physical reads cache', stat_diff)) phy_reads_cache, max(decode(stat_name, 'physical write total IO requests',stat_diff)) phy_writes, max(decode(stat_name, 'physical writes from cache', stat_diff)) phy_writes_cache, max(decode(stat_name, 'gc cr blocks served', stat_diff)) cr_served, max(decode(stat_name, 'gc current blocks served', stat_diff)) current_served from ( select stats.snap_id, stats.instance_number, stats.dbid, sn.begin_interval_time, sn.startup_time, stats.stat_name, case when (sn.begin_interval_time >= sn.startup_time and lag(sn.begin_interval_time) over (partition by stats.dbid, stats.instance_number, stats.stat_id order by sn.begin_interval_time) < sn.startup_time) then stats.value else stats.value - lag(stats.value) over (partition by stats.dbid, stats.instance_number, stats.stat_id order by stats.snap_id) end stat_diff, case when (sn.begin_interval_time >= sn.startup_time and lag(sn.begin_interval_time) over (partition by stats.dbid, stats.instance_number, stats.stat_id order by sn.begin_interval_time) < sn.startup_time) then 'Yes' end is_restart from dba_hist_sysstat stats, dba_hist_snapshot sn, t_interval t where sn.snap_id = stats.snap_id and sn.dbid = stats.dbid and sn.instance_number = stats.instance_number and sn.begin_interval_time between t.begin and t.end and stats.stat_name in ( 'redo size', 'redo write time', 'redo writes', 'CPU used by this session', 'physical read total IO requests', 'physical reads cache', 'physical write total IO requests', 'physical writes from cache', 'gc cr blocks served', 'gc current blocks served' ) ) group by dbid, instance_number, begin_interval_time, snap_id ) ) stats, ( select stats.snap_id, stats.instance_number, stats.dbid, stats.resource_name, stats.current_utilization from dba_hist_resource_limit stats, dba_hist_snapshot sn, t_interval t where sn.snap_id = stats.snap_id and sn.dbid = stats.dbid and sn.instance_number = stats.instance_number and sn.begin_interval_time between t.begin and t.end and stats.resource_name = 'processes' ) procs where waits.dbid = stats.dbid and waits.instance_number = stats.instance_number and waits.snap_id = stats.snap_id and waits.dbid = procs.dbid and waits.instance_number = procs.instance_number and waits.snap_id = procs.snap_id order by stats.dbid, stats.instance_number, stats.begin_interval_time ;
该脚本转自pythian.