Monday, May 10, 2010

Diagnosing RAC cluster waits.

We encountered high cluster waits on RAC, and found that it was caused by stats running on the second instance via DBMS_SCHEDULER. Some scripts to help diagnose the issue :-

# Find what programs are connected to what instance :-

select inst_id, service_name, username, program, machine, count(*)
from gv$session
where username = 'MYAPP'
group by inst_id, service_name, username, program,machine;

# find what blocks are cached on each instance in the cluster for a particular segment

select
inst_id,
s.owner owner,
object_name objname,
subobject_name subobjname,
substr(object_type,1,10) objtype,
ts.block_size / 1024 blockkb,
buffer.blocks blocks,
s.blocks totalblocks,
(buffer.blocks * ts.block_size / 1024) memkb,
(buffer.blocks/decode(s.blocks, 0, .001, s.blocks))*100 bufferpercent
from
(select inst_id, o.owner, o.object_name, o.subobject_name,
o.object_type object_type, count(*) blocks
from dba_objects o, gv$bh bh
where o.object_id = bh.objd and o.owner not in ('SYS','SYSTEM')
group by inst_id, o.owner, o.object_name, o.subobject_name, o.object_type) buffer,
dba_segments s,
dba_tablespaces ts
where s.tablespace_name = ts.tablespace_name
and s.owner = buffer.owner
and s.segment_name = buffer.object_name
and s.SEGMENT_TYPE = buffer.object_type
and (s.PARTITION_NAME = buffer.subobject_name or buffer.subobject_name is null)
and s.segment_name = 'MYTABLE'
order by bufferpercent desc;

# Find what scheduler jobs ran on the second instance.

select log_date, job_name from DBA_SCHEDULER_JOB_RUN_DETAILS where instance_id = 2 and log_date > sysdate -7 order by log_date;