Monday, February 25, 2013

Java program to extract data from Oracle datafile blocks.

I wrote the following Java program to explore the Oracle data block.  It scans the specified Oracle datafile for blocks belonging to a particular object and then extracts the row data directly from datafile blocks.

Because it extracts directly from the block it can be used to extract data from corrupt files or blocks, truncated tables, and even extract deleted rows.  

The program is written purely for learning, so I don't guarantee it will extract all records in a block, or that it can handle all data types.  I am happy for anyone to copy the program or improve it, but I do not provide any guarantee for it.  Use it at your own risk.

It is a large Java program laid out in order of execution for easy reading and learning purposes.  Yes, I know that  a "real Java programmer" would do a better job, and there is lots of room for improvement, but it is good enough for learning about Oracle data structures.

An example of the program output can be found here :- http://blog.contractoracle.com/2013/02/extracting-data-directly-from-oracle.html

# OraTabExport.java


import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class OraTabExport {

public static void main(String[] args) {
try {

        RandomAccessFile raf = new RandomAccessFile("D:\\oracle\\WIN64\\DATAFILE\\O1_MF_TEST_8LRBSQ59_.DBF", "r");
int BlockSize = 8192;
int ObjectID = 73520;

int StartByte = 0;
int ObjectIDLoc = 24;
int ITLSlotsLoc = 36;
int ITLSlots = 2;
int ITLSlot = 0;
int NumTablesLoc = 0;
int BlockObjectID = 0;
int FileSize =(int)raf.length();
int NumBlocks = FileSize / BlockSize;
int BlockNum = 0;
int DataHeaderStart = 0;
int TableDirectoryStart = 0;
int RowDirectoryStart =0;
int RowStart = 0;
int BlockHeaderSize = 20;
int TransactionHeaderSize = 24;
int DataHeaderSize = 14;
int TableDirectorySize = 4;
int NumTables = 0;
int TableNum = 0;
int NumTableRows = 0;
int TotalTableRows = 0;
int RowNumber = 0;
int NumColumns = 0;
int ColNum = 0;
int ColumnSize = 0;
int RowFlag = 0;
short RowOffset = 0;
int MinOffset = 0;
int MaxOffset = 0;
int OffsetCount = 0;
int ByteNum = 0;
Byte MyByte = 0;
char MyChar = 0;


for(BlockNum = 0; BlockNum <= NumBlocks -1; BlockNum++)
{
StartByte = BlockSize * BlockNum;
raf.seek(StartByte + ObjectIDLoc);
BlockObjectID = Integer.reverseBytes(raf.readInt());


raf.seek(StartByte + ITLSlotsLoc);
ITLSlots = raf.read();  

if ( BlockObjectID == ObjectID)
{

raf.seek(StartByte);

System.out.println("\nBlock " + BlockNum + " Contains Object ID " + BlockObjectID);

// The first 20 bytes is the Block Header

System.out.println("Block Header start       :- " + 0);
System.out.println(" Block Type 06-DATA          :- " + String.format("%02X ", raf.readByte()));
System.out.println(" Block Format                :- " + String.format("%02X ", raf.readByte()));
System.out.println(" Spare1                      :- " + String.format("%02X ", raf.readByte()));
System.out.println(" Spare2                      :- " + String.format("%02X ", raf.readByte()));
System.out.println(" Relative Block Address      :- " + Integer.reverseBytes(raf.readInt()));
System.out.println(" SCN Base                    :- " + Integer.reverseBytes(raf.readInt()));
System.out.println(" SCN Wrap                    :- " + Short.reverseBytes(raf.readShort()));
System.out.println(" Sequence                    :- " + String.format("%02X ", raf.readByte()));
System.out.println(" Flag 01-NEW                 :- " + String.format("%02X ", raf.readByte()));
System.out.println(" CheckSum                    :- " + Short.reverseBytes(raf.readShort()));
System.out.println(" Spare3                      :- " + Short.reverseBytes(raf.readShort()));

// The next 24 bytes are Fixed Transaction Header

System.out.println("Transaction Header start  :- " + BlockHeaderSize);
System.out.println(" Type 01-DATA 02-INDEX       :- " + String.format("%02X ", raf.readByte()));
System.out.println(" Spare 1                     :- " + String.format("%02X ", raf.readByte()));
System.out.println(" Spare 2                     :- " + String.format("%02X ", raf.readByte()));
System.out.println(" Spare 3                     :- " + String.format("%02X ", raf.readByte()));
System.out.println(" Object ID                   :- " + Integer.reverseBytes(raf.readInt()));
System.out.println(" Cleanout SCN Base           :- " + Integer.reverseBytes(raf.readInt()));
System.out.println(" Cleanout SCN Wrap           :- " + Short.reverseBytes(raf.readShort()));
System.out.println(" Spare 4                     :- " + String.format("%02X ", raf.readByte()));
System.out.println(" Spare 5                     :- " + String.format("%02X ", raf.readByte()));
System.out.println(" ITL Slots                   :- " + raf.readByte());
System.out.println(" UNKNOWN Byte                :- " + String.format("%02X ", raf.readByte()));
System.out.println(" Flag 00-FREE                :- " + String.format("%02X ", raf.readByte()));
System.out.println(" ITL TX Feeelist Slot        :- " + String.format("%02X ", raf.readByte()));
System.out.println(" Next Block On Free List     :- " + Integer.reverseBytes(raf.readInt()));

// Each ITL Slot has 24 bytes allocated.  

for(ITLSlot = 1; ITLSlot <= ITLSlots; ITLSlot++) 
{
System.out.println("ITLSlot                     :- " + ITLSlot);
System.out.println(" Undo Segment               :- " + Short.reverseBytes(raf.readShort()));
System.out.println(" Undo Segment Slot          :- " + Short.reverseBytes(raf.readShort()));
System.out.println(" Transaction Sequence       :- " + Integer.reverseBytes(raf.readInt()));
System.out.println(" Undo Block Address         :- " + Integer.reverseBytes(raf.readInt()));
System.out.println(" Undo Sequence              :- " + Short.reverseBytes(raf.readShort()));
System.out.println(" Undo Record Number         :- " + raf.readByte());
System.out.println(" Spare 1                    :- " + raf.readByte());
System.out.println(" Flag                       :- " + Short.reverseBytes(raf.readShort()));
System.out.println(" _ktbitun                   :- " + Short.reverseBytes(raf.readShort()));
System.out.println(" Base                       :- " + Integer.reverseBytes(raf.readInt()));
}

DataHeaderStart = BlockHeaderSize + TransactionHeaderSize + ITLSlots*TransactionHeaderSize + 8; 
raf.seek(StartByte + DataHeaderStart);

// The Data header contains details of the number of tables, rows, and free space in the block.

System.out.println("Data Header start        :- " + DataHeaderStart);
System.out.println(" Flags                       :- " + String.format("%02X ", raf.readByte()));
NumTables = raf.readByte(); 
System.out.println(" Number of Tables            :- " + NumTables);
System.out.println(" Number of Rows              :- " + Short.reverseBytes(raf.readShort()));
System.out.println(" Offset to Freespace Start   :- " + Short.reverseBytes(raf.readShort()));
System.out.println(" Offset to Freespace End     :- " + Short.reverseBytes(raf.readShort()));
System.out.println(" Available Space             :- " + Short.reverseBytes(raf.readShort()));
System.out.println(" Available Space after Commit:- " + Short.reverseBytes(raf.readShort()));

TableDirectoryStart = DataHeaderStart + DataHeaderSize;
raf.seek(StartByte + TableDirectoryStart);

// The Table Directory has a list of tables in the block and row counts

System.out.println("Table Directory start    :-" + TableDirectoryStart);
TotalTableRows = 0;

for(TableNum = 1; TableNum <= NumTables; TableNum++) 
{
System.out.println(" Table                       :- " + TableNum);
System.out.println("  Offset                      :- " + Short.reverseBytes(raf.readShort()));
NumTableRows = Short.reverseBytes(raf.readShort());
TotalTableRows = TotalTableRows + NumTableRows;
System.out.println("  Number of Rows              :- " + NumTableRows);
}

System.out.println(" Total Number of Rows        :- " + TotalTableRows);

RowDirectoryStart = TableDirectoryStart + NumTables*TableDirectorySize;
raf.seek(StartByte + RowDirectoryStart);

// The Row Directory contains a list of rows in the block, with offsets to the first byte

System.out.println("Row Directory start      :-" + RowDirectoryStart);

MinOffset = TotalTableRows*2;
MaxOffset = BlockSize - DataHeaderStart - 4;
RowOffset = 1;
OffsetCount = 0;

List RowOffsetList = new ArrayList();
while (RowOffset != 0 && OffsetCount < TotalTableRows) 
{
RowOffset = Short.reverseBytes(raf.readShort());
if (RowOffset > MinOffset && RowOffset < MaxOffset) 
{
OffsetCount = OffsetCount + 1;
System.out.println(" Row Offset " + OffsetCount + " :- " + RowOffset + "(+" + DataHeaderStart + ")");
RowOffsetList.add(RowOffset);
}
}

// Now go to each offset and get the row header which contains number of columns, row status etc

Iterator OffsetIterator = RowOffsetList.iterator();
RowNumber = 0;
while(OffsetIterator.hasNext())
{
RowOffset = (Short)OffsetIterator.next();
RowNumber = RowNumber + 1;
RowStart = DataHeaderStart + RowOffset;
raf.seek(StartByte + RowStart);
System.out.println(RowNumber + " Row Header start  :-" + RowStart);

RowFlag = raf.readByte();
String RowFlagString = String.format("%02X ", RowFlag );
String RowFlagDecode = "";

// Translate the Row Flag to identify deleted rows, table data etc

switch (RowFlag) {
case 3:  RowFlagDecode = "Junk ?";
                    break;
 case 12:  RowFlagDecode = "Chained ?";
                    break;
case 32:  RowFlagDecode = "Chained ?";
                    break;
case 44:  RowFlagDecode = "Table Data";
                    break;
 case 60:  RowFlagDecode = "Deleted";
                     break;
case 84:  RowFlagDecode = "Cluster Key ?";
                    break;
case -84:  RowFlagDecode = "Cluster Key ?";
                    break;
case 108:  RowFlagDecode = "Cluster Data";
                     break;
            case 124:  RowFlagDecode = "Deleted";
                     break;
default: RowFlagDecode = "Invalid Flag";
                     break;
}

System.out.println(" Flags              :-  " + RowFlagString + " " + RowFlagDecode);
System.out.println(" Lock Status        :- " + String.format("%02X ", raf.readByte()));

NumColumns = raf.readByte();
System.out.println(" Number of Columns  :- " + NumColumns);
if (NumColumns == 0)
{
System.out.println("Integer         :- " + Integer.reverseBytes(raf.readInt()));
System.out.println("Short           :- " + Short.reverseBytes(raf.readShort()));
}

if (NumTables > 1) 
{
System.out.println(" Cluster Byte       :- " + String.format("%02X ", raf.readByte()));
}
if (RowFlag == 76) 
{
System.out.println("Read Byte :- " + String.format("%02X ", raf.readByte()));
System.out.println("Read Byte :- " + String.format("%02X ", raf.readByte()));
System.out.println("Read Byte :- " + String.format("%02X ", raf.readByte()));
System.out.println("Read Byte :- " + String.format("%02X ", raf.readByte()));
System.out.println("Read Byte :- " + String.format("%02X ", raf.readByte()));
System.out.println("Read Byte :- " + String.format("%02X ", raf.readByte()));
}
if (RowFlag == 12) 
{
//System.out.println("Read Byte :- " + String.format("%02X ", raf.readByte()));
//System.out.println("Read Byte :- " + String.format("%02X ", raf.readByte()));
//System.out.println("Read Byte :- " + String.format("%02X ", raf.readByte()));
//System.out.println("Read Byte :- " + String.format("%02X ", raf.readByte()));
//System.out.println("Read Byte :- " + String.format("%02X ", raf.readByte()));
//System.out.println("Read Byte :- " + String.format("%02X ", raf.readByte()));
System.out.println("Integer         :- " + Integer.reverseBytes(raf.readInt()));
System.out.println("Short           :- " + Short.reverseBytes(raf.readShort()));
}

if (RowFlag != 3) {

for(ColNum = 1; ColNum <= NumColumns; ColNum++){
ColumnSize = raf.read();


if (ColumnSize == 255) 
{
System.out.print("Column " + ColNum + " Bytes " + ColumnSize + " Data :- ");
System.out.println("NULL");
}
else if (ColumnSize == 254)
{
ColumnSize = Short.reverseBytes(raf.readShort());
System.out.print("Column " + ColNum + " Bytes " + ColumnSize + " Data :- ");
for(ByteNum = 0; ByteNum < ColumnSize; ByteNum++) 
{

//System.out.print("" + String.format("%02X ", MyByte));
//MyByte = raf.readByte();
System.out.print("" + (char)raf.readByte());
}
System.out.println("");
}
else 
{

System.out.print("Column " + ColNum + " Bytes " + ColumnSize + " Data :- ");
for(ByteNum = 0; ByteNum < ColumnSize; ByteNum++) 
{

MyByte = raf.readByte();

//System.out.print("" + raf.readByte());


if (MyByte < 32 || MyByte > 126)
{
System.out.print("" + String.format("%02X ", MyByte));
}
else
{
 MyChar = ((char)Integer.parseInt(String.format("%02X", MyByte), 16));
   System.out.print("" + MyChar);

}
}
System.out.println("");

}
}
}


}
}

raf.close();

      } catch (IOException ex) {
         ex.printStackTrace();
      }
}
}