In Oracle by default VARCHAR2/CLOB data is encoded using UTF-8, that can result in a character take up to 4 bytes of storage. However NVARCHAR/NCLOB data is encoded in UTF-16, that results in a character taking 2 bytes each.
First consider the relevant portion of the UTF-8 encoding table along with UNICODE
-----------------------------------------------------
Unicode Character UTF-8 Character Name
-----------------------------------------------------
U+3053 こ e3 81 93 HIRAGANA LETTER KO
U+3093 ん e3 82 93 HIRAGANA LETTER N
U+306B に e3 81 ab HIRAGANA LETTER NI
U+3061 ち e3 81 a1 HIRAGANA LETTER TI
U+306F は e3 81 af HIRAGANA LETTER HA
-----------------------------------------------------
The following example provides the details for varchar2 & nvarchar2
1. Check the relevant database parameters
SELECT * FROM nls_database_parameters WHERE PARAMETER = 'NLS_NCHAR_CHARACTERSET';
-----------------------------------------------------
NLS_NCHAR_CHARACTERSET AL16UTF16
-----------------------------------------------------
SELECT * FROM nls_database_parameters WHERE parameter = 'NLS_CHARACTERSET';
-----------------------------------------------------
NLS_CHARACTERSET AL32UTF8
-----------------------------------------------------
2. Create the VARCHAR2 table
CREATE TABLE varchar2_demo (description VARCHAR2(50));
INSERT INTO varchar2_demo VALUES('こんにちは');
INSERT INTO varchar2_demo VALUES('ABCDE');
COMMIT;
3. Check the data and the encoded byte stream
SELECT description, DUMP(description,1016) FROM varchar2_demo;
--------------------------------------------------------------
こんにちは Typ=1 Len=15 CharacterSet=AL32UTF8: e3,81,93,e3,82,93,e3,81,ab,e3,81,a1,e3,81,af
ABCDE Typ=1 Len=5 CharacterSet=AL32UTF8: 41,42,43,44,45
--------------------------------------------------------------
4. Create the NVARCHAR2 table
CREATE TABLE nvarchar2_demo (description NVARCHAR2(50));
INSERT INTO nvarchar2_demo VALUES('こんにちは');
INSERT INTO nvarchar2_demo VALUES('ABCDE');
COMMIT;
5. Check the data and the encoded byte stream
SELECT description, DUMP(description,1016) FROM nvarchar2_demo;
--------------------------------------------------------
こんにちは Typ=1 Len=10 CharacterSet=AL16UTF16: 30,53,30,93,30,6b,30,61,30,6f
ABCDE Typ=1 Len=10 CharacterSet=AL16UTF16: 0,41,0,42,0,43,0,44,0,45
---------------------------------------------------------
The results of step 4 & 5 show that each character took either 1 or 3 bytes for VARCHAR2 whereas each character took 2 bytes for NVARCHAR2.
The following example provides the details for CLOB & NCLOB
6. Create the CLOB & NCLOB tables
CREATE TABLE clob_demo (id integer, description clob);
INSERT INTO clob_demo VALUES(1, 'こんにちは');
INSERT INTO clob_demo VALUES(2, 'ABCDE');
CREATE TABLE nclob_demo (id integer, description nclob);
INSERT INTO nclob_demo VALUES(1, 'こんにちは');
INSERT INTO nclob_demo VALUES(2, 'ABCDE');
COMMIT;
7. Check the data in the tables
SELECT * FROM clob_demo;
---------------------
1 こんにちは
2 ABCDE
---------------------
SELECT * FROM nclob_demo;
---------------------
1 こんにちは
2 ABCDE
---------------------
8. Check the encoded byte stream of first row in CLOB table
declare
cursor c1 is select description clb from clob_demo where id = 1;
c1rec c1%ROWTYPE;
dest_offset integer;
src_offset integer;
lang_context integer;
warning varchar2(1000);
blb blob;
buf raw(200);
blb_len integer;
blb_offset integer;
begin
open c1;
fetch c1 into c1rec;
close c1;
dbms_lob.createtemporary(blb, FALSE);
dest_offset := 1;
src_offset := 1;
lang_context := 0;
dbms_lob.converttoblob(blb, c1rec.clb, dbms_lob.getlength(c1rec.clb),
dest_offset, src_offset, 0, lang_context, warning);
blb_len := dbms_lob.getlength(blb);
blb_offset := 1;
dbms_lob.read(blb, blb_len, blb_offset, buf);
dbms_output.put_line(blb_len);
dbms_output.put_line(buf);
end;
/
-------------------------------------------------------
15
E3 81 93 E3 82 93 E3 81 AB E3 81 A1 E3 81 AF
-------------------------------------------------------
9. Check the encoded byte stream of second row in CLOB table
declare
cursor c1 is select description clb from clob_demo where id = 2;
c1rec c1%ROWTYPE;
dest_offset integer;
src_offset integer;
lang_context integer;
warning varchar2(1000);
blb blob;
buf raw(200);
blb_len integer;
blb_offset integer;
begin
open c1;
fetch c1 into c1rec;
close c1;
dbms_lob.createtemporary(blb, FALSE);
dest_offset := 1;
src_offset := 1;
lang_context := 0;
dbms_lob.converttoblob(blb, c1rec.clb, dbms_lob.getlength(c1rec.clb),
dest_offset, src_offset, 0, lang_context, warning);
blb_len := dbms_lob.getlength(blb);
blb_offset := 1;
dbms_lob.read(blb, blb_len, blb_offset, buf);
dbms_output.put_line(blb_len);
dbms_output.put_line(buf);
end;
/
--------------
5
41 42 43 44 45
--------------
10. Check the encoded byte stream of first row in NCLOB table
declare
cursor c1 is select description clb from nclob_demo where id = 1;
c1rec c1%ROWTYPE;
dest_offset integer;
src_offset integer;
lang_context integer;
warning varchar2(1000);
blb blob;
buf raw(200);
blb_len integer;
blb_offset integer;
begin
open c1;
fetch c1 into c1rec;
close c1;
dbms_lob.createtemporary(blb, FALSE);
dest_offset := 1;
src_offset := 1;
lang_context := 0;
dbms_lob.converttoblob(blb, c1rec.clb, dbms_lob.getlength(c1rec.clb),
dest_offset, src_offset, 0, lang_context, warning);
blb_len := dbms_lob.getlength(blb);
blb_offset := 1;
dbms_lob.read(blb, blb_len, blb_offset, buf);
dbms_output.put_line(blb_len);
dbms_output.put_line(buf);
end;
/
-------------------------------------------------------
10
30 53 30 93 30 6B 30 61 30 6F
-------------------------------------------------------
11. Check the encoded byte stream of second row in NCLOB table
declare
cursor c1 is select description clb from nclob_demo where id = 2;
c1rec c1%ROWTYPE;
dest_offset integer;
src_offset integer;
lang_context integer;
warning varchar2(1000);
blb blob;
buf raw(200);
blb_len integer;
blb_offset integer;
begin
open c1;
fetch c1 into c1rec;
close c1;
dbms_lob.createtemporary(blb, FALSE);
dest_offset := 1;
src_offset := 1;
lang_context := 0;
dbms_lob.converttoblob(blb, c1rec.clb, dbms_lob.getlength(c1rec.clb),
dest_offset, src_offset, 0, lang_context, warning);
blb_len := dbms_lob.getlength(blb);
blb_offset := 1;
dbms_lob.read(blb, blb_len, blb_offset, buf);
dbms_output.put_line(blb_len);
dbms_output.put_line(buf);
end;
/
------------------------------------------
10
00 41 00 42 00 43 00 44 00 45
------------------------------------------
The output of step 8,9,10 & 11 show that each data row took 10 bytes for NCLOB, whereas first data row took 15 bytes whereas the second took 5 bytes for CLOB.
No comments:
Post a Comment