Wednesday 31 July 2019

How to display encoded byte stream of a varchar column in PostgreSQL

CREATE TABLE varchar_demo(description varchar(200));

INSERT INTO varchar_demo VALUES('こんにちは');
INSERT INTO varchar_demo VALUES('ABCDE');

SELECT * FROM varchar_demo;
 description 
-------------
 こんにちは
 ABCDE
(2 rows)


CREATE OR REPLACE FUNCTION dump(columnname TEXT, tablename TEXT,
                                rowid INT) RETURNS TEXT
LANGUAGE plpgsql
AS
$function$
DECLARE 
  hex_str TEXT;
  hex_byte TEXT;
  query TEXT;
  i INT;
  len INT;
BEGIN
  query := format('SELECT octet_length(%I) FROM %I offset %s
                   LIMIT 1', columnname, tablename, rowid);
  EXECUTE query INTO len;
  hex_str := 'Len=' || len || ' ';
  FOR i in 1..len LOOP
    query := format('SELECT to_hex(get_byte(%I::bytea, %s)) 
                     FROM %I offset %s LIMIT 1', 
                     columnname, i - 1, tablename, rowid);
    EXECUTE query INTO hex_byte;
    hex_str := hex_str || hex_byte || ',';
  END LOOP;
  RETURN hex_str;
END;
$function$;


select dump('description', 'varchar_demo', 0);
                         dump                         
------------------------------------------------------
 Len=15 e3,81,93,e3,82,93,e3,81,ab,e3,81,a1,e3,81,af,
(1 row)

select dump('description', 'varchar_demo', 1);
         dump          
-----------------------
 Len=5 41,42,43,44,45,
(1 row)

Comparing NVARCHAR2 with VARCHAR2 and NCLOB with CLOB in Oracle

In Oracle by default VARCHAR2/CLOB data is encoded using UTF-8, that can result in a character take up to 4 bytes of storage. However NVARCHAR/NCLOB data is encoded in UTF-16, that results in a character taking 2 bytes each.

First consider the relevant portion of the UTF-8 encoding table along with UNICODE

-----------------------------------------------------
Unicode  Character   UTF-8       Character Name
-----------------------------------------------------
U+3053     こ     e3 81 93 HIRAGANA LETTER KO
U+3093    ん     e3 82 93 HIRAGANA LETTER N
U+306B    に     e3 81 ab HIRAGANA LETTER NI
U+3061    ち     e3 81 a1 HIRAGANA LETTER TI
U+306F    は     e3 81 af HIRAGANA LETTER HA
-----------------------------------------------------

The following example provides the details for varchar2 & nvarchar2
1. Check the relevant database parameters

SELECT * FROM nls_database_parameters WHERE PARAMETER = 'NLS_NCHAR_CHARACTERSET';
-----------------------------------------------------
NLS_NCHAR_CHARACTERSET AL16UTF16
-----------------------------------------------------

SELECT * FROM nls_database_parameters WHERE parameter = 'NLS_CHARACTERSET';
-----------------------------------------------------
NLS_CHARACTERSET AL32UTF8
-----------------------------------------------------

2. Create the VARCHAR2 table

CREATE TABLE varchar2_demo (description VARCHAR2(50));
INSERT INTO varchar2_demo VALUES('こんにちは');
INSERT INTO varchar2_demo VALUES('ABCDE');
COMMIT;

3. Check the data and the encoded byte stream

SELECT description, DUMP(description,1016) FROM varchar2_demo;
--------------------------------------------------------------
こんにちは Typ=1 Len=15 CharacterSet=AL32UTF8: e3,81,93,e3,82,93,e3,81,ab,e3,81,a1,e3,81,af
ABCDE  Typ=1 Len=5 CharacterSet=AL32UTF8: 41,42,43,44,45
--------------------------------------------------------------

4. Create the NVARCHAR2 table

CREATE TABLE nvarchar2_demo (description NVARCHAR2(50));

INSERT INTO nvarchar2_demo VALUES('こんにちは');
INSERT INTO nvarchar2_demo VALUES('ABCDE');
COMMIT;

5. Check the data and the encoded byte stream

SELECT description, DUMP(description,1016) FROM nvarchar2_demo;
--------------------------------------------------------
こんにちは    Typ=1 Len=10 CharacterSet=AL16UTF16: 30,53,30,93,30,6b,30,61,30,6f
ABCDE     Typ=1 Len=10 CharacterSet=AL16UTF16: 0,41,0,42,0,43,0,44,0,45
---------------------------------------------------------

The results of step 4 & 5 show that each character took either 1 or 3 bytes for VARCHAR2 whereas each character took 2 bytes for NVARCHAR2.

The following example provides the details for CLOB & NCLOB


6. Create the CLOB & NCLOB tables

CREATE TABLE clob_demo (id integer, description clob);
INSERT INTO clob_demo VALUES(1, 'こんにちは');
INSERT INTO clob_demo VALUES(2, 'ABCDE');

CREATE TABLE nclob_demo (id integer, description nclob);
INSERT INTO nclob_demo VALUES(1, 'こんにちは');
INSERT INTO nclob_demo VALUES(2, 'ABCDE');

COMMIT;

7. Check the data in the tables

SELECT * FROM clob_demo;
---------------------
1 こんにちは
2 ABCDE
---------------------

SELECT * FROM nclob_demo;
---------------------
1 こんにちは
2 ABCDE
---------------------

8. Check the encoded byte stream of first row in CLOB table

declare
  cursor c1 is select description clb from clob_demo where id = 1;
  c1rec         c1%ROWTYPE;
  dest_offset   integer;
  src_offset    integer;
  lang_context  integer;
  warning       varchar2(1000);
  blb           blob;
  buf           raw(200);
  blb_len       integer;
  blb_offset    integer;
begin
  open c1;
  fetch c1 into c1rec;
  close c1;
  dbms_lob.createtemporary(blb, FALSE);
  dest_offset := 1;
  src_offset := 1;
  lang_context := 0;
  dbms_lob.converttoblob(blb, c1rec.clb, dbms_lob.getlength(c1rec.clb),
                    dest_offset, src_offset, 0, lang_context, warning);
  blb_len := dbms_lob.getlength(blb);
  blb_offset := 1;
  dbms_lob.read(blb, blb_len, blb_offset, buf);
  dbms_output.put_line(blb_len);
  dbms_output.put_line(buf);
end;
/
-------------------------------------------------------
15
E3 81 93 E3 82 93 E3 81 AB E3 81 A1 E3 81 AF
-------------------------------------------------------

9. Check the encoded byte stream of second row in CLOB table

declare
  cursor c1 is select description clb from clob_demo where id = 2;
  c1rec         c1%ROWTYPE;
  dest_offset   integer;
  src_offset    integer;
  lang_context  integer;
  warning       varchar2(1000);
  blb           blob;
  buf           raw(200);
  blb_len       integer;
  blb_offset    integer;
begin
  open c1;
  fetch c1 into c1rec;
  close c1;
  dbms_lob.createtemporary(blb, FALSE);
  dest_offset := 1;
  src_offset := 1;
  lang_context := 0;
  dbms_lob.converttoblob(blb, c1rec.clb, dbms_lob.getlength(c1rec.clb),
                    dest_offset, src_offset, 0, lang_context, warning);
  blb_len := dbms_lob.getlength(blb);
  blb_offset := 1;
  dbms_lob.read(blb, blb_len, blb_offset, buf);
  dbms_output.put_line(blb_len);
  dbms_output.put_line(buf);
end;
/
--------------
5
41 42 43 44 45
--------------

10. Check the encoded byte stream of first row in NCLOB table

declare
  cursor c1 is select description clb from nclob_demo where id = 1;
  c1rec         c1%ROWTYPE;
  dest_offset   integer;
  src_offset    integer;
  lang_context  integer;
  warning       varchar2(1000);
  blb           blob;
  buf           raw(200);
  blb_len       integer;
  blb_offset    integer;
begin
  open c1;
  fetch c1 into c1rec;
  close c1;
  dbms_lob.createtemporary(blb, FALSE);
  dest_offset := 1;
  src_offset := 1;
  lang_context := 0;
  dbms_lob.converttoblob(blb, c1rec.clb, dbms_lob.getlength(c1rec.clb),
                    dest_offset, src_offset, 0, lang_context, warning);
  blb_len := dbms_lob.getlength(blb);
  blb_offset := 1;
  dbms_lob.read(blb, blb_len, blb_offset, buf);
  dbms_output.put_line(blb_len);
  dbms_output.put_line(buf);
end;
/
-------------------------------------------------------
10
30 53 30 93 30 6B 30 61 30 6F
-------------------------------------------------------

11. Check the encoded byte stream of second row in NCLOB table

declare
  cursor c1 is select description clb from nclob_demo where id = 2;
  c1rec         c1%ROWTYPE;
  dest_offset   integer;
  src_offset    integer;
  lang_context  integer;
  warning       varchar2(1000);
  blb           blob;
  buf           raw(200);
  blb_len       integer;
  blb_offset    integer;
begin
  open c1;
  fetch c1 into c1rec;
  close c1;
  dbms_lob.createtemporary(blb, FALSE);
  dest_offset := 1;
  src_offset := 1;
  lang_context := 0;
  dbms_lob.converttoblob(blb, c1rec.clb, dbms_lob.getlength(c1rec.clb),
                    dest_offset, src_offset, 0, lang_context, warning);
  blb_len := dbms_lob.getlength(blb);
  blb_offset := 1;
  dbms_lob.read(blb, blb_len, blb_offset, buf);
  dbms_output.put_line(blb_len);
  dbms_output.put_line(buf);
end;
/
------------------------------------------
10
00 41 00 42 00 43 00 44 00 45
------------------------------------------

The output of step 8,9,10 & 11 show that each data row took 10 bytes for NCLOB, whereas first data row took 15 bytes whereas the second took 5 bytes for CLOB.

Wednesday 27 December 2017

PostgreSQL and Atomikos


Traditionally transaction managers have been server programs running as a separate process on a separate server machine. Atomikos thought otherwise. They were the first to move towards embeddable transaction management. This means that transaction manager becomes part of the application, and is used by instantiating a class and calling its methods. It is no more a separate process running separately on a server machine. This means that the problem of connection sharing as described in my earlier blog, is no more there. The application and the transaction manager are the same process so they can easily share the same connection. In order to demonstrate this capability we will implement the following example, using PostgreSQL as resource manager and Atomikos as Transaction Manager.

Assume a bank with two branches B1 and B2. Assume each branch uses its own PostgreSQL database server PG1 & PG2. Assume branch B1 has an account A1 & branch B2 has an account A2. Assume account A1 has balance USD 100 & account A2 has balance 0.
Assume a transfer of USD 100 is initiated from account A1 to A2 using an application which uses Atomikos as transaction manager.
The following diagram shows the detail of the implementation. 


 






The resource Manager:
I used PostgreSQL 9.6.2 on 64 bit CentOS 7. The main changes in the postgresql.conf are as follows:

max_prepared_transactions = 100
log_connections = on
log_disconnections = on
log_line_prefix = '[%p][%c][%v][%r][%u][%a]'
log_statement = 'all'

In order to run the test application one has to create user atomikos on both the servers using:
./createuser -d -l -s -h localhost -p 5432 atomikos
./createuser -d -l -s -h localhost -p 9432 atomikos

Here are the details of the sourcecode. The source code consists of four files.
ResourceManagerSettings.java – contains the settings of the two PostgreSQL servers I used to test the application.
TransferApp.java – the main class that has the doTransfer() method that performs the balance transfer. In addition this class provides function to set up table in both the servers and populate them with data that is required to do the test. This class also provides a function that tests the transfer.
TestTransfer.java – test the balance transfer.
pom.xml – each maven project must have this file that lists all the dependencies. Finding the dependencies and their latest version was a challenge, and these two sites helped a lot:
To find jars I used : findjar.com
and to find the maven dependency I used : search.maven.org

To run the application use the following command
mvn install
Note : If this command gives you errors like JAVA_HOME not set etc please refer to this tutorial.

This command should result in the following output:

[INFO] Scanning for projects...
[INFO]
[INFO] ------------------------------------------------------------------------
[INFO] Building BalanceTransferApp 1.0.0
[INFO] ------------------------------------------------------------------------
[INFO]
[INFO] --- maven-resources-plugin:2.6:resources (default-resources) @ BalanceTransferApp ---
[WARNING] Using platform encoding (UTF-8 actually) to copy filtered resources, i.e. build is platform dependent!
[INFO] skip non existing resourceDirectory /home/user_name/Projects/atomikos/pg/TransferApp/src/main/resources
[INFO]
[INFO] --- maven-compiler-plugin:3.1:compile (default-compile) @ BalanceTransferApp ---
[INFO] Changes detected - recompiling the module!
[WARNING] File encoding has not been set, using platform encoding UTF-8, i.e. build is platform dependent!
[INFO] Compiling 2 source files to /home/user_name/Projects/atomikos/pg/TransferApp/target/classes
[INFO]
[INFO] --- maven-resources-plugin:2.6:testResources (default-testResources) @ BalanceTransferApp ---
[WARNING] Using platform encoding (UTF-8 actually) to copy filtered resources, i.e. build is platform dependent!
[INFO] skip non existing resourceDirectory /home/user_name/Projects/atomikos/pg/TransferApp/src/test/resources
[INFO]
[INFO] --- maven-compiler-plugin:3.1:testCompile (default-testCompile) @ BalanceTransferApp ---
[INFO] Changes detected - recompiling the module!
[WARNING] File encoding has not been set, using platform encoding UTF-8, i.e. build is platform dependent!
[INFO] Compiling 1 source file to /home/user_name/Projects/atomikos/pg/TransferApp/target/test-classes
[INFO]
[INFO] --- maven-surefire-plugin:2.12.4:test (default-test) @ BalanceTransferApp ---
[INFO] Surefire report directory: /home/user_name/Projects/atomikos/pg/TransferApp/target/surefire-reports

-------------------------------------------------------
T E S T S
-------------------------------------------------------
Running com.enterprisedb.transferapp.TestTransfer
log4j:WARN No appenders could be found for logger (com.atomikos.logging.LoggerFactory).
log4j:WARN Please initialize the log4j system properly.
Thanks for using Atomikos! Evaluate http://www.atomikos.com/Main/ExtremeTransactions for advanced features and professional support
or register at http://www.atomikos.com/Main/RegisterYourDownload to disable this message and receive FREE tips & advice
Tests run: 1, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 0.777 sec

Results :

Tests run: 1, Failures: 0, Errors: 0, Skipped: 0

[INFO]
[INFO] --- maven-jar-plugin:2.4:jar (default-jar) @ BalanceTransferApp ---
[INFO] Building jar: /home/user_name/Projects/atomikos/pg/TransferApp/target/BalanceTransferApp-1.0.0.jar
[INFO]
[INFO] --- maven-install-plugin:2.4:install (default-install) @ BalanceTransferApp ---
[INFO] Installing /home/user_name/Projects/atomikos/pg/TransferApp/target/BalanceTransferApp-1.0.0.jar to /home/user_name/.m2/repository/com/enterprisedb/transferapp/BalanceTransferApp/1.0.0/BalanceTransferApp-1.0.0.jar
[INFO] Installing /home/user_name/Projects/atomikos/pg/TransferApp/pom.xml to /home/user_name/.m2/repository/com/enterprisedb/transferapp/BalanceTransferApp/1.0.0/BalanceTransferApp-1.0.0.pom
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 3.336 s
[INFO] Finished at: 2017-12-27T17:29:10+05:00
[INFO] Final Memory: 18M/331M
[INFO] ------------------------------------------------------------------------




The PostgreSQL log for the first server:



[47837][5a439215.badd][][127.0.0.1(41024)][[unknown]][[unknown]]LOG: connection received: host=127.0.0.1 port=41024
[47837][5a439215.badd][2/1328][127.0.0.1(41024)][atomikos][[unknown]]LOG: connection authorized: user=atomikos database=postgres
[47837][5a439215.badd][2/1329][127.0.0.1(41024)][atomikos][[unknown]]LOG: execute <unnamed>: SET extra_float_digits = 3
[47837][5a439215.badd][2/1330][127.0.0.1(41024)][atomikos][[unknown]]LOG: execute <unnamed>: SELECT gid FROM pg_prepared_xacts where database = current_database()
[47838][5a439215.bade][][127.0.0.1(41026)][[unknown]][[unknown]]LOG: connection received: host=127.0.0.1 port=41026
[47838][5a439215.bade][3/1030][127.0.0.1(41026)][atomikos][[unknown]]LOG: connection authorized: user=atomikos database=postgres
[47838][5a439215.bade][3/1031][127.0.0.1(41026)][atomikos][[unknown]]LOG: execute <unnamed>: SET extra_float_digits = 3
[47838][5a439215.bade][3/1032][127.0.0.1(41026)][atomikos][[unknown]]LOG: execute <unnamed>: DROP TABLE IF EXISTS ACCOUNTS
[47838][5a439215.bade][3/1033][127.0.0.1(41026)][atomikos][[unknown]]LOG: execute <unnamed>: create table ACCOUNTS(id int, title varchar(255), balance int)
[47838][5a439215.bade][3/1034][127.0.0.1(41026)][atomikos][[unknown]]LOG: execute <unnamed>: insert into ACCOUNTS values (1, 'Account A1', 100)
[47844][5a439215.bae4][][127.0.0.1(41032)][[unknown]][[unknown]]LOG: connection received: host=127.0.0.1 port=41032
[47844][5a439215.bae4][4/282][127.0.0.1(41032)][atomikos][[unknown]]LOG: connection authorized: user=atomikos database=postgres
[47844][5a439215.bae4][4/283][127.0.0.1(41032)][atomikos][[unknown]]LOG: execute <unnamed>: SET extra_float_digits = 3
[47844][5a439215.bae4][4/284][127.0.0.1(41032)][atomikos][[unknown]]LOG: execute <unnamed>: SELECT gid FROM pg_prepared_xacts where database = current_database()
[47844][5a439215.bae4][4/285][127.0.0.1(41032)][atomikos][[unknown]]LOG: execute <unnamed>: BEGIN
[47844][5a439215.bae4][4/285][127.0.0.1(41032)][atomikos][[unknown]]LOG: execute <unnamed>: update ACCOUNTS set balance = 0 where id = 1
[47844][5a439215.bae4][4/285][127.0.0.1(41032)][atomikos][[unknown]]LOG: execute <unnamed>: PREPARE TRANSACTION '1096044365_NTIuNzEuMTg1LjEyNS50bTE1MTQzNzc3NDk2OTAwMDAwMQ==_NTIuNzEuMTg1LjEyNS50bTE='
[47844][5a439215.bae4][4/286][127.0.0.1(41032)][atomikos][[unknown]]LOG: execute <unnamed>: COMMIT PREPARED '1096044365_NTIuNzEuMTg1LjEyNS50bTE1MTQzNzc3NDk2OTAwMDAwMQ==_NTIuNzEuMTg1LjEyNS50bTE='
[47838][5a439215.bade][3/1035][127.0.0.1(41026)][atomikos][[unknown]]LOG: execute <unnamed>: select balance from accounts where id = 1
[47838][5a439215.bade][][127.0.0.1(41026)][atomikos][[unknown]]LOG: disconnection: session time: 0:00:00.166 user=atomikos database=postgres host=127.0.0.1 port=41026
[47837][5a439215.badd][][127.0.0.1(41024)][atomikos][[unknown]]LOG: disconnection: session time: 0:00:00.238 user=atomikos database=postgres host=127.0.0.1 port=41024
[47844][5a439215.bae4][][127.0.0.1(41032)][atomikos][[unknown]]LOG: disconnection: session time: 0:00:00.058 user=atomikos database=postgres host=127.0.0.1 port=41032
 


The PostgreSQL log for the second server:




[47840][5a439215.bae0][][127.0.0.1(51134)][[unknown]][[unknown]]LOG: connection received: host=127.0.0.1 port=51134
[47840][5a439215.bae0][2/607][127.0.0.1(51134)][atomikos][[unknown]]LOG: connection authorized: user=atomikos database=postgres
[47840][5a439215.bae0][2/608][127.0.0.1(51134)][atomikos][[unknown]]LOG: execute <unnamed>: SET extra_float_digits = 3
[47840][5a439215.bae0][2/609][127.0.0.1(51134)][atomikos][[unknown]]LOG: execute <unnamed>: SELECT gid FROM pg_prepared_xacts where database = current_database()
[47841][5a439215.bae1][][127.0.0.1(51136)][[unknown]][[unknown]]LOG: connection received: host=127.0.0.1 port=51136
[47841][5a439215.bae1][3/257][127.0.0.1(51136)][atomikos][[unknown]]LOG: connection authorized: user=atomikos database=postgres
[47841][5a439215.bae1][3/258][127.0.0.1(51136)][atomikos][[unknown]]LOG: execute <unnamed>: SET extra_float_digits = 3
[47841][5a439215.bae1][3/259][127.0.0.1(51136)][atomikos][[unknown]]LOG: execute <unnamed>: DROP TABLE IF EXISTS ACCOUNTS
[47841][5a439215.bae1][3/260][127.0.0.1(51136)][atomikos][[unknown]]LOG: execute <unnamed>: create table ACCOUNTS(id int, title varchar(255), balance int)
[47841][5a439215.bae1][3/261][127.0.0.1(51136)][atomikos][[unknown]]LOG: execute <unnamed>: insert into ACCOUNTS values (2, 'Account A2', 0)
[47845][5a439215.bae5][][127.0.0.1(51140)][[unknown]][[unknown]]LOG: connection received: host=127.0.0.1 port=51140
[47845][5a439215.bae5][4/174][127.0.0.1(51140)][atomikos][[unknown]]LOG: connection authorized: user=atomikos database=postgres
[47845][5a439215.bae5][4/175][127.0.0.1(51140)][atomikos][[unknown]]LOG: execute <unnamed>: SET extra_float_digits = 3
[47845][5a439215.bae5][4/176][127.0.0.1(51140)][atomikos][[unknown]]LOG: execute <unnamed>: SELECT gid FROM pg_prepared_xacts where database = current_database()
[47845][5a439215.bae5][4/177][127.0.0.1(51140)][atomikos][[unknown]]LOG: execute <unnamed>: BEGIN
[47845][5a439215.bae5][4/177][127.0.0.1(51140)][atomikos][[unknown]]LOG: execute <unnamed>: update ACCOUNTS set balance = 100 where id = 2
[47845][5a439215.bae5][4/177][127.0.0.1(51140)][atomikos][[unknown]]LOG: execute <unnamed>: PREPARE TRANSACTION '1096044365_NTIuNzEuMTg1LjEyNS50bTE1MTQzNzc3NDk2OTAwMDAwMQ==_NTIuNzEuMTg1LjEyNS50bTI='
[47845][5a439215.bae5][4/178][127.0.0.1(51140)][atomikos][[unknown]]LOG: execute <unnamed>: COMMIT PREPARED '1096044365_NTIuNzEuMTg1LjEyNS50bTE1MTQzNzc3NDk2OTAwMDAwMQ==_NTIuNzEuMTg1LjEyNS50bTI='
[47841][5a439215.bae1][3/262][127.0.0.1(51136)][atomikos][[unknown]]LOG: execute <unnamed>: select balance from accounts where id = 2
[47840][5a439215.bae0][][127.0.0.1(51134)][atomikos][[unknown]]LOG: disconnection: session time: 0:00:00.071 user=atomikos database=postgres host=127.0.0.1 port=51134
[47841][5a439215.bae1][][127.0.0.1(51136)][atomikos][[unknown]]LOG: disconnection: session time: 0:00:00.066 user=atomikos database=postgres host=127.0.0.1 port=51136
[47845][5a439215.bae5][][127.0.0.1(51140)][atomikos][[unknown]]LOG: disconnection: session time: 0:00:00.040 user=atomikos database=postgres host=127.0.0.1 port=51140