Wednesday 31 July 2019

How to display encoded byte stream of a varchar column in PostgreSQL

CREATE TABLE varchar_demo(description varchar(200));

INSERT INTO varchar_demo VALUES('こんにちは');
INSERT INTO varchar_demo VALUES('ABCDE');

SELECT * FROM varchar_demo;
 description 
-------------
 こんにちは
 ABCDE
(2 rows)


CREATE OR REPLACE FUNCTION dump(columnname TEXT, tablename TEXT,
                                rowid INT) RETURNS TEXT
LANGUAGE plpgsql
AS
$function$
DECLARE 
  hex_str TEXT;
  hex_byte TEXT;
  query TEXT;
  i INT;
  len INT;
BEGIN
  query := format('SELECT octet_length(%I) FROM %I offset %s
                   LIMIT 1', columnname, tablename, rowid);
  EXECUTE query INTO len;
  hex_str := 'Len=' || len || ' ';
  FOR i in 1..len LOOP
    query := format('SELECT to_hex(get_byte(%I::bytea, %s)) 
                     FROM %I offset %s LIMIT 1', 
                     columnname, i - 1, tablename, rowid);
    EXECUTE query INTO hex_byte;
    hex_str := hex_str || hex_byte || ',';
  END LOOP;
  RETURN hex_str;
END;
$function$;


select dump('description', 'varchar_demo', 0);
                         dump                         
------------------------------------------------------
 Len=15 e3,81,93,e3,82,93,e3,81,ab,e3,81,a1,e3,81,af,
(1 row)

select dump('description', 'varchar_demo', 1);
         dump          
-----------------------
 Len=5 41,42,43,44,45,
(1 row)

2 comments:

  1. \set ON_ERROR_STOP

    DROP TABLE IF EXISTS varchar_demo;
    CREATE TABLE varchar_demo (
    description varchar
    );

    INSERT INTO varchar_demo VALUES('こんにちは');
    INSERT INTO varchar_demo VALUES('ABCDE');

    SELECT * FROM varchar_demo;

    CREATE OR REPLACE FUNCTION dump_bytea (
    value bytea
    ) RETURNS text
    LANGUAGE SQL IMMUTABLE
    AS $$
    SELECT format ('Len=%s %s', octet_length (value), string_agg (to_hex (get_byte (value, i)) || ',', ''))
    FROM generate_series (0, octet_length (value) - 1) AS t (i)
    $$;

    CREATE OR REPLACE FUNCTION dump (
    columnname text,
    tablename text,
    rowid integer
    ) RETURNS text
    LANGUAGE PLPGSQL STABLE
    AS $$
    DECLARE
    value bytea;
    BEGIN
    EXECUTE format ('SELECT %I::bytea FROM %I OFFSET %L', columnname, tablename, rowid) INTO value;
    RETURN dump_bytea (value);
    END;
    $$;

    SELECT dump ('description', 'varchar_demo', 0);
    SELECT dump ('description', 'varchar_demo', 1);

    Or to get rid of the extra comma at the end of the dump:

    CREATE OR REPLACE FUNCTION dump_bytea (
    value bytea
    ) RETURNS text
    LANGUAGE SQL IMMUTABLE
    AS $$
    SELECT format ('Len=%s %s', octet_length (value), string_agg (to_hex (get_byte (value, i)), ','))
    FROM generate_series (0, octet_length (value) - 1) AS t (i)
    $$;

    Or if you don't care about the commas at all:

    CREATE OR REPLACE FUNCTION dump_bytea (
    value bytea
    ) RETURNS text
    LANGUAGE SQL IMMUTABLE
    AS $$
    SELECT format ('Len=%s %s', octet_length (value), encode (value, 'hex'))
    $$;

    ReplyDelete
    Replies
    1. Thanks for the suggested improvements.

      Delete