2、DML相关操作

2.1、INSERT

  • 插入多条记录
  1. CREATE TABLE public.t1_insert_mul
  2. (
  3. f1 int not null,
  4. f2 varchar(20),
  5. primary key(f1)
  6. ) distribute by shard(f1) to group default_group;
  7. postgres=# INSERT INTO t1_insert_mul VALUES(1,'Tbase'),(2,'pg');
  8. INSERT 0 2
  • 插入更新
  1. create table public.t1_conflict
  2. (
  3. f1 int not null,
  4. f2 varchar(20),
  5. primary key(f1)
  6. ) distribute by shard(f1) to group default_group;
  7. insert into t1_conflict values(1,'tbase') ON CONFLICT (f1) DO UPDATE SET f2 = 'tbase';
  8. create table public.t1_conflict
  9. (
  10. f1 int not null,
  11. f2 varchar(20) not null,
  12. f3 int ,
  13. primary key(f1,f2)
  14. ) distribute by shard(f1) to group default_group;
  15. insert into t1_conflict values(1,'tbase',2) ON CONFLICT (f1,f2) DO UPDATE SET f3 = 2;
  • 插入返回
  1. create table public.t1_insert_return
  2. (
  3. f1 int not null,
  4. f2 varchar(20) not null default 'tbase',
  5. primary key(f1)
  6. ) distribute by shard(f1) to group default_group;
  7. postgres=# insert into t1_insert_return values(1) returning *;
  8. f1 | f2
  9. ----+-------
  10. 1 | tbase
  11. (1 row)
  12. INSERT 0 1
  • INSERT更多的使用方法请参考Postgresql用法
  1. http://www.postgres.cn/docs/10/sql-insert.html

2.2、UPDATE

  • 基于分布键条件更新
  1. create table public.t1_update_pkey
  2. (
  3. f1 int not null,
  4. f2 varchar(20) not null default 'tbase',
  5. f3 varchar(32),
  6. primary key(f1)
  7. ) distribute by shard(f1) to group default_group;
  8. postgres=# explain UPDATE t1_update_pkey SET f2='tbase' where f1=1;
  9. QUERY PLAN
  10. ----------------------------------------------------------------------------------
  11. Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
  12. Node/s: dn001
  13. -> Update on t1_update_pkey (cost=0.15..4.17 rows=1 width=154)
  14. -> Index Scan using t1_update_pkey_pkey on t1_update_pkey (cost=0.15..4.17 rows=1 width=154)
  15. Index Cond: (f1 = 1)

性能最优,扩展性好

  • 非分布键更新
  1. postgres=# explain UPDATE t1_update_pkey SET f2='tbase' where f3='pg'; QUERY PLAN
  2. ----------------------------------------------------------------------------------
  3. Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
  4. Node/s: dn001, dn002
  5. -> Update on t1_update_pkey (cost=0.00..15.12 rows=2 width=154)
  6. -> Seq Scan on t1_update_pkey (cost=0.00..15.12 rows=2 width=154)
  7. Filter: ((f3)::text = 'pg'::text)
  8. (5 rows)

更新语句发往所有节点

  • 分区表带分区条件更新
  1. create table public.t1_pt_update
  2. ( f1 int not null,f2 timestamp not null,f3 varchar(20),primary key(f1) )
  3. partition by range (f2) begin (timestamp without time zone '2019-01-01 0:0:0') step (interval '1 month') partitions (2) distribute by shard(f1) to group default_group;
  4. postgres=# explain update t1_pt_update set f3='tbase' where f1=1 and f2>'2019-01-01' and f2<'2019-02-01'; QUERY PLAN
  5. -----------------------------------------------------------------------------------
  6. Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
  7. Node/s: dn001
  8. -> Update on t1_pt_update_part_0 (cost=0.15..4.17 rows=1 width=80)
  9. -> Index Scan using t1_pt_update_pkey_part_0 on t1_pt_update_part_0 (cost=0.15..4.17 rows=1 width=80)
  10. Index Cond: (f1 = 1)
  11. Filter: ((f2 > '2019-01-01 00:00:00'::timestamp without time zone) AND (f2 < '2019-02-01 00:00:00'::timestamp without time zone))

带分区条件更新,性能最优,扩展性好

  • 分区表不带分区条件更新
  1. postgres=# explain update t1_pt_update set f3='tbase' where f1=1; QUERY PLAN
  2. ------------------------------------------------------------------------------------
  3. Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
  4. Node/s: dn001
  5. -> Update on t1_pt_update (cost=0.15..4.17 rows=1 width=80)
  6. -> Index Scan using t1_pt_update_pkey_part_0 on t1_pt_update (partition sequence: 0, name: t1_pt_update_part_0) (cost=0.15..2.08 rows=0 width=80)
  7. Index Cond: (f1 = 1)
  8. -> Index Scan using t1_pt_update_pkey_part_1 on t1_pt_update (partition sequence: 1, name: t1_pt_update_part_1) (cost=0.15..2.08 rows=0 width=80)
  9. Index Cond: (f1 = 1)
  10. (7 rows)

需要扫描所有分区子表

  • 关联表更新
  1. create table public.t1_update_join1
  2. (
  3. f1 int not null,f2 varchar(20) not null default 'tbase',primary key(f1)
  4. )
  5. distribute by shard(f1) to group default_group;
  6. create table public.t1_update_join2
  7. (
  8. f1 int not null,f2 varchar(20) not null default 'tbase',primary key(f1)
  9. )
  10. distribute by shard(f1) to group default_group;
  11. update t1_update_join1 set f2='pg' from t1_update_join2 where t1_update_join1.f1=t1_update_join2.f1;

表关联更新只能是基于分布键关联

  • 分布键,分区键不能更新
  1. create table public.t1_update_pkey
  2. (
  3. f1 int not null,f2 varchar(20) not null default 'tbase', primary key(f1)
  4. ) distribute by shard(f1) to group default_group;
  5. postgres=# update t1_update_pkey set f1=2 where f1=1;
  6. ERROR: Distributed column or partition column "f1" can't be updated in current version
  7. Time: 0.910 ms.

目前的解决办法“删除旧记录,再新增记录”

  • 更多的UPDATE使用方法请参考Postgresql用法
  1. http://www.postgres.cn/docs/10/sql-update.html

2.3、DELETE

  • 删除返回记录
  1. create table public.t1_delete_return
  2. (
  3. f1 int not null,f2 varchar(20) not null default 'tbase',primary key(f1)
  4. )
  5. distribute by shard(f1) to group default_group;
  6. postgres=# insert into t1_delete_return values(1,'tbase');
  7. INSERT 0 1
  8. postgres=# delete from t1_delete_return where f1=1 returning *;
  9. f1 | f2
  10. ----+-------
  11. 1 | tbase
  12. (1 row)
  • UPDATE最优使用方法同样适合于DELETE

  • DELETE更多的使用方法见

  1. http://www.postgres.cn/docs/10/sql-delete.html

2.4、SELECT

  • 基于分布键查询
  1. create table public.t1_select
  2. (
  3. f1 int not null,f2 varchar(20) not null default 'tbase',f3 varchar(32), primary key(f1)
  4. )
  5. distribute by shard(f1) to group default_group;
  6. postgres=# explain select * from t1_select where f1=1; QUERY PLAN
  7. ----------------------------------------------------------------------------------
  8. Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
  9. Node/s: dn001
  10. -> Index Scan using t1_select_pkey on t1_select (cost=0.15..4.17 rows=1 width=144)
  11. Index Cond: (f1 = 1)

性能最优,扩展性好

  • 非分布键查询
  1. postgres=# explain select * from t1_select where f1<3;
  2. QUERY PLAN
  3. -------------------------------------------------------------------------------------
  4. Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
  5. Node/s: dn001, dn002
  6. -> Bitmap Heap Scan on t1_select (cost=3.21..14.92 rows=137 width=144)
  7. Recheck Cond: (f1 < 3)
  8. -> Bitmap Index Scan on t1_select_pkey (cost=0.00..3.17 rows=137 width=0)
  9. Index Cond: (f1 < 3)

查询语句发往所有节点,然后在CN汇总

  • 分布键JOIN查询
  1. create table public.t1_select_join1
  2. ( f1 int not null,f2 int,primary key(f1) )
  3. distribute by shard(f1) to group default_group;
  4. create index t1_select_join1_f2_idx on t1_select_join1(f2);
  5. create table public.t1_select_join2
  6. ( f1 int not null,f2 int,primary key(f1) )
  7. distribute by shard(f1) to group default_group;
  8. create index t1_select_join2_f2_idx on t1_select_join2(f2);
  9. postgres=# explain select t1_select_join1.* from t1_select_join1,t1_select_join2 where t1_select_join1.f1=t1_select_join2.f1 and t1_select_join1.f1=1;
  10. QUERY PLAN --------------------------------------------------------------------------------------
  11. Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
  12. Node/s: dn001
  13. -> Nested Loop (cost=0.30..8.35 rows=1 width=8)
  14. -> Index Scan using t1_select_join1_pkey on t1_select_join1 (cost=0.15..4.17 rows=1 width=8)
  15. Index Cond: (f1 = 1)
  16. -> Index Only Scan using t1_select_join2_pkey on t1_select_join2 (cost=0.15..4.17 rows=1 width=4)
  17. Index Cond: (f1 = 1)

性能最优,扩展性好

  • 非分布键JOIN查询
  1. postgres=# explain select * from t1_select_join1,t1_select_join2 where t1_select_join1.f1=t1_select_join2.f2 and t1_select_join1.f2=1 ;
  2. QUERY PLAN ------------------------------------------------------------------------------------------
  3. Remote Subquery Scan on all (dn001,dn002) (cost=2.26..33.48 rows=7 width=16)
  4. -> Nested Loop (cost=2.26..33.48 rows=7 width=16)
  5. -> Bitmap Heap Scan on t1_select_join1 (cost=2.13..9.57 rows=7 width=8) Recheck Cond: (f2 = 1)
  6. -> Bitmap Index Scan on t1_select_join1_f2_idx (cost=0.00..2.13 rows=7 width=0)
  7. Index Cond: (f2 = 1)
  8. -> Materialize (cost=100.12..103.45 rows=7 width=8)
  9. -> Remote Subquery Scan on all (dn001,dn002) (cost=100.12..103.44 rows=7 width=8)
  10. Distribute results by S: f2
  11. -> Index Scan using t1_select_join2_f2_idx on t1_select_join2 (cost=0.12..3.35 rows=7 width=8)
  12. Index Cond: (f2 = t1_select_join1.f1)

需要在DN做数据重分布

2.5、TRUNCATE

  • 普通表truncate
  1. create table public.t1_delete_truncate
  2. ( f1 int not null,f2 varchar(20) not null default 'tbase',primary key(f1) )
  3. distribute by shard(f1) to group default_group;
  4. insert into t1_delete_truncate select t,t::text from generate_series(1,1000000) as t;
  5. truncate table t1_delete_truncate;
  • 分区表truncate
  1. postgres=# create table public.t1_pt
  2. (
  3. f1 int not null,
  4. f2 timestamp not null,
  5. f3 varchar(20),
  6. primary key(f1)
  7. )
  8. partition by range (f2)
  9. begin (timestamp without time zone '2019-01-01 0:0:0')
  10. step (interval '1 month') partitions (3)
  11. distribute by shard(f1)
  12. to group default_group;
  13. truncate public.t1_pt partition for ('2019-01-01' ::timestamp without time zone);