完成SSB测试

SSB星型模式基准测试是OLAP数据库性能测试的常用场景,通过本篇教程,您可以了解到如何在MatrixOne中实现SSB测试。

准备工作

确保你已经安装了单机版MatrixOne连接到MatrixOne服务.

1. 编译dbgen

  1. $ git clone [email protected]:vadimtk/ssb-dbgen.git
  2. $ cd ssb-dbgen
  3. $ make

2. 生成数据

当使用-s 1dbgen命令会生产近600万行数据(670MB),当使用-s 10时会生产近6000万行数据,会耗费大量时间。

  1. $ ./dbgen -s 1 -T c
  2. $ ./dbgen -s 1 -T l
  3. $ ./dbgen -s 1 -T p
  4. $ ./dbgen -s 1 -T s
  5. $ ./dbgen -s 1 -T d

3. 在MatrixOne中建表

  1. create database if not exists ssb;
  2. use ssb;
  3. drop table if exists lineorder;
  4. drop table if exists part;
  5. drop table if exists supplier;
  6. drop table if exists customer;
  7. drop table if exists dates;
  8. drop table if exists lineorder_flat;
  9. create table lineorder (
  10. lo_orderkey bigint,
  11. lo_linenumber int,
  12. lo_custkey int,
  13. lo_partkey int,
  14. lo_suppkey int,
  15. lo_orderdate date,
  16. lo_orderpriority char (15),
  17. lo_shippriority tinyint,
  18. lo_quantity double,
  19. lo_extendedprice double,
  20. lo_ordtotalprice double,
  21. lo_discount double,
  22. lo_revenue double,
  23. lo_supplycost double,
  24. lo_tax double,
  25. lo_commitdate date,
  26. lo_shipmode char (10)
  27. ) ;
  28. create table part (
  29. p_partkey int,
  30. p_name varchar (22),
  31. p_mfgr char (6),
  32. p_category char (7),
  33. p_brand char (9),
  34. p_color varchar (11),
  35. p_type varchar (25),
  36. p_size int,
  37. p_container char (10)
  38. ) ;
  39. create table supplier (
  40. s_suppkey int,
  41. s_name char (25),
  42. s_address varchar (25),
  43. s_city char (10),
  44. s_nation char (15),
  45. s_region char (12),
  46. s_phone char (15)
  47. ) ;
  48. create table customer (
  49. c_custkey int,
  50. c_name varchar (25),
  51. c_address varchar (25),
  52. c_city char (10),
  53. c_nation char (15),
  54. c_region char (12),
  55. c_phone char (15),
  56. c_mktsegment char (10)
  57. ) ;
  58. create table dates (
  59. d_datekey date,
  60. d_date char (18),
  61. d_dayofweek char (9),
  62. d_month char (9),
  63. d_yearmonthnum int,
  64. d_yearmonth char (7),
  65. d_daynuminweek varchar(12),
  66. d_daynuminmonth int,
  67. d_daynuminyear int,
  68. d_monthnuminyear int,
  69. d_weeknuminyear int,
  70. d_sellingseason varchar (12),
  71. d_lastdayinweekfl varchar (1),
  72. d_lastdayinmonthfl varchar (1),
  73. d_holidayfl varchar (1),
  74. d_weekdayfl varchar (1)
  75. ) ;
  76. CREATE TABLE lineorder_flat(
  77. LO_ORDERKEY bigint primary key,
  78. LO_LINENUMBER int,
  79. LO_CUSTKEY int,
  80. LO_PARTKEY int,
  81. LO_SUPPKEY int,
  82. LO_ORDERDATE date,
  83. LO_ORDERPRIORITY char(15),
  84. LO_SHIPPRIORITY tinyint,
  85. LO_QUANTITY double,
  86. LO_EXTENDEDPRICE double,
  87. LO_ORDTOTALPRICE double,
  88. LO_DISCOUNT double,
  89. LO_REVENUE int unsigned,
  90. LO_SUPPLYCOST int unsigned,
  91. LO_TAX double,
  92. LO_COMMITDATE date,
  93. LO_SHIPMODE char(10),
  94. C_NAME varchar(25),
  95. C_ADDRESS varchar(25),
  96. C_CITY char(10),
  97. C_NATION char(15),
  98. C_REGION char(12),
  99. C_PHONE char(15),
  100. C_MKTSEGMENT char(10),
  101. S_NAME char(25),
  102. S_ADDRESS varchar(25),
  103. S_CITY char(10),
  104. S_NATION char(15),
  105. S_REGION char(12),
  106. S_PHONE char(15),
  107. P_NAME varchar(22),
  108. P_MFGR char(6),
  109. P_CATEGORY char(7),
  110. P_BRAND char(9),
  111. P_COLOR varchar(11),
  112. P_TYPE varchar(25),
  113. P_SIZE int,
  114. P_CONTAINER char(10)
  115. );

4. 导入数据

system_vars_config.toml参数增大,例如10GB,然后重新启动MatrixOne服务。

  1. max-entry-bytes = "10GB"

使用如下命令将数据导入相关表:

  1. load data infile '/ssb-dbgen-path/supplier.tbl' into table supplier FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\n';
  2. load data infile '/ssb-dbgen-path/customer.tbl' into table customer FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\n';
  3. load data infile '/ssb-dbgen-path/date.tbl' into table dates FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\n';
  4. load data infile '/ssb-dbgen-path/part.tbl' into table part FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\n';
  5. load data infile '/ssb-dbgen-path/lineorder.tbl into table lineorder FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\n';

接着你可以在MatrixOne中进行查询操作:
如果你想要运行SSB宽表测试,你还需要lineorder_flat表数据,你可以从以下链接获取数据:

https://pan.baidu.com/s/1dCpcKsygdVuHzd-H-RWHFA
code: k1rs

运行以下命令将数据导入lineorder_flat

  1. load data infile '/ssb-dbgen-path/lineorder_flat.tbl ' into table lineorder_flat FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\n';

5. 运行SSB测试命令

单表查询

  1. --Q1.1
  2. SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE year(LO_ORDERDATE)=1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
  3. --Q1.2
  4. SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE year(LO_ORDERDATE)=1994 AND LO_DISCOUNT BETWEEN 4 AND 6 AND LO_QUANTITY BETWEEN 26 AND 35;
  5. --Q1.3
  6. SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE d_yearmonthnum=1994 AND LO_DISCOUNT BETWEEN 5 AND 7 AND LO_QUANTITY BETWEEN 26 AND 35;
  7. --Q2.1
  8. SELECT sum(LO_REVENUE),year(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
  9. --Q2.2
  10. SELECT sum(LO_REVENUE), year(LO_ORDERDATE) AS year, P_BRAND FROM lineorder_flat WHERE P_BRAND BETWEEN 'MFGR#2221' AND 'MFGR#2228' AND S_REGION = 'ASIA' GROUP BY year, P_BRAND ORDER BY year, P_BRAND;
  11. --Q2.3
  12. SELECT sum(LO_REVENUE), year(LO_ORDERDATE) AS year, P_BRAND FROM lineorder_flat WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE' GROUP BY year, P_BRAND ORDER BY year, P_BRAND;
  13. --Q3.1
  14. SELECT C_NATION, S_NATION, year(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year(LO_ORDERDATE) between 1992 AND 1997 GROUP BY C_NATION, S_NATION, year(LO_ORDERDATE) ORDER BY year asc, revenue desc;
  15. --Q3.2
  16. SELECT C_CITY, S_CITY, year(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_NATION = 'CHINA' AND S_NATION = 'CHINA' AND year(LO_ORDERDATE) between 1992 AND 1997 GROUP BY C_CITY, S_CITY, year(LO_ORDERDATE) ORDER BY year asc, revenue desc;
  17. --Q3.3
  18. SELECT C_CITY, S_CITY, year(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI0' OR C_CITY = 'UNITED KI7') AND (S_CITY = 'UNITED KI0' OR S_CITY = 'UNITED KI7') AND year(LO_ORDERDATE) between 1992 AND 1997 GROUP BY C_CITY, S_CITY, year(LO_ORDERDATE) ORDER BY year asc, revenue desc;
  19. --Q3.4
  20. SELECT C_CITY, S_CITY, year(LO_ORDERDATE) AS year, sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI0' OR C_CITY = 'UNITED KI7') AND (S_CITY = 'MOZAMBIQU1' OR S_CITY = 'KENYA 4') AND year(LO_ORDERDATE)= 1997 GROUP BY C_CITY, S_CITY, year(LO_ORDERDATE) ORDER BY year asc, revenue desc;
  21. --Q4.1
  22. SELECT year(LO_ORDERDATE) AS year, C_NATION, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year(LO_ORDERDATE), C_NATION ORDER BY year, C_NATION;
  23. --Q4.2
  24. SELECT year(LO_ORDERDATE) AS year, S_NATION, P_CATEGORY, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (year(LO_ORDERDATE) = 1997 OR year(LO_ORDERDATE) = 1998) AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year(LO_ORDERDATE), S_NATION, P_CATEGORY ORDER BY year, S_NATION, P_CATEGORY;
  25. --Q4.3
  26. SELECT year(LO_ORDERDATE) AS year, S_CITY, P_BRAND, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE S_NATION = 'UNITED STATES' AND (year(LO_ORDERDATE) = 1997 OR year(LO_ORDERDATE) = 1998) AND P_CATEGORY = 'MFGR#14' GROUP BY year(LO_ORDERDATE), S_CITY, P_BRAND ORDER BY year, S_CITY, P_BRAND;

多表查询

  1. --Q1.1
  2. select sum(lo_revenue) as revenue
  3. from lineorder join dates on lo_orderdate = d_datekey
  4. where year(d_datekey) = 1993 and lo_discount between 1 and 3 and lo_quantity < 25;
  5. --Q1.2
  6. select sum(lo_revenue) as revenue
  7. from lineorder
  8. join dates on lo_orderdate = d_datekey
  9. where d_yearmonthnum = 199401
  10. and lo_discount between 4 and 6
  11. and lo_quantity between 26 and 35;
  12. --Q1.3
  13. select sum(lo_revenue) as revenue
  14. from lineorder
  15. join dates on lo_orderdate = d_datekey
  16. where d_weeknuminyear = 6 and year(d_datekey) = 1994
  17. and lo_discount between 5 and 7
  18. and lo_quantity between 26 and 35;
  19. --Q2.1
  20. select sum(lo_revenue) as lo_revenue, year(d_datekey) as year, p_brand
  21. from lineorder
  22. join dates on lo_orderdate = d_datekey
  23. join part on lo_partkey = p_partkey
  24. join supplier on lo_suppkey = s_suppkey
  25. where p_category = 'MFGR#12' and s_region = 'AMERICA'
  26. group by year, p_brand
  27. order by year, p_brand;
  28. --Q2.2
  29. select sum(lo_revenue) as lo_revenue, year(d_datekey) as year, p_brand
  30. from lineorder
  31. join dates on lo_orderdate = d_datekey
  32. join part on lo_partkey = p_partkey
  33. join supplier on lo_suppkey = s_suppkey
  34. where p_brand between 'MFGR#2221' and 'MFGR#2228' and s_region = 'ASIA'
  35. group by year, p_brand
  36. order by year, p_brand;
  37. --Q2.3
  38. select sum(lo_revenue) as lo_revenue, year(d_datekey) as year, p_brand
  39. from lineorder
  40. join dates on lo_orderdate = d_datekey
  41. join part on lo_partkey = p_partkey
  42. join supplier on lo_suppkey = s_suppkey
  43. where p_brand = 'MFGR#2239' and s_region = 'EUROPE'
  44. group by year, p_brand
  45. order by year, p_brand;
  46. --Q3.1
  47. select c_nation, s_nation, year(d_datekey) as year, sum(lo_revenue) as lo_revenue
  48. from lineorder
  49. join dates on lo_orderdate = d_datekey
  50. join customer on lo_custkey = c_custkey
  51. join supplier on lo_suppkey = s_suppkey
  52. where c_region = 'ASIA' and s_region = 'ASIA' and year(d_datekey) between 1992 and 1997
  53. group by c_nation, s_nation, year
  54. order by year asc, lo_revenue desc;
  55. --Q3.2
  56. select c_city, s_city, year(d_datekey) as year, sum(lo_revenue) as lo_revenue
  57. from lineorder
  58. join dates on lo_orderdate = d_datekey
  59. join customer on lo_custkey = c_custkey
  60. join supplier on lo_suppkey = s_suppkey
  61. where c_nation = 'UNITED STATES' and s_nation = 'UNITED STATES'
  62. and year(d_datekey) between 1992 and 1997
  63. group by c_city, s_city, year
  64. order by year asc, lo_revenue desc;
  65. --Q3.3
  66. select c_city, s_city, year(d_datekey) as year, sum(lo_revenue) as lo_revenue
  67. from lineorder
  68. join dates on lo_orderdate = d_datekey
  69. join customer on lo_custkey = c_custkey
  70. join supplier on lo_suppkey = s_suppkey
  71. where (c_city='UNITED KI1' or c_city='UNITED KI5')
  72. and (s_city='UNITED KI1' or s_city='UNITED KI5')
  73. and year(d_datekey) between 1992 and 1997
  74. group by c_city, s_city, year
  75. order by year asc, lo_revenue desc;
  76. --Q3.4
  77. select c_city, s_city, year(d_datekey) as year, sum(lo_revenue) as lo_revenue
  78. from lineorder
  79. join dates on lo_orderdate = d_datekey
  80. join customer on lo_custkey = c_custkey
  81. join supplier on lo_suppkey = s_suppkey
  82. where (c_city='UNITED KI1' or c_city='UNITED KI5') and (s_city='UNITED KI1' or s_city='UNITED KI5') and d_yearmonth = 'Dec1997'
  83. group by c_city, s_city, year
  84. order by year asc, lo_revenue desc;
  85. --Q4.1
  86. select year(d_datekey) as year, c_nation, sum(lo_revenue) - sum(lo_supplycost) as profit
  87. from lineorder
  88. join dates on lo_orderdate = d_datekey
  89. join customer on lo_custkey = c_custkey
  90. join supplier on lo_suppkey = s_suppkey
  91. join part on lo_partkey = p_partkey
  92. where c_region = 'AMERICA' and s_region = 'AMERICA' and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
  93. group by year, c_nation
  94. order by year, c_nation;
  95. --Q4.2
  96. select year(d_datekey) as year, s_nation, p_category, sum(lo_revenue) - sum(lo_supplycost) as profit
  97. from lineorder
  98. join dates on lo_orderdate = d_datekey
  99. join customer on lo_custkey = c_custkey
  100. join supplier on lo_suppkey = s_suppkey
  101. join part on lo_partkey = p_partkey
  102. where c_region = 'AMERICA'and s_region = 'AMERICA'
  103. and (year(d_datekey) = 1997 or year(d_datekey) = 1998)
  104. and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
  105. group by year, s_nation, p_category
  106. order by year, s_nation, p_category;
  107. --Q4.3
  108. select year(d_datekey) as year, s_city, p_brand, sum(lo_revenue) - sum(lo_supplycost) as profit,c_region , s_nation, p_category
  109. from lineorder
  110. join dates on lo_orderdate = d_datekey
  111. join customer on lo_custkey = c_custkey
  112. join supplier on lo_suppkey = s_suppkey
  113. join part on lo_partkey = p_partkey
  114. where
  115. (year(d_datekey) = 1997 or year(d_datekey) = 1998)
  116. and s_nation='ALGERIA'
  117. group by year, s_city, p_brand
  118. order by year, s_city, p_brand;