7
clickhouse 20.x 三分片两副本部署与本地表的压力测试(一)
source link: https://blog.51cto.com/flyfish225/5470312
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
clickhouse 20.x 三分片两副本部署与本地表的压力测试(一)
推荐 原创标签(空测试用例格分隔):clickhouse 系列
一:系统环境准备
1.1 系统初始化
系统:
CentOS7.9x64
主机名:
cat /etc/hosts
----
192.168.100.141 node01
192.168.100.142 node02
192.168.100.143 node03
192.168.100.144 node04
192.168.100.145 node05
192.168.100.146 node06
-----
本次部署6台服务器
推荐配置每台:
内存:64G+ CPU:32core+ 硬盘500G+
1.2 系统部署jdk
在node01,node02,node03,node04,node05 上面部署
tar -zxvf jdk-8u301-linux-x64.tar.gz
mv jdk1.8.0_301/ /usr/local/jdk
vim /etc/profile
----
### jdk
export JAVA_HOME=/usr/local/jdk
export CLASSPATH=.:$JAVA_HOME/jre/lib:$JAVA_HOME/lib:$JAVA_HOME/lib/tools.jar
PATH=$PATH:$HOME/bin:$JAVA_HOME/bin
----
source /etc/profile
java -version
1.3 配置zookeeper 集群
在node01,node02,node03,node04,node05上面部署
tar -zxvf apache-zookeeper-3.7.0-bin.tar.gz
mv apache-zookeeper-3.7.0-bin /usr/local/zookeeper
mkdir -p /usr/local/zookeeper/data
mkdir -p /usr/local/zookeeper/log
cd /usr/local/zookeeper/data/
echo 1 > myid
----
cd /usr/local/zookeeper/conf
cp zoo_sample.cfg zoo.cfg
vim zoo.cfg
----
# 心跳时间
tickTime=2000
# follow连接leader的初始化连接时间,表示tickTime的倍数
initLimit=10
# syncLimit配置表示leader与follower之间发送消息,请求和应答时间长度。如果followe在设置的时间内不能与leader进行通信,那么此follower将被丢弃,tickTime的倍数
syncLimit=5
# 客户端连接端口
clientPort=2181
# 节点数据存储目录,需要提前创建,注意myid添加,用于标识服务器节点
dataDir=/usr/local/zookeeper/data
dataLogDir=/usr/local/zookeeper/log
server.1=192.168.100.141:2888:3888
server.2=192.168.100.142:2888:3888
server.3=192.168.100.143:2888:3888
server.4=192.168.100.144:2888:3888
server.5=192.168.100.145:2888:3888
---
-----
scp -r zookeeper root@node02:/usr/local/
scp -r zookeeper root@node03:/usr/local/
scp -r zookeeper root@node04:/usr/local/
scp -r zookeeper root@node05:/usr/local/
修改node02 节点 myid
cd /usr/local/zookeeper/data/
echo 2 > myid
修改node03 节点 myid
cd /usr/local/zookeeper/data/
echo 3 > myid
修改node04 节点 myid
cd /usr/local/zookeeper/data/
echo 4 > myid
修改node05 节点 myid
cd /usr/local/zookeeper/data/
echo 5 > myid
启动zookeeper
cd /usr/local/zookeeper/bin/
./zkServer.sh start
二:安装clickhouse20.x [所有节点都安装]
2.1 安装clickhouse
unzip clickhouse20-9-7-11.zip
cd clickhouse20-9-7-11
rpm -ivh clickhouse-*
2.2 修改配置文件
2.2.2.1 修改config.xml
cd /etc/clickhouse-server/
vim config.xml
------
<database_atomic_delay_before_drop_table_sec>0</database_atomic_delay_before_drop_table_sec>
<include_from>/etc/clickhouse-server/config.d/metrika.xml</include_from>
----------
----
<listen_host>::</listen_host>
----
拿掉所有localhost的本地存储shared 分片不做显示:
<remote_servers incl="clickhouse_remote_servers" >
...... #### 全部注释掉
</remote_servers>
vim users.xml
-----
<password>tzck123.com</password>
<ip>::/0</ip>
-----
2.2.2.2 修改metrika.xml
cd /etc/clickhouse-server/config.d
vim metrika.xml
----------------------
<yandex>
<clickhouse_remote_servers>
<!--定义分片节点,这里我指定 3 个分片,每个分片有 2 个副本,01+02,03+04,05+06-->
<tzcluster3s2r02>
<shard>
<weight>1</weight>
<internal_replication>true</internal_replication>
<replica>
<host>node01</host>
<port>9000</port>
<user>default</user>
<password>tzck123.com</password>
</replica>
<replica>
<host>node02</host>
<port>9000</port>
<user>default</user>
<password>tzck123.com</password>
</replica>
</shard>
<shard>
<weight>1</weight>
<internal_replication>true</internal_replication>
<replica>
<host>node03</host>
<port>9000</port>
<user>default</user>
<password>tzck123.com</password>
</replica>
<replica>
<host>node04</host>
<port>9000</port>
<user>default</user>
<password>tzck123.com</password>
</replica>
</shard>
<shard>
<weight>1</weight>
<internal_replication>true</internal_replication>
<replica>
<host>node05</host>
<port>9000</port>
<user>default</user>
<password>tzck123.com</password>
</replica>
<replica>
<host>node06</host>
<port>9000</port>
<user>default</user>
<password>tzck123.com</password>
</replica>
</shard>
</tzcluster3s2r02>
</clickhouse_remote_servers>
<zookeeper-servers>
<node index="1">
<host>node02</host>
<port>2181</port>
</node>
<node index="2">
<host>node03</host>
<port>2181</port>
</node>
<node index="3">
<host>node04</host>
<port>2181</port>
</node>
<node index="4">
<host>node05</host>
<port>2181</port>
</node>
<node index="5">
<host>node06</host>
<port>2181</port>
</node>
</zookeeper-servers>
<!--定义宏变量,后面需要用-->
<!-- 以下的配置根据节点的 IP/域名具体配置,配置文件中 macros 若省略,则建复制表时每个分
片需指定 zookeeper 路径及副本名称,
同 一 分 片 上 路 径 相 同 , 副 本 名 称 不 同 ; 若 不 省 略 需 每 个 分 片 不 同 配
置,{layer}-{shard}-{replica}-->
<macros>
<!-- <replica>node01.02</replica> -->
<layer>02</layer>
<shard>01</shard>
<replica>tzcluster3s2r02_01_01</replica>
</macros>
<!--不限制访问来源 ip 地址-->
<networks>
<ip>::/0</ip>
</networks>
<!--数据压缩方式,默认为 lz4-->
<clickhouse_compression>
<case>
<min_part_size>10000000000</min_part_size>
<min_part_size_ratio>0.01</min_part_size_ratio>
<method>lz4</method>
</case>
</clickhouse_compression>
</yandex>
-----------------------
同步: 所有主机
scp metrika.xml root@node02:/etc/clickhouse-server/config.d/
scp metrika.xml root@node03:/etc/clickhouse-server/config.d/
scp metrika.xml root@node04:/etc/clickhouse-server/config.d/
scp metrika.xml root@node05:/etc/clickhouse-server/config.d/
scp metrika.xml root@node06:/etc/clickhouse-server/config.d/
其它主机依次修改macos定义宏
node02:
-----
<macros>
<!-- <replica>node01.02</replica> -->
<layer>02</layer>
<shard>01</shard>
<replica>tzcluster3s2r02_01_02</replica>
</macros>
----
node03:
----
<macros>
<!-- <replica>node03.04</replica> -->
<layer>02</layer>
<shard>02</shard>
<replica>tzcluster3s2r02_02_01</replica>
</macros>
----
node04:
----
<macros>
<!-- <replica>node03.04</replica> -->
<layer>02</layer>
<shard>02</shard>
<replica>tzcluster3s2r02_02_02</replica>
</macros>
----
node05:
-----
<macros>
<!-- <replica>node05.06</replica> -->
<layer>02</layer>
<shard>03</shard>
<replica>tzcluster3s2r02_03_01</replica>
</macros>
-----
node06:
-----
<macros>
<!-- <replica>node05.06</replica> -->
<layer>02</layer>
<shard>03</shard>
<replica>tzcluster3s2r02_03_02</replica>
</macros>
----
2.2.2.3 启动clickhouse
systemctl stop clickhouse-server.service 停机所有集群
systemctl start clickhouse-server.service 所有节点全部启动
systemctl status clickhouse-server.service 所有节点查看clickhouse 节点状态
三:验证集群
3.1 验证集群状态
验证:
clickhouse-client -h node01
查看集群:
select * from system.clusters;
select cluster,host_name from system.clusters;
3.2 压测数据
参考官网压测数据:
https://clickhouse.com/docs/en/getting-started/example-datasets/brown-benchmark
https://datasets.clickhouse.com/mgbench{1..3}.csv.xz
解压:
xz -v -d mgbench{1..3}.csv.xz
建库:
create database mgbench on cluster tzcluster3s2r02 ;
USE mgbench;
CREATE TABLE mgbench.logs1 (
log_time DateTime,
machine_name LowCardinality(String),
machine_group LowCardinality(String),
cpu_idle Nullable(Float32),
cpu_nice Nullable(Float32),
cpu_system Nullable(Float32),
cpu_user Nullable(Float32),
cpu_wio Nullable(Float32),
disk_free Nullable(Float32),
disk_total Nullable(Float32),
part_max_used Nullable(Float32),
load_fifteen Nullable(Float32),
load_five Nullable(Float32),
load_one Nullable(Float32),
mem_buffers Nullable(Float32),
mem_cached Nullable(Float32),
mem_free Nullable(Float32),
mem_shared Nullable(Float32),
swap_free Nullable(Float32),
bytes_in Nullable(Float32),
bytes_out Nullable(Float32)
)
ENGINE = MergeTree()
ORDER BY (machine_group, machine_name, log_time);
CREATE TABLE mgbench.logs2 (
log_time DateTime,
client_ip IPv4,
request String,
status_code UInt16,
object_size UInt64
)
ENGINE = MergeTree()
ORDER BY log_time;
CREATE TABLE mgbench.logs3 (
log_time DateTime64,
device_id FixedString(15),
device_name LowCardinality(String),
device_type LowCardinality(String),
device_floor UInt8,
event_type LowCardinality(String),
event_unit FixedString(1),
event_value Nullable(Float32)
)
ENGINE = MergeTree()
ORDER BY (event_type, log_time);
数据导入
clickhouse-client --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" < mgbench1.csv
clickhouse-client --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" < mgbench2.csv
clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbench3.csv
带用户密码导入
clickhouse-client -h node01 -u default -d mgbench --password tzck123.com --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" < mgbench1.csv
clickhouse-client -h node01 -u default -d mgbench --password tzck123.com --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" < mgbench2.csv
clickhouse-client -h node01 -u default -d mgbench --password tzck123.com --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbench3.csv
-- Q1.1: What is the CPU/network utilization for each web server since midnight?
SELECT machine_name,
MIN(cpu) AS cpu_min,
MAX(cpu) AS cpu_max,
AVG(cpu) AS cpu_avg,
MIN(net_in) AS net_in_min,
MAX(net_in) AS net_in_max,
AVG(net_in) AS net_in_avg,
MIN(net_out) AS net_out_min,
MAX(net_out) AS net_out_max,
AVG(net_out) AS net_out_avg
FROM (
SELECT machine_name,
COALESCE(cpu_user, 0.0) AS cpu,
COALESCE(bytes_in, 0.0) AS net_in,
COALESCE(bytes_out, 0.0) AS net_out
FROM logs1
WHERE machine_name IN ('anansi','aragog','urd')
AND log_time >= TIMESTAMP '2017-01-11 00:00:00'
) AS r
GROUP BY machine_name;
-- Q1.2: Which computer lab machines have been offline in the past day?
SELECT machine_name,
log_time
FROM logs1
WHERE (machine_name LIKE 'cslab%' OR
machine_name LIKE 'mslab%')
AND load_one IS NULL
AND log_time >= TIMESTAMP '2017-01-10 00:00:00'
ORDER BY machine_name,
log_time;
-- Q1.3: What are the hourly average metrics during the past 10 days for a specific workstation?
SELECT dt,
hr,
AVG(load_fifteen) AS load_fifteen_avg,
AVG(load_five) AS load_five_avg,
AVG(load_one) AS load_one_avg,
AVG(mem_free) AS mem_free_avg,
AVG(swap_free) AS swap_free_avg
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
load_fifteen,
load_five,
load_one,
mem_free,
swap_free
FROM logs1
WHERE machine_name = 'babbage'
AND load_fifteen IS NOT NULL
AND load_five IS NOT NULL
AND load_one IS NOT NULL
AND mem_free IS NOT NULL
AND swap_free IS NOT NULL
AND log_time >= TIMESTAMP '2017-01-01 00:00:00'
) AS r
GROUP BY dt,
hr
ORDER BY dt,
hr;
-- Q1.4: Over 1 month, how often was each server blocked on disk I/O?
SELECT machine_name,
COUNT(*) AS spikes
FROM logs1
WHERE machine_group = 'Servers'
AND cpu_wio > 0.99
AND log_time >= TIMESTAMP '2016-12-01 00:00:00'
AND log_time < TIMESTAMP '2017-01-01 00:00:00'
GROUP BY machine_name
ORDER BY spikes DESC
LIMIT 10;
-- Q1.5: Which externally reachable VMs have run low on memory?
SELECT machine_name,
dt,
MIN(mem_free) AS mem_free_min
FROM (
SELECT machine_name,
CAST(log_time AS DATE) AS dt,
mem_free
FROM logs1
WHERE machine_group = 'DMZ'
AND mem_free IS NOT NULL
) AS r
GROUP BY machine_name,
dt
HAVING MIN(mem_free) < 10000
ORDER BY machine_name,
dt;
-- Q1.6: What is the total hourly network traffic across all file servers?
SELECT dt,
hr,
SUM(net_in) AS net_in_sum,
SUM(net_out) AS net_out_sum,
SUM(net_in) + SUM(net_out) AS both_sum
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in,
COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out
FROM logs1
WHERE machine_name IN ('allsorts','andes','bigred','blackjack','bonbon',
'cadbury','chiclets','cotton','crows','dove','fireball','hearts','huey',
'lindt','milkduds','milkyway','mnm','necco','nerds','orbit','peeps',
'poprocks','razzles','runts','smarties','smuggler','spree','stride',
'tootsie','trident','wrigley','york')
) AS r
GROUP BY dt,
hr
ORDER BY both_sum DESC
LIMIT 10;
Recommend
About Joyk
Aggregate valuable and interesting links.
Joyk means Joy of geeK