工作使用
# 服务器异常断电导致文件损坏
clickhouse无法启动--Detaching Broken Part,DB::Exception: Suspiciously many (12) broken par
**原因**
由于clickhouse是开源列式存储数据库,服务器断电后,写入数据导致元数据与数据不一致。clickhouse在重启服务的时候会重新加载MergeTree表引擎数据,数据可能存在损坏情况。
clickhouse配置参数当中包含一个参数max_suspicious_broken_parts,如果单个分区的损坏部分数量超过max_suspicious_broken_parts配置的值,则拒绝自动修复或者拒绝删除损坏部分的数据,并且在服务启动时直接报错退出。
修改/etc/clickhouse-server/config.xml文件中max_suspicious_broken_parts的值,将其改为1000,重启clickhouse.
```
<merge_tree>
<max_suspicious_broken_parts>1000</max_suspicious_broken_parts>
</merge_tree>
```
## sql
```
查看失败的变更
select * from system.mutations where is_done = 0;\G
终止失败的变更
KILL MUTATION WHERE database = 'easyviews' AND table = 'bpm_base_local';
-- 查看时间对应的secondeIndex是什么
select dateDiff('second', toStartOfDay(toDateTime('2023-10-20 09:40:00')), toDateTime('2023-10-20 09:40:00')) secondIndex
-- 查数据库中有没有这个功能号
SELECT
bb.componentId ,
bb.componentName ,
count(*)
from
easyviews.bpm_base bb
group by
bb.componentId ,
bb.componentName
HAVING
date = today()
and bb.dimension7 in ('270998', '213998')
-- 计算单个颗粒最大交易量
SELECT datetime ,SUM(allTransCount) as num FROM easyviews.bpm_indicator
WHERE `date` = today() and `level` =3
GROUP by datetime order by num desc limit 1;
-- 计算单个事件单个颗粒度最大交易量,修改 levelUuid = 事件id
SELECT datetime,SUM(allTransCount) as num FROM easyviews.bpm_indicator WHERE
`date` = today() and levelUuid = 267 AND `level` = 4
GROUP by datetime order by num desc limit 1;
-- 1、按照功能号分组,计算每组内的该功能号的请求数;让后降序排列,然后选择第一名的功能号;
WITH max_counts AS (
SELECT dimension7,
MAX(count) AS max_count
FROM (
SELECT dimension7,
secondIndex,
COUNT(*) AS count
FROM easyviews.bpm_base
WHERE `date` = today() and `datetime` < '2024-03-14 09:35:00' and `datetime` > '2024-03-14 09:26:00'
GROUP BY dimension7, secondIndex
)
GROUP BY dimension7
)
SELECT b.dimension7,
b.secondIndex,
a.max_count
FROM max_counts a
JOIN (
SELECT dimension7,
secondIndex,
COUNT(*) AS count
FROM easyviews.bpm_base
WHERE `date` = today() and `datetime` < '2024-03-14 09:35:00' and `datetime` > '2024-03-14 09:26:00'
GROUP BY dimension7, secondIndex
) b ON a.dimension7 = b.dimension7 AND a.max_count = b.count
ORDER BY a.max_count DESC;
# 有一堆或者太长的SQL需要执行,可以写成一个文件,批量执行:
clickhouse-client --user 用户名 --password 密码 -d 数据库 --multiquery < /root/temp.sql
# 查看SQL的执行计划:
clickhouse-client -h localhost --send_logs_level=trace <<<"SQL语句" >/dev/null
# 导入为csv文件:
clickhouse-client --query="select * from default.t_city" > city.csv
# 或者
# echo 'select * from default.t_city' | curl localhost:8123?database=default -udefault:password -d @- > table_name.sql
# 导入csv文件
cat city.csv | clickhouse-client --query "insert into city FORMAT CSV"
比较小的数据量导出csv文件,带上字段名,然后导入
clickhouse> select * from default.t_city INTO OUTFILE '/data/t_city.csv' FORMAT CSVWithNames;
SELECT *
FROM default.t_city
INTO OUTFILE &
```
[gitee ck other](https://gitee.com/zhouguanyulovejiadanyang/learnnodel/tree/master)
31123 31124是nodeport38123 39000是hostport
```
-- 查询耗时最长
SELECT
query,
query_duration_ms,
event_time
FROM system.query_log AS ql
WHERE (event_time >= '2023-04-20 16:00:00') AND (event_time < '2023-04-20 16:15:00')
ORDER BY query_duration_ms DESC
LIMIT 10
-- 查询耗时最长频率
SELECT normalizeQuery(query) AS q,
count(),
any(query),
avg(read_rows) / 10000000 AS read_rows_kw,
avg(query_duration_ms),
max(query_duration_ms)
FROM system.query_log
WHERE (event_date = '2023-10-25')
AND (event_time > '2023-10-25 14:00:00')
AND (event_time < '2023-10-25 18:30:00')
AND (type = 2)
GROUP BY q HAVING count() > 10
ORDER BY avg(query_duration_ms) DESC LIMIT 10
select count() from system.processes
select query from system.processes
-- 查看整个库占的大小
SELECT
d.name AS database,
sum(p.bytes) AS total_bytes,
formatReadableSize(sum(p.bytes)) AS total_size
FROM system.parts AS p
JOIN system.databases AS d ON p.database = d.name
GROUP BY d.name;
SELECT
table AS table_name,
sum(rows) AS row_num,
formatReadableSize(sum(data_uncompressed_bytes)) AS org_size,
formatReadableSize(sum(data_compressed_bytes)) AS compress_size,
round((sum(data_compressed_bytes) / sum(data_uncompressed_bytes)) * 100, 0) AS compress_ratio
FROM system.parts
WHERE database='easyviews'
GROUP BY table order by sum(rows) desc;
# 查看一个月时间时间新增了多大磁盘空间的数据
SELECT
sum(data_uncompressed_bytes) / 1024 /1024 /1024 AS uncompressed_gb,
sum(data_compressed_bytes) / 1024 /1024 /1024 AS compressed_gb
FROM system.parts
WHERE
database = 'easyviews' AND
active = 1 AND
(modification_time >= '2024-04-01 00:00:00' AND modification_time <= '2024-04-30 23:59:59');
2、查看库里每张表大小:(每个节点都要执行)
select table,sum(bytes_on_disk)/1000/1000/1000 as GB from system.parts group by table order by GB desc
3、查看所有表每个分区下的大小和行数(每个节点都要执行)
select
`partition` ,
sum(rows) as row,
formatReadableSize(sum(bytes_on_disk)) as used_disk
from
system.parts
group by
`partition`
order by
partition
4、查看某个表每个分区下的大小和行数(每个节点都要执行)
select
`partition` ,
sum(rows) as row,
formatReadableSize(sum(bytes_on_disk)) as used_disk
from system.parts
where `table` = 'bpm_base_local'
group by `partition`
order by partition
-- 清空一个表
truncate table easyviews.packet_base_local on cluster '{cluster}'
-- 手动计算五日均值
select `datetime` ,`date` , levelUuid , sumMerge(allTransCount) as sm from easyviews.mv_bpm_indicator_y mbiy where
`level` = 4 and levelUuid = 6042 and `date` in ('2023-06-26','2023-06-27','2023-06-25','2023-06-20','2023-06-21') and
timeIndex = 15*60*60/5 GROUP by levelUuid , `datetime` ,centerId ,linkId ,datasetId ,`date` ,`level` ORDER by `date` ;
4878 + 4497 + 330 + 4636 +5164 = 19505
19505/5=3901
-- 查询事件6042的五日均值
SELECT yDataAvgValueBy5day FROM easyviews.daily_statistics where `date` = today() and timeIndex = 15*60*60/5 and levelUuid = 6042 ;
-- 查峰值
SELECT timeIndex ,yDataHistoryMaxValue ,yDataHistoryMaxValueDate,insertTime FROM easyviews.daily_statistics WHERE date=today() ORDER BY insertTime desc LIMIT 100;
-- 查请求和响应报文
SELECT a.`time` , a.`uuid` as rid,
max(case when a.direction = 0 then a.`data` else null end) as request ,
max(case when a.direction = 1 then a.`data` else null end) as response
from easyviews.bpm_original a
where a.applicationId = 6061
and a.`date` BETWEEN '2023-07-14' and '2023-07-18'
group by a.`time` , a.`uuid`
```
```
-- 计算成功率
SELECT
sumMerge(allTransCount) AS allTransCount,
round(if(isNaN(sumMerge(allStart2ends) / sumMerge(responseTransCount)), null,
sumMerge(allStart2ends) / sumMerge(responseTransCount)), 2) AS responseTime,
round(if(isNaN(sumMerge(responseTransCount) / allTransCount), null, sumMerge(responseTransCount) / allTransCount) * 100,
2) AS responseRate,
round(if(isNaN(sumMerge(successTransCount) / sumMerge(responseTransCount)), null,
sumMerge(successTransCount) / sumMerge(responseTransCount)) * 100, 2) AS successRate
FROM mv_bpm_indicator_hour
WHERE date = '2023-04-12'
AND time >= toUnixTimestamp('2023-04-12 00:00:00', 'Asia/Shanghai')*1000
AND time <= toUnixTimestamp('2023-04-13 00:00:00','Asia/Shanghai')*1000
AND levelUuid = 5939
## 以功能号维度统计多个组件的交易数
SELECT
bb.dimension7 ,
count(*) as ct
from
easyviews.bpm_base bb
group by
bb.dimension7
HAVING
--date >= '2024-01-04' and date <= '2024-01-10'
date in ('2024-01-04','2024-01-05')
and bb.componentId in (209,210,211)
order by ct desc
```
```
-- time 存在意义是保留数据精度
-- datetime 数据时间戳 请求时间
-- probeTime 探针输出时间戳 还原完数据写到kafka 的时间
-- decodeTime 解码输出时间戳
-- pairingTime 配对输出时间戳
-- responseTime 响应时间
-- insertTime 插入ck库的时间
-- secondIndex 一天的第几秒 存在意义便于按秒级别过滤数据
--过滤掉响应时间和配对时间为0的
SELECT
concat(toString(toDateTime(intDiv(time,1000000000),'Asia/Shanghai')),concat('.',toString(time % 1000000000))) as request_time,
concat(toString(toDateTime(intDiv(responseTime,1000000000), 'Asia/Shanghai')), concat('.', toString(time % 1000000000))) as respon_Time,
concat(toString(toDateTime(intDiv(probeTime,1000000000),'Asia/Shanghai')),concat('.',toString(time % 1000000000))) as pro_Time,
concat(toString(toDateTime(intDiv(decodeTime,1000000000),'Asia/Shanghai')),concat('.',toString(time % 1000000000))) as dec_Time,
concat(toString(toDateTime(intDiv(pairingTime, 1000000000),'Asia/Shanghai')), concat('.', toString(time % 1000000000))) as pair_Time,
insertTime
from easyviews.bpm_base
where
date = today()
and datetime > '2023-09-11 13:38:00' and responseTime not in (0) and pairingTime not in (0) limit 10;
select datetime,insertTime,
toDateTime(probeTime/1000000000,'Asia/Shanghai') as probeT,
toDateTime(decodeTime/1000000000,'Asia/Shanghai') as decodeT,
toDateTime(pairingTime/1000000000,'Asia/Shanghai') as pairT,
toDateTime(consumerTime /1000000000,'Asia/Shanghai') as conInsertT,
toDateTime(JSONExtract(consumerInfo,'consumerPullTime','UInt64')/1000000000,'Asia/Shanghai') as conPullT,
toDateTime(JSONExtract(consumerInfo,'eventPushTime','UInt64')/1000000000,'Asia/Shanghai') as eventT,
(probeT - datetime) as delay,
componentId,componentName
from easyviews.bpm_base
where date = today() and pairingTime != 0 and
`datetime` >= '2023-11-09 17:00:00' and `datetime` <= '2023-11-09 18:40:00' order by delay desc limit 10;
```