# CSV $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv
# Parquet $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet" $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq}
# ORC $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.orc}
主键
primary keys: not unique, determines how the data is sorted => 理解为carbondata的sort columns
Every 8,192 rows or 10MB of data (referred to as the index granularity) creates an entry in the primary key index file. This granularity concept creates a sparse index that can easily fit in memory, and the granules represent a stripe of the smallest amount of column data that gets processed during SELECT queries.
The primary key can be defined using the PRIMARY KEY parameter. If you define a table without a PRIMARY KEY specified, then the key becomes the tuple specified in the ORDER BY clause. If you specify both a PRIMARY KEY and an ORDER BY, the primary key must be a subset of the sort order.
原理
OLAP毫秒级
prototype: to do just a single task well: to filter and aggregate data as fast as possible 列存 索引:内存只存储必要列的信息 压缩 向量查询 / 列式处理 可扩展性 关注底层细节: 根据每个查询的特点从多种变式中选择最合适的数据结构实现
CREATE DATABASE IF NOT EXISTS bmp ON CLUSTER analysis; drop table bmp.t_bms_service_sub_local ON CLUSTER analysis; CREATE TABLE IF NOT EXISTS bmp.t_bms_service_sub_local ON CLUSTER analysis …; CREATE TABLE bmp.t_bms_service_sub ON CLUSTER analysis() ENGINE = Distributed(‘analysis’, ‘bmp’, ‘t_bms_service_sub_local’, hiveHash(seqid));
数据表元数据
select database,name,engine,create_table_query from system.tables where database != ‘system’ and database != ‘information_schema’ and database != ‘INFORMATION_SCHEMA’;
kafka接入ck
在集群创建表 -接收kafka数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
CREATE TABLE wx_log_kafka.kafka_queue_drs_log on CLUSTER cluster_other ( id Int64, database String, schema String, table String, opType String, es Int64, data Array(String), oldArray(String) ) ENGINE = Kafka SETTINGS kafka_broker_list ='xxx:9092,xxx:9092', kafka_topic_list ='t1', kafka_group_name ='CMCC_BI_DBSyn', kafka_format ='JSONEachRow', kafka_num_consumers =10, kafka_skip_broken_messages=1000;
CREATE TABLE IF NOTEXISTS wx_log_kafka.ods_drs_log_local ON CLUSTER cluster_other ( id Int64, database String, schema String, table String, opType String, es String, data Array(String), oldArray(String), kafka_offset Int64, datetime DateTime, daytime Date ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/wx_log_kafka/ods_drs_log_local', '{replica}') ORDERBY (daytime,id) PARTITIONBY daytime TTL datetime + toIntervalDay(20) SETTINGS index_granularity=8192
CREATE TABLE wx_log_kafka.ods_drs_log ON CLUSTER cluster_other AS wx_log_kafka.ods_drs_log_local ENGINE = Distributed(cluster_other, wx_log_kafka, ods_drs_log_local ,rand()) SETTINGS fsync_after_insert=1, fsync_directories=1 ;
创建mv - 写数据到目标表
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
CREATE MATERIALIZED VIEW wx_log_kafka.kafka_consumer_drs_log ON CLUSTER cluster_other TO wx_log_kafka.ods_drs_log AS SELECT id , database , schema , table , opType , es , data , old , _offset , toDateTime(_timestamp) AS datetime, toDateTime(_timestamp) AS daytime FROM wx_log_kafka.kafka_queue_drs_log
--表变更 select name,engine,metadata_modification_time from system.tables where database=''and name=''
--分区变更 selectmax(modification_time) as last_mtime from system.parts where database=''andtable=''and active=1;
--查询 select query_kind, event_time,tables,query from system.query_log where has(tables, 'ods.xxx') and query_kind in ('Select','Insert') and type ='QueryFinish' ORDERBY event_time desc limit 100
--最后访问/插入记录,剔除系统表 select tablename, max(event_time) last_access from (SELECT replaceAll(arrayJoin(tables), '`', '') AS tablename, event_time FROM system.query_log WHERE query_kind='Select'-- 'Insert' and type ='QueryFinish' and tables != [] )t WHERENOT startsWith(tablename, 'system.') GROUPBY tablename ORDERBY last_access DESC;