val tableName ="hudi_trips_cow"
# 存储到HDFS
val basePath ="hdfs://hadoop-hadoop-hdfs-nn:9000/tmp/hudi_trips_cow"
# 存储到本地
# val basePath ="file:///tmp/hudi_trips_cow"
1.
2.
3.
4.
5.
模拟生成Trip乘车数据
##构建DataGenerator对象,用于模拟生成10条Trip乘车数据
val dataGen = new DataGenerator
val inserts = convertToStringList(dataGen.generateInserts(10))
-- sets up the result mode to tableau to show the results directly in the CLISET'sql-client.execution.result-mode'='tableau';CREATETABLE t1(
uuid VARCHAR(20) PRIMARY KEY NOT ENFORCED,
name VARCHAR(10),
age INT,
ts TIMESTAMP(3),
`partition` VARCHAR(20))
PARTITIONED BY(`partition`)
WITH ('connector'='hudi','path'='hdfs://hadoop-hadoop-hdfs-nn:9000/tmp/flink-hudi-t1','table.type'='MERGE_ON_READ'-- this creates a MERGE_ON_READ table, by default is COPY_ON_WRITE);INSERTINTO t1 VALUES('id1','Danny',23,TIMESTAMP '1970-01-01 00:00:01','par1');-- insert data using valuesINSERTINTO t1 VALUES('id1','Danny',23,TIMESTAMP '1970-01-01 00:00:01','par1'),('id2','Stephen',33,TIMESTAMP '1970-01-01 00:00:02','par1'),('id3','Julian',53,TIMESTAMP '1970-01-01 00:00:03','par2'),('id4','Fabian',31,TIMESTAMP '1970-01-01 00:00:04','par2'),('id5','Sophia',18,TIMESTAMP '1970-01-01 00:00:05','par3'),('id6','Emma',20,TIMESTAMP '1970-01-01 00:00:06','par3'),('id7','Bob',44,TIMESTAMP '1970-01-01 00:00:07','par4'),('id8','Han',56,TIMESTAMP '1970-01-01 00:00:08','par4');
1.
2.
3.
4.
5.
6.
7.
8.
9.
10.
11.
12.
13.
14.
15.
16.
17.
18.
19.
20.
21.
22.
23.
24.
25.
26.
27.
28.
HDFS上查看
4)查询数据(批式查询)
select*from t1;
1.
5)更新数据
-- this would update the record with key 'id1'insertinto t1 values('id1','Danny',27,TIMESTAMP '1970-01-01 00:00:01','par1');
CREATE TABLE t2(
uuid VARCHAR(20) PRIMARY KEY NOT ENFORCED,
name VARCHAR(10),
age INT,
ts TIMESTAMP(3),
`partition` VARCHAR(20)
)
PARTITIONED BY (`partition`)
WITH (
'connector' = 'hudi',
'path' = 'hdfs://hadoop-hadoop-hdfs-nn:9000/tmp/flink-hudi-t1',
'table.type' = 'MERGE_ON_READ',
'read.streaming.enabled' = 'true', -- this option enable the streaming read
'read.start-commit' = '20210316134557', -- specifies the start commit instant time
'read.streaming.check-interval' = '4' -- specifies the check interval for finding new source commits, default 60s.
);
-- Then query the table in stream mode
select * from t2;