zl程序教程

您现在的位置是:首页 >  云平台

当前栏目

Hudi-Flink SQL实时读取Hudi表数据

实时flinkSQL数据 读取 hudi
2023-09-11 14:14:34 时间

代码如下(hudi表实时写入参考上一篇[Hudi-Flink消费kafka将增量数据实时写入Hudi])

package com.zhen.hudi;

import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;

/**
 * @Author FengZhen
 * @Date 3/10/22 8:33 PM
 * @Description 基于Flink SQL Connector实现:从hudi表中加载数据,编写SQL查询
 */
public class FlinkSQLReadDemo {

    public static void main(String[] args) {

        //1.获取表的执行环境
        EnvironmentSettings settings = EnvironmentSettings
                .newInstance()
                .inStreamingMode()
                .build();
        TableEnvironment tableEnv = TableEnvironment.create(settings);

        //2.创建输入表,TODO:加载hudi表数据
        tableEnv.executeSql(
                "CREATE TABLE order_hudi(\n" +
                        "    `orderId` STRING PRIMARY KEY NOT ENFORCED,\n" +
                        "    `userId` STRING,\n" +
                        "    `orderTime` STRING,\n" +
                        "    `ip` STRING,\n" +
                        "    `orderMoney` DOUBLE,\n" +
                        "    `orderStatus` INT,\n" +
                        "    `ts` STRING,\n" +
                        "    `partition_day` STRING\n" +
                        ")\n" +
                        "PARTITIONED BY (partition_day)\n" +
                        "WITH(\n" +
                        "    'connector' = 'hudi',\n" +
                        "    'path'='hdfs://localhost:9000/hudi-warehouse/flink_hudi_order',\n" +
                        "    'table.type' = 'MERGE_ON_READ',\n" +
                        "    'read.streaming.enabled' = 'true',\n" +
                        "    'read.streaming.check-interval' = '4'\n" +
                        ")"
        );

        //3.执行查询语句,流式读取hudi表数据
        tableEnv.executeSql(
                "SELECT orderId, userId, orderTime, ip, orderMoney, orderStatus, ts, partition_day FROM order_hudi"
        ).print();


    }

}