Hudi-通过Hive查询hudi表数据
数据 查询 通过 hive hudi
2023-09-11 14:14:34 时间
环境准备
集成jar包:hudi-hadoop-mr-bundle-0.10.1.jar,放入$HIVE_HOME/lib目录下
建外部表
create database db_hudi; use db_hudi; CREATE EXTERNAL TABLE IF NOT EXISTS tbl_hudi_didi( order_id BIGINT, product_id INT, city_id INT, district INT, county INT, type INT, combo_type INT, traffic_type INT, passenger_count INT, driver_product_id INT, start_dest_distance INT, arrive_time STRING, departure_time STRING, pre_total_fee DOUBLE, normal_time STRING, bubble_trace_id STRING, product_1level INT, dest_lng DOUBLE, dest_lat DOUBLE, starting_lng DOUBLE, starting_lat DOUBLE, ts BIGINT, partitionpath STRING ) PARTITIONED BY( date_str string ) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hudi.hadoop.HoodieParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' LOCATION '/hudi-warehouse/tbl_didi_haikou';
手动加入分区
--手动添加分区 ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-22') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-22'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-23') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-23'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-24') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-24'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-25') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-25'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-26') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-26'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-27') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-27'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-28') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-28'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-29') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-29'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-30') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-30'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-31') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-31'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-1') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-1'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-2') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-2'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-3') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-3'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-4') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-4'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-5') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-5'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-6') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-6'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-7') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-7'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-8') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-8'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-9') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-9'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-10') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-10';
查看分区
SHOW PARTITIONS db_hudi.tbl_hudi_didi;
![](https://img2022.cnblogs.com/blog/1109757/202203/1109757-20220303214859470-1975490309.png)
指标统计
-- 开发测试,设置运行模式为本地模式 set hive.exec.mode.local.auto = true; set hive.exec.mode.local.auto.tasks.max = 10; set hive.exec.mode.local.auto.inputbytes.max=88801103; set hive.exec.mode.local.auto.input.files.max=50; SET hive.mapred.mode=nonstrict; -- 指标一:订单类型统计 WITH tmp as ( SELECT product_id, COUNT(1) AS total FROM db_hudi.tbl_hudi_didi GROUP BY product_id ) SELECT CASE product_id WHEN 1 THEN "滴滴专车" WHEN 2 THEN "滴滴企业专车" WHEN 3 THEN "滴滴快车" WHEN 4 THEN "滴滴企业快车" ELSE "未知" END AS order_type, total FROM tmp ; -- 指标二:订单时效性统计 WITH tmp as ( SELECT type, COUNT(1) AS total FROM db_hudi.tbl_hudi_didi GROUP BY type ) SELECT CASE type WHEN 0 THEN "实时" WHEN 1 THEN "预约" ELSE "未知" END AS order_type, total FROM tmp ; --指标三:订单交通类型统计 SELECT traffic_type, COUNT(1) AS total FROM db_hudi.tbl_hudi_didi GROUP BY traffic_type; -- 指标五:订单价格统计,先将价格划分区间,再统计,此处使用WHEN函数和SUM函数 SELECT SUM( CASE WHEN pre_total_fee BETWEEN 0 AND 15 THEN 1 ELSE 0 END ) AS 0_15, SUM( CASE WHEN pre_total_fee BETWEEN 16 AND 30 THEN 1 ELSE 0 END ) AS 16_30, SUM( CASE WHEN pre_total_fee BETWEEN 31 AND 50 THEN 1 ELSE 0 END ) AS 31_50, SUM( CASE WHEN pre_total_fee BETWEEN 51 AND 100 THEN 1 ELSE 0 END ) AS 51_100, SUM( CASE WHEN pre_total_fee > 100 THEN 1 ELSE 0 END ) AS 100_ FROM db_hudi.tbl_hudi_didi;
相关文章
- Entity Framework 无法加载指定的元数据资源。
- 保存数据同一时候查询保存数据记录的ID
- Google Earth Engine ——MYD17A2H/A3H/GF V6总初级生产力(GPP)产品是一个具有500米分辨率的8天/16天累积综合数据。
- Google Earth Engine ——世界200000冰川面积、几何形状、表面速度和雪线高程current数据集
- 《迷人的8051单片机》----3.6 构造类型数据
- mysql数据查询 2.条件查询
- 《Python数据可视化编程实战》——5.5 用OpenGL制作动画
- Oracle取查询结果数据的第一条记录SQL:
- 在Hadoop集群中用PySpark处理数据的知识详解
- Excel VLOOKUP实用教程之 04 vlookup如何实现三变量查找,三个条件字段查询数据?(教程含数据excel)
- SQL Python教程之在 Python 环境中使用 SQL进校数据查询分析聚合过滤pandsql (教程含源码和数据集)
- 提高mysql千万级大数据SQL查询优化几条经验
- 阿里云大数据利器Maxcompute-使用mapjoin优化查询
- 7个影响数据分析的数据建模错误
- MySQL-比较两个表不同的数据
- 【HMS Core】华为分析服务通过REST方式上报用户行为,控制台为何无法查询到相关数据?
- golang自己定义数据类型查询与插入postgresql中point数据
- 寻找大数据变现的突破口
- docker(7):使用python 连接数据库,插入并查询数据--link
- elementUI table 显示所有数据,不显示纵向滚动条
- 工信部:大数据十三五规划年内将出台 助信息安全发展
- Oracle 通过子查询批量添加、修改表数据
- 【python】postgresql查询数据返回成字典格式