84 网站点击流数据分析案例(统计分析-PV统计)
2023-09-11 14:15:40 时间
1.多维度统计PV总量
1.时间维度
--计算指定的某个小时pvs
select count(*),month,day,hour from dw_click.ods_weblog_detail group by month,day,hour;
--计算该处理批次(一天)中的各小时pvs
drop table dw_pvs_hour;
create table dw_pvs_hour(month string,day string,hour string,pvs bigint) partitioned by(datestr string);
insert into table dw_pvs_hour partition(datestr='2016-03-18')
select a.month as month,a.day as day,a.hour as hour,count(1) as pvs from ods_weblog_detail a
where a.datestr='2016-03-18' group by a.month,a.day,a.hour;
或者用时间维表关联
维度:日
drop table dw_pvs_day;
create table dw_pvs_day(pvs bigint,month string,day string);
insert into table dw_pvs_day
select count(1) as pvs,a.month as month,a.day as day from dim_time a
join ods_weblog_detail b
on b.dd='18/Sep/2013' and a.month=b.month and a.day=b.day
group by a.month,a.day;
--或者,从之前算好的小时结果中统计
Insert into table dw_pvs_day
Select sum(pvs) as pvs,month,day from dw_pvs_hour group by month,day having day='18';
结果如下:
维度:月
drop table t_display_pv_month;
create table t_display_pv_month (pvs bigint,month string);
insert into table t_display_pv_month
select count(*) as pvs,a.month from t_dim_time a
join t_ods_detail_prt b on a.month=b.month group by a.month;
2. 按终端维度统计pv总量
注:探索数据中的终端类型
select distinct(http_user_agent) from ods_weblog_detail where http_user_agent like '%Mozilla%' limit 200;
终端维度:uc
drop table t_display_pv_terminal_uc;
create table t_display_pv_ terminal_uc (pvs bigint,mm string,dd string,hh string);
终端维度:chrome
drop table t_display_pv_terminal_chrome;
create table t_display_pv_ terminal_ chrome (pvs bigint,mm string,dd string,hh string);
终端维度:safari
drop table t_display_pv_terminal_safari;
create table t_display_pv_ terminal_ safari (pvs bigint,mm string,dd string,hh string);
3.按栏目维度统计pv总量
栏目维度:job
栏目维度:news
栏目维度:bargin
栏目维度:lane
2.人均浏览页数
需求描述:比如,今日所有来访者,平均请求的页面数
–总页面请求数/去重总人数
drop table dw_avgpv_user_d;
create table dw_avgpv_user_d(
day string,
avgpv string);
insert into table dw_avgpv_user_d
select '2013-09-18',sum(b.pvs)/count(b.remote_addr) from
(select remote_addr,count(1) as pvs from ods_weblog_detail where datestr='2013-09-18' group by remote_addr) b;
3.按referer维度统计pv总量
需求:按照来源及时间维度统计PVS,并按照PV大小倒序排序
– 按照小时粒度统计,查询结果存入:( “dw_pvs_referer_h” )
drop table dw_pvs_referer_h;
create table dw_pvs_referer_h(referer_url string,referer_host string,month string,day string,hour string,pv_referer_cnt bigint) partitioned by(datestr string);
insert into table dw_pvs_referer_h partition(datestr='2016-03-18')
select http_referer,ref_host,month,day,hour,count(1) as pv_referer_cnt
from ods_weblog_detail
group by http_referer,ref_host,month,day,hour
having ref_host is not null
order by hour asc,day asc,month asc,pv_referer_cnt desc;
按天粒度统计各来访域名的访问次数并排序
drop table dw_ref_host_visit_cnts_h;
create table dw_ref_host_visit_cnts_h(ref_host string,month string,day string,hour string,ref_host_cnts bigint) partitioned by(datestr string);
insert into table dw_ref_host_visit_cnts_h partition(datestr='2016-03-18')
select ref_host,month,day,hour,count(1) as ref_host_cnts
from ods_weblog_detail
group by ref_host,month,day,hour
having ref_host is not null
order by hour asc,day asc,month asc,ref_host_cnts desc;
注:还可以按来源地域维度、访客终端维度等计算
4.统计pv总量最大的来源TOPN
需求描述:按照时间维度,比如,统计一天内产生最多pvs的来源topN
需要用到row_number函数
以下语句对每个小时内的来访host次数倒序排序标号,
select ref_host,ref_host_cnts,concat(month,hour,day),
row_number() over (partition by concat(month,hour,day) order by ref_host_cnts desc) as od
from dw_ref_host_visit_cnts_h
效果如下:
根据上述row_number的功能,可编写Hql取各小时的ref_host访问次数topn
drop table dw_pvs_refhost_topn_h;
create table dw_pvs_refhost_topn_h(
hour string,
toporder string,
ref_host string,
ref_host_cnts string
) partitioned by(datestr string);
insert into table zs.dw_pvs_refhost_topn_h partition(datestr='2016-03-18')
select t.hour,t.od,t.ref_host,t.ref_host_cnts from
(select ref_host,ref_host_cnts,concat(month,day,hour) as hour,
row_number() over (partition by concat(month,day,hour) order by ref_host_cnts desc) as od
from zs.dw_ref_host_visit_cnts_h) t where od<=3;
结果如下:
注:还可以按来源地域维度、访客终端维度等计算
相关文章
- Spring Boot Hello World 基于 IDEA 案例详解
- AutoIT的语法和使用案例-小结
- mysql 重新整理——索引优化一个简单的案例 [十一]
- Android实训案例(二)——Android下的CMD命令之关机重启以及重启recovery
- Scala代码案例:统计三个班成绩情况,每个班有5名同学,求出各个班的平均分和所有班级的平均分
- 16.数组案例求成绩
- NLP:利用count函数或正则表达式compile、findall、finditer实现匹配统计(包括模糊匹配的贪婪匹配、懒惰匹配)、对多个字符串组成的列表进行多个模糊关键词进行模糊匹配案例
- NLP:N-Gram(gram窗口分段再统计)基于概率统计语言模型的简介(包括马尔可夫假设概述)、使用方法、案例应用之详细攻略
- Database之SQLSever:SQL命令实现的高级案例集合之单表/多表(筛选、统计个数)之详细攻略
- ML之LoR&Bagging&RF:依次利用LoR、Bagging、RF算法对titanic(泰坦尼克号)数据集 (Kaggle经典案例)获救人员进行二分类预测(最全)
- Hadoop快速入门——第三章、MapReduce案例(字符统计)
- 100天精通Python(数据分析篇)——第62天:pandas常用统计方法大全(含案例)
- 「github资料」40个Python可视化图表案例(附零基础学习资料)篇幅较长,建议收藏
- Scratch3.0——助力新进程序员理解程序(案例六、足球乱射)