问题
如下为某平台的商品促销数据,字段含义分别为品牌名称、打折开始日期、打折结束日期,现在要计算每个品牌的打折销售天数(注意其中的交叉日期)。比如vivo的打折销售天数就为17天。
brand start_date end_date
xiaomi 2021-06-05 2021-06-09
xiaomi 2021-06-11 2021-06-21
vivo 2021-06-05 2021-06-15
vivo 2021-06-09 2021-06-21
honor 2021-06-05 2021-06-21
honor 2021-06-09 2021-06-15
redmi 2021-06-17 2021-06-26
huawei 2021-06-05 2021-06-26
huawei 2021-06-09 2021-06-15
huawei 2021-06-17 2021-06-21
解答
方法一,对每行数据打折范围,枚举出所有打折日期,对品牌+打折日期去重后,得到每个品牌的打折日期都是哪几天,每天一行
with temp as (
select 'xiaomi' as brand ,'2021-06-05' as start_date,'2021-06-09' as end_date
union all
select 'xiaomi' as brand ,'2021-06-11' as start_date,'2021-06-21' as end_date
union all
select 'vivo' as brand ,'2021-06-05' as start_date,'2021-06-15' as end_date
union all
select 'vivo' as brand ,'2021-06-09' as start_date,'2021-06-21' as end_date
union all
select 'honor' as brand ,'2021-06-05' as start_date,'2021-06-21' as end_date
union all
select 'honor' as brand ,'2021-06-09' as start_date,'2021-06-15' as end_date
union all
select 'honor' as brand ,'2021-06-17' as start_date,'2021-06-26' as end_date
union all
select 'huawei' as brand ,'2021-06-05' as start_date,'2021-06-26' as end_date
union all
select 'huawei' as brand ,'2021-06-09' as start_date,'2021-06-15' as end_date
union all
select 'huawei' as brand ,'2021-06-17' as start_date,'2021-06-21' as end_date
)
select
brand
,count(distinct dt) as dts
from (
select
brand
,start_date
,end_date
,date_add(start_date,tmp.col_idx) as dt
from temp
lateral VIEW posexplode(split(repeat("#,",datediff(date(end_date), date(start_date))),'#')) tmp AS col_idx,col_val
) tt
group by brand
;
方法二,每个品牌下,打折开始时间升序,当前开始时间 对比 上一行结束打折时间是否有交集(start_date<=max_date),有的话使用上一行结束打折日期后一天,来规避日期范围交集
with temp as (
select 'xiaomi' as brand ,'2021-06-05' as start_date,'2021-06-09' as end_date
union all
select 'xiaomi' as brand ,'2021-06-11' as start_date,'2021-06-21' as end_date
union all
select 'vivo' as brand ,'2021-06-05' as start_date,'2021-06-15' as end_date
union all
select 'vivo' as brand ,'2021-06-09' as start_date,'2021-06-21' as end_date
union all
select 'honor' as brand ,'2021-06-05' as start_date,'2021-06-21' as end_date
union all
select 'honor' as brand ,'2021-06-09' as start_date,'2021-06-15' as end_date
union all
select 'honor' as brand ,'2021-06-17' as start_date,'2021-06-26' as end_date
union all
select 'huawei' as brand ,'2021-06-05' as start_date,'2021-06-26' as end_date
union all
select 'huawei' as brand ,'2021-06-09' as start_date,'2021-06-15' as end_date
union all
select 'huawei' as brand ,'2021-06-17' as start_date,'2021-06-21' as end_date
)
select
brand
,sum(datediff(date(end_date),date(start_date))+1)
from
(
select
brand
,case
when start_date<=max_date then date_add(date(max_date),1)
else start_date end
as start_date
,end_date
from(
select
brand
,start_date
,end_date
,max(end_date) over(partition by brand order by start_date rows between UNBOUNDED PRECEDING and 1 PRECEDING ) as max_date --获取同一品牌内按开始日期排序后,取第一行到前一行的最大结束时间
from temp
)t1
)t1
where end_date>=start_date
group by brand
;
Comments NOTHING