单分区去重
set mapred.reduce.tasks = 10;
WITH temptable as (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY aa, bb,cc
ORDER BY `timestamp` DESC) as row_num
FROM db.tablename where dt='2024-05-30'
)
INSERT OVERWRITE TABLE db.tablenam partition(dt='2024-05-30')
SELECT
`aa`,
`bb`,
`cc`
FROM temptable
WHERE row_num = 1;
动态分区去重
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrick;
set mapred.reduce.tasks = 5;
WITH temptable as (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY dt,aa, bb,cc,dd
ORDER BY `timestamp` DESC) as row_num
FROM dbname.tablename where dt>'2024-05-30'
)
INSERT OVERWRITE TABLE dbname.tablename partition(dt)
SELECT
`aa`,
`bb`,
`cc`,
`dd`,
`dt`
FROM temptable
WHERE row_num = 1;
评论区