I have a calender table for everyday in 2021. Also i have an entry table for workers everyday entries. It includes id and date columns. Calender just have date column. How can i find the days when the workers did not come? I thought searching days for every id would work
Advertisement
Answer
Left join the worker entries to all possible combinations of date & id.
Then the unmatched entries will be seen as NULL.
select cal.cal_date, wrkr.worker_id from (select distinct workingday as cal_date from worker) cal cross join (select distinct id as worker_id from worker) wrkr left join worker as ent on ent.workingday = cal.cal_date and ent.id = wrkr.worker_id where ent.id is null group by cal.cal_date, wrkr.worker_id
Extra:
Demo with a calendar and holidays
create table calendar ( cal_date date not null primary key ); create table holiday ( worker_id int not null, date_of_holiday date not null, primary key (worker_id, date_of_holiday) ); insert into holiday (worker_id, date_of_holiday) values (1, '2021-10-15'); create table worker ( id int not null, workingday date not null, primary key (id, workingday) );
-- -- filling the calendar with the current and next year -- with recursive rcte_calendar as ( select date_trunc('year', cast(current_date as date)) as day , date_trunc('year', cast(current_date as date)) + interval '2 year' - interval '1 day' as maxday union all select day + interval '1 day', maxday from rcte_calendar where day < maxday ) insert into calendar (cal_date) select day from rcte_calendar cte where not exists ( select 1 from calendar cal where cal.cal_date = cte.day ); -- what range does the calendar have select date_part('year', cal_date) as year , min(cal_date) , max(cal_date) from calendar group by date_part('year', cal_date) order by year;730 rows affected year | min | max :--- | :--------- | :--------- 2021 | 2021-01-01 | 2021-12-31 2022 | 2022-01-01 | 2022-12-31
-- -- putting test data in worker -- insert into worker (id, workingday) select wrkr.worker_id, cal.cal_date from calendar as cal cross join (values (1),(2),(3)) wrkr(worker_id) where cal.cal_date between cast('2021-10-10' as date) and cast('2021-10-22' as date) and extract(dow from cal.cal_date) not in (0, 6) -- no weekends and not exists ( select 1 from holiday as hol where hol.worker_id = wrkr.worker_id and hol.date_of_holiday = cal.cal_date ) and not exists ( select 1 from worker as wrkr2 where wrkr2.id = wrkr.worker_id and wrkr2.workingday = cal.cal_date ); -- -- worker 2 wasn't working on some days, so remove those -- delete from worker where id = 2 and workingday in ('2021-10-15','2021-10-18','2021-10-19');29 rows affected 3 rows affected
-- -- how does it look so far -- select * , extract(dow from workingday) as dayofweek from workerid | workingday | dayofweek -: | :--------- | --------: 1 | 2021-10-11 | 1 2 | 2021-10-11 | 1 3 | 2021-10-11 | 1 1 | 2021-10-12 | 2 2 | 2021-10-12 | 2 3 | 2021-10-12 | 2 1 | 2021-10-13 | 3 2 | 2021-10-13 | 3 3 | 2021-10-13 | 3 1 | 2021-10-14 | 4 2 | 2021-10-14 | 4 3 | 2021-10-14 | 4 3 | 2021-10-15 | 5 1 | 2021-10-18 | 1 3 | 2021-10-18 | 1 1 | 2021-10-19 | 2 3 | 2021-10-19 | 2 1 | 2021-10-20 | 3 2 | 2021-10-20 | 3 3 | 2021-10-20 | 3 1 | 2021-10-21 | 4 2 | 2021-10-21 | 4 3 | 2021-10-21 | 4 1 | 2021-10-22 | 5 2 | 2021-10-22 | 5 3 | 2021-10-22 | 5
-- -- The missing days, based on days other workers filled in -- But without considering the holidays -- select d.days, wrkr.worker_id from (select distinct workingday as days from worker) d cross join (select distinct id as worker_id from worker) wrkr left join worker as ent on ent.workingday = d.days and ent.id = wrkr.worker_id where ent.id is null group by d.days, wrkr.worker_iddays | worker_id :--------- | --------: 2021-10-15 | 1 2021-10-15 | 2 2021-10-18 | 2 2021-10-19 | 2
-- -- Missing days, based on period and holidays -- select cal.cal_date, wrkr.id as worker_id , count(case when ent.id is null then 1 end) over (partition by wrkr.id order by cal.cal_date) as rolling_count from calendar as cal cross join (select distinct id from worker) wrkr left join worker as ent on ent.workingday = cal.cal_date and ent.id = wrkr.id where cal.cal_date between cast('2021-10-10' as date) and cast('2021-10-22' as date) and extract(dow from cal.cal_date) not in (0, 6) -- no weekends and not exists ( -- workers may be on holiday select 1 from holiday as hol where hol.worker_id = wrkr.id and hol.date_of_holiday = cal.cal_date ) and ent.id is null -- the unmatched group by cal.cal_date, wrkr.id, ent.id order by cal.cal_date, wrkr.idcal_date | worker_id | rolling_count :--------- | --------: | ------------: 2021-10-15 | 2 | 1 2021-10-18 | 2 | 2 2021-10-19 | 2 | 3
-- -- Missing day ranges, based on period and holidays -- with cte_missed_work as ( select cal.cal_date , wrkr.id as worker_id , lag(cal_date) over (partition by wrkr.id order by cal.cal_date) as prev_date from calendar as cal cross join (select distinct id from worker) wrkr left join worker as ent on ent.workingday = cal.cal_date and ent.id = wrkr.id where cal.cal_date between cast('2021-10-10' as date) and cast('2021-10-22' as date) and extract(dow from cal.cal_date) not in (0, 6) -- no weekends and not exists ( -- workers may be on holiday select 1 from holiday as hol where hol.worker_id = wrkr.id and hol.date_of_holiday = cal.cal_date ) and ent.id is null -- the unmatched group by cal.cal_date, wrkr.id ), cte_missed_work2 as ( select * , sum(case when date_part('day', cal_date::timestamp - prev_date::timestamp) = 1 then 0 else 1 end) over (partition by worker_id order by cal_date) as Rnk from cte_missed_work ) select worker_id , min(cal_date) as firstDate , max(cal_date) as lastDate , date_part('day', max(cal_date)::timestamp - min(cal_date)::timestamp)+1 as missing_days from cte_missed_work2 group by worker_id, Rnk order by min(cal_date), worker_idworker_id | firstdate | lastdate | missing_days --------: | :--------- | :--------- | :----------- 2 | 2021-10-15 | 2021-10-15 | 1 2 | 2021-10-18 | 2021-10-19 | 2
Demo on db<>fiddle here