I have a calender table for everyday in 2021. Also i have an entry table for workers everyday entries. It includes id and date columns. Calender just have date column. How can i find the days when the workers did not come? I thought searching days for every id would work
Advertisement
Answer
Left join the worker entries to all possible combinations of date & id.
Then the unmatched entries will be seen as NULL.
x
select cal.cal_date, wrkr.worker_id
from (select distinct workingday as cal_date from worker) cal
cross join (select distinct id as worker_id from worker) wrkr
left join worker as ent
on ent.workingday = cal.cal_date
and ent.id = wrkr.worker_id
where ent.id is null
group by cal.cal_date, wrkr.worker_id
Extra:
Demo with a calendar and holidays
create table calendar (
cal_date date not null primary key
);
create table holiday (
worker_id int not null,
date_of_holiday date not null,
primary key (worker_id, date_of_holiday)
);
insert into holiday
(worker_id, date_of_holiday) values
(1, '2021-10-15');
create table worker (
id int not null,
workingday date not null,
primary key (id, workingday)
);
--
-- filling the calendar with the current and next year
--
with recursive rcte_calendar as
(
select date_trunc('year', cast(current_date as date)) as day
, date_trunc('year', cast(current_date as date))
+ interval '2 year'
- interval '1 day' as maxday
union all
select day + interval '1 day', maxday
from rcte_calendar
where day < maxday
)
insert into calendar (cal_date)
select day
from rcte_calendar cte
where not exists (
select 1
from calendar cal
where cal.cal_date = cte.day
);
-- what range does the calendar have
select
date_part('year', cal_date) as year
, min(cal_date)
, max(cal_date)
from calendar
group by date_part('year', cal_date)
order by year;
730 rows affected year | min | max :--- | :--------- | :--------- 2021 | 2021-01-01 | 2021-12-31 2022 | 2022-01-01 | 2022-12-31
--
-- putting test data in worker
--
insert into worker (id, workingday)
select wrkr.worker_id, cal.cal_date
from calendar as cal
cross join (values (1),(2),(3)) wrkr(worker_id)
where cal.cal_date between cast('2021-10-10' as date)
and cast('2021-10-22' as date)
and extract(dow from cal.cal_date) not in (0, 6) -- no weekends
and not exists (
select 1
from holiday as hol
where hol.worker_id = wrkr.worker_id
and hol.date_of_holiday = cal.cal_date
)
and not exists (
select 1
from worker as wrkr2
where wrkr2.id = wrkr.worker_id
and wrkr2.workingday = cal.cal_date
);
--
-- worker 2 wasn't working on some days, so remove those
--
delete from worker
where id = 2
and workingday in ('2021-10-15','2021-10-18','2021-10-19');
29 rows affected 3 rows affected
--
-- how does it look so far
--
select *
, extract(dow from workingday) as dayofweek
from worker
id | workingday | dayofweek -: | :--------- | --------: 1 | 2021-10-11 | 1 2 | 2021-10-11 | 1 3 | 2021-10-11 | 1 1 | 2021-10-12 | 2 2 | 2021-10-12 | 2 3 | 2021-10-12 | 2 1 | 2021-10-13 | 3 2 | 2021-10-13 | 3 3 | 2021-10-13 | 3 1 | 2021-10-14 | 4 2 | 2021-10-14 | 4 3 | 2021-10-14 | 4 3 | 2021-10-15 | 5 1 | 2021-10-18 | 1 3 | 2021-10-18 | 1 1 | 2021-10-19 | 2 3 | 2021-10-19 | 2 1 | 2021-10-20 | 3 2 | 2021-10-20 | 3 3 | 2021-10-20 | 3 1 | 2021-10-21 | 4 2 | 2021-10-21 | 4 3 | 2021-10-21 | 4 1 | 2021-10-22 | 5 2 | 2021-10-22 | 5 3 | 2021-10-22 | 5
--
-- The missing days, based on days other workers filled in
-- But without considering the holidays
--
select d.days, wrkr.worker_id
from (select distinct workingday as days from worker) d
cross join (select distinct id as worker_id from worker) wrkr
left join worker as ent
on ent.workingday = d.days
and ent.id = wrkr.worker_id
where ent.id is null
group by d.days, wrkr.worker_id
days | worker_id :--------- | --------: 2021-10-15 | 1 2021-10-15 | 2 2021-10-18 | 2 2021-10-19 | 2
--
-- Missing days, based on period and holidays
--
select cal.cal_date, wrkr.id as worker_id
, count(case when ent.id is null then 1 end)
over (partition by wrkr.id order by cal.cal_date) as rolling_count
from calendar as cal
cross join (select distinct id from worker) wrkr
left join worker as ent
on ent.workingday = cal.cal_date
and ent.id = wrkr.id
where cal.cal_date between cast('2021-10-10' as date)
and cast('2021-10-22' as date)
and extract(dow from cal.cal_date) not in (0, 6) -- no weekends
and not exists ( -- workers may be on holiday
select 1
from holiday as hol
where hol.worker_id = wrkr.id
and hol.date_of_holiday = cal.cal_date
)
and ent.id is null -- the unmatched
group by cal.cal_date, wrkr.id, ent.id
order by cal.cal_date, wrkr.id
cal_date | worker_id | rolling_count :--------- | --------: | ------------: 2021-10-15 | 2 | 1 2021-10-18 | 2 | 2 2021-10-19 | 2 | 3
--
-- Missing day ranges, based on period and holidays
--
with cte_missed_work as
(
select
cal.cal_date
, wrkr.id as worker_id
, lag(cal_date) over (partition by wrkr.id
order by cal.cal_date) as prev_date
from calendar as cal
cross join (select distinct id from worker) wrkr
left join worker as ent
on ent.workingday = cal.cal_date
and ent.id = wrkr.id
where cal.cal_date between cast('2021-10-10' as date)
and cast('2021-10-22' as date)
and extract(dow from cal.cal_date) not in (0, 6) -- no weekends
and not exists ( -- workers may be on holiday
select 1
from holiday as hol
where hol.worker_id = wrkr.id
and hol.date_of_holiday = cal.cal_date
)
and ent.id is null -- the unmatched
group by cal.cal_date, wrkr.id
),
cte_missed_work2 as (
select *
, sum(case when date_part('day', cal_date::timestamp - prev_date::timestamp) = 1 then 0 else 1 end)
over (partition by worker_id order by cal_date) as Rnk
from cte_missed_work
)
select worker_id
, min(cal_date) as firstDate
, max(cal_date) as lastDate
, date_part('day', max(cal_date)::timestamp - min(cal_date)::timestamp)+1 as missing_days
from cte_missed_work2
group by worker_id, Rnk
order by min(cal_date), worker_id
worker_id | firstdate | lastdate | missing_days --------: | :--------- | :--------- | :----------- 2 | 2021-10-15 | 2021-10-15 | 1 2 | 2021-10-18 | 2021-10-19 | 2
Demo on db<>fiddle here