Skip to content
Advertisement

How to find days when workers don’t show up for work

I have a calender table for everyday in 2021. Also i have an entry table for workers everyday entries. It includes id and date columns. Calender just have date column. How can i find the days when the workers did not come? I thought searching days for every id would work

Advertisement

Answer

Left join the worker entries to all possible combinations of date & id.
Then the unmatched entries will be seen as NULL.

select cal.cal_date, wrkr.worker_id
from (select distinct workingday as cal_date from worker) cal
cross join (select distinct id as worker_id from worker) wrkr
left join worker as ent
  on ent.workingday = cal.cal_date
 and ent.id = wrkr.worker_id
where ent.id is null
group by cal.cal_date, wrkr.worker_id

Extra:

Demo with a calendar and holidays

create table calendar (
 cal_date date not null primary key
);

create table holiday (
 worker_id int not null, 
 date_of_holiday date not null, 
 primary key (worker_id, date_of_holiday)
);

insert into holiday 
(worker_id, date_of_holiday) values
(1, '2021-10-15');

create table worker (
 id int not null, 
 workingday date not null, 
 primary key (id, workingday)
);
--
-- filling the calendar with the current and next year
--
with recursive rcte_calendar as
(
  select date_trunc('year', cast(current_date as date)) as day
  , date_trunc('year', cast(current_date as date)) 
    + interval '2 year' 
    - interval '1 day' as maxday
  union all
  select day + interval '1 day', maxday
  from rcte_calendar
  where day < maxday
)
insert into calendar (cal_date)
select day
from rcte_calendar cte
where not exists (
  select 1
  from calendar cal
  where cal.cal_date = cte.day
);

-- what range does the calendar have
select
  date_part('year', cal_date) as year
, min(cal_date)
, max(cal_date) 
from calendar
group by date_part('year', cal_date)
order by year;
730 rows affected

year | min        | max       
:--- | :--------- | :---------
2021 | 2021-01-01 | 2021-12-31
2022 | 2022-01-01 | 2022-12-31
--
-- putting test data in worker
--
insert into worker (id, workingday)
select wrkr.worker_id, cal.cal_date 
from calendar as cal
cross join (values (1),(2),(3)) wrkr(worker_id)
where cal.cal_date between cast('2021-10-10' as date) 
                       and cast('2021-10-22' as date) 
  and extract(dow from cal.cal_date) not in (0, 6) -- no weekends
  and not exists (
      select 1 
      from holiday as hol
      where hol.worker_id = wrkr.worker_id
        and hol.date_of_holiday = cal.cal_date
  )
  and not exists (
      select 1 
      from worker as wrkr2
      where wrkr2.id = wrkr.worker_id
        and wrkr2.workingday = cal.cal_date
  );

--
-- worker 2 wasn't working on some days, so remove those
--
delete from worker 
where id = 2 
  and workingday in ('2021-10-15','2021-10-18','2021-10-19');
29 rows affected

3 rows affected
--
-- how does it look so far
--
select *
, extract(dow from workingday) as dayofweek
from worker
id | workingday | dayofweek
-: | :--------- | --------:
 1 | 2021-10-11 |         1
 2 | 2021-10-11 |         1
 3 | 2021-10-11 |         1
 1 | 2021-10-12 |         2
 2 | 2021-10-12 |         2
 3 | 2021-10-12 |         2
 1 | 2021-10-13 |         3
 2 | 2021-10-13 |         3
 3 | 2021-10-13 |         3
 1 | 2021-10-14 |         4
 2 | 2021-10-14 |         4
 3 | 2021-10-14 |         4
 3 | 2021-10-15 |         5
 1 | 2021-10-18 |         1
 3 | 2021-10-18 |         1
 1 | 2021-10-19 |         2
 3 | 2021-10-19 |         2
 1 | 2021-10-20 |         3
 2 | 2021-10-20 |         3
 3 | 2021-10-20 |         3
 1 | 2021-10-21 |         4
 2 | 2021-10-21 |         4
 3 | 2021-10-21 |         4
 1 | 2021-10-22 |         5
 2 | 2021-10-22 |         5
 3 | 2021-10-22 |         5
--
-- The missing days, based on days other workers filled in
-- But without considering the holidays
-- 
select d.days, wrkr.worker_id
from (select distinct workingday as days from worker) d
cross join (select distinct id as worker_id from worker) wrkr
left join worker as ent
  on ent.workingday = d.days
 and ent.id = wrkr.worker_id
where ent.id is null
group by d.days, wrkr.worker_id
days       | worker_id
:--------- | --------:
2021-10-15 |         1
2021-10-15 |         2
2021-10-18 |         2
2021-10-19 |         2
--
-- Missing days, based on period and holidays
--
select cal.cal_date, wrkr.id as worker_id
, count(case when ent.id is null then 1 end) 
  over (partition by wrkr.id order by cal.cal_date) as rolling_count
from calendar as cal
cross join (select distinct id from worker) wrkr
left join worker as ent
  on ent.workingday = cal.cal_date
 and ent.id = wrkr.id
where cal.cal_date between cast('2021-10-10' as date) 
                       and cast('2021-10-22' as date)
  and extract(dow from cal.cal_date) not in (0, 6) -- no weekends
  and not exists ( -- workers may be on holiday
      select 1 
      from holiday as hol
      where hol.worker_id = wrkr.id
        and hol.date_of_holiday = cal.cal_date
  )
 and ent.id is null -- the unmatched
group by cal.cal_date, wrkr.id, ent.id
order by cal.cal_date, wrkr.id
cal_date   | worker_id | rolling_count
:--------- | --------: | ------------:
2021-10-15 |         2 |             1
2021-10-18 |         2 |             2
2021-10-19 |         2 |             3
--
-- Missing day ranges, based on period and holidays
--
with cte_missed_work as
(
    select 
      cal.cal_date
    , wrkr.id as worker_id
    , lag(cal_date) over (partition by wrkr.id 
                          order by cal.cal_date) as prev_date
    from calendar as cal
    cross join (select distinct id from worker) wrkr
    left join worker as ent
      on ent.workingday = cal.cal_date
     and ent.id = wrkr.id
    where cal.cal_date between cast('2021-10-10' as date) 
                           and cast('2021-10-22' as date)
      and extract(dow from cal.cal_date) not in (0, 6) -- no weekends
      and not exists ( -- workers may be on holiday
          select 1 
          from holiday as hol
          where hol.worker_id = wrkr.id
            and hol.date_of_holiday = cal.cal_date
      )
      and ent.id is null -- the unmatched
      group by cal.cal_date, wrkr.id
), 
cte_missed_work2 as (
  select *
  , sum(case when date_part('day', cal_date::timestamp - prev_date::timestamp) = 1 then 0 else 1 end)
    over (partition by worker_id order by cal_date) as Rnk
  from cte_missed_work
)
select worker_id
, min(cal_date) as firstDate
, max(cal_date) as lastDate
, date_part('day', max(cal_date)::timestamp - min(cal_date)::timestamp)+1 as missing_days
from cte_missed_work2
group by worker_id, Rnk
order by min(cal_date), worker_id
worker_id | firstdate  | lastdate   | missing_days
--------: | :--------- | :--------- | :-----------
        2 | 2021-10-15 | 2021-10-15 | 1           
        2 | 2021-10-18 | 2021-10-19 | 2           

Demo on db<>fiddle here

User contributions licensed under: CC BY-SA
3 People found this is helpful
Advertisement