Skip to content
Advertisement

Gaps and Islands Months only

I am working with a data set of clients and their date records. I am trying to apply a gaps and island problem only using MONTHS,( currently var char ‘YYYYMM’). I need to take individual records and group by gaps in months(irregardless of year). I cant figure out how to go from:

Current

drop table RUNNING_LOG;

create table running_log ( 
  run_date          date not null,  
  time_in_seconds   int  not null, 
  distance_in_miles int  not null,
  SERV_YRMO         VARCHAR2(6)
);

insert into running_log values (date'2018-01-01', 420, 1,'201801');  
insert into running_log values (date'2018-01-02', 2400, 5,'201801');  
insert into running_log values (date'2018-01-03', 2430, 5,'201801');  
insert into running_log values (date'2018-01-06', 2350, 5,'201801');  
insert into running_log values (date'2018-02-07', 410, 1,'201802');  
insert into running_log values (date'2018-02-10', 400, 1,'201802');  
insert into running_log values (date'2018-02-13', 2300, 5,'201802');  
insert into running_log values (date'2018-12-31', 425, 1,'201803');  
insert into running_log values (date'2019-01-01', 422, 1,'201901');  
insert into running_log values (date'2019-01-06', 2350, 5,'201901');  
insert into running_log values (date'2019-02-07', 410, 1,'201902');  
insert into running_log values (date'2019-02-10', 400, 1,'201902');  
insert into running_log values (date'2019-02-13', 2300, 5,'201902');  
insert into running_log values (date'2019-03-14', 425, 1,'201903');  
insert into running_log values (date'2019-03-15', 422, 1,'201903');
insert into running_log values (date'2020-03-01', 425, 1,'202003');  
insert into running_log values (date'2021-03-31', 422, 1,'202103');   

commit;  

select * from running_log;

To: Final Result

Advertisement

Answer

One method is uses dense_rank() and truncating dates to months:

select to_char(min(run_date), 'YYYY-MM'), to_char(max(run_date), 'YYYY-MM'), sum(distance)
from (select t.*,
             dense_rank() over (order by trunc(run_date, 'Month')) as seqnum
      from t
     ) t
group by trunc(run_date, 'Month') - seqnum * interval '1' month
order by min(run_date);
User contributions licensed under: CC BY-SA
7 People found this is helpful
Advertisement