Need to group records by consecutive date SQL

I’m building a query in order to build consecutive dates which is

create table #consecutivedates (
    sgid nvarchar(max), 
    metric nvarchar(max), 
    targetvalue nvarchar(max), 
    startdate datetime, 
    enddate datetime
)

insert into #consecutivedates values 
    ('2177', '515818', '18', '2019-09-01',  '2019-09-30'),
    ('2177', '515818', '125', '2019-08-01',  '2019-08-31'),
    ('2177', '515818', '15', '2019-07-01',  '2019-07-31')

SELECT  sgid,metric, CAST(startdate AS DATE) startdate, CAST(enddate AS DATE) enddate,
        ROW_NUMBER() OVER ( ORDER BY sgid, metric, startdate ) rn
INTO    #temp
FROM    #consecutivedates

-- GroupingColumn in cte used to identify and group consecutive dates
;WITH    cte
          AS ( SELECT   sgid ,
          metric , 
                        startdate ,
                        enddate ,
                        1 AS GroupingColumn ,
                        rn
               FROM     #temp
               WHERE    rn = 1
               UNION ALL
               SELECT   t2.sgid ,
               t2.metric,
                        t2.startdate,
                        t2.enddate ,
                        CASE WHEN t2.startdate = DATEADD(day, 1, cte.enddate) 
                                  AND cte.sgid = t2.sgid AND cte.metric=t2.metric
                             THEN cte.GroupingColumn
                             ELSE cte.GroupingColumn + 1
                        END AS GroupingColumn ,
                        t2.rn
               FROM     #temp t2
                        INNER JOIN cte ON t2.rn = cte.rn + 1
             )
    SELECT  sgid,metric, MIN(startdate) AS startdate, MAX(enddate) AS enddate
    FROM    cte
    GROUP BY sgid,metric, GroupingColumn

DROP TABLE #temp
DROP TABLE #consecutivedates

​x
 
create table #consecutivedates (    sgid nvarchar(max),     metric nvarchar(max),     targetvalue nvarchar(max),     startdate datetime,     enddate datetime)​insert into #consecutivedates values     ('2177', '515818', '18', '2019-09-01',  '2019-09-30'),    ('2177', '515818', '125', '2019-08-01',  '2019-08-31'),    ('2177', '515818', '15', '2019-07-01',  '2019-07-31')​SELECT  sgid,metric, CAST(startdate AS DATE) startdate, CAST(enddate AS DATE) enddate,        ROW_NUMBER() OVER ( ORDER BY sgid, metric, startdate ) rnINTO    #tempFROM    #consecutivedates​-- GroupingColumn in cte used to identify and group consecutive dates;WITH    cte          AS ( SELECT   sgid ,          metric ,                         startdate ,                        enddate ,                        1 AS GroupingColumn ,                        rn               FROM     #temp               WHERE    rn = 1               UNION ALL               SELECT   t2.sgid ,               t2.metric,                        t2.startdate,                        t2.enddate ,                        CASE WHEN t2.startdate = DATEADD(day, 1, cte.enddate)                                   AND cte.sgid = t2.sgid AND cte.metric=t2.metric                             THEN cte.GroupingColumn                             ELSE cte.GroupingColumn + 1                        END AS GroupingColumn ,                        t2.rn               FROM     #temp t2                        INNER JOIN cte ON t2.rn = cte.rn + 1             )    SELECT  sgid,metric, MIN(startdate) AS startdate, MAX(enddate) AS enddate    FROM    cte    GROUP BY sgid,metric, GroupingColumn​DROP TABLE #tempDROP TABLE #consecutivedates​

but I’m facing a problem with two things.

query is too expensive when files are big.
if the dates are something like

startdate enddate ‘2019-08-01’ ‘2019-09-30’ ‘2019-10-01’ ‘2019-10-31’

Then the query will not group the dates and I need it to be smart enough to solve this kind of cases.

Any help would be nice,

Thanks.

Answer

I undersand that you want to group together records that have the same sgid and metric and that are adjacent (ie next record starts one day after the end of the current record).

Here is a gaps and island solution that uses window sum to define the groups:

select sgid, metric, min(startdate) startdate, max(enddate) enddate
from (
    select
        t.*,
        sum(case when startdate = dateadd(day, 1, lag_enddate) then 0 else 1 end)
            over(partition by sgid, metric order by startdate) grp
    from (
        select 
            t.*, 
            lag(enddate) over(partition by sgid, metric order by startdate) lag_enddate
        from  #consecutivedates t
    ) t
) t
group by sgid, metric, grp

 
select sgid, metric, min(startdate) startdate, max(enddate) enddatefrom (    select        t.*,        sum(case when startdate = dateadd(day, 1, lag_enddate) then 0 else 1 end)            over(partition by sgid, metric order by startdate) grp    from (        select             t.*,             lag(enddate) over(partition by sgid, metric order by startdate) lag_enddate        from  #consecutivedates t    ) t) tgroup by sgid, metric, grp​

For your sample data, where all three records are adjacent, this produces:

sgid | metric | startdate               | enddate                
:--- | :----- | :---------------------- | :----------------------
2177 | 515818 | 2019-07-01 00:00:00.000 | 2019-09-30 00:00:00.000

Demo on DB Fiddle

Note that the query uses SQL Server date functions (which I suspect that you are using): alternatives exists in other databases.

Advertisement

Answer