Skip to content
Advertisement

How to get all rows from a SQL table before a change in column string?

I have these 2 tables and their inserts:

create table #JobFailureConfig (
    JobId int not null,
    JobName varchar(128) not null);
go

insert into #JobFailureConfig values (2, 'CopyOverFileJob');
insert into #JobFailureConfig values (3, 'ExcludeFromWarehouseJob');
go

create table #Job(
    id int,
    JobId int,
    CreateDtTm datetimeoffset,
    StatusDtTm datetimeoffset,
    StatusName varchar(128));
go

insert into #Job values (47022087, 3, '2020-05-07 18:44:35.7327 -07:00', '2020-05-07 18:44:36.6599 -07:00','Failed');
insert into #Job values (47022092, 3, '2020-05-07 18:44:47.1431 -07:00', '2020-05-07 18:44:47.2599 -07:00','Failed');
insert into #Job values (47022097, 3, '2020-05-07 18:45:36.6888 -07:00', '2020-05-07 18:45:36.8439 -07:00','Failed');
insert into #Job values (47022088, 3, '2020-05-07 18:46:35.7327 -07:00', '2020-05-07 18:47:36.6599 -07:00','Finished');
insert into #Job values (47022087, 3, '2020-05-07 18:44:35.7327 -07:00', '2020-05-13 18:44:36.6599 -07:00','Failed');
insert into #Job values (47022092, 3, '2020-05-07 18:44:47.1431 -07:00', '2020-05-13 18:44:47.2599 -07:00','Failed');

insert into #Job values (98234237, 2, '2020-05-01 18:44:35.7327 -07:00', '2020-05-01 18:44:36.6599 -07:00','Finished');
insert into #Job values (98234238, 2, '2020-05-03 18:44:35.7327 -07:00', '2020-05-03 18:44:36.6599 -07:00','Failed');
insert into #Job values (98234238, 2, '2020-05-03 18:44:35.7327 -07:00', '2020-04-03 18:44:36.6599 -07:00','Failed');
go

I have this query to partition them by JobId and order them by JobId and StatusDtTm:

select
    j.Id,
    j.JobId,
    jfc.JobName,
    j.StatusName,
    CreateDtTm = format(cast(CreateDtTm as datetime), 'yyyy-MM-dd HH:mm:ss'),
    StatusDtTm = format(cast(StatusDtTm as datetime), 'yyyy-MM-dd HH:mm:ss'),
    row_number() over (Partition by j.JobId order by j.JobId, j.StatusDtTm desc) as JobFailureId
from #Job j
join #JobFailureConfig jfc
    on j.JobId = jfc.JobId;
go

enter image description here

I want to fetch only the rows where the Job last finished per job. Basically get all the records per job before it last finished (highlighted in green). I can’t seem to figure out what to add in the where clause here to get the highlighted result. Can someone help please?

Advertisement

Answer

One approach is to add the following column to your query:

count(case when statusname = 'Finished' then 1 end)
   over(partition by j.jobid
        order by j.StatusDtTm
        rows between unbounded preceding and current row) sel

This new field sel will have a nonzero value for the highlighted rows, where a current or a previous StatusName for that job ID has the value ‘Finished’; otherwise, the value will be zero.

The full query to output only the highlighted records is then:

with cte as
(
    select
       j.Id,
       j.JobId,
       jfc.JobName,
       j.StatusName,
       format(cast(CreateDtTm as datetime), 'yyyy-MM-dd HH:mm:ss') as CreateDtTm,
       format(cast(StatusDtTm as datetime), 'yyyy-MM-dd HH:mm:ss') as StatusDtTm,
       row_number() over (Partition by j.JobId order by j.JobId, j.StatusDtTm desc) as JobFailureId,
       count(case when statusname = 'Finished' then 1 end)
         over(partition by j.jobid
              order by j.StatusDtTm
              rows between unbounded preceding and current row) sel
   from #Job j
   join #JobFailureConfig jfc
       on j.JobId = jfc.JobId
)
select Id, JobId, JobName, StatusName, CreateDtTm, StatusDtTm, JobFailureId
from cte
where sel=1
order by jobid, jobfailureid
User contributions licensed under: CC BY-SA
2 People found this is helpful
Advertisement