How to accurately pull minimum date of each time a payment method was changed

I need to get the minimum date for each time a payment method changes between the method id, account number, or routing number. I’m bouncing between a cursor and between windows functions. Doing the method below seems to work fine, but when i use this method with a larger dataset, with different pay ids involved, it doesn’t seem to be doing things in order by person so some will have duplicate entrys.

Duplicates would be 2 payments of the same kind within the same date range of being used. I only want the min date.

With windows functions this is what i’ve tried so far. I’m looking for suggestions on how to make this more accurate for larger datasets. Do i even go this route, or do i turn to using a stored procedure that takes in one person at a time?

create table #payments (pay_ID uniqueidentifier, pay_type int, pay_account varchar(max), pay_routing varchar(max),  pay_date datetime);
create table #ordered_payments (pay_ID uniqueidentifier,pay_type int, pay_account varchar(max), pay_routing varchar(max), pay_date datetime, rn int);

DECLARE @payID uniqueidentifier = newid();

--Dummy record so that when grabbing the ordered payments we get the first payment. Without this the last result set on line 30 will not pick up the first one.
INSERT INTO #payments (pay_ID, pay_type, pay_account, pay_routing, pay_date) VALUES
(newid(),0, '', '', '1/1/1900')  

--Actual payments made
insert into #payments (pay_ID, pay_type, pay_account, pay_routing, pay_date) VALUES
(@payID, 1, 'e121', '0101', '09/18/2020'),
(@payID, 1, 'e121', '0101', '09/19/2020'),
(@payID, 1, 'e121', '0101', '09/20/2020'),
(@payID, 2, 'e122', '0102', '09/21/2020'),
(@payID, 2, 'e122', '0102', '09/22/2020'),
(@payID, 1, 'e121', '0101', '09/23/2020'),
(@payID, 1, 'e121', '0101', '09/24/2020'),
(@payID, 1, 'e121', '0101', '09/25/2020'),
(@payID, 2, 'e122', '0102', '09/26/2020'),
(@payID, 2, 'e122', '0102', '09/27/2020'),
(@payID, 3, 'e123', '0103', '09/28/2020'),
(@payID, 1, 'e121', '0101', '09/29/2020'),
(@payID, 1, 'e121', '0101', '09/30/2020'),
(@payID, 1, 'e121', '0101', '10/01/2020'),
(@payID, 1, 'e121', '0101', '10/02/2020')



insert into #ordered_payments
select pay_ID, pay_type, pay_account, pay_routing, pay_date, row_number() over(order by pay_date) rn
from #payments

select p.pay_ID ,p.pay_type, p.pay_date, p.rn 
from #ordered_payments p
inner join #ordered_payments pp 
on (
    (
    p.pay_type <> pp.pay_type 
    OR p.pay_account <> pp.pay_account
    OR p.pay_routing <> pp.pay_routing
    ) 
        and p.rn = pp.rn + 1
   )

​x
 
create table #payments (pay_ID uniqueidentifier, pay_type int, pay_account varchar(max), pay_routing varchar(max),  pay_date datetime);create table #ordered_payments (pay_ID uniqueidentifier,pay_type int, pay_account varchar(max), pay_routing varchar(max), pay_date datetime, rn int);​DECLARE @payID uniqueidentifier = newid();​--Dummy record so that when grabbing the ordered payments we get the first payment. Without this the last result set on line 30 will not pick up the first one.INSERT INTO #payments (pay_ID, pay_type, pay_account, pay_routing, pay_date) VALUES(newid(),0, '', '', '1/1/1900')  ​--Actual payments madeinsert into #payments (pay_ID, pay_type, pay_account, pay_routing, pay_date) VALUES(@payID, 1, 'e121', '0101', '09/18/2020'),(@payID, 1, 'e121', '0101', '09/19/2020'),(@payID, 1, 'e121', '0101', '09/20/2020'),(@payID, 2, 'e122', '0102', '09/21/2020'),(@payID, 2, 'e122', '0102', '09/22/2020'),(@payID, 1, 'e121', '0101', '09/23/2020'),(@payID, 1, 'e121', '0101', '09/24/2020'),(@payID, 1, 'e121', '0101', '09/25/2020'),(@payID, 2, 'e122', '0102', '09/26/2020'),(@payID, 2, 'e122', '0102', '09/27/2020'),(@payID, 3, 'e123', '0103', '09/28/2020'),(@payID, 1, 'e121', '0101', '09/29/2020'),(@payID, 1, 'e121', '0101', '09/30/2020'),(@payID, 1, 'e121', '0101', '10/01/2020'),(@payID, 1, 'e121', '0101', '10/02/2020')​​​insert into #ordered_paymentsselect pay_ID, pay_type, pay_account, pay_routing, pay_date, row_number() over(order by pay_date) rnfrom #payments​select p.pay_ID ,p.pay_type, p.pay_date, p.rn from #ordered_payments pinner join #ordered_payments pp on (    (    p.pay_type <> pp.pay_type     OR p.pay_account <> pp.pay_account    OR p.pay_routing <> pp.pay_routing    )         and p.rn = pp.rn + 1   )​

Desired result would be

1   2020-09-18 00:00:00.000
2   2020-09-21 00:00:00.000
1   2020-09-23 00:00:00.000
2   2020-09-26 00:00:00.000
3   2020-09-28 00:00:00.000
1   2020-09-29 00:00:00.000

 
1   2020-09-18 00:00:00.0002   2020-09-21 00:00:00.0001   2020-09-23 00:00:00.0002   2020-09-26 00:00:00.0003   2020-09-28 00:00:00.0001   2020-09-29 00:00:00.000​

Answer

I think you can use lag() to get the first record after things change:

select p.*
from (select p.*,
             lag(pay_date) over (partition by pay_id, order by pay_date) as prev_pd,
             lag(pay_date) over (partition by pay_id, pay_account, pay_type, pay_routing order by pay_date) as prev_pd_grp
      from #payments p
     ) p
where prev_pd_grp is null or prev_pd_grp <> prev_pd;

 
select p.*from (select p.*,             lag(pay_date) over (partition by pay_id, order by pay_date) as prev_pd,             lag(pay_date) over (partition by pay_id, pay_account, pay_type, pay_routing order by pay_date) as prev_pd_grp      from #payments p     ) pwhere prev_pd_grp is null or prev_pd_grp <> prev_pd;​

Here is a db<>fiddle.

Advertisement

Answer