(I’m not sure if I’m using the correct title… please let me know if you think it should be changed to something else)
Hi, I’m trying to select substring with different starting location.
I have a table like this,
Req:
x
WITH
Req (COURSE_NUM, MAJOR) AS
(
VALUES
('A001', 'CS1 ')
, ('A002', 'CS2 CS1 CS3 CS4')
, ('A003', 'CS2 ')
, ('B001', 'CS3 CS1 ')
, ('B002', 'CS2 ')
)
SELECT * FROM Req
COURSE_NUM MAJOR
---------- -------
A001 CS1
A002 CS2 CS1 CS3 CS4
A003 CS2
B001 CS3 CS1
B002 CS2
I’m trying to get a table like this:
MAJOR COURSE_NUM
----- -----------
CS1 A001
CS1 A002
CS1 B001
CS2 A002
CS2 A003
CS2 B002
CS3 A002
CS3 B002
CS4 A002
After googling around, I found CHARINDEX() might be a solution. So I tried DB2 equivalent LOCATE():
Select SUBSTR(MAJOR, LOCATE('CS1', MAJOR), 3)
,COURSE_NUM
FROM Req
WHERE MAJOR LIKE '%CS1%'
The above query would give me a result like this:
MAJOR COURSE_NUM
----- -----------
CS1 A001
CS1 A002
CS1 B001
This is okay for one major. What if I want all majors? I also tried using a host variable in LOCATE() and in LIKE clause. But it raised an error:
SQLCODE = -171, ERROR: THE DATA TYPE, LENGTH, OR VALUE OF
ARGUMENT 1 OF LOCATE IS INVALID
I’m not sure if the error was caused by my syntax or other restrictions.
Is there any way or workaround to accomplish this? Please let me know if you need more information. Thanks!
Advertisement
Answer
Db2 for LUW
WITH
Req (COURSE_NUM, MAJOR) AS
(
VALUES
('A001', 'CS1 ')
, ('A002', 'CS2 CS1 CS3 CS4')
, ('A003', 'CS2 ')
, ('B001', 'CS3 CS1 ')
, ('B002', 'CS2 ')
)
SELECT T.TOKEN, R.COURSE_NUM
FROM Req R, XMLTABLE
(
'for $id in tokenize($s, " +") return <i>{string($id)}</i>'
PASSING TRIM (R.MAJOR) as "s"
COLUMNS
TOKEN VARCHAR (10) PATH '.'
) T
ORDER BY T.TOKEN, R.COURSE_NUM;
DB2 for all pratforms using the REGEXP_SUBSTR function
WITH
Req (COURSE_NUM, MAJOR) AS
(
SELECT 'A001', 'CS1 ' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'A002', 'CS2 CS1 CS3 CS4' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'A003', 'CS2 ' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'B001', 'CS3 CS1 ' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'B002', 'CS2 ' FROM SYSIBM.SYSDUMMY1
)
, T (OCCURENCE, COURSE_NUM, MAJOR, TOKEN) AS
(
SELECT
1 AS OCCURENCE, COURSE_NUM, MAJOR AS MAJOR
, REGEXP_SUBSTR (MAJOR, '[^ ]+', 1, 1) TOKEN
FROM Req
UNION ALL
SELECT
OCCURENCE + 1, COURSE_NUM, MAJOR
, REGEXP_SUBSTR (MAJOR, '[^ ]+', 1, OCCURENCE + 1) TOKEN
FROM T
WHERE REGEXP_SUBSTR (MAJOR, '[^ ]+', 1, OCCURENCE + 1) IS NOT NULL
)
SELECT TOKEN, COURSE_NUM
FROM T
ORDER BY TOKEN, COURSE_NUM;
DB2 for all pratforms using the LOCATE function
WITH
Req (COURSE_NUM, MAJOR) AS
(
SELECT 'A001', 'CS1 ' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'A002', 'CS2 CS1 CS3 CS4' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'A003', 'CS2 ' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'B001', 'CS3 CS1 ' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'B002', 'CS2 ' FROM SYSIBM.SYSDUMMY1
)
, T (COURSE_NUM, MAJOR, TOKEN) AS
(
SELECT
COURSE_NUM
, LTRIM (SUBSTR (MAJOR, NULLIF (LOCATE (' ', MAJOR), 0) + 1)) MAJOR
, SUBSTR (MAJOR, 1, COALESCE (NULLIF (LOCATE (' ', MAJOR), 0) - 1, LENGTH (MAJOR))) TOKEN
FROM (SELECT COURSE_NUM, TRIM (MAJOR) MAJOR FROM Req) R
UNION ALL
SELECT
COURSE_NUM
, LTRIM (SUBSTR (MAJOR, NULLIF (LOCATE (' ', MAJOR), 0) + 1)) MAJOR
, SUBSTR (MAJOR, 1, COALESCE (NULLIF (LOCATE (' ', MAJOR), 0) - 1, LENGTH (MAJOR))) TOKEN
FROM T
WHERE MAJOR IS NOT NULL
)
SELECT TOKEN, COURSE_NUM
FROM T
ORDER BY TOKEN, COURSE_NUM;