Skip to content
Advertisement

Split text into multiline chunks based on fixed character length using SQL

I am looking for SQL that takes a string of text (coming in as up to 3 lines, but usually 1) and breaks it into at most 3 lines with a max of 30 characters per line. (The background is I’m trying to insert text into address line fields and sometimes the incoming values are longer than 30 characters and need to split into smaller chunks and bumped down to another line).

I’m not even sure where to start. Since it’s address text there’s quite a bit of variance and ideally it shouldn’t split in the middle of a word, if possible. This would be in SQL Server 2012. Any suggestions?

Advertisement

Answer

This should accomplish what you want using recursive CTEs:

CREATE PROCEDURE REDISTRIBUTE_TEXT 
     @IN_LINE_1         VARCHAR(200)
    ,@IN_LINE_2         VARCHAR(200)
    ,@IN_LINE_3         VARCHAR(200)

    ,@MAX_LINE_LENGTH   INTEGER

    ,@OUT_LINE_1        VARCHAR(200) OUT
    ,@OUT_LINE_2        VARCHAR(200) OUT
    ,@OUT_LINE_3        VARCHAR(200) OUT
AS
BEGIN
    SET NOCOUNT ON;
        DECLARE @s VARCHAR(1000);

        SET @s = @IN_LINE_1 + ' ' + @IN_LINE_2 + ' ' + @IN_LINE_3;

        WITH CHAR_ARRAY AS (
            SELECT   v.number + 1                   Char_ID
                    ,SUBSTRING(a.b, v.number+1, 1)  Char_Value
            FROM    (SELECT @s b) a
            JOIN    master..spt_values v 
            ON      v.number < len(a.b)
            WHERE   v.type = 'P'
        )
        , WORD_ARRAY AS (
            SELECT   S.ID           AS Word_ID
                    ,S.Value        AS Word_Value
                    ,LEN(S.VALUE)   AS Word_Length
            FROM    dbo.String_Split_1(@s, ' ') S   -- This could be replaced with built-in STRING_SPLIT function in MSSQL 2016+
        )
        , WORD_EXT AS (
            SELECT  x.*
                    ,( 
                        SELECT   SUM(y.Word_Length) + COUNT(*)
                        FROM      WORD_ARRAY y
                        WHERE     y.Word_ID <= x.Word_ID
                    ) AS RunningLength
            FROM    WORD_ARRAY x
        )
        , CHAR_EXT AS (
            SELECT   C.*
                    ,W2.Word_ID     AS Next_Word_ID
                    ,W2.Word_Length AS Next_Word_Length
            FROM    (
                SELECT  C.*
                        ,W.*
                        ,ROW_NUMBER() OVER (PARTITION BY C.Char_ID ORDER BY RunningLength ASC) RN
                FROM    CHAR_ARRAY  C
                CROSS JOIN  WORD_EXT W
                WHERE   C.Char_ID <= W.RunningLength
            ) C
            LEFT JOIN   WORD_ARRAY W2
            ON      W2.Word_ID = C.Word_ID + 1
            WHERE RN = 1
        )
        , RECUR (CURR_CHAR_ID, START_WORD_ID, CURR_WORD_ID, LINE_NUM, LINE_TEXT, LINE_LENGTH) AS (
            SELECT   C.Char_ID
                    ,W.Word_ID
                    ,W.Word_ID
                    ,1
                    ,CAST(C.Char_Value AS VARCHAR)
                    ,LEN(C.Char_Value)
            FROM    CHAR_EXT C
            JOIN    WORD_EXT W
            ON      C.Word_ID = W.Word_ID
            WHERE   C.Char_ID = 1
            
            UNION ALL

            SELECT   CHAR_ID                                                                                    CURR_CHAR_ID
                    ,CASE WHEN WRAP_END_WORD = 1 THEN Next_Word_ID ELSE START_WORD_ID END                       START_WORD_ID
                    ,CASE WHEN WRAP_END_WORD = 1 THEN Next_Word_ID ELSE Word_ID END                             CURR_WORD_ID
                    ,CASE WHEN WRAP_END_WORD = 1 OR WRAP_MID_WORD = 1 THEN LINE_NUM + 1 ELSE LINE_NUM END       LINE_NUM
                    ,CAST(
                        CASE    WHEN WRAP_END_WORD = 1 OR WRAP_MID_WORD = 1 
                                THEN (CASE WHEN C.Char_Value = ' ' THEN '' ELSE C.Char_Value END )
                                ELSE LINE_TEXT + C.Char_Value 
                                END AS VARCHAR)                                                                 LINE_TEXT
                    ,CASE WHEN WRAP_END_WORD = 1 OR WRAP_MID_WORD = 1 THEN 1 ELSE LINE_LENGTH + 1 END           LINE_LENGTH
            FROM    (
                SELECT   R.*
                        ,C.*
                        ,CASE WHEN R.START_WORD_ID = C.WORD_ID AND LINE_LENGTH > @MAX_LINE_LENGTH 
                            THEN 1 ELSE 0 
                            END AS WRAP_MID_WORD
                        ,CASE WHEN C.Char_Value = ' ' AND (
                                    LEN(R.LINE_TEXT) + 1 + C.Next_Word_Length > @MAX_LINE_LENGTH
                                )
                            THEN 1 ELSE 0 
                            END AS WRAP_END_WORD
                FROM    RECUR R
                JOIN    CHAR_EXT C
                ON      C.Char_ID = R.CURR_CHAR_ID + 1
            ) C
        )

        SELECT  * 
        INTO    #LINES
        FROM (
            SELECT  * 
                    ,ROW_NUMBER() OVER (PARTITION BY LINE_NUM ORDER BY LEN(LINE_TEXT) DESC) RN
            FROM    RECUR R
        ) R
        WHERE RN = 1
        ;

        SELECT  @OUT_LINE_1 = LINE_TEXT FROM    #LINES WHERE    LINE_NUM = 1;
        SELECT  @OUT_LINE_2 = LINE_TEXT FROM    #LINES WHERE    LINE_NUM = 2;
        SELECT  @OUT_LINE_3 = LINE_TEXT FROM    #LINES WHERE    LINE_NUM = 3;

        DROP TABLE #LINES;
END
GO


CREATE Function [String_Split_1]( @string varchar(4000), @delimeter varchar(10))
Returns
@Result Table (ID int identity(1,1) not null, value varchar(100))
As
Begin
    declare @len int, @loc int = 1
    While @loc <= len(@string) 
    Begin
        Set @len = CHARINDEX(@delimeter, @string, @loc) - @loc
        If @Len < 0 Set @Len = len(@string)
        Insert Into @Result Values (SUBSTRING(@string,@loc,@len))
        Set @loc = @loc + @len + 1
    End
    Return
End

enter image description here

User contributions licensed under: CC BY-SA
2 People found this is helpful
Advertisement