Skip to content
Advertisement

How to automate parameters passed into pandas.read_sql?

I am trying to create a methodology for passing parameters automatically through something like locals(), similarly to how f-strings work.

How it currently works

import pandas as pd

def my_func(conn, string_id, date, integer_ids):
    sql = f"""    
    select * from RISK a
    where STRING_ID = '{string_id}'
    and DATE = {date}
    and INTEGER_ID in ({','.join(map(str, integer_ids))})"""
    df = pd.read_sql(sql, conn)
    return df

However, this approach means I cannot copy-paste the SQL into SQL developer or similar, and run it from there. So I would like an approach that makes use of parameters instead.

There seems to be two problems with that

  1. Parameters must be literals, so its not possible to pass along lists
  2. I need to create a dictionary manually, and cannot simply pass something like locals()

How I would like it to work would be something like the example below (which obviously doesn’t work)

import pandas as pd

def my_func(conn, string_id, date, integer_ids):
    sql = """    
    select * from RISK
    where STRING_ID = :string_id
    and DATE = :date
    and INTEGER_ID in :integer_ids"""
    df = pd.read_sql(sql, conn, params=locals())
    return df

EDIT: After testing a bit, maybe I could use a regex to find all instances of :param and replace them with the parameter value, e.g.

import re
pattern = '[nr].*:s*([^nr]*)'
matches = re.findall(pattern,sql)
for match in matches:
    sql = sql.replace(':'+match, eval(match))

It’s just not very pretty, and it introduces issues related to object types. E.g. the string_id should be encapsulated by ', and the date needs to be converted to a string object as well

Final edit: Thanks to perl, I now have a working solution to my problem

def read_sql(sql, conn, params):
    # Finds all words following a ":" sign in the sql
    for p in re.findall(':(w+)', sql):
        if isinstance(params.get(p), (tuple, list)):
            ext_params = {f'{p}_{i:03d}': p_i for i, p_i in enumerate(params.get(p))}
            sql = sql.replace(f':{p}', f"(:{', :'.join(ext_params)})")
            params.update(ext_params)

    sql_text = sqlalchemy.text(sql)
    return pd.read_sql(sql_text, conn, params=params)


def my_func(conn, string_id, date, integer_ids):
    sql = """    
    select * from RISK
    where STRING_ID = :string_id
    and DATE = :date
    and INTEGER_ID in :integer_ids"""
    df = read_sql(sql, conn, locals())
    return df

Advertisement

Answer

You can use parametrized queries by wrapping the query in sqlalchemy.text and converting lists to tuples. For example:

def my_func(conn, min_number, letters):
    # convert lists to tuples
    letters = tuple(letters)
    
    # wrap sql in sqlalchemy.text
    sql = sqlalchemy.text("""    
        SELECT *
        FROM letters
        WHERE
            number >= :min_number AND
            letter in :letters""")
    
    # read and return the resulting dataframe
    df = pd.read_sql(sql, conn, params=locals())
    return df

my_func(conn, 10, ['a', 'b', 'c', 'x', 'y', 'z'])

Output:

  letter  number
0      x      23
1      y      24
2      z      25

For completeness of the example, the following was used as a test table:

df = pd.DataFrame({
    'letter': list(string.ascii_lowercase),
    'number': range(len(string.ascii_lowercase))})
df.to_sql('letters', conn, index=False)

Update: Here’s a possible workaround for Oracle to make it work with lists:

def get_query(sql, **kwargs):
    for k, v in kwargs.items():
        vs = "','".join(v)
        sql = sql.replace(f':{k}', f"('{vs}')")
    return sql

def my_func(conn, min_number, letters):
    sql_template = """    
        SELECT *
        FROM letters
        WHERE
            number >= :min_number AND
            letter in :letters
    """
    # pass list variables to `get_query` function as named parameters
    # to get parameters replaced with ('value1', 'value2', ..., 'valueN')
    sql = sqlalchemy.text(
        get_query(sql_template, letters=letters))
    
    df = pd.read_sql(sql, conn, params=locals())
    return df

my_func(conn, 10, ['a', 'b', 'c', 'x', 'y', 'z'])

Update 2: Here’s the get_query function that works with both strings and numbers (enclosing in quotes strings, but not numbers):

def get_query(sql, **kwargs):
    # enclose in quotes strings, but not numbers
    def q(x):
        q = '' if isinstance(x, (int, float)) else "'"
        return f'{q}{x}{q}'
    
    # replace with values
    for k, v in kwargs.items():
        sql = sql.replace(f':{k}', f"({','.join([q(x) for x in v])})")

    return sql

For example:

sql = """    
SELECT *
FROM letters
WHERE
    number in :numbers AND
    letters in :letters
"""

get_query(sql,
          numbers=[1, 2, 3],
          letters=['A', 'B', 'C'])

Output:

SELECT *
FROM letters
WHERE
    number in (1,2,3) AND
    letters in ('A','B','C')
User contributions licensed under: CC BY-SA
10 People found this is helpful
Advertisement