I have a query in SAS where I use a Macro variable to repeat a query to Teradata with a different variable. We have 5 databases, one for each state, where I run the same query, but use the variable to update the state, then mend all data sets. I’m looking for help in how I can do this in python.
loop through {state1, state2, state3, state4, state5} and save each query as {stateX}_df then merge all
x
import teradata as td
import pandas as pd
from teradata import tdodbc
udaExec = td.UdaExec(appConfigFile="udaexec.ini")
with udaExec.connect("${dataSourceName}", LoginTimeout=120) as session:
query1 = """database my_db_{state1};"""
query2 = """
select distinct
{state1}, item_a, item_b
from table
"""
session.execute(query1)
session.execute(query2)
{stateX}_df = pd.read_sql(query2), session)
Advertisement
Answer
Here is an improved version with volatile table use: Python SQL loop variables through multiple queries
udaExec = td.UdaExec(appConfigFile="udaexec.ini")
with udaExec.connect("${dataSourceName}") as session:
state_dataframes = []
STATES = ["state1", "state2", "state3", "state4", "state5"]
for state in STATES:
query1 = """database my_db_{};"""
query2 = """
create set volatile table v_table
,no fallback, no before journal, no after journal as
(
select top 10
'{}' as state
,t.*
from table t
)
with data
primary index (dw_key)
on commit preserve rows;
"""
query3 = """
create set volatile table v_table_2
,no fallback, no before journal, no after journal as
(
select t.*
from v_table t
)
with data
primary index (dw_clm_key)
on commit preserve rows;
"""
query4 = """
select t.*
from v_table_2 t
"""
session.execute(query1.format(state))
session.execute(query2.format(state))
session.execute(query3)
session.execute(query4)
state_dataframes.append(pd.read_sql(query4, session))
session.execute("DROP TABLE v_table")
session.execute("DROP TABLE v_table_2")
all_states_df = pd.concat(state_dataframes)