Skip to content
Advertisement

Passing tuples to SQL query while unnesting

I need to query a large BigQuery table to retrieve data from certain dates about barcodes in a store. There is a specified date for each of these barcodes (there are thousands of dates for each barcode in the BigQuery table, which makes it unpractical to only query on barcode). I have therefore created a list of tuples containing the barcodes and specific dates (only a very small subset):

date_and_barcode = [('A4630411929016393', datetime.date(2022, 10, 9)),
 ('A4630411929716390', datetime.date(2022, 10, 9)),
 ('A4630462735016271', datetime.date(2022, 10, 9)),
 ('A4070460677116273', datetime.date(2022, 10, 9)),
 ('A4070460701616276', datetime.date(2022, 10, 9)),
 ('A4630460194116279', datetime.date(2022, 10, 9)),
 ('A4630460205516276', datetime.date(2022, 10, 7)),
 ('A4630460214016271', datetime.date(2022, 10, 9)),
 ('A4630460280316277', datetime.date(2022, 10, 9)),
 ('A4630460281616271', datetime.date(2022, 10, 9)),
 ('A4630450353216276', datetime.date(2022, 10, 11)),
 ('A4220452268816274', datetime.date(2022, 10, 9))]

My query today looks like this:

query="""
select   barcode,
         storeinfo1,
         storeinfo2,
         item1

         from `project.dataset.table`
         where barcode IN UNNEST(@label_list) and date in UNNEST(@date_list)
"""

job_config = bigquery.QueryJobConfig(
query_parameters=[
         bigquery.ArrayQueryParameter("label_list", "STRING", label_list),
         bigquery.ArrayQueryParameter("date_list", "STRING", date_list),
            ]
        )
DATA = client.query(query, job_config=job_config).to_dataframe()

This obviously doesn’t work since it will take all the possible combinations of barcodes and dates. I only want the combinations that match the list I have.

I tried this

query="""
select   barcode,
         storeinfo1,
         storeinfo2,
         item1

         from `project.dataset.table`
        where barcode in {} and Date in {} 
                                )

            """.format(UNNEST(date_and_barcode)[0], UNNEST(date_and_barcode)[1])

job_config = bigquery.QueryJobConfig(
query_parameters=[
         bigquery.ArrayQueryParameter("date_and_barcode", "STRING", date_and_barcode),

            ]
        )
DATA = client.query(query, job_config=job_config).to_dataframe()

and

query="""
select   barcode,
         storeinfo1,
         storeinfo2,
         item1

         from `project.dataset.table`
        where barcode in UNNEST(@{}) and Date in UNNEST(@{})
                                )

            """.format(list(zip(*date_and_labels))[0], list(zip(*date_and_labels))[1])

job_config = bigquery.QueryJobConfig(
query_parameters=[
         bigquery.ArrayQueryParameter("date_and_barcode", "STRING", date_and_barcode),

            ]
        )
DATA = client.query(query, job_config=job_config).to_dataframe()

None worked!

I would be grateful for hints on how to solve this.

Advertisement

Answer

You might consider using tuple syntax in your query.

(barcode, date) IN UNNEST(date_and_barcode)

DECLARE date_and_barcode DEFAULT [('A4630411929016393', date(2022, 10, 9)),
 ('A4630411929716390', date(2022, 10, 9)),
 ('A4630462735016271', date(2022, 10, 9)),
 ('A4070460677116273', date(2022, 10, 9)),
 ('A4070460701616276', date(2022, 10, 9)),
 ('A4630460194116279', date(2022, 10, 9)),
 ('A4630460205516276', date(2022, 10, 7)),
 ('A4630460214016271', date(2022, 10, 9)),
 ('A4630460280316277', date(2022, 10, 9)),
 ('A4630460281616271', date(2022, 10, 9)),
 ('A4630450353216276', date(2022, 10, 11)),
 ('A4220452268816274', date(2022, 10, 9))];

WITH sample_data AS (
    SELECT 'A4630460194116279' barcode, DATE '2022-10-09' date
)
SELECT * FROM sample_data WHERE (barcode, date) IN UNNEST(date_and_barcode);

Query results

enter image description here


Python Example

date_and_barcode = [('A4630411929016393', datetime.date(2022, 10, 9)),
 ('A4630411929716390', datetime.date(2022, 10, 9)),
 # ...  same as list of tuples in your question
 ('A4630450353216276', datetime.date(2022, 10, 11)),
 ('A4220452268816274', datetime.date(2022, 10, 9))
]

date_and_barcode = ",".join("('%s','%s')" % tup for tup in date_and_barcode)

query=f"""
  select barcode,
         storeinfo1,
         storeinfo2,
         item1
   from `project.dataset.table`
  where (barcode, date) IN UNNEST([{date_and_barcode}])
"""
print(query)
User contributions licensed under: CC BY-SA
1 People found this is helpful
Advertisement