I need to query a large BigQuery table to retrieve data from certain dates about barcodes in a store. There is a specified date for each of these barcodes (there are thousands of dates for each barcode in the BigQuery table, which makes it unpractical to only query on barcode). I have therefore created a list of tuples containing the barcodes and specific dates (only a very small subset):
date_and_barcode = [('A4630411929016393', datetime.date(2022, 10, 9)), ('A4630411929716390', datetime.date(2022, 10, 9)), ('A4630462735016271', datetime.date(2022, 10, 9)), ('A4070460677116273', datetime.date(2022, 10, 9)), ('A4070460701616276', datetime.date(2022, 10, 9)), ('A4630460194116279', datetime.date(2022, 10, 9)), ('A4630460205516276', datetime.date(2022, 10, 7)), ('A4630460214016271', datetime.date(2022, 10, 9)), ('A4630460280316277', datetime.date(2022, 10, 9)), ('A4630460281616271', datetime.date(2022, 10, 9)), ('A4630450353216276', datetime.date(2022, 10, 11)), ('A4220452268816274', datetime.date(2022, 10, 9))]
My query today looks like this:
query=""" select barcode, storeinfo1, storeinfo2, item1 from `project.dataset.table` where barcode IN UNNEST(@label_list) and date in UNNEST(@date_list) """ job_config = bigquery.QueryJobConfig( query_parameters=[ bigquery.ArrayQueryParameter("label_list", "STRING", label_list), bigquery.ArrayQueryParameter("date_list", "STRING", date_list), ] ) DATA = client.query(query, job_config=job_config).to_dataframe()
This obviously doesn’t work since it will take all the possible combinations of barcodes and dates. I only want the combinations that match the list I have.
I tried this
query=""" select barcode, storeinfo1, storeinfo2, item1 from `project.dataset.table` where barcode in {} and Date in {} ) """.format(UNNEST(date_and_barcode)[0], UNNEST(date_and_barcode)[1]) job_config = bigquery.QueryJobConfig( query_parameters=[ bigquery.ArrayQueryParameter("date_and_barcode", "STRING", date_and_barcode), ] ) DATA = client.query(query, job_config=job_config).to_dataframe()
and
query=""" select barcode, storeinfo1, storeinfo2, item1 from `project.dataset.table` where barcode in UNNEST(@{}) and Date in UNNEST(@{}) ) """.format(list(zip(*date_and_labels))[0], list(zip(*date_and_labels))[1]) job_config = bigquery.QueryJobConfig( query_parameters=[ bigquery.ArrayQueryParameter("date_and_barcode", "STRING", date_and_barcode), ] ) DATA = client.query(query, job_config=job_config).to_dataframe()
None worked!
I would be grateful for hints on how to solve this.
Advertisement
Answer
You might consider using tuple syntax in your query.
(barcode, date) IN UNNEST(date_and_barcode)
DECLARE date_and_barcode DEFAULT [('A4630411929016393', date(2022, 10, 9)), ('A4630411929716390', date(2022, 10, 9)), ('A4630462735016271', date(2022, 10, 9)), ('A4070460677116273', date(2022, 10, 9)), ('A4070460701616276', date(2022, 10, 9)), ('A4630460194116279', date(2022, 10, 9)), ('A4630460205516276', date(2022, 10, 7)), ('A4630460214016271', date(2022, 10, 9)), ('A4630460280316277', date(2022, 10, 9)), ('A4630460281616271', date(2022, 10, 9)), ('A4630450353216276', date(2022, 10, 11)), ('A4220452268816274', date(2022, 10, 9))]; WITH sample_data AS ( SELECT 'A4630460194116279' barcode, DATE '2022-10-09' date ) SELECT * FROM sample_data WHERE (barcode, date) IN UNNEST(date_and_barcode);
Query results
Python Example
date_and_barcode = [('A4630411929016393', datetime.date(2022, 10, 9)), ('A4630411929716390', datetime.date(2022, 10, 9)), # ... same as list of tuples in your question ('A4630450353216276', datetime.date(2022, 10, 11)), ('A4220452268816274', datetime.date(2022, 10, 9)) ] date_and_barcode = ",".join("('%s','%s')" % tup for tup in date_and_barcode) query=f""" select barcode, storeinfo1, storeinfo2, item1 from `project.dataset.table` where (barcode, date) IN UNNEST([{date_and_barcode}]) """ print(query)