Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.

Commit d8c25ac

Browse files
authored
test: retry getting rows after streaming them in test_insert_rows_from_dataframe (#832)
1 parent 48e8a35 commit d8c25ac

1 file changed

Lines changed: 26 additions & 14 deletions

File tree

tests/system/test_pandas.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import io
2222
import operator
2323

24+
import google.api_core.retry
2425
import pkg_resources
2526
import pytest
2627
import pytz
@@ -41,6 +42,10 @@
4142
PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0")
4243

4344

45+
class MissingDataError(Exception):
46+
pass
47+
48+
4449
def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_id):
4550
"""Test that a DataFrame with dtypes that map well to BigQuery types
4651
can be uploaded without specifying a schema.
@@ -666,27 +671,34 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id):
666671
)
667672
for errors in chunk_errors:
668673
assert not errors
669-
670-
# Use query to fetch rows instead of listing directly from the table so
671-
# that we get values from the streaming buffer.
672-
rows = list(
673-
bigquery_client.query(
674-
"SELECT * FROM `{}.{}.{}`".format(
675-
table.project, table.dataset_id, table.table_id
676-
)
677-
)
678-
)
679-
680-
sorted_rows = sorted(rows, key=operator.attrgetter("int_col"))
681-
row_tuples = [r.values() for r in sorted_rows]
682674
expected = [
683675
# Pandas often represents NULL values as NaN. Convert to None for
684676
# easier comparison.
685677
tuple(None if col != col else col for col in data_row)
686678
for data_row in dataframe.itertuples(index=False)
687679
]
688680

689-
assert len(row_tuples) == len(expected)
681+
# Use query to fetch rows instead of listing directly from the table so
682+
# that we get values from the streaming buffer "within a few seconds".
683+
# https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
684+
@google.api_core.retry.Retry(
685+
predicate=google.api_core.retry.if_exception_type(MissingDataError)
686+
)
687+
def get_rows():
688+
rows = list(
689+
bigquery_client.query(
690+
"SELECT * FROM `{}.{}.{}`".format(
691+
table.project, table.dataset_id, table.table_id
692+
)
693+
)
694+
)
695+
if len(rows) != len(expected):
696+
raise MissingDataError()
697+
return rows
698+
699+
rows = get_rows()
700+
sorted_rows = sorted(rows, key=operator.attrgetter("int_col"))
701+
row_tuples = [r.values() for r in sorted_rows]
690702

691703
for row, expected_row in zip(row_tuples, expected):
692704
assert (

0 commit comments

Comments
 (0)