@@ -7307,6 +7307,134 @@ def test_load_table_from_dataframe_struct_fields(self):
73077307 assert sent_config .source_format == job .SourceFormat .PARQUET
73087308 assert sent_config .schema == schema
73097309
7310+ @unittest .skipIf (pandas is None , "Requires `pandas`" )
7311+ @unittest .skipIf (pyarrow is None , "Requires `pyarrow`" )
7312+ def test_load_table_from_dataframe_array_fields (self ):
7313+ """Test that a DataFrame with array columns can be uploaded correctly.
7314+
7315+ See: https://github.com/googleapis/python-bigquery/issues/19
7316+ """
7317+ from google .cloud .bigquery .client import _DEFAULT_NUM_RETRIES
7318+ from google .cloud .bigquery import job
7319+ from google .cloud .bigquery .schema import SchemaField
7320+
7321+ client = self ._make_client ()
7322+
7323+ records = [(3.14 , [1 , 2 ])]
7324+ dataframe = pandas .DataFrame (
7325+ data = records , columns = ["float_column" , "array_column" ]
7326+ )
7327+
7328+ schema = [
7329+ SchemaField ("float_column" , "FLOAT" ),
7330+ SchemaField (
7331+ "array_column" ,
7332+ "INTEGER" ,
7333+ mode = "REPEATED" ,
7334+ ),
7335+ ]
7336+ job_config = job .LoadJobConfig (schema = schema )
7337+
7338+ load_patch = mock .patch (
7339+ "google.cloud.bigquery.client.Client.load_table_from_file" , autospec = True
7340+ )
7341+
7342+ get_table_patch = mock .patch (
7343+ "google.cloud.bigquery.client.Client.get_table" ,
7344+ autospec = True ,
7345+ side_effect = google .api_core .exceptions .NotFound ("Table not found" ),
7346+ )
7347+
7348+ with load_patch as load_table_from_file , get_table_patch :
7349+ client .load_table_from_dataframe (
7350+ dataframe ,
7351+ self .TABLE_REF ,
7352+ job_config = job_config ,
7353+ location = self .LOCATION ,
7354+ )
7355+
7356+ load_table_from_file .assert_called_once_with (
7357+ client ,
7358+ mock .ANY ,
7359+ self .TABLE_REF ,
7360+ num_retries = _DEFAULT_NUM_RETRIES ,
7361+ rewind = True ,
7362+ size = mock .ANY ,
7363+ job_id = mock .ANY ,
7364+ job_id_prefix = None ,
7365+ location = self .LOCATION ,
7366+ project = None ,
7367+ job_config = mock .ANY ,
7368+ timeout = DEFAULT_TIMEOUT ,
7369+ )
7370+
7371+ sent_config = load_table_from_file .mock_calls [0 ][2 ]["job_config" ]
7372+ assert sent_config .source_format == job .SourceFormat .PARQUET
7373+ assert sent_config .schema == schema
7374+
7375+ @unittest .skipIf (pandas is None , "Requires `pandas`" )
7376+ @unittest .skipIf (pyarrow is None , "Requires `pyarrow`" )
7377+ def test_load_table_from_dataframe_array_fields_w_auto_schema (self ):
7378+ """Test that a DataFrame with array columns can be uploaded correctly.
7379+
7380+ See: https://github.com/googleapis/python-bigquery/issues/19
7381+ """
7382+ from google .cloud .bigquery .client import _DEFAULT_NUM_RETRIES
7383+ from google .cloud .bigquery import job
7384+ from google .cloud .bigquery .schema import SchemaField
7385+
7386+ client = self ._make_client ()
7387+
7388+ records = [(3.14 , [1 , 2 ])]
7389+ dataframe = pandas .DataFrame (
7390+ data = records , columns = ["float_column" , "array_column" ]
7391+ )
7392+
7393+ expected_schema = [
7394+ SchemaField ("float_column" , "FLOAT" ),
7395+ SchemaField (
7396+ "array_column" ,
7397+ "INT64" ,
7398+ mode = "REPEATED" ,
7399+ ),
7400+ ]
7401+
7402+ load_patch = mock .patch (
7403+ "google.cloud.bigquery.client.Client.load_table_from_file" , autospec = True
7404+ )
7405+
7406+ get_table_patch = mock .patch (
7407+ "google.cloud.bigquery.client.Client.get_table" ,
7408+ autospec = True ,
7409+ side_effect = google .api_core .exceptions .NotFound ("Table not found" ),
7410+ )
7411+
7412+ with load_patch as load_table_from_file , get_table_patch :
7413+ client .load_table_from_dataframe (
7414+ dataframe ,
7415+ self .TABLE_REF ,
7416+ location = self .LOCATION ,
7417+ )
7418+
7419+ load_table_from_file .assert_called_once_with (
7420+ client ,
7421+ mock .ANY ,
7422+ self .TABLE_REF ,
7423+ num_retries = _DEFAULT_NUM_RETRIES ,
7424+ rewind = True ,
7425+ size = mock .ANY ,
7426+ job_id = mock .ANY ,
7427+ job_id_prefix = None ,
7428+ location = self .LOCATION ,
7429+ project = None ,
7430+ job_config = mock .ANY ,
7431+ timeout = DEFAULT_TIMEOUT ,
7432+ )
7433+
7434+ sent_config = load_table_from_file .mock_calls [0 ][2 ]["job_config" ]
7435+ assert sent_config .source_format == job .SourceFormat .PARQUET
7436+ assert sent_config .schema == expected_schema
7437+
73107438 @unittest .skipIf (pandas is None , "Requires `pandas`" )
73117439 @unittest .skipIf (pyarrow is None , "Requires `pyarrow`" )
73127440 def test_load_table_from_dataframe_w_partial_schema (self ):
0 commit comments