Skip to content

Commit 2491570

Browse files
cleop-googlecopybara-github
authored andcommitted
feat: GenAI SDK client(multimodal) - Add to_batch_job_source and get_batch_job_destination to MultimodalDataset
PiperOrigin-RevId: 906352851
1 parent 762d20c commit 2491570

4 files changed

Lines changed: 69 additions & 0 deletions

File tree

tests/unit/vertexai/genai/replays/test_create_multimodal_datasets.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,15 @@ def mock_generate_multimodal_dataset_display_name():
8181
yield mock_generate
8282

8383

84+
@pytest.fixture
85+
def mock_get_batch_job_unique_name():
86+
with mock.patch.object(
87+
_datasets_utils, "get_batch_job_unique_name"
88+
) as mock_unique_name:
89+
mock_unique_name.return_value = "12345678901234_abcde"
90+
yield mock_unique_name
91+
92+
8493
def test_create_dataset(client):
8594
create_dataset_operation = client.datasets._create_multimodal_dataset(
8695
name="projects/vertex-sdk-dev/locations/us-central1",

tests/unit/vertexai/genai/test_multimodal_datasets_genai.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,15 @@ def mock_import_bigframes():
3636
yield mock_import_bigframes
3737

3838

39+
@pytest.fixture
40+
def mock_get_batch_job_unique_name():
41+
with mock.patch.object(
42+
_datasets_utils, "get_batch_job_unique_name"
43+
) as mock_unique_name:
44+
mock_unique_name.return_value = "12345678901234_abcde"
45+
yield mock_unique_name
46+
47+
3948
class TestMultimodalDataset:
4049

4150
def test_read_config(self):
@@ -157,6 +166,28 @@ def test_to_bigframes(self, mock_import_bigframes):
157166
"project.dataset.table"
158167
)
159168

169+
def test_get_batch_job_destination(self, mock_get_batch_job_unique_name):
170+
dataset = types.MultimodalDataset(
171+
name="projects/vertex-sdk-dev/locations/us-central1/datasets/12345",
172+
display_name="test_multimodal_dataset",
173+
metadata={
174+
"inputConfig": {
175+
"bigquerySource": {
176+
"uri": "bq://target_project.target_dataset.target_table"
177+
},
178+
},
179+
},
180+
)
181+
destination = dataset.get_batch_job_destination()
182+
assert (
183+
destination.vertex_dataset.display_name
184+
== "test_multimodal_dataset_batch_output_12345678901234_abcde"
185+
)
186+
assert (
187+
destination.vertex_dataset.bigquery_destination
188+
== "bq://target_project.target_dataset.target_table_batch_output_12345678901234_abcde"
189+
)
190+
160191

161192
class TestGeminiRequestReadConfig:
162193
def test_single_turn_template(self):

vertexai/_genai/_datasets_utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,13 @@ def generate_multimodal_dataset_display_name() -> str:
242242
return f"MultimodalDataset {datetime.datetime.now().isoformat(sep=' ')}"
243243

244244

245+
def get_batch_job_unique_name() -> str:
246+
"""Generates a unique name suffix for a batch job destination."""
247+
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
248+
unique_id = uuid.uuid4().hex[0:5]
249+
return f"{timestamp}_{unique_id}"
250+
251+
245252
def save_dataframe_to_bigquery(
246253
dataframe: "bigframes.pandas.DataFrame", # type: ignore # noqa: F821
247254
target_table_id: str,

vertexai/_genai/types/common.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15334,6 +15334,28 @@ def to_bigframes(
1533415334
raise ValueError("Multimodal dataset bigquery source uri is not set.")
1533515335
return bigframes.pandas.read_gbq_table(self.bigquery_uri.removeprefix("bq://"))
1533615336

15337+
def to_batch_job_source(self) -> "genai_types.BatchJobSource":
15338+
"""Converts the dataset to a BatchJobSource."""
15339+
return genai_types.BatchJobSource(
15340+
vertex_dataset_name=self.name,
15341+
)
15342+
15343+
def get_batch_job_destination(self) -> "genai_types.BatchJobDestination":
15344+
"""Converts the dataset to a BatchJobDestination."""
15345+
from .. import _datasets_utils
15346+
15347+
unique_name = _datasets_utils.get_batch_job_unique_name()
15348+
bigquery_uri = self.bigquery_uri
15349+
if bigquery_uri is None:
15350+
raise ValueError("Multimodal dataset bigquery source uri is not set.")
15351+
curr_display_name = self.display_name or "genai_batch_job"
15352+
return genai_types.BatchJobDestination(
15353+
vertex_dataset=genai_types.VertexMultimodalDatasetDestination(
15354+
display_name=f"{curr_display_name}_batch_output_{unique_name}",
15355+
bigquery_destination=f"{bigquery_uri}_batch_output_{unique_name}",
15356+
)
15357+
)
15358+
1533715359

1533815360
class MultimodalDatasetDict(TypedDict, total=False):
1533915361
"""Represents a multimodal dataset."""

0 commit comments

Comments
 (0)