Skip to content

Commit 84c92f9

Browse files
kmjungchingor13
authored andcommitted
BigQuery: Update resumption strategy to use format-independent row count. (#5658)
* Update resumption strategy to use format-independent row count. This change modifies the ReadRowsResumptionStrategy helper class in the BigQuery storage client to use the new format-independent row count value in the ReadRowsResponse message in order to track stream position. It also modifies various test files to use the new row count value. * Fix checkstyle errors
1 parent 3288c73 commit 84c92f9

4 files changed

Lines changed: 30 additions & 62 deletions

File tree

google-cloud-clients/google-cloud-bigquerystorage/src/main/java/com/google/cloud/bigquery/storage/v1beta1/stub/readrows/ReadRowsResumptionStrategy.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import com.google.api.gax.retrying.StreamResumptionStrategy;
2020
import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsRequest;
2121
import com.google.cloud.bigquery.storage.v1beta1.Storage.ReadRowsResponse;
22+
import javax.annotation.Nonnull;
2223

2324
/**
2425
* An implementation of a {@link StreamResumptionStrategy} for the ReadRows API. This class tracks
@@ -36,13 +37,15 @@ public class ReadRowsResumptionStrategy
3637
private long rowsProcessed = 0;
3738

3839
@Override
40+
@Nonnull
3941
public StreamResumptionStrategy<ReadRowsRequest, ReadRowsResponse> createNew() {
4042
return new ReadRowsResumptionStrategy();
4143
}
4244

4345
@Override
46+
@Nonnull
4447
public ReadRowsResponse processResponse(ReadRowsResponse response) {
45-
rowsProcessed += response.getAvroRows().getRowCount();
48+
rowsProcessed += response.getRowCount();
4649
return response;
4750
}
4851

google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/it/ITBigQueryStorageLongRunningTest.java

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
package com.google.cloud.bigquery.storage.v1beta1.it;
1818

1919
import static org.junit.Assert.assertEquals;
20-
import static org.junit.Assert.assertTrue;
2120

2221
import com.google.api.gax.rpc.ServerStream;
2322
import com.google.cloud.ServiceOptions;
@@ -121,12 +120,12 @@ public Long call() throws Exception {
121120
ExecutorService executor = Executors.newFixedThreadPool(tasks.size());
122121
List<Future<Long>> results = executor.invokeAll(tasks);
123122

124-
long avroRowCount = 0;
123+
long rowCount = 0;
125124
for (Future<Long> result : results) {
126-
avroRowCount += result.get();
125+
rowCount += result.get();
127126
}
128127

129-
assertEquals(313_797_035, avroRowCount);
128+
assertEquals(313_797_035, rowCount);
130129
}
131130

132131
private long readAllRowsFromStream(Stream stream) {
@@ -135,19 +134,13 @@ private long readAllRowsFromStream(Stream stream) {
135134
ReadRowsRequest readRowsRequest =
136135
ReadRowsRequest.newBuilder().setReadPosition(readPosition).build();
137136

138-
long avroRowCount = 0;
137+
long rowCount = 0;
139138
ServerStream<ReadRowsResponse> serverStream = client.readRowsCallable().call(readRowsRequest);
140139
for (ReadRowsResponse response : serverStream) {
141-
assertTrue(
142-
String.format(
143-
"Response is missing 'avro_rows'. Read %d rows so far from stream '%s'. ReadRows response:%n%s",
144-
avroRowCount, stream.getName(), response.toString()),
145-
response.hasAvroRows());
146-
avroRowCount += response.getAvroRows().getRowCount();
140+
rowCount += response.getRowCount();
147141
}
148142

149-
LOG.info(
150-
String.format("Read total of %d rows from stream '%s'.", avroRowCount, stream.getName()));
151-
return avroRowCount;
143+
LOG.info(String.format("Read total of %d rows from stream '%s'.", rowCount, stream.getName()));
144+
return rowCount;
152145
}
153146
}

google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/it/ITBigQueryStorageTest.java

Lines changed: 17 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import static org.junit.Assert.assertEquals;
2222
import static org.junit.Assert.assertNotNull;
2323
import static org.junit.Assert.assertNull;
24-
import static org.junit.Assert.assertTrue;
2524

2625
import com.google.api.gax.rpc.ServerStream;
2726
import com.google.cloud.RetryOption;
@@ -149,18 +148,13 @@ public void testSimpleRead() {
149148
ReadRowsRequest readRowsRequest =
150149
ReadRowsRequest.newBuilder().setReadPosition(readPosition).build();
151150

152-
long avroRowCount = 0;
151+
long rowCount = 0;
153152
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
154153
for (ReadRowsResponse response : stream) {
155-
assertTrue(
156-
String.format(
157-
"Response is missing 'avro_rows'. Read %d rows so far. ReadRows response:%n%s",
158-
avroRowCount, response.toString()),
159-
response.hasAvroRows());
160-
avroRowCount += response.getAvroRows().getRowCount();
154+
rowCount += response.getRowCount();
161155
}
162156

163-
assertEquals(164_656, avroRowCount);
157+
assertEquals(164_656, rowCount);
164158
}
165159

166160
@Test
@@ -187,31 +181,23 @@ public void testSimpleReadAndResume() {
187181
// We have to read some number of rows in order to be able to resume. More details:
188182
// https://cloud.google.com/bigquery/docs/reference/storage/rpc/google.cloud.bigquery.storage.v1beta1#google.cloud.bigquery.storage.v1beta1.ReadRowsRequest
189183

190-
long avroRowCount = ReadStreamToOffset(session.getStreams(0), /* rowOffset = */ 34_846);
184+
long rowCount = ReadStreamToOffset(session.getStreams(0), /* rowOffset = */ 34_846);
191185

192186
StreamPosition readPosition =
193-
StreamPosition.newBuilder()
194-
.setStream(session.getStreams(0))
195-
.setOffset(avroRowCount)
196-
.build();
187+
StreamPosition.newBuilder().setStream(session.getStreams(0)).setOffset(rowCount).build();
197188

198189
ReadRowsRequest readRowsRequest =
199190
ReadRowsRequest.newBuilder().setReadPosition(readPosition).build();
200191

201192
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
202193

203194
for (ReadRowsResponse response : stream) {
204-
assertTrue(
205-
String.format(
206-
"Response is missing 'avro_rows'. Read %d rows so far. ReadRows response:%n%s",
207-
avroRowCount, response.toString()),
208-
response.hasAvroRows());
209-
avroRowCount += response.getAvroRows().getRowCount();
195+
rowCount += response.getRowCount();
210196
}
211197

212198
// Verifies that the number of rows skipped and read equals to the total number of rows in the
213199
// table.
214-
assertEquals(164_656, avroRowCount);
200+
assertEquals(164_656, rowCount);
215201
}
216202

217203
@Test
@@ -252,17 +238,11 @@ public void testFilter() throws IOException {
252238
SimpleRowReader reader =
253239
new SimpleRowReader(new Schema.Parser().parse(session.getAvroSchema().getSchema()));
254240

255-
long avroRowCount = 0;
241+
long rowCount = 0;
256242

257243
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
258244
for (ReadRowsResponse response : stream) {
259-
assertTrue(
260-
String.format(
261-
"Response is missing 'avro_rows'. Read %d rows so far. ReadRows response:%n%s",
262-
avroRowCount, response.toString()),
263-
response.hasAvroRows());
264-
avroRowCount += response.getAvroRows().getRowCount();
265-
245+
rowCount += response.getRowCount();
266246
reader.processRows(
267247
response.getAvroRows(),
268248
new SimpleRowReader.AvroRowConsumer() {
@@ -276,7 +256,7 @@ public void accept(GenericData.Record record) {
276256
});
277257
}
278258

279-
assertEquals(1_333, avroRowCount);
259+
assertEquals(1_333, rowCount);
280260
}
281261

282262
@Test
@@ -336,15 +316,10 @@ public void testColumnSelection() throws IOException {
336316

337317
SimpleRowReader reader = new SimpleRowReader(avroSchema);
338318

339-
long avroRowCount = 0;
319+
long rowCount = 0;
340320
ServerStream<ReadRowsResponse> stream = client.readRowsCallable().call(readRowsRequest);
341321
for (ReadRowsResponse response : stream) {
342-
assertTrue(
343-
String.format(
344-
"Response is missing 'avro_rows'. Read %d rows so far. ReadRows response:%n%s",
345-
avroRowCount, response.toString()),
346-
response.hasAvroRows());
347-
avroRowCount += response.getAvroRows().getRowCount();
322+
rowCount += response.getRowCount();
348323
reader.processRows(
349324
response.getAvroRows(),
350325
new SimpleRowReader.AvroRowConsumer() {
@@ -362,7 +337,7 @@ public void accept(GenericData.Record record) {
362337
});
363338
}
364339

365-
assertEquals(1_333, avroRowCount);
340+
assertEquals(1_333, rowCount);
366341
}
367342

368343
@Test
@@ -864,19 +839,19 @@ private long ReadStreamToOffset(Stream stream, long rowOffset) {
864839
ReadRowsRequest readRowsRequest =
865840
ReadRowsRequest.newBuilder().setReadPosition(readPosition).build();
866841

867-
long avroRowCount = 0;
842+
long rowCount = 0;
868843
ServerStream<ReadRowsResponse> serverStream = client.readRowsCallable().call(readRowsRequest);
869844
Iterator<ReadRowsResponse> responseIterator = serverStream.iterator();
870845

871846
while (responseIterator.hasNext()) {
872847
ReadRowsResponse response = responseIterator.next();
873-
avroRowCount += response.getAvroRows().getRowCount();
874-
if (avroRowCount >= rowOffset) {
848+
rowCount += response.getRowCount();
849+
if (rowCount >= rowOffset) {
875850
return rowOffset;
876851
}
877852
}
878853

879-
return avroRowCount;
854+
return rowCount;
880855
}
881856

882857
/**

google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/stub/readrows/ReadRowsRetryTest.java

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import com.google.api.gax.grpc.GrpcTransportChannel;
2020
import com.google.api.gax.rpc.FixedTransportChannelProvider;
2121
import com.google.api.gax.rpc.ServerStream;
22-
import com.google.cloud.bigquery.storage.v1beta1.AvroProto.AvroRows;
2322
import com.google.cloud.bigquery.storage.v1beta1.BigQueryStorageClient;
2423
import com.google.cloud.bigquery.storage.v1beta1.BigQueryStorageGrpc.BigQueryStorageImplBase;
2524
import com.google.cloud.bigquery.storage.v1beta1.BigQueryStorageSettings;
@@ -167,7 +166,7 @@ private int getRowCount(ReadRowsRequest request) {
167166
ServerStream<ReadRowsResponse> serverStream = client.readRowsCallable().call(request);
168167
int rowCount = 0;
169168
for (ReadRowsResponse readRowsResponse : serverStream) {
170-
rowCount += readRowsResponse.getAvroRows().getRowCount();
169+
rowCount += readRowsResponse.getRowCount();
171170
}
172171
return rowCount;
173172
}
@@ -232,9 +231,7 @@ static ReadRowsRequest createRequest(String streamName, long offset) {
232231
}
233232

234233
static ReadRowsResponse createResponse(int numberOfRows) {
235-
return ReadRowsResponse.newBuilder()
236-
.setAvroRows(AvroRows.newBuilder().setRowCount(numberOfRows))
237-
.build();
234+
return ReadRowsResponse.newBuilder().setRowCount(numberOfRows).build();
238235
}
239236

240237
RpcExpectation expectRequest(String streamName, long offset) {

0 commit comments

Comments
 (0)