use of ui.ex1.entity in project DataflowTemplates by GoogleCloudPlatform.
the class BigQueryConvertersTest method testAvroToEntityAllFieldTypes.
/**
* Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with all field types.
*/
@Test
public void testAvroToEntityAllFieldTypes() throws Exception {
// Create test data
List<TableFieldSchema> fields = new ArrayList<>();
fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
fields.add(new TableFieldSchema().setName(longStringField).setType("STRING"));
fields.add(new TableFieldSchema().setName(integerField).setType("INTEGER"));
fields.add(new TableFieldSchema().setName(int64Field).setType("INT64"));
fields.add(new TableFieldSchema().setName(floatField).setType("FLOAT"));
fields.add(new TableFieldSchema().setName(float64Field).setType("FLOAT64"));
fields.add(new TableFieldSchema().setName(booleanField).setType("BOOLEAN"));
fields.add(new TableFieldSchema().setName(boolField).setType("BOOL"));
fields.add(new TableFieldSchema().setName(validTimestampField).setType("TIMESTAMP"));
fields.add(new TableFieldSchema().setName(dateField).setType("DATE"));
fields.add(new TableFieldSchema().setName(timeField).setType("TIME"));
fields.add(new TableFieldSchema().setName(dateTimeField).setType("DATETIME"));
fields.add(new TableFieldSchema().setName(nullField).setType("STRING"));
TableSchema bqSchema = new TableSchema().setFields(fields);
Schema avroSchema = new Schema.Parser().parse(String.format(avroSchemaTemplate, new StringBuilder().append(String.format(avroFieldTemplate, idField, "string", idFieldDesc)).append(",").append(generateShortStringField()).append(",").append(generateLongStringField()).append(",").append(String.format(avroFieldTemplate, integerField, "int", integerFieldDesc)).append(",").append(String.format(avroFieldTemplate, int64Field, "int", int64FieldDesc)).append(",").append(String.format(avroFieldTemplate, floatField, "float", floatFieldDesc)).append(",").append(String.format(avroFieldTemplate, float64Field, "float", float64FieldDesc)).append(",").append(String.format(avroFieldTemplate, booleanField, "boolean", booleanFieldDesc)).append(",").append(String.format(avroFieldTemplate, boolField, "boolean", boolFieldDesc)).append(",").append(String.format(avroFieldTemplate, validTimestampField, "long", validTimestampFieldDesc)).append(",").append(String.format(avroFieldTemplate, dateField, "string", dateFieldDesc)).append(",").append(String.format(avroFieldTemplate, timeField, "string", timeFieldDesc)).append(",").append(String.format(avroFieldTemplate, dateTimeField, "string", dateTimeFieldDesc)).append(",").append(String.format(avroFieldTemplate, nullField, "null", nullFieldDesc)).toString()));
GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
builder.set(idField, idFieldValueStr);
builder.set(shortStringField, shortStringFieldValue);
builder.set(longStringField, longStringFieldValue);
builder.set(integerField, integerFieldValue);
builder.set(int64Field, int64FieldValue);
builder.set(floatField, floatFieldValue);
builder.set(float64Field, float64FieldValue);
builder.set(booleanField, booleanFieldValue);
builder.set(boolField, boolFieldValue);
builder.set(validTimestampField, validTimestampFieldValueMicros);
builder.set(dateField, dateFieldValue);
builder.set(timeField, timeFieldValue);
builder.set(dateTimeField, dateTimeFieldValue);
builder.set(nullField, null);
Record record = builder.build();
SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
// Run the test
Entity outputEntity = converter.apply(inputBqData);
Map<String, Value> properties = outputEntity.getPropertiesMap();
// Assess results
assertTrue(outputEntity.hasKey());
assertEquals(idFieldValueStr, outputEntity.getKey().getPath(0).getName());
validateMetadata(outputEntity);
assertTrue(outputEntity.containsProperties(shortStringField));
assertEquals(shortStringFieldValue, properties.get(shortStringField).getStringValue());
assertFalse(properties.get(shortStringField).getExcludeFromIndexes());
assertTrue(outputEntity.containsProperties(longStringField));
assertEquals(longStringFieldValue, properties.get(longStringField).getStringValue());
assertTrue(properties.get(longStringField).getExcludeFromIndexes());
assertTrue(outputEntity.containsProperties(integerField));
assertEquals(integerFieldValue, properties.get(integerField).getIntegerValue());
assertTrue(outputEntity.containsProperties(int64Field));
assertEquals(int64FieldValue, properties.get(int64Field).getIntegerValue());
assertTrue(outputEntity.containsProperties(floatField));
assertEquals(floatFieldValue, properties.get(floatField).getDoubleValue(), 0.001);
assertTrue(outputEntity.containsProperties(float64Field));
assertEquals(float64FieldValue, properties.get(float64Field).getDoubleValue(), 0.001);
assertTrue(outputEntity.containsProperties(booleanField));
assertEquals(booleanFieldValue, properties.get(booleanField).getBooleanValue());
assertTrue(outputEntity.containsProperties(boolField));
assertEquals(boolFieldValue, properties.get(boolField).getBooleanValue());
assertTrue(outputEntity.containsProperties(validTimestampField));
assertEquals(Timestamps.fromMillis(validTimestampFieldValueMillis), properties.get(validTimestampField).getTimestampValue());
assertTrue(outputEntity.containsProperties(dateField));
assertEquals(dateFieldValue, properties.get(dateField).getStringValue());
assertTrue(outputEntity.containsProperties(timeField));
assertEquals(timeFieldValue, properties.get(timeField).getStringValue());
assertTrue(outputEntity.containsProperties(dateTimeField));
assertEquals(dateTimeFieldValue, properties.get(dateTimeField).getStringValue());
assertTrue(outputEntity.containsProperties(nullField));
assertEquals(NullValue.NULL_VALUE, properties.get(nullField).getNullValue());
}
use of ui.ex1.entity in project DataflowTemplates by GoogleCloudPlatform.
the class BigQueryConvertersTest method testAvroToEntityNoIdColumn.
/**
* Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a key when the
* unique name column is missing.
*/
@Test
public void testAvroToEntityNoIdColumn() throws Exception {
// Create test data
List<TableFieldSchema> fields = new ArrayList<>();
fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
TableSchema bqSchema = new TableSchema().setFields(fields);
Record record = generateSingleFieldAvroRecord(shortStringField, "string", shortStringFieldDesc, shortStringFieldValue);
SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
// Run the test
Entity outputEntity = converter.apply(inputBqData);
assertTrue(!outputEntity.hasKey());
}
use of ui.ex1.entity in project DataflowTemplates by GoogleCloudPlatform.
the class BigQueryConvertersTest method testAvroToEntityDefaultNamespace.
/**
* Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a default namespace
* when the namespace is not specified.
*/
@Test
public void testAvroToEntityDefaultNamespace() throws Exception {
// Create test data
List<TableFieldSchema> fields = new ArrayList<>();
fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
TableSchema bqSchema = new TableSchema().setFields(fields);
Schema avroSchema = new Schema.Parser().parse(String.format(avroSchemaTemplate, new StringBuilder().append(String.format(avroFieldTemplate, idField, "int", idFieldDesc)).append(",").append(generateShortStringField()).toString()));
GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
builder.set(idField, 1);
builder.set(shortStringField, shortStringFieldValue);
Record record = builder.build();
SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
// Run the test
AvroToEntity noNamespaceConverter = AvroToEntity.newBuilder().setEntityKind(entityKind).setUniqueNameColumn(uniqueNameColumn).build();
Entity outputEntity = noNamespaceConverter.apply(inputBqData);
// Assess results
assertTrue(outputEntity.hasKey());
assertEquals("", outputEntity.getKey().getPartitionId().getNamespaceId());
}
use of ui.ex1.entity in project DataflowTemplates by GoogleCloudPlatform.
the class BigQueryConvertersTest method testAvroToEntityNullIdColumn.
/**
* Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a key when the
* unique name column is null.
*/
@Test
public void testAvroToEntityNullIdColumn() throws Exception {
// Create test data
List<TableFieldSchema> fields = new ArrayList<>();
fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
TableSchema bqSchema = new TableSchema().setFields(fields);
Schema avroSchema = new Schema.Parser().parse(String.format(avroSchemaTemplate, new StringBuilder().append(String.format(avroFieldTemplate, idField, "null", idFieldDesc)).append(",").append(generateShortStringField()).toString()));
GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
builder.set(idField, null);
builder.set(shortStringField, shortStringFieldValue);
Record record = builder.build();
SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
// Run the test
Entity outputEntity = converter.apply(inputBqData);
assertTrue(!outputEntity.hasKey());
}
use of ui.ex1.entity in project DataflowTemplates by GoogleCloudPlatform.
the class BigQueryToDatastore method main.
/**
* Runs a pipeline which reads data from BigQuery and writes it to Datastore.
*
* @param args arguments to the pipeline
*/
public static void main(String[] args) {
BigQueryToDatastoreOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(BigQueryToDatastoreOptions.class);
Pipeline pipeline = Pipeline.create(options);
// Read from BigQuery and convert data to Datastore Entity format with 2 possible outcomes,
// success or failure, based on the possibility to create valid Entity keys from BQ data
TupleTag<Entity> successTag = new TupleTag<Entity>() {
};
TupleTag<String> failureTag = new TupleTag<String>("failures") {
};
PCollectionTuple entities = pipeline.apply(BigQueryToEntity.newBuilder().setQuery(options.getReadQuery()).setUniqueNameColumn(options.getReadIdColumn()).setEntityKind(options.getDatastoreWriteEntityKind()).setNamespace(options.getDatastoreWriteNamespace()).setSuccessTag(successTag).setFailureTag(failureTag).build());
// Write on GCS data that could not be converted to valid Datastore entities
entities.apply(LogErrors.newBuilder().setErrorWritePath(options.getInvalidOutputPath()).setErrorTag(failureTag).build());
// Write valid entities to Datastore
TupleTag<String> errorTag = new TupleTag<String>("errors") {
};
entities.get(successTag).apply(WriteEntities.newBuilder().setProjectId(options.getDatastoreWriteProjectId()).setHintNumWorkers(options.getDatastoreHintNumWorkers()).setErrorTag(errorTag).build()).apply(LogErrors.newBuilder().setErrorWritePath(options.getErrorWritePath()).setErrorTag(errorTag).build());
pipeline.run();
}
Aggregations