use of org.apache.avro.Schema.Type in project knime-cloud by knime.
the class AbstractAmazonPersonalizeDataUploadNodeModel method checkAlreadyExistingDataset.
private void checkAlreadyExistingDataset(final AmazonPersonalize personalizeClient, final String datasetGroupArn, final ExecutionContext exec) throws InterruptedException {
exec.setMessage("Checking already existing datasets");
final ListDatasetsResult listDatasets = personalizeClient.listDatasets(new ListDatasetsRequest().withDatasetGroupArn(datasetGroupArn));
final Optional<DatasetSummary> dataset = listDatasets.getDatasets().stream().filter(e -> e.getDatasetType().equals(m_datasetType)).findFirst();
if (dataset.isPresent()) {
if (m_settings.getOverwriteDatasetPolicy().equals(OverwritePolicy.ABORT.toString())) {
// Abort if dataset already exists
throw new IllegalStateException("A dataset of type '" + getDatasetType() + "' already exists. Either choose a different dataset group or select to overwrite the existing " + "dataset.");
} else {
// Delete the existing dataset
exec.setMessage("Deleting existing dataset");
deleteDataset(personalizeClient, datasetGroupArn, dataset.get().getDatasetArn());
}
}
exec.setProgress(1);
}
use of org.apache.avro.Schema.Type in project hudi by apache.
the class DebeziumSource method toDataset.
/**
* Converts a Kafka Topic offset into a Spark dataset.
*
* @param offsetRanges Offset ranges
* @param offsetGen KafkaOffsetGen
* @return Spark dataset
*/
private Dataset<Row> toDataset(OffsetRange[] offsetRanges, KafkaOffsetGen offsetGen, String schemaStr) {
AvroConvertor convertor = new AvroConvertor(schemaStr);
Dataset<Row> kafkaData;
if (deserializerClassName.equals(StringDeserializer.class.getName())) {
kafkaData = AvroConversionUtils.createDataFrame(KafkaUtils.<String, String>createRDD(sparkContext, offsetGen.getKafkaParams(), offsetRanges, LocationStrategies.PreferConsistent()).map(obj -> convertor.fromJson(obj.value())).rdd(), schemaStr, sparkSession);
} else {
kafkaData = AvroConversionUtils.createDataFrame(KafkaUtils.createRDD(sparkContext, offsetGen.getKafkaParams(), offsetRanges, LocationStrategies.PreferConsistent()).map(obj -> (GenericRecord) obj.value()).rdd(), schemaStr, sparkSession);
}
// Flatten debezium payload, specific to each DB type (postgres/ mysql/ etc..)
Dataset<Row> debeziumDataset = processDataset(kafkaData);
// Some required transformations to ensure debezium data types are converted to spark supported types.
return convertArrayColumnsToString(convertColumnToNullable(sparkSession, convertDateColumns(debeziumDataset, new Schema.Parser().parse(schemaStr))));
}
use of org.apache.avro.Schema.Type in project quick by bakdata.
the class GraphQLToAvroConverterTest method shouldConvertAllScalars.
@Test
void shouldConvertAllScalars() {
final Schema parsedSchema = this.graphQLToAvroConverter.convertToSchema(scalarSchema);
assertThat(parsedSchema.getName()).isEqualTo("Scalars");
final Map<String, Type> expectedTypeForField = Map.ofEntries(Map.entry("int", Type.INT), Map.entry("float", Type.FLOAT), Map.entry("string", Type.STRING), Map.entry("bool", Type.BOOLEAN), Map.entry("id", Type.STRING), Map.entry("long", Type.LONG), Map.entry("short", Type.INT), Map.entry("char", Type.STRING));
for (final Entry<String, Type> typeEntry : expectedTypeForField.entrySet()) {
assertThat(parsedSchema.getField(typeEntry.getKey())).isNotNull().extracting(field -> field.schema().getType()).isEqualTo(typeEntry.getValue());
}
}
use of org.apache.avro.Schema.Type in project quick by bakdata.
the class GraphQLToAvroConverterTest method shouldConvertGraphQLSchema.
@Test
void shouldConvertGraphQLSchema() {
final Schema parsedSchema = this.graphQLToAvroConverter.convertToSchema(productSchema);
assertThat(parsedSchema.getName()).isEqualTo("Product");
assertThat(parsedSchema.getField("productId")).isNotNull().extracting(Field::schema).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).extracting(Schema::getType).containsExactly(Type.NULL, Type.INT);
assertThat(parsedSchema.getField("name")).isNotNull().extracting(Field::schema).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).extracting(Schema::getType).contains(Type.NULL, Type.STRING);
assertThat(parsedSchema.getField("description")).isNotNull().extracting(Field::schema).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).extracting(Schema::getType).contains(Type.NULL, Type.STRING);
assertThat(parsedSchema.getField("price")).isNotNull().extracting(Field::schema).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.RECORD)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("name", "Price").hasFieldOrPropertyWithValue("type", Type.RECORD).extracting(Schema::getFields, InstanceOfAssertFactories.list(Field.class)).hasSize(2).satisfies(fields -> assertThat(fields).extracting(Field::name).containsExactly("value", "currency")).satisfies(fields -> assertThat(fields).flatExtracting(field -> unwrapSchemaType(field.schema())).containsExactly(Type.NULL, Type.FLOAT, Type.NULL, Type.STRING));
assertThat(parsedSchema.getField("metadata")).isNotNull().extracting(Field::schema).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.RECORD)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("name", "Metadata").hasFieldOrPropertyWithValue("type", Type.RECORD).extracting(Schema::getFields, InstanceOfAssertFactories.list(Field.class)).hasSize(2).satisfies(fields -> assertThat(fields).extracting(Field::name).containsExactly("created_at", "source")).satisfies(fields -> assertThat(fields).flatExtracting(field -> unwrapSchemaType(field.schema())).containsExactly(Type.NULL, Type.INT, Type.NULL, Type.STRING));
}
use of org.apache.avro.Schema.Type in project quick by bakdata.
the class GraphQLToAvroConverterTest method shouldConvertGraphQLSchemaWithLists.
@Test
void shouldConvertGraphQLSchemaWithLists() {
final Schema parsedSchema = this.graphQLToAvroConverter.convertToSchema(contractSchema);
assertThat(parsedSchema.getName()).isEqualTo("Contract");
assertThat(parsedSchema.getField("_id")).isNotNull().extracting(field -> field.schema().getType()).isEqualTo(Type.STRING);
assertThat(parsedSchema.getField("policyHolderId")).isNotNull().extracting(Field::schema).hasFieldOrPropertyWithValue("type", Type.UNION).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.ARRAY)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("type", Type.ARRAY).extracting(Schema::getElementType).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.RECORD)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("name", "PersonGrainValue").hasFieldOrPropertyWithValue("type", Type.RECORD).extracting(Schema::getFields, InstanceOfAssertFactories.list(Field.class)).hasSize(3).satisfies(fields -> assertThat(fields).extracting(Field::name).containsExactly("_in_utc", "_v", "_c")).satisfies(fields -> assertThat(fields).flatExtracting(field -> unwrapSchemaType(field.schema())).containsExactly(Type.STRING, Type.STRING, Type.NULL, Type.FLOAT));
assertThat(parsedSchema.getField("insuredPersonId")).isNotNull().extracting(Field::schema).hasFieldOrPropertyWithValue("type", Type.UNION).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.ARRAY)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("type", Type.ARRAY).extracting(Schema::getElementType).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.RECORD)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("name", "PersonGrainValue").hasFieldOrPropertyWithValue("type", Type.RECORD).extracting(Schema::getFields, InstanceOfAssertFactories.list(Field.class)).hasSize(3).satisfies(fields -> assertThat(fields).extracting(Field::name).containsExactly("_in_utc", "_v", "_c")).satisfies(fields -> assertThat(fields).flatExtracting(field -> unwrapSchemaType(field.schema())).containsExactly(Type.STRING, Type.STRING, Type.NULL, Type.FLOAT));
assertThat(parsedSchema.getField("term")).isNotNull().extracting(Field::schema).hasFieldOrPropertyWithValue("type", Type.UNION).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.ARRAY)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("type", Type.ARRAY).extracting(Schema::getElementType).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.RECORD)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("name", "GrainValue").hasFieldOrPropertyWithValue("type", Type.RECORD).extracting(Schema::getFields, InstanceOfAssertFactories.list(Field.class)).hasSize(3).satisfies(fields -> assertThat(fields).extracting(Field::name).containsExactly("_in_utc", "_v", "_c")).satisfies(fields -> assertThat(fields).flatExtracting(field -> unwrapSchemaType(field.schema())).containsExactly(Type.STRING, Type.STRING, Type.NULL, Type.FLOAT));
assertThat(parsedSchema.getField("value")).isNotNull().extracting(Field::schema).hasFieldOrPropertyWithValue("type", Type.UNION).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.ARRAY)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("type", Type.ARRAY).extracting(Schema::getElementType).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.RECORD)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("name", "GrainValue").hasFieldOrPropertyWithValue("type", Type.RECORD).extracting(Schema::getFields, InstanceOfAssertFactories.list(Field.class)).hasSize(3).satisfies(fields -> assertThat(fields).extracting(Field::name).containsExactly("_in_utc", "_v", "_c")).satisfies(fields -> assertThat(fields).flatExtracting(field -> unwrapSchemaType(field.schema())).containsExactly(Type.STRING, Type.STRING, Type.NULL, Type.FLOAT));
}
Aggregations