Search in sources :

Example 76 with Type

use of org.apache.avro.Schema.Type in project knime-cloud by knime.

the class AbstractAmazonPersonalizeDataUploadNodeModel method checkAlreadyExistingDataset.

private void checkAlreadyExistingDataset(final AmazonPersonalize personalizeClient, final String datasetGroupArn, final ExecutionContext exec) throws InterruptedException {
    exec.setMessage("Checking already existing datasets");
    final ListDatasetsResult listDatasets = personalizeClient.listDatasets(new ListDatasetsRequest().withDatasetGroupArn(datasetGroupArn));
    final Optional<DatasetSummary> dataset = listDatasets.getDatasets().stream().filter(e -> e.getDatasetType().equals(m_datasetType)).findFirst();
    if (dataset.isPresent()) {
        if (m_settings.getOverwriteDatasetPolicy().equals(OverwritePolicy.ABORT.toString())) {
            // Abort if dataset already exists
            throw new IllegalStateException("A dataset of type '" + getDatasetType() + "' already exists. Either choose a different dataset group or select to overwrite the existing " + "dataset.");
        } else {
            // Delete the existing dataset
            exec.setMessage("Deleting existing dataset");
            deleteDataset(personalizeClient, datasetGroupArn, dataset.get().getDatasetArn());
        }
    }
    exec.setProgress(1);
}
Also used : ConnectionMonitor(org.knime.base.filehandling.remote.files.ConnectionMonitor) Arrays(java.util.Arrays) NodeSettingsRO(org.knime.core.node.NodeSettingsRO) AmazonConnectionInformationPortObject(org.knime.cloud.aws.util.AmazonConnectionInformationPortObject) CSVWriter(org.knime.base.node.io.csvwriter.CSVWriter) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) URISyntaxException(java.net.URISyntaxException) ListDatasetGroupsResult(com.amazonaws.services.personalize.model.ListDatasetGroupsResult) DescribeDatasetGroupResult(com.amazonaws.services.personalize.model.DescribeDatasetGroupResult) RemoteFile(org.knime.base.filehandling.remote.files.RemoteFile) CreateDatasetGroupResult(com.amazonaws.services.personalize.model.CreateDatasetGroupResult) CreateDatasetImportJobRequest(com.amazonaws.services.personalize.model.CreateDatasetImportJobRequest) InvalidInputException(com.amazonaws.services.personalize.model.InvalidInputException) Status(org.knime.cloud.aws.mlservices.utils.personalize.AmazonPersonalizeUtils.Status) DataColumnSpec(org.knime.core.data.DataColumnSpec) Map(java.util.Map) FieldAssembler(org.apache.avro.SchemaBuilder.FieldAssembler) URI(java.net.URI) DeleteDatasetGroupRequest(com.amazonaws.services.personalize.model.DeleteDatasetGroupRequest) DescribeDatasetImportJobRequest(com.amazonaws.services.personalize.model.DescribeDatasetImportJobRequest) PortType(org.knime.core.node.port.PortType) FileWriterSettings(org.knime.base.node.io.csvwriter.FileWriterSettings) IntValue(org.knime.core.data.IntValue) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) Schema(org.apache.avro.Schema) AmazonPersonalize(com.amazonaws.services.personalize.AmazonPersonalize) NodeModel(org.knime.core.node.NodeModel) Collectors(java.util.stream.Collectors) List(java.util.List) BufferedDataTable(org.knime.core.node.BufferedDataTable) RemoteFileFactory(org.knime.base.filehandling.remote.files.RemoteFileFactory) Optional(java.util.Optional) DataSource(com.amazonaws.services.personalize.model.DataSource) DescribeDatasetImportJobResult(com.amazonaws.services.personalize.model.DescribeDatasetImportJobResult) PortObject(org.knime.core.node.port.PortObject) LongValue(org.knime.core.data.LongValue) DataTableSpec(org.knime.core.data.DataTableSpec) DatasetGroupSummary(com.amazonaws.services.personalize.model.DatasetGroupSummary) DescribeDatasetGroupRequest(com.amazonaws.services.personalize.model.DescribeDatasetGroupRequest) HashMap(java.util.HashMap) DatasetSummary(com.amazonaws.services.personalize.model.DatasetSummary) BufferedOutputStream(java.io.BufferedOutputStream) ExecutionContext(org.knime.core.node.ExecutionContext) CloudConnectionInformation(org.knime.cloud.core.util.port.CloudConnectionInformation) Connection(org.knime.base.filehandling.remote.files.Connection) AmazonPersonalizeUtils(org.knime.cloud.aws.mlservices.utils.personalize.AmazonPersonalizeUtils) CreateSchemaRequest(com.amazonaws.services.personalize.model.CreateSchemaRequest) OutputStreamWriter(java.io.OutputStreamWriter) AmazonPersonalizeConnection(org.knime.cloud.aws.mlservices.nodes.personalize.AmazonPersonalizeConnection) DataCell(org.knime.core.data.DataCell) Type(org.apache.avro.Schema.Type) StringValue(org.knime.core.data.StringValue) CreateDatasetGroupRequest(com.amazonaws.services.personalize.model.CreateDatasetGroupRequest) ConnectionInformation(org.knime.base.filehandling.remote.connectioninformation.port.ConnectionInformation) CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator) CreateDatasetRequest(com.amazonaws.services.personalize.model.CreateDatasetRequest) ListDatasetsResult(com.amazonaws.services.personalize.model.ListDatasetsResult) FileOutputStream(java.io.FileOutputStream) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) IOException(java.io.IOException) DatasetSchemaSummary(com.amazonaws.services.personalize.model.DatasetSchemaSummary) DeleteDatasetRequest(com.amazonaws.services.personalize.model.DeleteDatasetRequest) File(java.io.File) DataRow(org.knime.core.data.DataRow) NodeSettingsWO(org.knime.core.node.NodeSettingsWO) ListDatasetGroupsRequest(com.amazonaws.services.personalize.model.ListDatasetGroupsRequest) ListDatasetsRequest(com.amazonaws.services.personalize.model.ListDatasetsRequest) StringUtils(com.amazonaws.util.StringUtils) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) FileUtil(org.knime.core.util.FileUtil) DatasetSummary(com.amazonaws.services.personalize.model.DatasetSummary) ListDatasetsResult(com.amazonaws.services.personalize.model.ListDatasetsResult) ListDatasetsRequest(com.amazonaws.services.personalize.model.ListDatasetsRequest)

Example 77 with Type

use of org.apache.avro.Schema.Type in project hudi by apache.

the class DebeziumSource method toDataset.

/**
 * Converts a Kafka Topic offset into a Spark dataset.
 *
 * @param offsetRanges Offset ranges
 * @param offsetGen    KafkaOffsetGen
 * @return Spark dataset
 */
private Dataset<Row> toDataset(OffsetRange[] offsetRanges, KafkaOffsetGen offsetGen, String schemaStr) {
    AvroConvertor convertor = new AvroConvertor(schemaStr);
    Dataset<Row> kafkaData;
    if (deserializerClassName.equals(StringDeserializer.class.getName())) {
        kafkaData = AvroConversionUtils.createDataFrame(KafkaUtils.<String, String>createRDD(sparkContext, offsetGen.getKafkaParams(), offsetRanges, LocationStrategies.PreferConsistent()).map(obj -> convertor.fromJson(obj.value())).rdd(), schemaStr, sparkSession);
    } else {
        kafkaData = AvroConversionUtils.createDataFrame(KafkaUtils.createRDD(sparkContext, offsetGen.getKafkaParams(), offsetRanges, LocationStrategies.PreferConsistent()).map(obj -> (GenericRecord) obj.value()).rdd(), schemaStr, sparkSession);
    }
    // Flatten debezium payload, specific to each DB type (postgres/ mysql/ etc..)
    Dataset<Row> debeziumDataset = processDataset(kafkaData);
    // Some required transformations to ensure debezium data types are converted to spark supported types.
    return convertArrayColumnsToString(convertColumnToNullable(sparkSession, convertDateColumns(debeziumDataset, new Schema.Parser().parse(schemaStr))));
}
Also used : Arrays(java.util.Arrays) Dataset(org.apache.spark.sql.Dataset) SchemaRegistryProvider(org.apache.hudi.utilities.schema.SchemaRegistryProvider) HoodieException(org.apache.hudi.exception.HoodieException) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) KafkaOffsetGen(org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen) AvroConversionUtils(org.apache.hudi.AvroConversionUtils) Option(org.apache.hudi.common.util.Option) HoodieDeltaStreamerMetrics(org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamerMetrics) DataSourceWriteOptions(org.apache.hudi.DataSourceWriteOptions) RowSource(org.apache.hudi.utilities.sources.RowSource) Logger(org.apache.log4j.Logger) LocationStrategies(org.apache.spark.streaming.kafka010.LocationStrategies) AvroConvertor(org.apache.hudi.utilities.sources.helpers.AvroConvertor) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) OffsetRange(org.apache.spark.streaming.kafka010.OffsetRange) CheckpointUtils(org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils) Type(org.apache.avro.Schema.Type) SparkSession(org.apache.spark.sql.SparkSession) KafkaUtils(org.apache.spark.streaming.kafka010.KafkaUtils) DataTypes(org.apache.spark.sql.types.DataTypes) StructField(org.apache.spark.sql.types.StructField) StructType(org.apache.spark.sql.types.StructType) SchemaProvider(org.apache.hudi.utilities.schema.SchemaProvider) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) Field(org.apache.avro.Schema.Field) TypedProperties(org.apache.hudi.common.config.TypedProperties) IOException(java.io.IOException) Row(org.apache.spark.sql.Row) Collectors(java.util.stream.Collectors) List(java.util.List) LogManager(org.apache.log4j.LogManager) org.apache.spark.sql.functions(org.apache.spark.sql.functions) Pair(org.apache.hudi.common.util.collection.Pair) AvroConvertor(org.apache.hudi.utilities.sources.helpers.AvroConvertor) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Row(org.apache.spark.sql.Row) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 78 with Type

use of org.apache.avro.Schema.Type in project quick by bakdata.

the class GraphQLToAvroConverterTest method shouldConvertAllScalars.

@Test
void shouldConvertAllScalars() {
    final Schema parsedSchema = this.graphQLToAvroConverter.convertToSchema(scalarSchema);
    assertThat(parsedSchema.getName()).isEqualTo("Scalars");
    final Map<String, Type> expectedTypeForField = Map.ofEntries(Map.entry("int", Type.INT), Map.entry("float", Type.FLOAT), Map.entry("string", Type.STRING), Map.entry("bool", Type.BOOLEAN), Map.entry("id", Type.STRING), Map.entry("long", Type.LONG), Map.entry("short", Type.INT), Map.entry("char", Type.STRING));
    for (final Entry<String, Type> typeEntry : expectedTypeForField.entrySet()) {
        assertThat(parsedSchema.getField(typeEntry.getKey())).isNotNull().extracting(field -> field.schema().getType()).isEqualTo(typeEntry.getValue());
    }
}
Also used : Schema(org.apache.avro.Schema) Field(org.apache.avro.Schema.Field) Files(java.nio.file.Files) InstanceOfAssertFactories(org.assertj.core.api.InstanceOfAssertFactories) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) List(java.util.List) BeforeAll(org.junit.jupiter.api.BeforeAll) Map(java.util.Map) Entry(java.util.Map.Entry) Path(java.nio.file.Path) Type(org.apache.avro.Schema.Type) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema) Test(org.junit.jupiter.api.Test)

Example 79 with Type

use of org.apache.avro.Schema.Type in project quick by bakdata.

the class GraphQLToAvroConverterTest method shouldConvertGraphQLSchema.

@Test
void shouldConvertGraphQLSchema() {
    final Schema parsedSchema = this.graphQLToAvroConverter.convertToSchema(productSchema);
    assertThat(parsedSchema.getName()).isEqualTo("Product");
    assertThat(parsedSchema.getField("productId")).isNotNull().extracting(Field::schema).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).extracting(Schema::getType).containsExactly(Type.NULL, Type.INT);
    assertThat(parsedSchema.getField("name")).isNotNull().extracting(Field::schema).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).extracting(Schema::getType).contains(Type.NULL, Type.STRING);
    assertThat(parsedSchema.getField("description")).isNotNull().extracting(Field::schema).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).extracting(Schema::getType).contains(Type.NULL, Type.STRING);
    assertThat(parsedSchema.getField("price")).isNotNull().extracting(Field::schema).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.RECORD)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("name", "Price").hasFieldOrPropertyWithValue("type", Type.RECORD).extracting(Schema::getFields, InstanceOfAssertFactories.list(Field.class)).hasSize(2).satisfies(fields -> assertThat(fields).extracting(Field::name).containsExactly("value", "currency")).satisfies(fields -> assertThat(fields).flatExtracting(field -> unwrapSchemaType(field.schema())).containsExactly(Type.NULL, Type.FLOAT, Type.NULL, Type.STRING));
    assertThat(parsedSchema.getField("metadata")).isNotNull().extracting(Field::schema).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.RECORD)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("name", "Metadata").hasFieldOrPropertyWithValue("type", Type.RECORD).extracting(Schema::getFields, InstanceOfAssertFactories.list(Field.class)).hasSize(2).satisfies(fields -> assertThat(fields).extracting(Field::name).containsExactly("created_at", "source")).satisfies(fields -> assertThat(fields).flatExtracting(field -> unwrapSchemaType(field.schema())).containsExactly(Type.NULL, Type.INT, Type.NULL, Type.STRING));
}
Also used : Schema(org.apache.avro.Schema) Field(org.apache.avro.Schema.Field) Files(java.nio.file.Files) InstanceOfAssertFactories(org.assertj.core.api.InstanceOfAssertFactories) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) List(java.util.List) BeforeAll(org.junit.jupiter.api.BeforeAll) Map(java.util.Map) Entry(java.util.Map.Entry) Path(java.nio.file.Path) Type(org.apache.avro.Schema.Type) Field(org.apache.avro.Schema.Field) Schema(org.apache.avro.Schema) Test(org.junit.jupiter.api.Test)

Example 80 with Type

use of org.apache.avro.Schema.Type in project quick by bakdata.

the class GraphQLToAvroConverterTest method shouldConvertGraphQLSchemaWithLists.

@Test
void shouldConvertGraphQLSchemaWithLists() {
    final Schema parsedSchema = this.graphQLToAvroConverter.convertToSchema(contractSchema);
    assertThat(parsedSchema.getName()).isEqualTo("Contract");
    assertThat(parsedSchema.getField("_id")).isNotNull().extracting(field -> field.schema().getType()).isEqualTo(Type.STRING);
    assertThat(parsedSchema.getField("policyHolderId")).isNotNull().extracting(Field::schema).hasFieldOrPropertyWithValue("type", Type.UNION).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.ARRAY)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("type", Type.ARRAY).extracting(Schema::getElementType).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.RECORD)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("name", "PersonGrainValue").hasFieldOrPropertyWithValue("type", Type.RECORD).extracting(Schema::getFields, InstanceOfAssertFactories.list(Field.class)).hasSize(3).satisfies(fields -> assertThat(fields).extracting(Field::name).containsExactly("_in_utc", "_v", "_c")).satisfies(fields -> assertThat(fields).flatExtracting(field -> unwrapSchemaType(field.schema())).containsExactly(Type.STRING, Type.STRING, Type.NULL, Type.FLOAT));
    assertThat(parsedSchema.getField("insuredPersonId")).isNotNull().extracting(Field::schema).hasFieldOrPropertyWithValue("type", Type.UNION).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.ARRAY)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("type", Type.ARRAY).extracting(Schema::getElementType).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.RECORD)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("name", "PersonGrainValue").hasFieldOrPropertyWithValue("type", Type.RECORD).extracting(Schema::getFields, InstanceOfAssertFactories.list(Field.class)).hasSize(3).satisfies(fields -> assertThat(fields).extracting(Field::name).containsExactly("_in_utc", "_v", "_c")).satisfies(fields -> assertThat(fields).flatExtracting(field -> unwrapSchemaType(field.schema())).containsExactly(Type.STRING, Type.STRING, Type.NULL, Type.FLOAT));
    assertThat(parsedSchema.getField("term")).isNotNull().extracting(Field::schema).hasFieldOrPropertyWithValue("type", Type.UNION).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.ARRAY)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("type", Type.ARRAY).extracting(Schema::getElementType).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.RECORD)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("name", "GrainValue").hasFieldOrPropertyWithValue("type", Type.RECORD).extracting(Schema::getFields, InstanceOfAssertFactories.list(Field.class)).hasSize(3).satisfies(fields -> assertThat(fields).extracting(Field::name).containsExactly("_in_utc", "_v", "_c")).satisfies(fields -> assertThat(fields).flatExtracting(field -> unwrapSchemaType(field.schema())).containsExactly(Type.STRING, Type.STRING, Type.NULL, Type.FLOAT));
    assertThat(parsedSchema.getField("value")).isNotNull().extracting(Field::schema).hasFieldOrPropertyWithValue("type", Type.UNION).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.ARRAY)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("type", Type.ARRAY).extracting(Schema::getElementType).satisfies(schema -> assertThat(schema.getType()).isEqualTo(Type.UNION)).extracting(Schema::getTypes, InstanceOfAssertFactories.list(Schema.class)).satisfies(types -> assertThat(types).extracting(Schema::getType).containsExactly(Type.NULL, Type.RECORD)).last(InstanceOfAssertFactories.type(Schema.class)).hasFieldOrPropertyWithValue("name", "GrainValue").hasFieldOrPropertyWithValue("type", Type.RECORD).extracting(Schema::getFields, InstanceOfAssertFactories.list(Field.class)).hasSize(3).satisfies(fields -> assertThat(fields).extracting(Field::name).containsExactly("_in_utc", "_v", "_c")).satisfies(fields -> assertThat(fields).flatExtracting(field -> unwrapSchemaType(field.schema())).containsExactly(Type.STRING, Type.STRING, Type.NULL, Type.FLOAT));
}
Also used : Schema(org.apache.avro.Schema) Field(org.apache.avro.Schema.Field) Files(java.nio.file.Files) InstanceOfAssertFactories(org.assertj.core.api.InstanceOfAssertFactories) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) List(java.util.List) BeforeAll(org.junit.jupiter.api.BeforeAll) Map(java.util.Map) Entry(java.util.Map.Entry) Path(java.nio.file.Path) Type(org.apache.avro.Schema.Type) Field(org.apache.avro.Schema.Field) Schema(org.apache.avro.Schema) Test(org.junit.jupiter.api.Test)

Aggregations

Type (org.apache.avro.Schema.Type)80 Schema (org.apache.avro.Schema)58 Field (org.apache.avro.Schema.Field)32 Map (java.util.Map)20 List (java.util.List)16 HashMap (java.util.HashMap)15 ArrayList (java.util.ArrayList)13 ByteBuffer (java.nio.ByteBuffer)11 Collectors (java.util.stream.Collectors)11 IOException (java.io.IOException)10 LogicalType (org.apache.avro.LogicalType)8 LinkedHashMap (java.util.LinkedHashMap)7 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 Arrays (java.util.Arrays)5 PersistentBase (org.apache.gora.persistency.impl.PersistentBase)5 Test (org.junit.Test)5 BaseRuntimeChildDefinition (ca.uhn.fhir.context.BaseRuntimeChildDefinition)4 BaseRuntimeElementDefinition (ca.uhn.fhir.context.BaseRuntimeElementDefinition)4 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)4