Search in sources :

Example 1 with CreateSchemaRequest

use of com.amazonaws.services.personalize.model.CreateSchemaRequest in project knime-cloud by knime.

the class AbstractAmazonPersonalizeDataUploadNodeModel method createSchema.

private String createSchema(final AmazonPersonalize personalizeClient, final DataTableSpec spec) {
    final StringBuilder schemaNameBuilder = new StringBuilder(getSchemaNamePrefix());
    FieldAssembler<Schema> fieldAssembler = createFieldAssembler(SCHEMA_NAMESPACE);
    for (final String colName : spec.getColumnNames()) {
        if (!colName.startsWith(PREFIX_METADATA_FIELD)) {
            continue;
        }
        final DataColumnSpec colSpec = spec.getColumnSpec(colName);
        final boolean isCategorical;
        final Type type;
        if (colSpec.getType().isCompatible(StringValue.class)) {
            isCategorical = true;
            type = Type.STRING;
        } else if (colSpec.getType().isCompatible(IntValue.class)) {
            isCategorical = false;
            type = Type.INT;
        } else if (colSpec.getType().isCompatible(LongValue.class)) {
            isCategorical = false;
            type = Type.LONG;
        } else {
            isCategorical = false;
            type = Type.DOUBLE;
        }
        schemaNameBuilder.append("-" + type);
        // 'categorical' must be set for metadata
        fieldAssembler = fieldAssembler.name(colName).prop("categorical", isCategorical).type(Schema.create(type)).noDefault();
    }
    final String schemaName = schemaNameBuilder.toString();
    // check if the same schema has been created before
    final List<DatasetSchemaSummary> existingSchemas = AmazonPersonalizeUtils.listAllSchemas(personalizeClient);
    final Optional<DatasetSchemaSummary> schemaSummary = existingSchemas.stream().filter(e -> e.getName().equals(schemaName)).findAny();
    // if so, use this one again
    if (schemaSummary.isPresent()) {
        return schemaSummary.get().getSchemaArn();
    }
    // otherwise create new one
    final Schema schema = fieldAssembler.endRecord();
    final CreateSchemaRequest createSchemaRequest = new CreateSchemaRequest().withName(schemaName).withSchema(schema.toString());
    return personalizeClient.createSchema(createSchemaRequest).getSchemaArn();
}
Also used : ConnectionMonitor(org.knime.base.filehandling.remote.files.ConnectionMonitor) Arrays(java.util.Arrays) NodeSettingsRO(org.knime.core.node.NodeSettingsRO) AmazonConnectionInformationPortObject(org.knime.cloud.aws.util.AmazonConnectionInformationPortObject) CSVWriter(org.knime.base.node.io.csvwriter.CSVWriter) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) URISyntaxException(java.net.URISyntaxException) ListDatasetGroupsResult(com.amazonaws.services.personalize.model.ListDatasetGroupsResult) DescribeDatasetGroupResult(com.amazonaws.services.personalize.model.DescribeDatasetGroupResult) RemoteFile(org.knime.base.filehandling.remote.files.RemoteFile) CreateDatasetGroupResult(com.amazonaws.services.personalize.model.CreateDatasetGroupResult) CreateDatasetImportJobRequest(com.amazonaws.services.personalize.model.CreateDatasetImportJobRequest) InvalidInputException(com.amazonaws.services.personalize.model.InvalidInputException) Status(org.knime.cloud.aws.mlservices.utils.personalize.AmazonPersonalizeUtils.Status) DataColumnSpec(org.knime.core.data.DataColumnSpec) Map(java.util.Map) FieldAssembler(org.apache.avro.SchemaBuilder.FieldAssembler) URI(java.net.URI) DeleteDatasetGroupRequest(com.amazonaws.services.personalize.model.DeleteDatasetGroupRequest) DescribeDatasetImportJobRequest(com.amazonaws.services.personalize.model.DescribeDatasetImportJobRequest) PortType(org.knime.core.node.port.PortType) FileWriterSettings(org.knime.base.node.io.csvwriter.FileWriterSettings) IntValue(org.knime.core.data.IntValue) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) Schema(org.apache.avro.Schema) AmazonPersonalize(com.amazonaws.services.personalize.AmazonPersonalize) NodeModel(org.knime.core.node.NodeModel) Collectors(java.util.stream.Collectors) List(java.util.List) BufferedDataTable(org.knime.core.node.BufferedDataTable) RemoteFileFactory(org.knime.base.filehandling.remote.files.RemoteFileFactory) Optional(java.util.Optional) DataSource(com.amazonaws.services.personalize.model.DataSource) DescribeDatasetImportJobResult(com.amazonaws.services.personalize.model.DescribeDatasetImportJobResult) PortObject(org.knime.core.node.port.PortObject) LongValue(org.knime.core.data.LongValue) DataTableSpec(org.knime.core.data.DataTableSpec) DatasetGroupSummary(com.amazonaws.services.personalize.model.DatasetGroupSummary) DescribeDatasetGroupRequest(com.amazonaws.services.personalize.model.DescribeDatasetGroupRequest) HashMap(java.util.HashMap) DatasetSummary(com.amazonaws.services.personalize.model.DatasetSummary) BufferedOutputStream(java.io.BufferedOutputStream) ExecutionContext(org.knime.core.node.ExecutionContext) CloudConnectionInformation(org.knime.cloud.core.util.port.CloudConnectionInformation) Connection(org.knime.base.filehandling.remote.files.Connection) AmazonPersonalizeUtils(org.knime.cloud.aws.mlservices.utils.personalize.AmazonPersonalizeUtils) CreateSchemaRequest(com.amazonaws.services.personalize.model.CreateSchemaRequest) OutputStreamWriter(java.io.OutputStreamWriter) AmazonPersonalizeConnection(org.knime.cloud.aws.mlservices.nodes.personalize.AmazonPersonalizeConnection) DataCell(org.knime.core.data.DataCell) Type(org.apache.avro.Schema.Type) StringValue(org.knime.core.data.StringValue) CreateDatasetGroupRequest(com.amazonaws.services.personalize.model.CreateDatasetGroupRequest) ConnectionInformation(org.knime.base.filehandling.remote.connectioninformation.port.ConnectionInformation) CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator) CreateDatasetRequest(com.amazonaws.services.personalize.model.CreateDatasetRequest) ListDatasetsResult(com.amazonaws.services.personalize.model.ListDatasetsResult) FileOutputStream(java.io.FileOutputStream) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) IOException(java.io.IOException) DatasetSchemaSummary(com.amazonaws.services.personalize.model.DatasetSchemaSummary) DeleteDatasetRequest(com.amazonaws.services.personalize.model.DeleteDatasetRequest) File(java.io.File) DataRow(org.knime.core.data.DataRow) NodeSettingsWO(org.knime.core.node.NodeSettingsWO) ListDatasetGroupsRequest(com.amazonaws.services.personalize.model.ListDatasetGroupsRequest) ListDatasetsRequest(com.amazonaws.services.personalize.model.ListDatasetsRequest) StringUtils(com.amazonaws.util.StringUtils) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) FileUtil(org.knime.core.util.FileUtil) CreateSchemaRequest(com.amazonaws.services.personalize.model.CreateSchemaRequest) Schema(org.apache.avro.Schema) PortType(org.knime.core.node.port.PortType) Type(org.apache.avro.Schema.Type) DatasetSchemaSummary(com.amazonaws.services.personalize.model.DatasetSchemaSummary) DataColumnSpec(org.knime.core.data.DataColumnSpec) IntValue(org.knime.core.data.IntValue)

Aggregations

AmazonPersonalize (com.amazonaws.services.personalize.AmazonPersonalize)1 CreateDatasetGroupRequest (com.amazonaws.services.personalize.model.CreateDatasetGroupRequest)1 CreateDatasetGroupResult (com.amazonaws.services.personalize.model.CreateDatasetGroupResult)1 CreateDatasetImportJobRequest (com.amazonaws.services.personalize.model.CreateDatasetImportJobRequest)1 CreateDatasetRequest (com.amazonaws.services.personalize.model.CreateDatasetRequest)1 CreateSchemaRequest (com.amazonaws.services.personalize.model.CreateSchemaRequest)1 DataSource (com.amazonaws.services.personalize.model.DataSource)1 DatasetGroupSummary (com.amazonaws.services.personalize.model.DatasetGroupSummary)1 DatasetSchemaSummary (com.amazonaws.services.personalize.model.DatasetSchemaSummary)1 DatasetSummary (com.amazonaws.services.personalize.model.DatasetSummary)1 DeleteDatasetGroupRequest (com.amazonaws.services.personalize.model.DeleteDatasetGroupRequest)1 DeleteDatasetRequest (com.amazonaws.services.personalize.model.DeleteDatasetRequest)1 DescribeDatasetGroupRequest (com.amazonaws.services.personalize.model.DescribeDatasetGroupRequest)1 DescribeDatasetGroupResult (com.amazonaws.services.personalize.model.DescribeDatasetGroupResult)1 DescribeDatasetImportJobRequest (com.amazonaws.services.personalize.model.DescribeDatasetImportJobRequest)1 DescribeDatasetImportJobResult (com.amazonaws.services.personalize.model.DescribeDatasetImportJobResult)1 InvalidInputException (com.amazonaws.services.personalize.model.InvalidInputException)1 ListDatasetGroupsRequest (com.amazonaws.services.personalize.model.ListDatasetGroupsRequest)1 ListDatasetGroupsResult (com.amazonaws.services.personalize.model.ListDatasetGroupsResult)1 ListDatasetsRequest (com.amazonaws.services.personalize.model.ListDatasetsRequest)1