Search in sources :

Example 11 with AmazonPersonalize

use of com.amazonaws.services.personalize.AmazonPersonalize in project knime-cloud by knime.

the class AbstractAmazonPersonalizeDataUploadNodeModel method createSchema.

private String createSchema(final AmazonPersonalize personalizeClient, final DataTableSpec spec) {
    final StringBuilder schemaNameBuilder = new StringBuilder(getSchemaNamePrefix());
    FieldAssembler<Schema> fieldAssembler = createFieldAssembler(SCHEMA_NAMESPACE);
    for (final String colName : spec.getColumnNames()) {
        if (!colName.startsWith(PREFIX_METADATA_FIELD)) {
            continue;
        }
        final DataColumnSpec colSpec = spec.getColumnSpec(colName);
        final boolean isCategorical;
        final Type type;
        if (colSpec.getType().isCompatible(StringValue.class)) {
            isCategorical = true;
            type = Type.STRING;
        } else if (colSpec.getType().isCompatible(IntValue.class)) {
            isCategorical = false;
            type = Type.INT;
        } else if (colSpec.getType().isCompatible(LongValue.class)) {
            isCategorical = false;
            type = Type.LONG;
        } else {
            isCategorical = false;
            type = Type.DOUBLE;
        }
        schemaNameBuilder.append("-" + type);
        // 'categorical' must be set for metadata
        fieldAssembler = fieldAssembler.name(colName).prop("categorical", isCategorical).type(Schema.create(type)).noDefault();
    }
    final String schemaName = schemaNameBuilder.toString();
    // check if the same schema has been created before
    final List<DatasetSchemaSummary> existingSchemas = AmazonPersonalizeUtils.listAllSchemas(personalizeClient);
    final Optional<DatasetSchemaSummary> schemaSummary = existingSchemas.stream().filter(e -> e.getName().equals(schemaName)).findAny();
    // if so, use this one again
    if (schemaSummary.isPresent()) {
        return schemaSummary.get().getSchemaArn();
    }
    // otherwise create new one
    final Schema schema = fieldAssembler.endRecord();
    final CreateSchemaRequest createSchemaRequest = new CreateSchemaRequest().withName(schemaName).withSchema(schema.toString());
    return personalizeClient.createSchema(createSchemaRequest).getSchemaArn();
}
Also used : ConnectionMonitor(org.knime.base.filehandling.remote.files.ConnectionMonitor) Arrays(java.util.Arrays) NodeSettingsRO(org.knime.core.node.NodeSettingsRO) AmazonConnectionInformationPortObject(org.knime.cloud.aws.util.AmazonConnectionInformationPortObject) CSVWriter(org.knime.base.node.io.csvwriter.CSVWriter) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) URISyntaxException(java.net.URISyntaxException) ListDatasetGroupsResult(com.amazonaws.services.personalize.model.ListDatasetGroupsResult) DescribeDatasetGroupResult(com.amazonaws.services.personalize.model.DescribeDatasetGroupResult) RemoteFile(org.knime.base.filehandling.remote.files.RemoteFile) CreateDatasetGroupResult(com.amazonaws.services.personalize.model.CreateDatasetGroupResult) CreateDatasetImportJobRequest(com.amazonaws.services.personalize.model.CreateDatasetImportJobRequest) InvalidInputException(com.amazonaws.services.personalize.model.InvalidInputException) Status(org.knime.cloud.aws.mlservices.utils.personalize.AmazonPersonalizeUtils.Status) DataColumnSpec(org.knime.core.data.DataColumnSpec) Map(java.util.Map) FieldAssembler(org.apache.avro.SchemaBuilder.FieldAssembler) URI(java.net.URI) DeleteDatasetGroupRequest(com.amazonaws.services.personalize.model.DeleteDatasetGroupRequest) DescribeDatasetImportJobRequest(com.amazonaws.services.personalize.model.DescribeDatasetImportJobRequest) PortType(org.knime.core.node.port.PortType) FileWriterSettings(org.knime.base.node.io.csvwriter.FileWriterSettings) IntValue(org.knime.core.data.IntValue) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) Schema(org.apache.avro.Schema) AmazonPersonalize(com.amazonaws.services.personalize.AmazonPersonalize) NodeModel(org.knime.core.node.NodeModel) Collectors(java.util.stream.Collectors) List(java.util.List) BufferedDataTable(org.knime.core.node.BufferedDataTable) RemoteFileFactory(org.knime.base.filehandling.remote.files.RemoteFileFactory) Optional(java.util.Optional) DataSource(com.amazonaws.services.personalize.model.DataSource) DescribeDatasetImportJobResult(com.amazonaws.services.personalize.model.DescribeDatasetImportJobResult) PortObject(org.knime.core.node.port.PortObject) LongValue(org.knime.core.data.LongValue) DataTableSpec(org.knime.core.data.DataTableSpec) DatasetGroupSummary(com.amazonaws.services.personalize.model.DatasetGroupSummary) DescribeDatasetGroupRequest(com.amazonaws.services.personalize.model.DescribeDatasetGroupRequest) HashMap(java.util.HashMap) DatasetSummary(com.amazonaws.services.personalize.model.DatasetSummary) BufferedOutputStream(java.io.BufferedOutputStream) ExecutionContext(org.knime.core.node.ExecutionContext) CloudConnectionInformation(org.knime.cloud.core.util.port.CloudConnectionInformation) Connection(org.knime.base.filehandling.remote.files.Connection) AmazonPersonalizeUtils(org.knime.cloud.aws.mlservices.utils.personalize.AmazonPersonalizeUtils) CreateSchemaRequest(com.amazonaws.services.personalize.model.CreateSchemaRequest) OutputStreamWriter(java.io.OutputStreamWriter) AmazonPersonalizeConnection(org.knime.cloud.aws.mlservices.nodes.personalize.AmazonPersonalizeConnection) DataCell(org.knime.core.data.DataCell) Type(org.apache.avro.Schema.Type) StringValue(org.knime.core.data.StringValue) CreateDatasetGroupRequest(com.amazonaws.services.personalize.model.CreateDatasetGroupRequest) ConnectionInformation(org.knime.base.filehandling.remote.connectioninformation.port.ConnectionInformation) CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator) CreateDatasetRequest(com.amazonaws.services.personalize.model.CreateDatasetRequest) ListDatasetsResult(com.amazonaws.services.personalize.model.ListDatasetsResult) FileOutputStream(java.io.FileOutputStream) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) IOException(java.io.IOException) DatasetSchemaSummary(com.amazonaws.services.personalize.model.DatasetSchemaSummary) DeleteDatasetRequest(com.amazonaws.services.personalize.model.DeleteDatasetRequest) File(java.io.File) DataRow(org.knime.core.data.DataRow) NodeSettingsWO(org.knime.core.node.NodeSettingsWO) ListDatasetGroupsRequest(com.amazonaws.services.personalize.model.ListDatasetGroupsRequest) ListDatasetsRequest(com.amazonaws.services.personalize.model.ListDatasetsRequest) StringUtils(com.amazonaws.util.StringUtils) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) FileUtil(org.knime.core.util.FileUtil) CreateSchemaRequest(com.amazonaws.services.personalize.model.CreateSchemaRequest) Schema(org.apache.avro.Schema) PortType(org.knime.core.node.port.PortType) Type(org.apache.avro.Schema.Type) DatasetSchemaSummary(com.amazonaws.services.personalize.model.DatasetSchemaSummary) DataColumnSpec(org.knime.core.data.DataColumnSpec) IntValue(org.knime.core.data.IntValue)

Example 12 with AmazonPersonalize

use of com.amazonaws.services.personalize.AmazonPersonalize in project knime-cloud by knime.

the class AbstractAmazonPersonalizeDataUploadNodeModel method checkAlreadyExistingDataset.

private void checkAlreadyExistingDataset(final AmazonPersonalize personalizeClient, final String datasetGroupArn, final ExecutionContext exec) throws InterruptedException {
    exec.setMessage("Checking already existing datasets");
    final ListDatasetsResult listDatasets = personalizeClient.listDatasets(new ListDatasetsRequest().withDatasetGroupArn(datasetGroupArn));
    final Optional<DatasetSummary> dataset = listDatasets.getDatasets().stream().filter(e -> e.getDatasetType().equals(m_datasetType)).findFirst();
    if (dataset.isPresent()) {
        if (m_settings.getOverwriteDatasetPolicy().equals(OverwritePolicy.ABORT.toString())) {
            // Abort if dataset already exists
            throw new IllegalStateException("A dataset of type '" + getDatasetType() + "' already exists. Either choose a different dataset group or select to overwrite the existing " + "dataset.");
        } else {
            // Delete the existing dataset
            exec.setMessage("Deleting existing dataset");
            deleteDataset(personalizeClient, datasetGroupArn, dataset.get().getDatasetArn());
        }
    }
    exec.setProgress(1);
}
Also used : ConnectionMonitor(org.knime.base.filehandling.remote.files.ConnectionMonitor) Arrays(java.util.Arrays) NodeSettingsRO(org.knime.core.node.NodeSettingsRO) AmazonConnectionInformationPortObject(org.knime.cloud.aws.util.AmazonConnectionInformationPortObject) CSVWriter(org.knime.base.node.io.csvwriter.CSVWriter) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) URISyntaxException(java.net.URISyntaxException) ListDatasetGroupsResult(com.amazonaws.services.personalize.model.ListDatasetGroupsResult) DescribeDatasetGroupResult(com.amazonaws.services.personalize.model.DescribeDatasetGroupResult) RemoteFile(org.knime.base.filehandling.remote.files.RemoteFile) CreateDatasetGroupResult(com.amazonaws.services.personalize.model.CreateDatasetGroupResult) CreateDatasetImportJobRequest(com.amazonaws.services.personalize.model.CreateDatasetImportJobRequest) InvalidInputException(com.amazonaws.services.personalize.model.InvalidInputException) Status(org.knime.cloud.aws.mlservices.utils.personalize.AmazonPersonalizeUtils.Status) DataColumnSpec(org.knime.core.data.DataColumnSpec) Map(java.util.Map) FieldAssembler(org.apache.avro.SchemaBuilder.FieldAssembler) URI(java.net.URI) DeleteDatasetGroupRequest(com.amazonaws.services.personalize.model.DeleteDatasetGroupRequest) DescribeDatasetImportJobRequest(com.amazonaws.services.personalize.model.DescribeDatasetImportJobRequest) PortType(org.knime.core.node.port.PortType) FileWriterSettings(org.knime.base.node.io.csvwriter.FileWriterSettings) IntValue(org.knime.core.data.IntValue) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) Schema(org.apache.avro.Schema) AmazonPersonalize(com.amazonaws.services.personalize.AmazonPersonalize) NodeModel(org.knime.core.node.NodeModel) Collectors(java.util.stream.Collectors) List(java.util.List) BufferedDataTable(org.knime.core.node.BufferedDataTable) RemoteFileFactory(org.knime.base.filehandling.remote.files.RemoteFileFactory) Optional(java.util.Optional) DataSource(com.amazonaws.services.personalize.model.DataSource) DescribeDatasetImportJobResult(com.amazonaws.services.personalize.model.DescribeDatasetImportJobResult) PortObject(org.knime.core.node.port.PortObject) LongValue(org.knime.core.data.LongValue) DataTableSpec(org.knime.core.data.DataTableSpec) DatasetGroupSummary(com.amazonaws.services.personalize.model.DatasetGroupSummary) DescribeDatasetGroupRequest(com.amazonaws.services.personalize.model.DescribeDatasetGroupRequest) HashMap(java.util.HashMap) DatasetSummary(com.amazonaws.services.personalize.model.DatasetSummary) BufferedOutputStream(java.io.BufferedOutputStream) ExecutionContext(org.knime.core.node.ExecutionContext) CloudConnectionInformation(org.knime.cloud.core.util.port.CloudConnectionInformation) Connection(org.knime.base.filehandling.remote.files.Connection) AmazonPersonalizeUtils(org.knime.cloud.aws.mlservices.utils.personalize.AmazonPersonalizeUtils) CreateSchemaRequest(com.amazonaws.services.personalize.model.CreateSchemaRequest) OutputStreamWriter(java.io.OutputStreamWriter) AmazonPersonalizeConnection(org.knime.cloud.aws.mlservices.nodes.personalize.AmazonPersonalizeConnection) DataCell(org.knime.core.data.DataCell) Type(org.apache.avro.Schema.Type) StringValue(org.knime.core.data.StringValue) CreateDatasetGroupRequest(com.amazonaws.services.personalize.model.CreateDatasetGroupRequest) ConnectionInformation(org.knime.base.filehandling.remote.connectioninformation.port.ConnectionInformation) CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator) CreateDatasetRequest(com.amazonaws.services.personalize.model.CreateDatasetRequest) ListDatasetsResult(com.amazonaws.services.personalize.model.ListDatasetsResult) FileOutputStream(java.io.FileOutputStream) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) IOException(java.io.IOException) DatasetSchemaSummary(com.amazonaws.services.personalize.model.DatasetSchemaSummary) DeleteDatasetRequest(com.amazonaws.services.personalize.model.DeleteDatasetRequest) File(java.io.File) DataRow(org.knime.core.data.DataRow) NodeSettingsWO(org.knime.core.node.NodeSettingsWO) ListDatasetGroupsRequest(com.amazonaws.services.personalize.model.ListDatasetGroupsRequest) ListDatasetsRequest(com.amazonaws.services.personalize.model.ListDatasetsRequest) StringUtils(com.amazonaws.util.StringUtils) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) FileUtil(org.knime.core.util.FileUtil) DatasetSummary(com.amazonaws.services.personalize.model.DatasetSummary) ListDatasetsResult(com.amazonaws.services.personalize.model.ListDatasetsResult) ListDatasetsRequest(com.amazonaws.services.personalize.model.ListDatasetsRequest)

Aggregations

AmazonPersonalize (com.amazonaws.services.personalize.AmazonPersonalize)12 AmazonPersonalizeConnection (org.knime.cloud.aws.mlservices.nodes.personalize.AmazonPersonalizeConnection)12 CloudConnectionInformation (org.knime.cloud.core.util.port.CloudConnectionInformation)11 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)9 AmazonConnectionInformationPortObject (org.knime.cloud.aws.util.AmazonConnectionInformationPortObject)8 AmazonPersonalizeUtils (org.knime.cloud.aws.mlservices.utils.personalize.AmazonPersonalizeUtils)6 DataTableSpec (org.knime.core.data.DataTableSpec)6 BufferedDataTable (org.knime.core.node.BufferedDataTable)6 NodeSettingsRO (org.knime.core.node.NodeSettingsRO)6 NodeSettingsWO (org.knime.core.node.NodeSettingsWO)6 PortObjectSpec (org.knime.core.node.port.PortObjectSpec)6 CreateDatasetRequest (com.amazonaws.services.personalize.model.CreateDatasetRequest)5 File (java.io.File)5 IOException (java.io.IOException)5 CreateDatasetGroupRequest (com.amazonaws.services.personalize.model.CreateDatasetGroupRequest)4 CreateDatasetGroupResult (com.amazonaws.services.personalize.model.CreateDatasetGroupResult)4 CreateDatasetImportJobRequest (com.amazonaws.services.personalize.model.CreateDatasetImportJobRequest)4 CreateSchemaRequest (com.amazonaws.services.personalize.model.CreateSchemaRequest)4 DatasetGroupSummary (com.amazonaws.services.personalize.model.DatasetGroupSummary)4 URI (java.net.URI)4