Search in sources :

Example 1 with CreateDatasetGroupRequest

use of com.amazonaws.services.personalize.model.CreateDatasetGroupRequest in project knime-cloud by knime.

the class AbstractAmazonPersonalizeDataUploadNodeModel method createDatasetGroup.

// Creates a new dataset group if not already existing
private String createDatasetGroup(final AmazonPersonalize personalizeClient, final ExecutionContext exec) throws InterruptedException {
    exec.setMessage("Creating dataset group");
    final ListDatasetGroupsRequest listDatasetGroupsRequest = new ListDatasetGroupsRequest();
    final ListDatasetGroupsResult listDatasetGroups = personalizeClient.listDatasetGroups(listDatasetGroupsRequest);
    final String datasetGroupName = m_settings.getSelectedDatasetGroup();
    final String datasetGroupArn;
    final boolean existing = listDatasetGroups.getDatasetGroups().stream().anyMatch(e -> e.getName().equals(datasetGroupName));
    if (!existing) {
        // Create new dataset group
        final CreateDatasetGroupResult createDatasetGroup = personalizeClient.createDatasetGroup(new CreateDatasetGroupRequest().withName(datasetGroupName));
        datasetGroupArn = createDatasetGroup.getDatasetGroupArn();
    } else {
        final Optional<DatasetGroupSummary> dataGroupSummary = listDatasetGroups.getDatasetGroups().stream().filter(e -> e.getName().equals(datasetGroupName)).findFirst();
        if (!dataGroupSummary.isPresent()) {
            // should never happen
            throw new IllegalStateException("Dataset group with name '" + datasetGroupName + "' not present.");
        }
        datasetGroupArn = dataGroupSummary.get().getDatasetGroupArn();
    }
    // Wait until dataset group is created and ACTIVE (even if the group already existed, make sure it's ACTIVE)
    final DescribeDatasetGroupRequest describeDatasetGroupRequest = new DescribeDatasetGroupRequest();
    describeDatasetGroupRequest.setDatasetGroupArn(datasetGroupArn);
    AmazonPersonalizeUtils.waitUntilActive(() -> {
        final DescribeDatasetGroupResult datasetGroupDescription = personalizeClient.describeDatasetGroup(describeDatasetGroupRequest);
        final String status = datasetGroupDescription.getDatasetGroup().getStatus();
        exec.setMessage("Creating dataset group (Status: " + status + ")");
        if (status.equals(Status.CREATED_FAILED.getStatus())) {
            if (!existing) {
                // Delete the dataset group that we tried to create
                personalizeClient.deleteDatasetGroup(new DeleteDatasetGroupRequest().withDatasetGroupArn(datasetGroupArn));
                // Wait until the dataset group is deleted (should usually be very quick but you never know...)
                try {
                    AmazonPersonalizeUtils.waitUntilActive(() -> {
                        return !personalizeClient.listDatasetGroups(listDatasetGroupsRequest).getDatasetGroups().stream().anyMatch(e -> e.getName().equals(datasetGroupName));
                    }, 50);
                } catch (InterruptedException e1) {
                // unlikely case
                // do nothing, the deletion will be further processed by amazon
                }
                throw new IllegalStateException("Dataset group creation failed. Reason: " + datasetGroupDescription.getDatasetGroup().getFailureReason());
            }
            throw new IllegalStateException("The selected dataset group is in an invalid state: " + Status.CREATED_FAILED.getStatus() + ". Reason: " + datasetGroupDescription.getDatasetGroup().getFailureReason());
        }
        return status.equals(Status.ACTIVE.getStatus());
    }, 500);
    exec.setProgress(1);
    return datasetGroupArn;
}
Also used : ConnectionMonitor(org.knime.base.filehandling.remote.files.ConnectionMonitor) Arrays(java.util.Arrays) NodeSettingsRO(org.knime.core.node.NodeSettingsRO) AmazonConnectionInformationPortObject(org.knime.cloud.aws.util.AmazonConnectionInformationPortObject) CSVWriter(org.knime.base.node.io.csvwriter.CSVWriter) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) URISyntaxException(java.net.URISyntaxException) ListDatasetGroupsResult(com.amazonaws.services.personalize.model.ListDatasetGroupsResult) DescribeDatasetGroupResult(com.amazonaws.services.personalize.model.DescribeDatasetGroupResult) RemoteFile(org.knime.base.filehandling.remote.files.RemoteFile) CreateDatasetGroupResult(com.amazonaws.services.personalize.model.CreateDatasetGroupResult) CreateDatasetImportJobRequest(com.amazonaws.services.personalize.model.CreateDatasetImportJobRequest) InvalidInputException(com.amazonaws.services.personalize.model.InvalidInputException) Status(org.knime.cloud.aws.mlservices.utils.personalize.AmazonPersonalizeUtils.Status) DataColumnSpec(org.knime.core.data.DataColumnSpec) Map(java.util.Map) FieldAssembler(org.apache.avro.SchemaBuilder.FieldAssembler) URI(java.net.URI) DeleteDatasetGroupRequest(com.amazonaws.services.personalize.model.DeleteDatasetGroupRequest) DescribeDatasetImportJobRequest(com.amazonaws.services.personalize.model.DescribeDatasetImportJobRequest) PortType(org.knime.core.node.port.PortType) FileWriterSettings(org.knime.base.node.io.csvwriter.FileWriterSettings) IntValue(org.knime.core.data.IntValue) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) Schema(org.apache.avro.Schema) AmazonPersonalize(com.amazonaws.services.personalize.AmazonPersonalize) NodeModel(org.knime.core.node.NodeModel) Collectors(java.util.stream.Collectors) List(java.util.List) BufferedDataTable(org.knime.core.node.BufferedDataTable) RemoteFileFactory(org.knime.base.filehandling.remote.files.RemoteFileFactory) Optional(java.util.Optional) DataSource(com.amazonaws.services.personalize.model.DataSource) DescribeDatasetImportJobResult(com.amazonaws.services.personalize.model.DescribeDatasetImportJobResult) PortObject(org.knime.core.node.port.PortObject) LongValue(org.knime.core.data.LongValue) DataTableSpec(org.knime.core.data.DataTableSpec) DatasetGroupSummary(com.amazonaws.services.personalize.model.DatasetGroupSummary) DescribeDatasetGroupRequest(com.amazonaws.services.personalize.model.DescribeDatasetGroupRequest) HashMap(java.util.HashMap) DatasetSummary(com.amazonaws.services.personalize.model.DatasetSummary) BufferedOutputStream(java.io.BufferedOutputStream) ExecutionContext(org.knime.core.node.ExecutionContext) CloudConnectionInformation(org.knime.cloud.core.util.port.CloudConnectionInformation) Connection(org.knime.base.filehandling.remote.files.Connection) AmazonPersonalizeUtils(org.knime.cloud.aws.mlservices.utils.personalize.AmazonPersonalizeUtils) CreateSchemaRequest(com.amazonaws.services.personalize.model.CreateSchemaRequest) OutputStreamWriter(java.io.OutputStreamWriter) AmazonPersonalizeConnection(org.knime.cloud.aws.mlservices.nodes.personalize.AmazonPersonalizeConnection) DataCell(org.knime.core.data.DataCell) Type(org.apache.avro.Schema.Type) StringValue(org.knime.core.data.StringValue) CreateDatasetGroupRequest(com.amazonaws.services.personalize.model.CreateDatasetGroupRequest) ConnectionInformation(org.knime.base.filehandling.remote.connectioninformation.port.ConnectionInformation) CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator) CreateDatasetRequest(com.amazonaws.services.personalize.model.CreateDatasetRequest) ListDatasetsResult(com.amazonaws.services.personalize.model.ListDatasetsResult) FileOutputStream(java.io.FileOutputStream) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) IOException(java.io.IOException) DatasetSchemaSummary(com.amazonaws.services.personalize.model.DatasetSchemaSummary) DeleteDatasetRequest(com.amazonaws.services.personalize.model.DeleteDatasetRequest) File(java.io.File) DataRow(org.knime.core.data.DataRow) NodeSettingsWO(org.knime.core.node.NodeSettingsWO) ListDatasetGroupsRequest(com.amazonaws.services.personalize.model.ListDatasetGroupsRequest) ListDatasetsRequest(com.amazonaws.services.personalize.model.ListDatasetsRequest) StringUtils(com.amazonaws.util.StringUtils) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) FileUtil(org.knime.core.util.FileUtil) DescribeDatasetGroupResult(com.amazonaws.services.personalize.model.DescribeDatasetGroupResult) DatasetGroupSummary(com.amazonaws.services.personalize.model.DatasetGroupSummary) CreateDatasetGroupResult(com.amazonaws.services.personalize.model.CreateDatasetGroupResult) CreateDatasetGroupRequest(com.amazonaws.services.personalize.model.CreateDatasetGroupRequest) ListDatasetGroupsRequest(com.amazonaws.services.personalize.model.ListDatasetGroupsRequest) ListDatasetGroupsResult(com.amazonaws.services.personalize.model.ListDatasetGroupsResult) DescribeDatasetGroupRequest(com.amazonaws.services.personalize.model.DescribeDatasetGroupRequest) DeleteDatasetGroupRequest(com.amazonaws.services.personalize.model.DeleteDatasetGroupRequest)

Aggregations

AmazonPersonalize (com.amazonaws.services.personalize.AmazonPersonalize)1 CreateDatasetGroupRequest (com.amazonaws.services.personalize.model.CreateDatasetGroupRequest)1 CreateDatasetGroupResult (com.amazonaws.services.personalize.model.CreateDatasetGroupResult)1 CreateDatasetImportJobRequest (com.amazonaws.services.personalize.model.CreateDatasetImportJobRequest)1 CreateDatasetRequest (com.amazonaws.services.personalize.model.CreateDatasetRequest)1 CreateSchemaRequest (com.amazonaws.services.personalize.model.CreateSchemaRequest)1 DataSource (com.amazonaws.services.personalize.model.DataSource)1 DatasetGroupSummary (com.amazonaws.services.personalize.model.DatasetGroupSummary)1 DatasetSchemaSummary (com.amazonaws.services.personalize.model.DatasetSchemaSummary)1 DatasetSummary (com.amazonaws.services.personalize.model.DatasetSummary)1 DeleteDatasetGroupRequest (com.amazonaws.services.personalize.model.DeleteDatasetGroupRequest)1 DeleteDatasetRequest (com.amazonaws.services.personalize.model.DeleteDatasetRequest)1 DescribeDatasetGroupRequest (com.amazonaws.services.personalize.model.DescribeDatasetGroupRequest)1 DescribeDatasetGroupResult (com.amazonaws.services.personalize.model.DescribeDatasetGroupResult)1 DescribeDatasetImportJobRequest (com.amazonaws.services.personalize.model.DescribeDatasetImportJobRequest)1 DescribeDatasetImportJobResult (com.amazonaws.services.personalize.model.DescribeDatasetImportJobResult)1 InvalidInputException (com.amazonaws.services.personalize.model.InvalidInputException)1 ListDatasetGroupsRequest (com.amazonaws.services.personalize.model.ListDatasetGroupsRequest)1 ListDatasetGroupsResult (com.amazonaws.services.personalize.model.ListDatasetGroupsResult)1 ListDatasetsRequest (com.amazonaws.services.personalize.model.ListDatasetsRequest)1