Search in sources :

Example 1 with CreateDatasetRequest

use of com.amazonaws.services.personalize.model.CreateDatasetRequest in project knime-cloud by knime.

the class AbstractAmazonPersonalizeDataUploadNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
    // === Write table out as CSV ===  (TODO we may be able to write it directly to S3)
    // Filter included columns
    final BufferedDataTable filterTable = filterTable((BufferedDataTable) inObjects[TABLE_INPUT_PORT_IDX], exec);
    // Rename columns to fit the later created schema
    final BufferedDataTable adaptedTable = renameColumns(filterTable, exec);
    // Check if the input is valid (just a shallow check, there is no clear documentation by Amazon)
    exec.setMessage("Validating input");
    validateInputTableContent(adaptedTable);
    exec.setProgress(0.05);
    // Write the table as CSV to disc
    final URI sourceURI = writeCSV(adaptedTable, exec.createSubExecutionContext(0.1));
    // === Upload CSV to S3 ===
    final CloudConnectionInformation cxnInfo = ((AmazonConnectionInformationPortObject) inObjects[0]).getConnectionInformation();
    final String uniqueFilePath = m_settings.getTarget() + "KNIME-tmp-" + StringUtils.lowerCase(getDatasetType()) + "-file-" + System.currentTimeMillis() + ".csv";
    final RemoteFile<Connection> target = writeToS3(exec.createSubExecutionContext(0.1), sourceURI, cxnInfo, uniqueFilePath);
    // === Import data from S3 to Amazon Personalize service ===
    try (final AmazonPersonalizeConnection personalizeConnection = new AmazonPersonalizeConnection(cxnInfo)) {
        final AmazonPersonalize personalizeClient = personalizeConnection.getClient();
        // Create the dataset group ARN or use existing one
        final String datasetGroupArn = createDatasetGroup(personalizeClient, exec.createSubExecutionContext(0.2));
        // Check if the respective dataset already exists and either delete it or abort
        checkAlreadyExistingDataset(personalizeClient, datasetGroupArn, exec.createSubExecutionContext(0.1));
        exec.setProgress(0.5);
        // Create the data set (container)
        exec.setMessage("Importing dataset from S3");
        final String schemaArn = createSchema(personalizeClient, adaptedTable.getDataTableSpec());
        final String datasetArn = personalizeClient.createDataset(new CreateDatasetRequest().withDatasetGroupArn(datasetGroupArn).withDatasetType(m_datasetType).withName(m_settings.getDatasetName()).withSchemaArn(schemaArn)).getDatasetArn();
        try {
            // Import the dataset from S3
            importDataFromS3(personalizeClient, "s3:/" + uniqueFilePath, datasetArn, exec);
        } catch (RuntimeException | InterruptedException e1) {
            try {
                deleteDataset(personalizeClient, datasetGroupArn, datasetArn);
            } catch (InterruptedException e) {
            // happens if user cancels node execution during deletion of dataset
            // do nothing, deletion will be further processed by amazon
            }
            throw e1;
        }
    } catch (RuntimeException e) {
        // TODO cancel import job, currently not supported but hopefully in future versions
        throw e;
    } finally {
        // Remove temporary created S3 file
        target.delete();
    }
    return null;
}
Also used : AmazonPersonalizeConnection(org.knime.cloud.aws.mlservices.nodes.personalize.AmazonPersonalizeConnection) CreateDatasetRequest(com.amazonaws.services.personalize.model.CreateDatasetRequest) Connection(org.knime.base.filehandling.remote.files.Connection) AmazonPersonalizeConnection(org.knime.cloud.aws.mlservices.nodes.personalize.AmazonPersonalizeConnection) AmazonPersonalize(com.amazonaws.services.personalize.AmazonPersonalize) URI(java.net.URI) AmazonConnectionInformationPortObject(org.knime.cloud.aws.util.AmazonConnectionInformationPortObject) BufferedDataTable(org.knime.core.node.BufferedDataTable) CloudConnectionInformation(org.knime.cloud.core.util.port.CloudConnectionInformation)

Aggregations

AmazonPersonalize (com.amazonaws.services.personalize.AmazonPersonalize)1 CreateDatasetRequest (com.amazonaws.services.personalize.model.CreateDatasetRequest)1 URI (java.net.URI)1 Connection (org.knime.base.filehandling.remote.files.Connection)1 AmazonPersonalizeConnection (org.knime.cloud.aws.mlservices.nodes.personalize.AmazonPersonalizeConnection)1 AmazonConnectionInformationPortObject (org.knime.cloud.aws.util.AmazonConnectionInformationPortObject)1 CloudConnectionInformation (org.knime.cloud.core.util.port.CloudConnectionInformation)1 BufferedDataTable (org.knime.core.node.BufferedDataTable)1