use of com.amazonaws.services.personalize.model.CreateDatasetRequest in project knime-cloud by knime.
the class AbstractAmazonPersonalizeDataUploadNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
// === Write table out as CSV === (TODO we may be able to write it directly to S3)
// Filter included columns
final BufferedDataTable filterTable = filterTable((BufferedDataTable) inObjects[TABLE_INPUT_PORT_IDX], exec);
// Rename columns to fit the later created schema
final BufferedDataTable adaptedTable = renameColumns(filterTable, exec);
// Check if the input is valid (just a shallow check, there is no clear documentation by Amazon)
exec.setMessage("Validating input");
validateInputTableContent(adaptedTable);
exec.setProgress(0.05);
// Write the table as CSV to disc
final URI sourceURI = writeCSV(adaptedTable, exec.createSubExecutionContext(0.1));
// === Upload CSV to S3 ===
final CloudConnectionInformation cxnInfo = ((AmazonConnectionInformationPortObject) inObjects[0]).getConnectionInformation();
final String uniqueFilePath = m_settings.getTarget() + "KNIME-tmp-" + StringUtils.lowerCase(getDatasetType()) + "-file-" + System.currentTimeMillis() + ".csv";
final RemoteFile<Connection> target = writeToS3(exec.createSubExecutionContext(0.1), sourceURI, cxnInfo, uniqueFilePath);
// === Import data from S3 to Amazon Personalize service ===
try (final AmazonPersonalizeConnection personalizeConnection = new AmazonPersonalizeConnection(cxnInfo)) {
final AmazonPersonalize personalizeClient = personalizeConnection.getClient();
// Create the dataset group ARN or use existing one
final String datasetGroupArn = createDatasetGroup(personalizeClient, exec.createSubExecutionContext(0.2));
// Check if the respective dataset already exists and either delete it or abort
checkAlreadyExistingDataset(personalizeClient, datasetGroupArn, exec.createSubExecutionContext(0.1));
exec.setProgress(0.5);
// Create the data set (container)
exec.setMessage("Importing dataset from S3");
final String schemaArn = createSchema(personalizeClient, adaptedTable.getDataTableSpec());
final String datasetArn = personalizeClient.createDataset(new CreateDatasetRequest().withDatasetGroupArn(datasetGroupArn).withDatasetType(m_datasetType).withName(m_settings.getDatasetName()).withSchemaArn(schemaArn)).getDatasetArn();
try {
// Import the dataset from S3
importDataFromS3(personalizeClient, "s3:/" + uniqueFilePath, datasetArn, exec);
} catch (RuntimeException | InterruptedException e1) {
try {
deleteDataset(personalizeClient, datasetGroupArn, datasetArn);
} catch (InterruptedException e) {
// happens if user cancels node execution during deletion of dataset
// do nothing, deletion will be further processed by amazon
}
throw e1;
}
} catch (RuntimeException e) {
// TODO cancel import job, currently not supported but hopefully in future versions
throw e;
} finally {
// Remove temporary created S3 file
target.delete();
}
return null;
}
Aggregations