use of co.cask.cdap.explore.table.CreateStatementBuilder in project cdap by caskdata.
the class ExploreTableManager method generateFileSetCreateStatement.
/**
* Generate a create statement for a ((time-)partitioned) file set.
*
* @param dataset the instantiated dataset
* @param datasetId the dataset id
* @param properties the properties from dataset specification
* @param truncating whether this call to create() is part of a truncate() operation. The effect is:
* If possessExisting is true, then the truncate() has just dropped this
* dataset and that deleted the explore table: we must recreate it.
*
* @return a CREATE TABLE statement, or null if the dataset is not explorable
* @throws IllegalArgumentException if the schema cannot be parsed, or if shouldErrorOnMissingSchema is true and
* the dataset spec does not contain a schema.
*/
@Nullable
private String generateFileSetCreateStatement(DatasetId datasetId, Dataset dataset, Map<String, String> properties, boolean truncating) throws IllegalArgumentException, ExploreException {
String tableName = tableNaming.getTableName(datasetId, properties);
String databaseName = ExploreProperties.getExploreDatabaseName(properties);
Map<String, String> tableProperties = FileSetProperties.getTableProperties(properties);
// if this dataset reuses an existing table, do not attempt to create it
if (FileSetProperties.isUseExisting(tableProperties) || (FileSetProperties.isPossessExisting(tableProperties) && !truncating)) {
try {
exploreService.getTableInfo(datasetId.getNamespace(), databaseName, tableName);
// table exists: do not attempt to create
return null;
} catch (TableNotFoundException e) {
throw new ExploreException(String.format("Dataset '%s' is configured to use an existing explore table, but table '%s' does not " + "exist in database '%s'. ", datasetId.getDataset(), tableName, databaseName));
}
}
Location baseLocation;
Partitioning partitioning = null;
if (dataset instanceof PartitionedFileSet) {
partitioning = ((PartitionedFileSet) dataset).getPartitioning();
baseLocation = ((PartitionedFileSet) dataset).getEmbeddedFileSet().getBaseLocation();
} else {
baseLocation = ((FileSet) dataset).getBaseLocation();
}
CreateStatementBuilder createStatementBuilder = new CreateStatementBuilder(datasetId.getDataset(), databaseName, tableName, shouldEscapeColumns).setLocation(baseLocation).setPartitioning(partitioning).setTableProperties(tableProperties);
String schema = FileSetProperties.getExploreSchema(properties);
String format = FileSetProperties.getExploreFormat(properties);
if (format != null) {
if ("parquet".equals(format)) {
return createStatementBuilder.setSchema(FileSetProperties.getExploreSchema(properties)).buildWithFileFormat("parquet");
}
// for text and csv, we know what to do
Preconditions.checkArgument("text".equals(format) || "csv".equals(format), "Only text and csv are supported as native formats");
Preconditions.checkNotNull(schema, "for native formats, explore schema must be given in dataset properties");
String delimiter = null;
if ("text".equals(format)) {
delimiter = FileSetProperties.getExploreFormatProperties(properties).get("delimiter");
} else if ("csv".equals(format)) {
delimiter = ",";
}
return createStatementBuilder.setSchema(schema).setRowFormatDelimited(delimiter, null).buildWithFileFormat("TEXTFILE");
} else {
// They can be created by setting the avro.schema.literal table property
if (schema != null) {
createStatementBuilder.setSchema(schema);
}
// format not given, look for serde, input format, etc.
String serde = FileSetProperties.getSerDe(properties);
String inputFormat = FileSetProperties.getExploreInputFormat(properties);
String outputFormat = FileSetProperties.getExploreOutputFormat(properties);
Preconditions.checkArgument(serde != null && inputFormat != null && outputFormat != null, "All of SerDe, InputFormat and OutputFormat must be given in dataset properties");
return createStatementBuilder.setRowFormatSerde(serde).buildWithFormats(inputFormat, outputFormat);
}
}
use of co.cask.cdap.explore.table.CreateStatementBuilder in project cdap by caskdata.
the class ExploreTableManager method generateEnableStatement.
/**
* Generate a Hive DDL statement to create a Hive table for the given dataset.
*
* @param dataset the instantiated dataset
* @param spec the dataset specification
* @param datasetId the dataset id
* @param truncating whether this call to create() is part of a truncate() operation, which is in some
* case implemented using disableExplore() followed by enableExplore()
*
* @return a CREATE TABLE statement, or null if the dataset is not explorable
* @throws UnsupportedTypeException if the dataset is a RecordScannable of a type that is not supported by Hive
*/
@Nullable
private String generateEnableStatement(Dataset dataset, DatasetSpecification spec, DatasetId datasetId, String tableName, boolean truncating) throws UnsupportedTypeException, ExploreException {
String datasetName = datasetId.getDataset();
Map<String, String> serdeProperties = ImmutableMap.of(Constants.Explore.DATASET_NAME, datasetId.getDataset(), Constants.Explore.DATASET_NAMESPACE, datasetId.getNamespace());
// or it must be a FileSet or a PartitionedFileSet with explore enabled in it properties.
if (dataset instanceof Table) {
// valid for a table not to have a schema property. this logic should really be in Table
return generateCreateStatementFromSchemaProperty(spec, datasetId, tableName, serdeProperties, false);
}
if (dataset instanceof ObjectMappedTable) {
return generateCreateStatementFromSchemaProperty(spec, datasetId, tableName, serdeProperties, true);
}
boolean isRecordScannable = dataset instanceof RecordScannable;
boolean isRecordWritable = dataset instanceof RecordWritable;
if (isRecordScannable || isRecordWritable) {
Type recordType = isRecordScannable ? ((RecordScannable) dataset).getRecordType() : ((RecordWritable) dataset).getRecordType();
// Use == because that's what same class means.
if (StructuredRecord.class == recordType) {
return generateCreateStatementFromSchemaProperty(spec, datasetId, tableName, serdeProperties, true);
}
// otherwise, derive the schema from the record type
LOG.debug("Enabling explore for dataset instance {}", datasetName);
String databaseName = ExploreProperties.getExploreDatabaseName(spec.getProperties());
return new CreateStatementBuilder(datasetName, databaseName, tableName, shouldEscapeColumns).setSchema(hiveSchemaFor(recordType)).setTableComment("CDAP Dataset").buildWithStorageHandler(DatasetStorageHandler.class.getName(), serdeProperties);
} else if (dataset instanceof FileSet || dataset instanceof PartitionedFileSet) {
Map<String, String> properties = spec.getProperties();
if (FileSetProperties.isExploreEnabled(properties)) {
LOG.debug("Enabling explore for dataset instance {}", datasetName);
return generateFileSetCreateStatement(datasetId, dataset, properties, truncating);
}
}
// dataset is not explorable
return null;
}
use of co.cask.cdap.explore.table.CreateStatementBuilder in project cdap by caskdata.
the class ExploreTableManager method enableStream.
/**
* Enable exploration on a stream by creating a corresponding Hive table. Enabling exploration on a
* stream that has already been enabled is a no-op. Assumes the stream actually exists.
*
* @param tableName name of the Hive table to create
* @param streamId the ID of the stream
* @param formatSpec the format specification for the table
* @return query handle for creating the Hive table for the stream
* @throws UnsupportedTypeException if the stream schema is not compatible with Hive
* @throws ExploreException if there was an exception submitting the create table statement
* @throws SQLException if there was a problem with the create table statement
*/
public QueryHandle enableStream(String tableName, StreamId streamId, FormatSpecification formatSpec) throws UnsupportedTypeException, ExploreException, SQLException {
String streamName = streamId.getStream();
LOG.debug("Enabling explore for stream {} with table {}", streamId, tableName);
// schema of a stream is always timestamp, headers, and then the schema of the body.
List<Schema.Field> fields = Lists.newArrayList(Schema.Field.of("ts", Schema.of(Schema.Type.LONG)), Schema.Field.of("headers", Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.STRING))));
if (formatSpec.getSchema() != null) {
fields.addAll(formatSpec.getSchema().getFields());
}
Schema schema = Schema.recordOf("streamEvent", fields);
Map<String, String> serdeProperties = ImmutableMap.of(Constants.Explore.STREAM_NAME, streamName, Constants.Explore.STREAM_NAMESPACE, streamId.getNamespace(), Constants.Explore.FORMAT_SPEC, GSON.toJson(formatSpec));
String createStatement = new CreateStatementBuilder(streamName, null, tableName, shouldEscapeColumns).setSchema(schema).setTableComment("CDAP Stream").buildWithStorageHandler(StreamStorageHandler.class.getName(), serdeProperties);
LOG.debug("Running create statement for stream {} with table {}: {}", streamName, tableName, createStatement);
return exploreService.execute(streamId.getParent(), createStatement);
}
use of co.cask.cdap.explore.table.CreateStatementBuilder in project cdap by caskdata.
the class ExploreTableManager method generateCreateStatementFromSchemaProperty.
/**
* Generate a create statement from the "schema" property of the dataset (specification). This is used for
* Table, ObjectMappedTable and RecordScannables with record type StructuredRecord, all of which use the
* {@link DatasetStorageHandler}.
*
* @param spec the dataset specification
* @param datasetId the dataset id
* @param serdeProperties properties to be passed to the {@link co.cask.cdap.hive.datasets.DatasetSerDe}
* @param shouldErrorOnMissingSchema whether the schema is required.
* @return a CREATE TABLE statement, or null if the dataset is not explorable
* @throws UnsupportedTypeException if the schema cannot be represented in Hive
* @throws IllegalArgumentException if the schema cannot be parsed, or if shouldErrorOnMissingSchema is true and
* the dataset spec does not contain a schema.
*/
@Nullable
private String generateCreateStatementFromSchemaProperty(DatasetSpecification spec, DatasetId datasetId, String tableName, Map<String, String> serdeProperties, boolean shouldErrorOnMissingSchema) throws UnsupportedTypeException {
Schema schema = getSchemaFromProperty(spec, datasetId, shouldErrorOnMissingSchema);
if (schema == null) {
return null;
}
String databaseName = ExploreProperties.getExploreDatabaseName(spec.getProperties());
return new CreateStatementBuilder(datasetId.getDataset(), databaseName, tableName, shouldEscapeColumns).setSchema(schema).setTableComment("CDAP Dataset").buildWithStorageHandler(DatasetStorageHandler.class.getName(), serdeProperties);
}
Aggregations