use of co.cask.cdap.hive.serde.ObjectDeserializer in project cdap by caskdata.
the class StreamSerDe method initialize.
// initialize gets called multiple times by Hive. It may seem like a good idea to put additional settings into
// the conf, but be very careful when doing so. If there are multiple hive tables involved in a query, initialize
// for each table is called before input splits are fetched for any table. It is therefore not safe to put anything
// the input format may need into conf in this method. Rather, use StorageHandler's method to place needed config
// into the properties map there, which will get passed here and also copied into the job conf for the input
// format to consume.
@Override
public void initialize(Configuration conf, Properties properties) throws SerDeException {
// The columns property comes from the Hive metastore, which has it from the create table statement
// It is then important that this schema be accurate and in the right order - the same order as
// object inspectors will reflect them.
String streamName = properties.getProperty(Constants.Explore.STREAM_NAME);
String streamNamespace = properties.getProperty(Constants.Explore.STREAM_NAMESPACE);
// to avoid a null pointer exception that prevents dropping a table, we handle the null namespace case here.
if (streamNamespace == null) {
// we also still need an ObjectInspector as Hive uses it to check what columns the table has.
this.inspector = new ObjectDeserializer(properties, null).getInspector();
return;
}
StreamId streamId = new StreamId(streamNamespace, streamName);
try (ContextManager.Context context = ContextManager.getContext(conf)) {
Schema schema = null;
// Because it calls initialize just to get the object inspector
if (context != null) {
// Get the stream format from the stream config.
FormatSpecification formatSpec = getFormatSpec(properties, streamId, context);
this.streamFormat = (AbstractStreamEventRecordFormat) RecordFormats.createInitializedFormat(formatSpec);
schema = formatSpec.getSchema();
}
this.deserializer = new ObjectDeserializer(properties, schema, BODY_OFFSET);
this.inspector = deserializer.getInspector();
} catch (UnsupportedTypeException e) {
// this should have been validated up front when schema was set on the stream.
// if we hit this something went wrong much earlier.
LOG.error("Schema unsupported by format.", e);
throw new SerDeException("Schema unsupported by format.", e);
} catch (IOException e) {
LOG.error("Could not get the config for stream {}.", streamName, e);
throw new SerDeException("Could not get the config for stream " + streamName, e);
} catch (Exception e) {
LOG.error("Could not create the format for stream {}.", streamName, e);
throw new SerDeException("Could not create the format for stream " + streamName, e);
}
}
use of co.cask.cdap.hive.serde.ObjectDeserializer in project cdap by caskdata.
the class DatasetSerDe method initialize.
@Override
public void initialize(Configuration conf, Properties properties) throws SerDeException {
// The column names are saved as the given inspector to #serialize doesn't preserves them
// - maybe because it's an external table
// The columns property comes from the Hive metastore, which has it from the create table statement
// It is then important that this schema be accurate and in the right order - the same order as
// object inspectors will reflect them.
String datasetName = properties.getProperty(Constants.Explore.DATASET_NAME);
String namespace = properties.getProperty(Constants.Explore.DATASET_NAMESPACE);
// to avoid a null pointer exception that prevents dropping a table, we handle the null namespace case here.
if (namespace == null) {
// we also still need an ObjectInspector as Hive uses it to check what columns the table has.
this.objectInspector = new ObjectDeserializer(properties, null).getInspector();
return;
}
if (datasetName == null || datasetName.isEmpty()) {
throw new SerDeException("Dataset name not found in serde properties.");
}
// a bunch of times.
if (schema == null) {
DatasetId datasetId = new DatasetId(namespace, datasetName);
getDatasetSchema(conf, datasetId);
}
this.deserializer = new ObjectDeserializer(properties, schema);
ArrayList<String> columnNames = Lists.newArrayList(StringUtils.split(properties.getProperty("columns"), ","));
this.serializer = new ObjectSerializer(columnNames);
this.objectInspector = deserializer.getInspector();
}
Aggregations