use of org.apache.hadoop.hive.serde2.SerDeException in project cdap by caskdata.
the class DatasetSerDe method deserialize.
@Override
public Object deserialize(Writable writable) throws SerDeException {
ObjectWritable objectWritable = (ObjectWritable) writable;
Object obj = objectWritable.get();
try {
return deserializer.deserialize(obj);
} catch (Throwable t) {
LOG.error("Unable to deserialize object {}.", obj, t);
throw new SerDeException("Unable to deserialize an object.", t);
}
}
use of org.apache.hadoop.hive.serde2.SerDeException in project cdap by caskdata.
the class DatasetSerDe method getDatasetSchema.
private void getDatasetSchema(Configuration conf, DatasetId datasetId) throws SerDeException {
try (ContextManager.Context hiveContext = ContextManager.getContext(conf)) {
// Because it calls initialize just to get the object inspector
if (hiveContext == null) {
LOG.info("Hive provided a null conf, will not be able to get dataset schema.");
return;
}
// some datasets like Table and ObjectMappedTable have schema in the dataset properties
try {
DatasetSpecification datasetSpec = hiveContext.getDatasetSpec(datasetId);
String schemaStr = datasetSpec.getProperty("schema");
if (schemaStr != null) {
schema = Schema.parseJson(schemaStr);
return;
}
} catch (DatasetManagementException | ServiceUnavailableException e) {
throw new SerDeException("Could not instantiate dataset " + datasetId, e);
} catch (IOException e) {
throw new SerDeException("Exception getting schema for dataset " + datasetId, e);
}
// other datasets must be instantiated to get their schema
// conf is null if this is a query that writes to a dataset
ClassLoader parentClassLoader = conf == null ? null : conf.getClassLoader();
try (SystemDatasetInstantiator datasetInstantiator = hiveContext.createDatasetInstantiator(parentClassLoader)) {
Dataset dataset = datasetInstantiator.getDataset(datasetId);
if (dataset == null) {
throw new SerDeException("Could not find dataset " + datasetId);
}
Type recordType;
if (dataset instanceof RecordScannable) {
recordType = ((RecordScannable) dataset).getRecordType();
} else if (dataset instanceof RecordWritable) {
recordType = ((RecordWritable) dataset).getRecordType();
} else {
throw new SerDeException("Dataset " + datasetId + " is not explorable.");
}
schema = schemaGenerator.generate(recordType);
} catch (UnsupportedTypeException e) {
throw new SerDeException("Dataset " + datasetId + " has an unsupported schema.", e);
} catch (IOException e) {
throw new SerDeException("Exception while trying to instantiate dataset " + datasetId, e);
}
} catch (IOException e) {
throw new SerDeException("Could not get hive context from configuration.", e);
}
}
use of org.apache.hadoop.hive.serde2.SerDeException in project metacat by Netflix.
the class HiveTableUtil method getTableStructFields.
/**
* getTableStructFields.
*
* @param table table
* @return all struct field refs
*/
public static List<? extends StructField> getTableStructFields(final Table table) {
final Properties schema = MetaStoreUtils.getTableMetadata(table);
final String name = schema.getProperty(serdeConstants.SERIALIZATION_LIB);
if (name == null) {
return Collections.emptyList();
}
final Deserializer deserializer = createDeserializer(getDeserializerClass(name));
try {
deserializer.initialize(new Configuration(false), schema);
} catch (SerDeException e) {
throw new RuntimeException("error initializing deserializer: " + deserializer.getClass().getName());
}
try {
final ObjectInspector inspector = deserializer.getObjectInspector();
Preconditions.checkArgument(inspector.getCategory() == ObjectInspector.Category.STRUCT, "expected STRUCT: %s", inspector.getCategory());
return ((StructObjectInspector) inspector).getAllStructFieldRefs();
} catch (SerDeException e) {
throw Throwables.propagate(e);
}
}
use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.
the class HiveAccumuloTableInputFormat method getSplits.
@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(jobConf);
final Instance instance = accumuloParams.getInstance();
final ColumnMapper columnMapper;
try {
columnMapper = getColumnMapper(jobConf);
} catch (TooManyAccumuloColumnsException e) {
throw new IOException(e);
}
JobContext context = ShimLoader.getHadoopShims().newJobContext(Job.getInstance(jobConf));
Path[] tablePaths = FileInputFormat.getInputPaths(context);
try {
Connector connector = null;
// Need to get a Connector so we look up the user's authorizations if not otherwise specified
if (accumuloParams.useSasl()) {
log.info("Current user: " + UserGroupInformation.getCurrentUser());
// In a YARN/Tez job, don't have the Kerberos credentials anymore, use the delegation token
AuthenticationToken token = ConfiguratorBase.getAuthenticationToken(AccumuloInputFormat.class, jobConf);
if (null != token && !jobConf.getCredentials().getAllTokens().isEmpty()) {
// Convert the stub from the configuration back into a normal Token
log.info("Found authentication token in Configuration: " + token);
log.info("Job credential tokens: " + jobConf.getCredentials().getAllTokens());
AuthenticationToken unwrappedToken = ConfiguratorBase.unwrapAuthenticationToken(jobConf, token);
log.info("Converted authentication token from Configuration into: " + unwrappedToken);
// will return back the original token (which we know is insufficient)
if (unwrappedToken != token) {
log.info("Creating Accumulo Connector with unwrapped delegation token");
connector = instance.getConnector(accumuloParams.getAccumuloUserName(), unwrappedToken);
} else {
log.info("Job credentials did not contain delegation token, fetching new token");
}
}
if (connector == null) {
log.info("Obtaining Accumulo Connector using KerberosToken");
// Construct a KerberosToken -- relies on ProxyUser configuration. Will be the client making
// the request on top of the HS2's user. Accumulo will require proper proxy-user auth configs.
connector = instance.getConnector(accumuloParams.getAccumuloUserName(), new KerberosToken(accumuloParams.getAccumuloUserName()));
}
} else {
// Still in the local JVM, use the username+password or Kerberos credentials
connector = accumuloParams.getConnector(instance);
}
final List<ColumnMapping> columnMappings = columnMapper.getColumnMappings();
final List<IteratorSetting> iterators = predicateHandler.getIterators(jobConf, columnMapper);
final Collection<Range> ranges = predicateHandler.getRanges(jobConf, columnMapper);
// We don't want that.
if (null != ranges && ranges.isEmpty()) {
return new InputSplit[0];
}
// Set the relevant information in the Configuration for the AccumuloInputFormat
configure(jobConf, instance, connector, accumuloParams, columnMapper, iterators, ranges);
int numColumns = columnMappings.size();
List<Integer> readColIds = ColumnProjectionUtils.getReadColumnIDs(jobConf);
// Sanity check
if (numColumns < readColIds.size())
throw new IOException("Number of column mappings (" + numColumns + ")" + " numbers less than the hive table columns. (" + readColIds.size() + ")");
// get splits from Accumulo
InputSplit[] splits = accumuloInputFormat.getSplits(jobConf, numSplits);
HiveAccumuloSplit[] hiveSplits = new HiveAccumuloSplit[splits.length];
for (int i = 0; i < splits.length; i++) {
RangeInputSplit ris = (RangeInputSplit) splits[i];
ris.setLogLevel(Level.DEBUG);
hiveSplits[i] = new HiveAccumuloSplit(ris, tablePaths[0]);
}
return hiveSplits;
} catch (AccumuloException e) {
log.error("Could not configure AccumuloInputFormat", e);
throw new IOException(StringUtils.stringifyException(e));
} catch (AccumuloSecurityException e) {
log.error("Could not configure AccumuloInputFormat", e);
throw new IOException(StringUtils.stringifyException(e));
} catch (SerDeException e) {
log.error("Could not configure AccumuloInputFormat", e);
throw new IOException(StringUtils.stringifyException(e));
}
}
use of org.apache.hadoop.hive.serde2.SerDeException in project hive by apache.
the class DruidSerDe method initialize.
@Override
public void initialize(Configuration configuration, Properties properties) throws SerDeException {
tsTZTypeInfo = new TimestampLocalTZTypeInfo(configuration.get(HiveConf.ConfVars.HIVE_LOCAL_TIME_ZONE.varname));
// Druid query
final String druidQuery = properties.getProperty(Constants.DRUID_QUERY_JSON, null);
if (druidQuery != null && !druidQuery.isEmpty()) {
initFromDruidQueryPlan(properties, druidQuery);
} else {
// No query. Either it is a CTAS, or we need to create a Druid meta data Query
if (!org.apache.commons.lang3.StringUtils.isEmpty(properties.getProperty(serdeConstants.LIST_COLUMNS)) && !org.apache.commons.lang3.StringUtils.isEmpty(properties.getProperty(serdeConstants.LIST_COLUMN_TYPES))) {
// CASE CTAS statement
initFromProperties(properties);
} else {
// Segment Metadata query that retrieves all columns present in
// the data source (dimensions and metrics).
initFromMetaDataQuery(configuration, properties);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("DruidSerDe initialized with\n" + "\t columns: " + Arrays.toString(columns) + "\n\t types: " + Arrays.toString(types));
}
}
Aggregations