use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class HiveMetaStoreUtils method getDeserializer.
public static Deserializer getDeserializer(Configuration conf, org.apache.hadoop.hive.metastore.api.Table table, String metaTable, boolean skipConfError, String lib) throws MetaException {
AbstractSerDe deserializer;
try {
deserializer = ReflectionUtil.newInstance(conf.getClassByName(lib).asSubclass(AbstractSerDe.class), conf);
} catch (Exception e) {
throw new MetaException(e.getClass().getName() + " " + e.getMessage());
}
try {
Properties properties = MetaStoreUtils.getTableMetadata(table);
if (metaTable != null) {
properties.put("metaTable", metaTable);
}
deserializer.initialize(conf, properties, null);
} catch (SerDeException e) {
if (!skipConfError) {
LOG.error("error in initSerDe: " + e.getClass().getName() + " " + e.getMessage(), e);
throw new MetaException(e.getClass().getName() + " " + e.getMessage());
}
}
return deserializer;
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class SerDeStorageSchemaReader method readSchema.
@Override
public List<FieldSchema> readSchema(Table tbl, EnvironmentContext envContext, Configuration conf) throws MetaException {
ClassLoader orgHiveLoader = null;
try {
if (envContext != null) {
String addedJars = envContext.getProperties().get("hive.added.jars.path");
if (org.apache.commons.lang3.StringUtils.isNotBlank(addedJars)) {
// for thread safe
orgHiveLoader = conf.getClassLoader();
ClassLoader loader = org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.addToClassPath(orgHiveLoader, org.apache.commons.lang3.StringUtils.split(addedJars, ","));
conf.setClassLoader(loader);
}
}
Deserializer s = HiveMetaStoreUtils.getDeserializer(conf, tbl, null, false);
return HiveMetaStoreUtils.getFieldsFromDeserializer(tbl.getTableName(), s);
} catch (Exception e) {
StringUtils.stringifyException(e);
throw new MetaException(e.getMessage());
} finally {
if (orgHiveLoader != null) {
conf.setClassLoader(orgHiveLoader);
}
}
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class AlterTableSetSerdeOperation method doAlteration.
@Override
protected void doAlteration(Table table, Partition partition) throws HiveException {
StorageDescriptor sd = getStorageDescriptor(table, partition);
String serdeName = desc.getSerdeName();
String oldSerdeName = sd.getSerdeInfo().getSerializationLib();
// if orc table, restrict changing the serde as it can break schema evolution
if (AlterTableUtils.isSchemaEvolutionEnabled(table, context.getConf()) && oldSerdeName.equalsIgnoreCase(OrcSerde.class.getName()) && !serdeName.equalsIgnoreCase(OrcSerde.class.getName())) {
throw new HiveException(ErrorMsg.CANNOT_CHANGE_SERDE, OrcSerde.class.getSimpleName(), desc.getDbTableName());
}
sd.getSerdeInfo().setSerializationLib(serdeName);
if (MapUtils.isNotEmpty(desc.getProps())) {
sd.getSerdeInfo().getParameters().putAll(desc.getProps());
}
if (partition == null) {
if (Table.shouldStoreFieldsInMetastore(context.getConf(), serdeName, table.getParameters()) && !Table.hasMetastoreBasedSchema(context.getConf(), oldSerdeName)) {
// from old SerDe are too long to be stored in metastore, but there's nothing we can do.
try {
Deserializer oldSerde = HiveMetaStoreUtils.getDeserializer(context.getConf(), table.getTTable(), null, false, oldSerdeName);
table.setFields(Hive.getFieldsFromDeserializer(table.getTableName(), oldSerde));
} catch (MetaException ex) {
throw new HiveException(ex);
}
}
}
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class Table method getFields.
public ArrayList<StructField> getFields() {
ArrayList<StructField> fields = new ArrayList<StructField>();
try {
Deserializer decoder = getDeserializer();
// Expand out all the columns of the table
StructObjectInspector structObjectInspector = (StructObjectInspector) decoder.getObjectInspector();
List<? extends StructField> fld_lst = structObjectInspector.getAllStructFieldRefs();
for (StructField field : fld_lst) {
fields.add(field);
}
} catch (SerDeException e) {
throw new RuntimeException(e);
}
return fields;
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class SemanticAnalyzer method genScriptPlan.
@SuppressWarnings("nls")
private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) throws SemanticException {
// If there is no "AS" clause, the output schema will be "key,value"
List<ColumnInfo> outputCols = new ArrayList<ColumnInfo>();
int inputSerDeNum = 1, inputRecordWriterNum = 2;
int outputSerDeNum = 4, outputRecordReaderNum = 5;
int outputColsNum = 6;
boolean outputColNames = false, outputColSchemas = false;
int execPos = 3;
boolean defaultOutputCols = false;
// Go over all the children
if (trfm.getChildCount() > outputColsNum) {
ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum);
if (outCols.getType() == HiveParser.TOK_ALIASLIST) {
outputColNames = true;
} else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) {
outputColSchemas = true;
}
}
// If column type is not specified, use a string
if (!outputColNames && !outputColSchemas) {
String intName = getColumnInternalName(0);
ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias("key");
outputCols.add(colInfo);
intName = getColumnInternalName(1);
colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias("value");
outputCols.add(colInfo);
defaultOutputCols = true;
} else {
ASTNode collist = (ASTNode) trfm.getChild(outputColsNum);
int ccount = collist.getChildCount();
Set<String> colAliasNamesDuplicateCheck = new HashSet<String>();
if (outputColNames) {
for (int i = 0; i < ccount; ++i) {
String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i)).getText()).toLowerCase();
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
} else {
for (int i = 0; i < ccount; ++i) {
ASTNode child = (ASTNode) collist.getChild(i);
assert child.getType() == HiveParser.TOK_TABCOL;
String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0)).getText()).toLowerCase();
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils.getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child.getChild(1))), null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
}
}
RowResolver out_rwsch = new RowResolver();
StringBuilder columns = new StringBuilder();
StringBuilder columnTypes = new StringBuilder();
for (int i = 0; i < outputCols.size(); ++i) {
if (i != 0) {
columns.append(",");
columnTypes.append(",");
}
columns.append(outputCols.get(i).getInternalName());
columnTypes.append(outputCols.get(i).getType().getTypeName());
out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(), outputCols.get(i));
}
StringBuilder inpColumns = new StringBuilder();
StringBuilder inpColumnTypes = new StringBuilder();
List<ColumnInfo> inputSchema = opParseCtx.get(input).getRowResolver().getColumnInfos();
for (int i = 0; i < inputSchema.size(); ++i) {
if (i != 0) {
inpColumns.append(",");
inpColumnTypes.append(",");
}
inpColumns.append(inputSchema.get(i).getInternalName());
inpColumnTypes.append(inputSchema.get(i).getType().getTypeName());
}
TableDesc outInfo;
TableDesc errInfo;
TableDesc inInfo;
String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE);
Class<? extends Deserializer> serde;
try {
serde = (Class<? extends Deserializer>) Class.forName(defaultSerdeName, true, Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
int fieldSeparator = Utilities.tabCode;
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESCRIPTESCAPE)) {
fieldSeparator = Utilities.ctrlaCode;
}
// Input and Output Serdes
if (trfm.getChild(inputSerDeNum).getChildCount() > 0) {
inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm.getChild(inputSerDeNum))).getChild(0), inpColumns.toString(), inpColumnTypes.toString());
} else {
// It is not a very clean way, and should be modified later - due to
// compatibility reasons, user sees the results as JSON for custom
// scripts and has no way for specifying that. Right now, it is
// hard-coded to DelimitedJSONSerDe
inInfo = PlanUtils.getTableDesc(DelimitedJSONSerDe.class, Integer.toString(fieldSeparator), inpColumns.toString(), inpColumnTypes.toString(), null, false);
}
if (trfm.getChild(outputSerDeNum).getChildCount() > 0) {
outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm.getChild(outputSerDeNum))).getChild(0), columns.toString(), columnTypes.toString());
// This is for backward compatibility. If the user did not specify the
// output column list, we assume that there are 2 columns: key and value.
// However, if the script outputs: col1, col2, col3 seperated by TAB, the
// requirement is: key is col and value is (col2 TAB col3)
} else {
outInfo = PlanUtils.getTableDesc(serde, Integer.toString(fieldSeparator), columns.toString(), columnTypes.toString(), null, defaultOutputCols);
}
// Error stream always uses the default serde with a single column
errInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), "KEY");
// Output record readers
Class<? extends RecordReader> outRecordReader = getRecordReader((ASTNode) trfm.getChild(outputRecordReaderNum));
Class<? extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode) trfm.getChild(inputRecordWriterNum));
Class<? extends RecordReader> errRecordReader = getDefaultRecordReader();
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new ScriptDesc(fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())), inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
// disable backtracking
output.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
// Add URI entity for transform script. script assumed t be local unless downloadable
if (conf.getBoolVar(ConfVars.HIVE_CAPTURE_TRANSFORM_ENTITY)) {
String scriptCmd = getScriptProgName(stripQuotes(trfm.getChild(execPos).getText()));
getInputs().add(new ReadEntity(new Path(scriptCmd), ResourceDownloader.isFileUri(scriptCmd)));
}
return output;
}
Aggregations