use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping in project hive by apache.
the class HiveAccumuloTableInputFormat method getPairCollection.
/**
* Create col fam/qual pairs from pipe separated values, usually from config object. Ignores
* rowID.
*
* @param columnMappings
* The list of ColumnMappings for the given query
* @return a Set of Pairs of colfams and colquals
*/
protected HashSet<Pair<Text, Text>> getPairCollection(List<ColumnMapping> columnMappings) {
final HashSet<Pair<Text, Text>> pairs = new HashSet<Pair<Text, Text>>();
for (ColumnMapping columnMapping : columnMappings) {
if (columnMapping instanceof HiveAccumuloColumnMapping) {
HiveAccumuloColumnMapping accumuloColumnMapping = (HiveAccumuloColumnMapping) columnMapping;
Text cf = new Text(accumuloColumnMapping.getColumnFamily());
Text cq = null;
// A null cq implies an empty column qualifier
if (null != accumuloColumnMapping.getColumnQualifier()) {
cq = new Text(accumuloColumnMapping.getColumnQualifier());
}
pairs.add(new Pair<Text, Text>(cf, cq));
} else if (columnMapping instanceof HiveAccumuloMapColumnMapping) {
HiveAccumuloMapColumnMapping mapMapping = (HiveAccumuloMapColumnMapping) columnMapping;
// Can't fetch prefix on colqual, must pull the entire qualifier
// TODO use an iterator to do the filter, server-side.
pairs.add(new Pair<Text, Text>(new Text(mapMapping.getColumnFamily()), null));
}
}
log.info("Computed columns to fetch (" + pairs + ") from " + columnMappings);
return pairs;
}
use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping in project hive by apache.
the class AccumuloRowSerializer method serialize.
public Mutation serialize(Object obj, ObjectInspector objInspector) throws SerDeException, IOException {
if (objInspector.getCategory() != ObjectInspector.Category.STRUCT) {
throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
}
// Prepare the field ObjectInspectors
StructObjectInspector soi = (StructObjectInspector) objInspector;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
List<Object> columnValues = soi.getStructFieldsDataAsList(obj);
// Fail if we try to access an offset out of bounds
if (rowIdOffset >= fields.size()) {
throw new IllegalStateException("Attempted to access field outside of definition for struct. Have " + fields.size() + " fields and tried to access offset " + rowIdOffset);
}
StructField field = fields.get(rowIdOffset);
Object value = columnValues.get(rowIdOffset);
// The ObjectInspector for the row ID
ObjectInspector fieldObjectInspector = field.getFieldObjectInspector();
// Serialize the row component using the RowIdFactory. In the normal case, this will just
// delegate back to the "local" serializeRowId method
byte[] data = rowIdFactory.serializeRowId(value, field, output);
// Set that as the row id in the mutation
Mutation mutation = new Mutation(data);
// Each column in the row
for (int i = 0; i < fields.size(); i++) {
if (rowIdOffset == i) {
continue;
}
// Get the relevant information for this column
field = fields.get(i);
value = columnValues.get(i);
// Despite having a fixed schema from Hive, we have sparse columns in Accumulo
if (null == value) {
continue;
}
// The ObjectInspector for the current column
fieldObjectInspector = field.getFieldObjectInspector();
// Make sure we got the right implementation of a ColumnMapping
ColumnMapping mapping = mappings.get(i);
if (mapping instanceof HiveAccumuloColumnMapping) {
serializeColumnMapping((HiveAccumuloColumnMapping) mapping, fieldObjectInspector, value, mutation);
} else if (mapping instanceof HiveAccumuloMapColumnMapping) {
serializeColumnMapping((HiveAccumuloMapColumnMapping) mapping, fieldObjectInspector, value, mutation);
} else {
throw new IllegalArgumentException("Mapping for " + field.getFieldName() + " was not a HiveColumnMapping, but was " + mapping.getClass());
}
}
return mutation;
}
use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping in project hive by apache.
the class AccumuloPredicateHandler method getIterators.
/**
* Loop through search conditions and build iterator settings for predicates involving columns
* other than rowID, if any.
*
* @param conf
* Configuration
* @throws SerDeException
*/
public List<IteratorSetting> getIterators(Configuration conf, ColumnMapper columnMapper) throws SerDeException {
List<IteratorSetting> itrs = Lists.newArrayList();
boolean shouldPushdown = conf.getBoolean(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, AccumuloSerDeParameters.ITERATOR_PUSHDOWN_DEFAULT);
if (!shouldPushdown) {
LOG.info("Iterator pushdown is disabled for this table");
return itrs;
}
boolean binaryEncodedRow = ColumnEncoding.BINARY.getName().equalsIgnoreCase(conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE));
int rowIdOffset = columnMapper.getRowIdOffset();
String[] hiveColumnNamesArr = conf.getStrings(serdeConstants.LIST_COLUMNS);
if (null == hiveColumnNamesArr) {
throw new IllegalArgumentException("Could not find Hive columns in configuration");
}
String hiveRowIdColumnName = null;
if (rowIdOffset >= 0 && rowIdOffset < hiveColumnNamesArr.length) {
hiveRowIdColumnName = hiveColumnNamesArr[rowIdOffset];
}
List<String> hiveColumnNames = Arrays.asList(hiveColumnNamesArr);
for (IndexSearchCondition sc : getSearchConditions(conf)) {
String col = sc.getColumnDesc().getColumn();
if (hiveRowIdColumnName == null || !hiveRowIdColumnName.equals(col)) {
HiveAccumuloColumnMapping mapping = (HiveAccumuloColumnMapping) columnMapper.getColumnMappingForHiveColumn(hiveColumnNames, col);
itrs.add(toSetting(mapping, sc, binaryEncodedRow));
}
}
LOG.info("num iterators = " + itrs.size());
return itrs;
}
use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping in project hive by apache.
the class PrimitiveComparisonFilter method init.
@Override
public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException {
super.init(source, options, env);
String serializedColumnMapping = options.get(COLUMN);
Entry<String, String> pair = ColumnMappingFactory.parseMapping(serializedColumnMapping);
// The ColumnEncoding, column name and type are all irrelevant at this point, just need the
// cf:[cq]
columnMapping = new HiveAccumuloColumnMapping(pair.getKey(), pair.getValue(), ColumnEncoding.STRING, "column", "string");
columnMappingFamily = new Text(columnMapping.getColumnFamily());
columnMappingQualifier = new Text(columnMapping.getColumnQualifier());
cfHolder = new Text();
cqHolder = new Text();
try {
Class<?> pClass = JavaUtils.loadClass(options.get(P_COMPARE_CLASS));
Class<?> cClazz = JavaUtils.loadClass(options.get(COMPARE_OPT_CLASS));
PrimitiveComparison pCompare = pClass.asSubclass(PrimitiveComparison.class).newInstance();
compOpt = cClazz.asSubclass(CompareOp.class).newInstance();
byte[] constant = getConstant(options);
pCompare.init(constant);
compOpt.setPrimitiveCompare(pCompare);
} catch (ClassNotFoundException e) {
throw new IOException(e);
} catch (InstantiationException e) {
throw new IOException(e);
} catch (IllegalAccessException e) {
throw new IOException(e);
}
}
use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping in project hive by apache.
the class TestHiveAccumuloTableInputFormat method testColumnMappingsToPairs.
@Test
public void testColumnMappingsToPairs() {
List<ColumnMapping> mappings = new ArrayList<ColumnMapping>();
Set<Pair<Text, Text>> columns = new HashSet<Pair<Text, Text>>();
// Row ID
mappings.add(new HiveAccumuloRowIdColumnMapping(AccumuloHiveConstants.ROWID, ColumnEncoding.STRING, "row", TypeInfoFactory.stringTypeInfo.toString()));
// Some cf:cq
mappings.add(new HiveAccumuloColumnMapping("person", "name", ColumnEncoding.STRING, "col1", TypeInfoFactory.stringTypeInfo.toString()));
mappings.add(new HiveAccumuloColumnMapping("person", "age", ColumnEncoding.STRING, "col2", TypeInfoFactory.stringTypeInfo.toString()));
mappings.add(new HiveAccumuloColumnMapping("person", "height", ColumnEncoding.STRING, "col3", TypeInfoFactory.stringTypeInfo.toString()));
// Bare cf
mappings.add(new HiveAccumuloColumnMapping("city", "name", ColumnEncoding.STRING, "col4", TypeInfoFactory.stringTypeInfo.toString()));
columns.add(new Pair<Text, Text>(new Text("person"), new Text("name")));
columns.add(new Pair<Text, Text>(new Text("person"), new Text("age")));
columns.add(new Pair<Text, Text>(new Text("person"), new Text("height")));
// Null qualifier would mean all qualifiers in that family, want an empty qualifier
columns.add(new Pair<Text, Text>(new Text("city"), new Text("name")));
assertEquals(columns, inputformat.getPairCollection(mappings));
}
Aggregations