Search in sources :

Example 11 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestAccumuloPredicateHandler method testBinaryRangeGeneration.

@Test
public void testBinaryRangeGeneration() throws Exception {
    List<String> columnNames = Arrays.asList("key", "column");
    List<TypeInfo> columnTypes = Arrays.<TypeInfo>asList(TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
    conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames));
    conf.set(serdeConstants.LIST_COLUMN_TYPES, "int,string");
    String columnMappingStr = ":rowID#b,cf:f1";
    conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr);
    columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes);
    int intValue = 100;
    // Make binary integer value in the bytearray
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    JavaIntObjectInspector intOI = (JavaIntObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME));
    LazyUtils.writePrimitive(baos, intValue, intOI);
    // 100 < key
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "key", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, intValue);
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(constant);
    children.add(column);
    ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPLessThan(), children);
    assertNotNull(node);
    String filterExpr = SerializationUtilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    // Should make (100, +inf)
    List<Range> ranges = handler.getRanges(conf, columnMapper);
    Assert.assertEquals(1, ranges.size());
    Assert.assertEquals(new Range(new Text(baos.toByteArray()), false, null, false), ranges.get(0));
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) JavaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaIntObjectInspector) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Text(org.apache.hadoop.io.Text) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Range(org.apache.accumulo.core.data.Range) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenericUDFOPLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Test(org.junit.Test)

Example 12 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestAccumuloPredicateHandler method testIgnoreIteratorPushdown.

@Test
public void testIgnoreIteratorPushdown() throws TooManyAccumuloColumnsException {
    // Override what's placed in the Configuration by setup()
    conf = new JobConf();
    List<String> columnNames = Arrays.asList("field1", "field2", "rid");
    List<TypeInfo> columnTypes = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
    conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames));
    conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,string");
    String columnMappingStr = "cf:f1,cf:f2,:rowID";
    conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr);
    columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes);
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field2", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5);
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
    assertNotNull(node2);
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
    String filterExpr = SerializationUtilities.serializeExpression(both);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    conf.setBoolean(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, false);
    try {
        List<IteratorSetting> iterators = handler.getIterators(conf, columnMapper);
        assertEquals(iterators.size(), 0);
    } catch (Exception e) {
        fail(StringUtils.stringifyException(e));
    }
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TooManyAccumuloColumnsException(org.apache.hadoop.hive.accumulo.serde.TooManyAccumuloColumnsException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) JobConf(org.apache.hadoop.mapred.JobConf) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 13 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestAccumuloPredicateHandler method testIteratorIgnoreRowIDFields.

@Test
public void testIteratorIgnoreRowIDFields() {
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
    assertNotNull(node2);
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
    String filterExpr = SerializationUtilities.serializeExpression(both);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        List<IteratorSetting> iterators = handler.getIterators(conf, columnMapper);
        assertEquals(iterators.size(), 0);
    } catch (SerDeException e) {
        StringUtils.stringifyException(e);
    }
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 14 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestAccumuloPredicateHandler method setup.

@Before
public void setup() throws TooManyAccumuloColumnsException {
    FunctionRegistry.getFunctionNames();
    conf = new JobConf();
    List<String> columnNames = Arrays.asList("field1", "rid");
    List<TypeInfo> columnTypes = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
    conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames));
    conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,string");
    String columnMappingStr = "cf:f1,:rowID";
    conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr);
    columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes);
}
Also used : JobConf(org.apache.hadoop.mapred.JobConf) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Before(org.junit.Before)

Example 15 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestAccumuloRowSerializer method testBinarySerialization.

@Test
public void testBinarySerialization() throws IOException, SerDeException {
    List<String> columns = Arrays.asList("row", "cq1", "cq2", "cq3");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
    List<String> typeNames = new ArrayList<String>(types.size());
    for (TypeInfo type : types) {
        typeNames.add(type.getTypeName());
    }
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq1#b,cf:cq2#b,cf:cq3");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    LazySimpleStructObjectInspector oi = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), accumuloSerDeParams.getRowIdFactory());
    // Create the LazyStruct from the LazyStruct...Inspector
    LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(oi);
    ByteArrayRef byteRef = new ByteArrayRef();
    byteRef.setData(new byte[] { 'r', 'o', 'w', '1', ' ', '1', '0', ' ', '2', '0', ' ', 'v', 'a', 'l', 'u', 'e' });
    obj.init(byteRef, 0, byteRef.getData().length);
    Mutation m = (Mutation) serializer.serialize(obj, oi);
    Assert.assertArrayEquals("row1".getBytes(), m.getRow());
    List<ColumnUpdate> updates = m.getUpdates();
    Assert.assertEquals(3, updates.size());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream out = new DataOutputStream(baos);
    ColumnUpdate update = updates.get(0);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq1", new String(update.getColumnQualifier()));
    out.writeInt(10);
    Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
    update = updates.get(1);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq2", new String(update.getColumnQualifier()));
    baos.reset();
    out.writeInt(20);
    Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
    update = updates.get(2);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq3", new String(update.getColumnQualifier()));
    Assert.assertEquals("value", new String(update.getValue()));
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ColumnUpdate(org.apache.accumulo.core.data.ColumnUpdate) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Mutation(org.apache.accumulo.core.data.Mutation) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)65 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)44 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)36 Text (org.apache.hadoop.io.Text)34 ArrayList (java.util.ArrayList)19 HiveConf (org.apache.hadoop.hive.conf.HiveConf)19 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)17 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)16 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)16 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)14 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)14 SmallTableGenerationParameters (org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters)13 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)13 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)12 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)12 Properties (java.util.Properties)11 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)11 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)11 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)11 Configuration (org.apache.hadoop.conf.Configuration)10