Search in sources :

Example 1 with ColumnMapping

use of org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping in project hive by apache.

the class HBaseSerDeHelper method generateColumnTypes.

/**
   * Autogenerates the column types from the given serialization class
   * 
   * @param tbl the hive table properties
   * @param columnsMapping the hbase columns mapping determining hbase column families and
   *          qualifiers
   * @param sb StringBuilder to form the list of columns
   * @param conf configuration
   * @throws IllegalArgumentException if any of the given arguments was null
   * @throws SerDeException if there was an error generating the column types
   * */
public static void generateColumnTypes(Properties tbl, List<ColumnMapping> columnsMapping, StringBuilder sb, Configuration conf) throws SerDeException {
    if (tbl == null) {
        throw new IllegalArgumentException("tbl cannot be null");
    }
    if (columnsMapping == null) {
        throw new IllegalArgumentException("columnsMapping cannot be null");
    }
    if (sb == null) {
        throw new IllegalArgumentException("StringBuilder cannot be null");
    }
    // Generate the columns according to the column mapping provided
    for (int i = 0; i < columnsMapping.size(); i++) {
        if (sb.length() > 0) {
            sb.append(":");
        }
        ColumnMapping colMap = columnsMapping.get(i);
        if (colMap.hbaseRowKey) {
            Map<String, String> compositeKeyParts = getCompositeKeyParts(tbl);
            StringBuilder keyStruct = new StringBuilder();
            if (compositeKeyParts == null || compositeKeyParts.isEmpty()) {
                String compKeyClass = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS);
                String compKeyTypes = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_TYPES);
                if (compKeyTypes == null) {
                    if (compKeyClass != null) {
                        // implementation. Flag exception.
                        throw new SerDeException("Either the hbase.composite.key.types property should be set or the getParts method must be overridden in " + compKeyClass);
                    }
                    // the row key column becomes a STRING
                    sb.append(serdeConstants.STRING_TYPE_NAME);
                } else {
                    generateKeyStruct(compKeyTypes, keyStruct);
                }
            } else {
                generateKeyStruct(compositeKeyParts, keyStruct);
            }
            sb.append(keyStruct);
        } else if (colMap.qualifierName == null) {
            String serClassName = null;
            String serType = null;
            String schemaLiteral = null;
            String schemaUrl = null;
            if (colMap.qualifierPrefix != null) {
                serType = tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "." + HBaseSerDe.SERIALIZATION_TYPE);
                if (serType == null) {
                    throw new SerDeException(HBaseSerDe.SERIALIZATION_TYPE + " property not provided for column family [" + colMap.familyName + "] and prefix [" + colMap.qualifierPrefix + "]");
                }
                // we are provided with a prefix
                serClassName = tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "." + serdeConstants.SERIALIZATION_CLASS);
                if (serClassName == null) {
                    if (serType.equalsIgnoreCase(HBaseSerDeParameters.AVRO_SERIALIZATION_TYPE)) {
                        // for avro type, the serialization class parameter is optional
                        schemaLiteral = tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "." + AvroTableProperties.SCHEMA_LITERAL.getPropName());
                        schemaUrl = tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "." + AvroTableProperties.SCHEMA_URL.getPropName());
                        if (schemaLiteral == null && schemaUrl == null) {
                            // be provided
                            throw new SerDeException("For an avro schema, either " + AvroTableProperties.SCHEMA_LITERAL.getPropName() + ", " + AvroTableProperties.SCHEMA_URL.getPropName() + " or " + serdeConstants.SERIALIZATION_CLASS + " property must be set.");
                        }
                        if (schemaUrl != null) {
                            schemaLiteral = getSchemaFromFS(schemaUrl, conf).toString();
                        }
                    } else {
                        throw new SerDeException(serdeConstants.SERIALIZATION_CLASS + " property not provided for column family [" + colMap.familyName + "] and prefix [" + colMap.qualifierPrefix + "]");
                    }
                }
            } else {
                serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE);
                if (serType == null) {
                    throw new SerDeException(HBaseSerDe.SERIALIZATION_TYPE + " property not provided for column family [" + colMap.familyName + "]");
                }
                serClassName = tbl.getProperty(colMap.familyName + "." + serdeConstants.SERIALIZATION_CLASS);
                if (serClassName == null) {
                    if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) {
                        // for avro type, the serialization class parameter is optional
                        schemaLiteral = tbl.getProperty(colMap.familyName + "." + AvroTableProperties.SCHEMA_LITERAL.getPropName());
                        schemaUrl = tbl.getProperty(colMap.familyName + "." + AvroTableProperties.SCHEMA_URL.getPropName());
                        if (schemaLiteral == null && schemaUrl == null) {
                            // either schema literal or serialization class must be provided
                            throw new SerDeException("For an avro schema, either " + AvroTableProperties.SCHEMA_LITERAL.getPropName() + " property or " + serdeConstants.SERIALIZATION_CLASS + " property must be set.");
                        }
                        if (schemaUrl != null) {
                            schemaLiteral = getSchemaFromFS(schemaUrl, conf).toString();
                        }
                    } else {
                        throw new SerDeException(serdeConstants.SERIALIZATION_CLASS + " property not provided for column family [" + colMap.familyName + "]");
                    }
                }
            }
            StringBuilder generatedStruct = new StringBuilder();
            // generate struct for each of the given prefixes
            generateColumnStruct(serType, serClassName, schemaLiteral, colMap, generatedStruct);
            // a column family becomes a MAP
            sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + "," + generatedStruct + ">");
        } else {
            String qualifierName = colMap.qualifierName;
            if (colMap.qualifierName.endsWith("*")) {
                // we are provided with a prefix
                qualifierName = colMap.qualifierName.substring(0, colMap.qualifierName.length() - 1);
            }
            String serType = tbl.getProperty(colMap.familyName + "." + qualifierName + "." + HBaseSerDe.SERIALIZATION_TYPE);
            if (serType == null) {
                throw new SerDeException(HBaseSerDe.SERIALIZATION_TYPE + " property not provided for column family [" + colMap.familyName + "] and qualifier [" + qualifierName + "]");
            }
            String serClassName = tbl.getProperty(colMap.familyName + "." + qualifierName + "." + serdeConstants.SERIALIZATION_CLASS);
            String schemaLiteral = null;
            String schemaUrl = null;
            if (serClassName == null) {
                if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) {
                    // for avro type, the serialization class parameter is optional
                    schemaLiteral = tbl.getProperty(colMap.familyName + "." + qualifierName + "." + AvroTableProperties.SCHEMA_LITERAL.getPropName());
                    schemaUrl = tbl.getProperty(colMap.familyName + "." + qualifierName + "." + AvroTableProperties.SCHEMA_URL.getPropName());
                    if (schemaLiteral == null && schemaUrl == null) {
                        // be provided
                        throw new SerDeException("For an avro schema, either " + AvroTableProperties.SCHEMA_LITERAL.getPropName() + ", " + AvroTableProperties.SCHEMA_URL.getPropName() + " or " + serdeConstants.SERIALIZATION_CLASS + " property must be set.");
                    }
                    if (schemaUrl != null) {
                        schemaLiteral = getSchemaFromFS(schemaUrl, conf).toString();
                    }
                } else {
                    throw new SerDeException(serdeConstants.SERIALIZATION_CLASS + " property not provided for column family [" + colMap.familyName + "] and qualifier [" + qualifierName + "]");
                }
            }
            StringBuilder generatedStruct = new StringBuilder();
            generateColumnStruct(serType, serClassName, schemaLiteral, colMap, generatedStruct);
            sb.append(generatedStruct);
        }
    }
    // trim off ending ",", if any
    trim(sb);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated column types: [" + sb.toString() + "]");
    }
}
Also used : ColumnMapping(org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 2 with ColumnMapping

use of org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping in project hive by apache.

the class TestLazyHBaseObject method testLazyHBaseRow3.

/**
   * Test the LazyHBaseRow class with a one-to-one/onto mapping between Hive columns and
   * HBase column family/column qualifier pairs. The column types are primitive and fields
   * are stored in binary format in HBase.
   * @throws SerDeException
   */
public void testLazyHBaseRow3() throws SerDeException {
    List<TypeInfo> fieldTypeInfos = TypeInfoUtils.getTypeInfosFromTypeString("string,int,tinyint,smallint,bigint,float,double,string,boolean");
    List<String> fieldNames = Arrays.asList(new String[] { "key", "c_int", "c_byte", "c_short", "c_long", "c_float", "c_double", "c_string", "c_bool" });
    Text nullSequence = new Text("\\N");
    String hbaseColumnsMapping = ":key#str,cf-int:cq-int#bin,cf-byte:cq-byte#bin," + "cf-short:cq-short#bin,cf-long:cq-long#bin,cf-float:cq-float#bin,cf-double:cq-double#bin," + "cf-string:cq-string#str,cf-bool:cq-bool#bin";
    ColumnMappings columnMappings = null;
    try {
        columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping);
    } catch (SerDeException e) {
        fail(e.toString());
    }
    ColumnMapping[] columnsMapping = columnMappings.getColumnsMapping();
    for (int i = 0; i < columnsMapping.length; i++) {
        ColumnMapping colMap = columnsMapping[i];
        if (i == 0 || i == 7) {
            colMap.binaryStorage.add(false);
        } else {
            colMap.binaryStorage.add(true);
        }
    }
    ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, new byte[] { ' ', ':', '=' }, nullSequence, false, false, (byte) 0);
    LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings);
    byte[] rowKey = "row-key".getBytes();
    List<KeyValue> kvs = new ArrayList<KeyValue>();
    byte[] value;
    for (int i = 1; i < columnsMapping.length; i++) {
        switch(i) {
            case 1:
                value = Bytes.toBytes(1);
                break;
            case 2:
                value = new byte[] { (byte) 1 };
                break;
            case 3:
                value = Bytes.toBytes((short) 1);
                break;
            case 4:
                value = Bytes.toBytes((long) 1);
                break;
            case 5:
                value = Bytes.toBytes((float) 1.0F);
                break;
            case 6:
                value = Bytes.toBytes((double) 1.0);
                break;
            case 7:
                value = "Hadoop, Hive, with HBase storage handler.".getBytes();
                break;
            case 8:
                value = Bytes.toBytes(true);
                break;
            default:
                throw new RuntimeException("Not expected: " + i);
        }
        ColumnMapping colMap = columnsMapping[i];
        kvs.add(new KeyValue(rowKey, colMap.familyNameBytes, colMap.qualifierNameBytes, value));
    }
    Collections.sort(kvs, KeyValue.COMPARATOR);
    Result result = new Result(kvs);
    o.init(result);
    List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
    for (int i = 0; i < fieldRefs.size(); i++) {
        Object fieldData = ((StructObjectInspector) oi).getStructFieldData(o, fieldRefs.get(i));
        assert (fieldData != null);
        assert (fieldData instanceof LazyPrimitive<?, ?>);
        Writable writable = ((LazyPrimitive<?, ?>) fieldData).getWritableObject();
        switch(i) {
            case 0:
                Text text = new Text("row-key");
                assertEquals(text, writable);
                break;
            case 1:
                IntWritable iw = new IntWritable(1);
                assertEquals(iw, writable);
                break;
            case 2:
                ByteWritable bw = new ByteWritable((byte) 1);
                assertEquals(bw, writable);
                break;
            case 3:
                ShortWritable sw = new ShortWritable((short) 1);
                assertEquals(sw, writable);
                break;
            case 4:
                LongWritable lw = new LongWritable(1);
                assertEquals(lw, writable);
                break;
            case 5:
                FloatWritable fw = new FloatWritable(1.0F);
                assertEquals(fw, writable);
                break;
            case 6:
                DoubleWritable dw = new DoubleWritable(1.0);
                assertEquals(dw, writable);
                break;
            case 7:
                Text t = new Text("Hadoop, Hive, with HBase storage handler.");
                assertEquals(t, writable);
                break;
            case 8:
                BooleanWritable boolWritable = new BooleanWritable(true);
                assertEquals(boolWritable, writable);
                break;
            default:
                fail("Error: Unanticipated value in deserializing fields for HBaseSerDe.");
                break;
        }
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) Result(org.apache.hadoop.hbase.client.Result) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) ColumnMapping(org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping) IntWritable(org.apache.hadoop.io.IntWritable) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 3 with ColumnMapping

use of org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping in project hive by apache.

the class TestLazyHBaseObject method testLazyHBaseRow1.

/**
   * Test the LazyHBaseRow class with one-for-one mappings between
   * Hive fields and HBase columns.
   * @throws SerDeException
   */
public void testLazyHBaseRow1() throws SerDeException {
    List<TypeInfo> fieldTypeInfos = TypeInfoUtils.getTypeInfosFromTypeString("string,int,array<string>,map<string,string>,string");
    List<String> fieldNames = Arrays.asList("key", "a", "b", "c", "d");
    Text nullSequence = new Text("\\N");
    String hbaseColsMapping = ":key,cfa:a,cfa:b,cfb:c,cfb:d";
    ColumnMappings columnMappings = null;
    try {
        columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColsMapping);
    } catch (SerDeException e) {
        fail(e.toString());
    }
    for (ColumnMapping colMap : columnMappings) {
        if (!colMap.hbaseRowKey && colMap.qualifierName == null) {
            colMap.binaryStorage.add(false);
            colMap.binaryStorage.add(false);
        } else {
            colMap.binaryStorage.add(false);
        }
    }
    ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, new byte[] { ' ', ':', '=' }, nullSequence, false, false, (byte) 0);
    LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings);
    List<KeyValue> kvs = new ArrayList<KeyValue>();
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("a:b:c")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("d=e:f=g")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("hi")));
    Result r = new Result(kvs);
    o.init(r);
    assertEquals(("{'key':'test-row','a':123,'b':['a','b','c']," + "'c':{'d':'e','f':'g'},'d':'hi'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
    kvs.clear();
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("d=e:f=g")));
    r = new Result(kvs);
    o.init(r);
    assertEquals(("{'key':'test-row','a':123,'b':null," + "'c':{'d':'e','f':'g'},'d':null}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
    kvs.clear();
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("a")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("d=\\N:f=g:h")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("no")));
    r = new Result(kvs);
    o.init(r);
    assertEquals(("{'key':'test-row','a':null,'b':['a']," + "'c':{'d':null,'f':'g','h':null},'d':'no'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
    kvs.clear();
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes(":a::")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("no")));
    r = new Result(kvs);
    o.init(r);
    assertEquals(("{'key':'test-row','a':null,'b':['','a','','']," + "'c':null,'d':'no'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
    // This is intentionally duplicated because of HIVE-3179
    assertEquals(("{'key':'test-row','a':null,'b':['','a','','']," + "'c':null,'d':'no'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
    kvs.clear();
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("")));
    r = new Result(kvs);
    o.init(r);
    assertEquals("{'key':'test-row','a':123,'b':[],'c':{},'d':''}".replace("'", "\""), SerDeUtils.getJSONString(o, oi));
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Result(org.apache.hadoop.hbase.client.Result) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) ColumnMapping(org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping)

Example 4 with ColumnMapping

use of org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping in project hive by apache.

the class HBaseStorageHandler method decomposePredicate.

public static DecomposedPredicate decomposePredicate(JobConf jobConf, HBaseSerDe hBaseSerDe, ExprNodeDesc predicate) {
    ColumnMapping keyMapping = hBaseSerDe.getHBaseSerdeParam().getKeyColumnMapping();
    ColumnMapping tsMapping = hBaseSerDe.getHBaseSerdeParam().getTimestampColumnMapping();
    IndexPredicateAnalyzer analyzer = HiveHBaseTableInputFormat.newIndexPredicateAnalyzer(keyMapping.columnName, keyMapping.isComparable(), tsMapping == null ? null : tsMapping.columnName);
    List<IndexSearchCondition> conditions = new ArrayList<IndexSearchCondition>();
    ExprNodeGenericFuncDesc pushedPredicate = null;
    ExprNodeGenericFuncDesc residualPredicate = (ExprNodeGenericFuncDesc) analyzer.analyzePredicate(predicate, conditions);
    for (List<IndexSearchCondition> searchConditions : HiveHBaseInputFormatUtil.decompose(conditions).values()) {
        int scSize = searchConditions.size();
        if (scSize < 1 || 2 < scSize) {
            // Either there was nothing which could be pushed down (size = 0),
            // there were complex predicates which we don't support yet.
            // Currently supported are one of the form:
            // 1. key < 20                        (size = 1)
            // 2. key = 20                        (size = 1)
            // 3. key < 20 and key > 10           (size = 2)
            // Add to residual
            residualPredicate = extractResidualCondition(analyzer, searchConditions, residualPredicate);
            continue;
        }
        if (scSize == 2 && (searchConditions.get(0).getComparisonOp().equals(GenericUDFOPEqual.class.getName()) || searchConditions.get(1).getComparisonOp().equals(GenericUDFOPEqual.class.getName()))) {
            // If one of the predicates is =, then any other predicate with it is illegal.
            // Add to residual
            residualPredicate = extractResidualCondition(analyzer, searchConditions, residualPredicate);
            continue;
        }
        boolean sameType = sameTypeIndexSearchConditions(searchConditions);
        if (!sameType) {
            // If type for column and constant are different, we currently do not support pushing them
            residualPredicate = extractResidualCondition(analyzer, searchConditions, residualPredicate);
            continue;
        }
        TypeInfo typeInfo = searchConditions.get(0).getColumnDesc().getTypeInfo();
        if (typeInfo.getCategory() == Category.PRIMITIVE && PrimitiveObjectInspectorUtils.getPrimitiveGrouping(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) == PrimitiveGrouping.NUMERIC_GROUP) {
            // would be returned.
            if (scSize == 2) {
                boolean lowerBound = false;
                boolean upperBound = false;
                if (searchConditions.get(0).getComparisonOp().equals(GenericUDFOPEqualOrLessThan.class.getName()) || searchConditions.get(0).getComparisonOp().equals(GenericUDFOPLessThan.class.getName())) {
                    lowerBound = true;
                } else {
                    upperBound = true;
                }
                if (searchConditions.get(1).getComparisonOp().equals(GenericUDFOPEqualOrGreaterThan.class.getName()) || searchConditions.get(1).getComparisonOp().equals(GenericUDFOPGreaterThan.class.getName())) {
                    upperBound = true;
                } else {
                    lowerBound = true;
                }
                if (!upperBound || !lowerBound) {
                    // Not valid range, add to residual
                    residualPredicate = extractResidualCondition(analyzer, searchConditions, residualPredicate);
                    continue;
                }
            } else {
                // scSize == 1
                if (!searchConditions.get(0).getComparisonOp().equals(GenericUDFOPEqual.class.getName())) {
                    // Not valid range, add to residual
                    residualPredicate = extractResidualCondition(analyzer, searchConditions, residualPredicate);
                    continue;
                }
            }
        }
        // This one can be pushed
        pushedPredicate = extractStorageHandlerCondition(analyzer, searchConditions, pushedPredicate);
    }
    DecomposedPredicate decomposedPredicate = new DecomposedPredicate();
    decomposedPredicate.pushedPredicate = pushedPredicate;
    decomposedPredicate.residualPredicate = residualPredicate;
    return decomposedPredicate;
}
Also used : IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ColumnMapping(org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping) IndexPredicateAnalyzer(org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer)

Example 5 with ColumnMapping

use of org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping in project hive by apache.

the class HBaseStorageHandler method preCreateTable.

@Override
public void preCreateTable(Table tbl) throws MetaException {
    boolean isExternal = MetaStoreUtils.isExternalTable(tbl);
    // first we need to support storing NULL for location on a table
    if (tbl.getSd().getLocation() != null) {
        throw new MetaException("LOCATION may not be specified for HBase.");
    }
    HTable htable = null;
    try {
        String tableName = getHBaseTableName(tbl);
        Map<String, String> serdeParam = tbl.getSd().getSerdeInfo().getParameters();
        String hbaseColumnsMapping = serdeParam.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
        ColumnMappings columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping);
        HTableDescriptor tableDesc;
        if (!getHBaseAdmin().tableExists(tableName)) {
            // if it is not an external table then create one
            if (!isExternal) {
                // Create the column descriptors
                tableDesc = new HTableDescriptor(tableName);
                Set<String> uniqueColumnFamilies = new HashSet<String>();
                for (ColumnMapping colMap : columnMappings) {
                    if (!colMap.hbaseRowKey && !colMap.hbaseTimestamp) {
                        uniqueColumnFamilies.add(colMap.familyName);
                    }
                }
                for (String columnFamily : uniqueColumnFamilies) {
                    tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes(columnFamily)));
                }
                getHBaseAdmin().createTable(tableDesc);
            } else {
                // an external table
                throw new MetaException("HBase table " + tableName + " doesn't exist while the table is declared as an external table.");
            }
        } else {
            if (!isExternal) {
                throw new MetaException("Table " + tableName + " already exists" + " within HBase; use CREATE EXTERNAL TABLE instead to" + " register it in Hive.");
            }
            // make sure the schema mapping is right
            tableDesc = getHBaseAdmin().getTableDescriptor(Bytes.toBytes(tableName));
            for (ColumnMapping colMap : columnMappings) {
                if (colMap.hbaseRowKey || colMap.hbaseTimestamp) {
                    continue;
                }
                if (!tableDesc.hasFamily(colMap.familyNameBytes)) {
                    throw new MetaException("Column Family " + colMap.familyName + " is not defined in hbase table " + tableName);
                }
            }
        }
        // ensure the table is online
        htable = new HTable(hbaseConf, tableDesc.getName());
    } catch (Exception se) {
        throw new MetaException(StringUtils.stringifyException(se));
    } finally {
        if (htable != null) {
            IOUtils.closeQuietly(htable);
        }
    }
}
Also used : HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) HTable(org.apache.hadoop.hbase.client.HTable) ColumnMapping(org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) IOException(java.io.IOException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Aggregations

ColumnMapping (org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping)10 ArrayList (java.util.ArrayList)7 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)7 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)4 IOException (java.io.IOException)3 KeyValue (org.apache.hadoop.hbase.KeyValue)3 Result (org.apache.hadoop.hbase.client.Result)3 LazyString (org.apache.hadoop.hive.serde2.lazy.LazyString)3 LazyMapObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)3 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)3 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)3 Text (org.apache.hadoop.io.Text)3 Scan (org.apache.hadoop.hbase.client.Scan)2 HashSet (java.util.HashSet)1 LinkedHashSet (java.util.LinkedHashSet)1 Path (org.apache.hadoop.fs.Path)1 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)1 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)1 TableName (org.apache.hadoop.hbase.TableName)1