Search in sources :

Example 21 with Entry

use of java.util.Map.Entry in project hive by apache.

the class ColumnStatsUpdateTask method constructColumnStatsFromInput.

private ColumnStatistics constructColumnStatsFromInput() throws SemanticException, MetaException {
    String dbName = SessionState.get().getCurrentDatabase();
    ColumnStatsDesc desc = work.getColStats();
    String tableName = desc.getTableName();
    String partName = work.getPartName();
    List<String> colName = desc.getColName();
    List<String> colType = desc.getColType();
    ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
    // grammar prohibits more than 1 column so we are guaranteed to have only 1
    // element in this lists.
    statsObj.setColName(colName.get(0));
    statsObj.setColType(colType.get(0));
    ColumnStatisticsData statsData = new ColumnStatisticsData();
    String columnType = colType.get(0);
    if (columnType.equalsIgnoreCase("long") || columnType.equalsIgnoreCase("tinyint") || columnType.equalsIgnoreCase("smallint") || columnType.equalsIgnoreCase("int") || columnType.equalsIgnoreCase("bigint")) {
        LongColumnStatsData longStats = new LongColumnStatsData();
        longStats.setNumNullsIsSet(false);
        longStats.setNumDVsIsSet(false);
        longStats.setLowValueIsSet(false);
        longStats.setHighValueIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                longStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                longStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                longStats.setLowValue(Long.parseLong(value));
            } else if (fName.equals("highValue")) {
                longStats.setHighValue(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setLongStats(longStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("double") || columnType.equalsIgnoreCase("float")) {
        DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
        doubleStats.setNumNullsIsSet(false);
        doubleStats.setNumDVsIsSet(false);
        doubleStats.setLowValueIsSet(false);
        doubleStats.setHighValueIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                doubleStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                doubleStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                doubleStats.setLowValue(Double.parseDouble(value));
            } else if (fName.equals("highValue")) {
                doubleStats.setHighValue(Double.parseDouble(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setDoubleStats(doubleStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("string") || columnType.toLowerCase().startsWith("char") || columnType.toLowerCase().startsWith("varchar")) {
        //char(x),varchar(x) types
        StringColumnStatsData stringStats = new StringColumnStatsData();
        stringStats.setMaxColLenIsSet(false);
        stringStats.setAvgColLenIsSet(false);
        stringStats.setNumNullsIsSet(false);
        stringStats.setNumDVsIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                stringStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                stringStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("avgColLen")) {
                stringStats.setAvgColLen(Double.parseDouble(value));
            } else if (fName.equals("maxColLen")) {
                stringStats.setMaxColLen(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setStringStats(stringStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("boolean")) {
        BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
        booleanStats.setNumNullsIsSet(false);
        booleanStats.setNumTruesIsSet(false);
        booleanStats.setNumFalsesIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                booleanStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numTrues")) {
                booleanStats.setNumTrues(Long.parseLong(value));
            } else if (fName.equals("numFalses")) {
                booleanStats.setNumFalses(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setBooleanStats(booleanStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNullsIsSet(false);
        binaryStats.setAvgColLenIsSet(false);
        binaryStats.setMaxColLenIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                binaryStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("avgColLen")) {
                binaryStats.setAvgColLen(Double.parseDouble(value));
            } else if (fName.equals("maxColLen")) {
                binaryStats.setMaxColLen(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setBinaryStats(binaryStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.toLowerCase().startsWith("decimal")) {
        //decimal(a,b) type
        DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
        decimalStats.setNumNullsIsSet(false);
        decimalStats.setNumDVsIsSet(false);
        decimalStats.setLowValueIsSet(false);
        decimalStats.setHighValueIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                decimalStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                decimalStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                BigDecimal d = new BigDecimal(value);
                decimalStats.setLowValue(new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
            } else if (fName.equals("highValue")) {
                BigDecimal d = new BigDecimal(value);
                decimalStats.setHighValue(new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setDecimalStats(decimalStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("date") || columnType.equalsIgnoreCase("timestamp")) {
        DateColumnStatsData dateStats = new DateColumnStatsData();
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                dateStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                dateStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                // Date high/low value is stored as long in stats DB, but allow users to set high/low
                // value using either date format (yyyy-mm-dd) or numeric format (days since epoch)
                dateStats.setLowValue(readDateValue(value));
            } else if (fName.equals("highValue")) {
                dateStats.setHighValue(readDateValue(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setDateStats(dateStats);
        statsObj.setStatsData(statsData);
    } else {
        throw new SemanticException("Unsupported type");
    }
    String[] names = Utilities.getDbTableName(dbName, tableName);
    ColumnStatisticsDesc statsDesc = getColumnStatsDesc(names[0], names[1], partName, partName == null);
    ColumnStatistics colStat = new ColumnStatistics();
    colStat.setStatsDesc(statsDesc);
    colStat.addToStatsObj(statsObj);
    return colStat;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatsDesc(org.apache.hadoop.hive.ql.plan.ColumnStatsDesc) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) BigDecimal(java.math.BigDecimal) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) Entry(java.util.Map.Entry) BigDecimal(java.math.BigDecimal) Decimal(org.apache.hadoop.hive.metastore.api.Decimal) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) Map(java.util.Map) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 22 with Entry

use of java.util.Map.Entry in project hive by apache.

the class HiveInputFormat method pushProjectionsAndFilters.

protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass, Path splitPath, boolean nonNative) {
    Path splitPathWithNoSchema = Path.getPathWithoutSchemeAndAuthority(splitPath);
    if (this.mrwork == null) {
        init(job);
    }
    if (this.mrwork.getPathToAliases() == null) {
        return;
    }
    ArrayList<String> aliases = new ArrayList<String>();
    Iterator<Entry<Path, ArrayList<String>>> iterator = this.mrwork.getPathToAliases().entrySet().iterator();
    Set<Path> splitParentPaths = null;
    int pathsSize = this.mrwork.getPathToAliases().entrySet().size();
    while (iterator.hasNext()) {
        Entry<Path, ArrayList<String>> entry = iterator.next();
        Path key = entry.getKey();
        boolean match;
        if (nonNative) {
            // For non-native tables, we need to do an exact match to avoid
            // HIVE-1903.  (The table location contains no files, and the string
            // representation of its path does not have a trailing slash.)
            match = splitPath.equals(key) || splitPathWithNoSchema.equals(key);
        } else {
            // path to something deeper than the table location.)
            if (pathsSize > 1) {
                // In such cases, use pre-computed paths for comparison
                if (splitParentPaths == null) {
                    splitParentPaths = new HashSet<>();
                    FileUtils.populateParentPaths(splitParentPaths, splitPath);
                    FileUtils.populateParentPaths(splitParentPaths, splitPathWithNoSchema);
                }
                match = splitParentPaths.contains(key);
            } else {
                match = FileUtils.isPathWithinSubtree(splitPath, key) || FileUtils.isPathWithinSubtree(splitPathWithNoSchema, key);
            }
        }
        if (match) {
            ArrayList<String> list = entry.getValue();
            for (String val : list) {
                aliases.add(val);
            }
        }
    }
    for (String alias : aliases) {
        Operator<? extends OperatorDesc> op = this.mrwork.getAliasToWork().get(alias);
        if (op instanceof TableScanOperator) {
            TableScanOperator ts = (TableScanOperator) op;
            // push down projections.
            ColumnProjectionUtils.appendReadColumns(jobConf, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
            // push down filters
            pushFilters(jobConf, ts);
            AcidUtils.setTransactionalTableScan(job, ts.getConf().isAcidTable());
            AcidUtils.setAcidOperationalProperties(job, ts.getConf().getAcidOperationalProperties());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ArrayList(java.util.ArrayList) Entry(java.util.Map.Entry)

Example 23 with Entry

use of java.util.Map.Entry in project hive by apache.

the class AbstractBucketJoinProc method convertMapJoinToBucketMapJoin.

/*
   * Convert mapjoin to a bucketed mapjoin.
   * The operator tree is not changed, but the mapjoin descriptor in the big table is
   * enhanced to keep the big table bucket -> small table buckets mapping.
   */
protected void convertMapJoinToBucketMapJoin(MapJoinOperator mapJoinOp, BucketJoinProcCtx context) throws SemanticException {
    MapJoinDesc desc = mapJoinOp.getConf();
    Map<String, Map<String, List<String>>> aliasBucketFileNameMapping = new LinkedHashMap<String, Map<String, List<String>>>();
    Map<String, List<Integer>> tblAliasToNumberOfBucketsInEachPartition = context.getTblAliasToNumberOfBucketsInEachPartition();
    Map<String, List<List<String>>> tblAliasToBucketedFilePathsInEachPartition = context.getTblAliasToBucketedFilePathsInEachPartition();
    Map<Partition, List<String>> bigTblPartsToBucketFileNames = context.getBigTblPartsToBucketFileNames();
    Map<Partition, Integer> bigTblPartsToBucketNumber = context.getBigTblPartsToBucketNumber();
    List<String> joinAliases = context.getJoinAliases();
    String baseBigAlias = context.getBaseBigAlias();
    // sort bucket names for the big table
    for (List<String> partBucketNames : bigTblPartsToBucketFileNames.values()) {
        Collections.sort(partBucketNames);
    }
    // in the big table to bucket file names in small tables.
    for (int j = 0; j < joinAliases.size(); j++) {
        String alias = joinAliases.get(j);
        if (alias.equals(baseBigAlias)) {
            continue;
        }
        for (List<String> names : tblAliasToBucketedFilePathsInEachPartition.get(alias)) {
            Collections.sort(names);
        }
        List<Integer> smallTblBucketNums = tblAliasToNumberOfBucketsInEachPartition.get(alias);
        List<List<String>> smallTblFilesList = tblAliasToBucketedFilePathsInEachPartition.get(alias);
        Map<String, List<String>> mappingBigTableBucketFileNameToSmallTableBucketFileNames = new LinkedHashMap<String, List<String>>();
        aliasBucketFileNameMapping.put(alias, mappingBigTableBucketFileNameToSmallTableBucketFileNames);
        // for each bucket file in big table, get the corresponding bucket file
        // name in the small table.
        // more than 1 partition in the big table, do the mapping for each partition
        Iterator<Entry<Partition, List<String>>> bigTblPartToBucketNames = bigTblPartsToBucketFileNames.entrySet().iterator();
        Iterator<Entry<Partition, Integer>> bigTblPartToBucketNum = bigTblPartsToBucketNumber.entrySet().iterator();
        while (bigTblPartToBucketNames.hasNext()) {
            assert bigTblPartToBucketNum.hasNext();
            int bigTblBucketNum = bigTblPartToBucketNum.next().getValue();
            List<String> bigTblBucketNameList = bigTblPartToBucketNames.next().getValue();
            fillMappingBigTableBucketFileNameToSmallTableBucketFileNames(smallTblBucketNums, smallTblFilesList, mappingBigTableBucketFileNameToSmallTableBucketFileNames, bigTblBucketNum, bigTblBucketNameList, desc.getBigTableBucketNumMapping());
        }
    }
    desc.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
    desc.setBigTableAlias(baseBigAlias);
    boolean bigTablePartitioned = context.isBigTablePartitioned();
    if (bigTablePartitioned) {
        desc.setBigTablePartSpecToFileMapping(convert(bigTblPartsToBucketFileNames));
    }
    Map<Integer, Set<String>> posToAliasMap = mapJoinOp.getPosToAliasMap();
    Map<String, String> aliasToNewAliasMap = context.getAliasToNewAliasMap();
    if (aliasToNewAliasMap != null && posToAliasMap != null) {
        for (Map.Entry<String, String> entry : aliasToNewAliasMap.entrySet()) {
            for (Set<String> aliases : posToAliasMap.values()) {
                if (aliases.remove(entry.getKey())) {
                    aliases.add(entry.getValue());
                }
            }
        }
    }
    // successfully convert to bucket map join
    desc.setBucketMapJoin(true);
}
Also used : Set(java.util.Set) LinkedHashMap(java.util.LinkedHashMap) Entry(java.util.Map.Entry) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) List(java.util.List) Partition(org.apache.hadoop.hive.ql.metadata.Partition) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 24 with Entry

use of java.util.Map.Entry in project hive by apache.

the class TestHiveAccumuloTableOutputFormat method testWriteToMockInstance.

@Test
public void testWriteToMockInstance() throws Exception {
    Instance inst = new MockInstance(test.getMethodName());
    Connector conn = inst.getConnector("root", new PasswordToken(""));
    HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
    String table = test.getMethodName();
    conn.tableOperations().create(table);
    JobConf conf = new JobConf();
    conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
    conf.set(AccumuloConnectionParameters.USER_NAME, "root");
    conf.set(AccumuloConnectionParameters.USER_PASS, "");
    conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
    conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
    FileSystem local = FileSystem.getLocal(conf);
    outputFormat.checkOutputSpecs(local, conf);
    RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
    List<String> names = Arrays.asList("row", "col1", "col2");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq1,cf:cq2");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, accumuloSerDeParams.getRowIdFactory());
    TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
    LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(Arrays.asList("row", "cq1", "cq2"), Arrays.asList(stringTypeInfo, stringTypeInfo, stringTypeInfo), serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
    ByteArrayRef bytes = new ByteArrayRef();
    bytes.setData("row value1 value2".getBytes());
    struct.init(bytes, 0, bytes.getData().length);
    // Serialize the struct into a mutation
    Mutation m = serializer.serialize(struct, structOI);
    // Write the mutation
    recordWriter.write(new Text(table), m);
    // Close the writer
    recordWriter.close(null);
    Iterator<Entry<Key, Value>> iter = conn.createScanner(table, new Authorizations()).iterator();
    Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
    Entry<Key, Value> entry = iter.next();
    Key k = entry.getKey();
    Value v = entry.getValue();
    Assert.assertEquals("row", k.getRow().toString());
    Assert.assertEquals("cf", k.getColumnFamily().toString());
    Assert.assertEquals("cq1", k.getColumnQualifier().toString());
    Assert.assertEquals("", k.getColumnVisibility().toString());
    Assert.assertEquals("value1", new String(v.get()));
    Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
    entry = iter.next();
    k = entry.getKey();
    v = entry.getValue();
    Assert.assertEquals("row", k.getRow().toString());
    Assert.assertEquals("cf", k.getColumnFamily().toString());
    Assert.assertEquals("cq2", k.getColumnQualifier().toString());
    Assert.assertEquals("", k.getColumnVisibility().toString());
    Assert.assertEquals("value2", new String(v.get()));
    Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Configuration(org.apache.hadoop.conf.Configuration) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Instance(org.apache.accumulo.core.client.Instance) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) Properties(java.util.Properties) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) Entry(java.util.Map.Entry) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) AccumuloRowSerializer(org.apache.hadoop.hive.accumulo.serde.AccumuloRowSerializer) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Authorizations(org.apache.accumulo.core.security.Authorizations) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AccumuloSerDe(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) Value(org.apache.accumulo.core.data.Value) Mutation(org.apache.accumulo.core.data.Mutation) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 25 with Entry

use of java.util.Map.Entry in project hive by apache.

the class TestHiveAccumuloTableOutputFormat method testWriteToMockInstanceWithVisibility.

@Test
public void testWriteToMockInstanceWithVisibility() throws Exception {
    Instance inst = new MockInstance(test.getMethodName());
    Connector conn = inst.getConnector("root", new PasswordToken(""));
    Authorizations auths = new Authorizations("foo");
    conn.securityOperations().changeUserAuthorizations("root", auths);
    HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
    String table = test.getMethodName();
    conn.tableOperations().create(table);
    JobConf conf = new JobConf();
    conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
    conf.set(AccumuloConnectionParameters.USER_NAME, "root");
    conf.set(AccumuloConnectionParameters.USER_PASS, "");
    conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
    conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
    FileSystem local = FileSystem.getLocal(conf);
    outputFormat.checkOutputSpecs(local, conf);
    RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
    List<String> names = Arrays.asList("row", "col1", "col2");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq1,cf:cq2");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility("foo"), accumuloSerDeParams.getRowIdFactory());
    LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(Arrays.asList("row", "cq1", "cq2"), Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo), serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
    ByteArrayRef bytes = new ByteArrayRef();
    bytes.setData("row value1 value2".getBytes());
    struct.init(bytes, 0, bytes.getData().length);
    // Serialize the struct into a mutation
    Mutation m = serializer.serialize(struct, structOI);
    // Write the mutation
    recordWriter.write(new Text(table), m);
    // Close the writer
    recordWriter.close(null);
    Iterator<Entry<Key, Value>> iter = conn.createScanner(table, auths).iterator();
    Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
    Entry<Key, Value> entry = iter.next();
    Key k = entry.getKey();
    Value v = entry.getValue();
    Assert.assertEquals("row", k.getRow().toString());
    Assert.assertEquals("cf", k.getColumnFamily().toString());
    Assert.assertEquals("cq1", k.getColumnQualifier().toString());
    Assert.assertEquals("foo", k.getColumnVisibility().toString());
    Assert.assertEquals("value1", new String(v.get()));
    Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
    entry = iter.next();
    k = entry.getKey();
    v = entry.getValue();
    Assert.assertEquals("row", k.getRow().toString());
    Assert.assertEquals("cf", k.getColumnFamily().toString());
    Assert.assertEquals("cq2", k.getColumnQualifier().toString());
    Assert.assertEquals("foo", k.getColumnVisibility().toString());
    Assert.assertEquals("value2", new String(v.get()));
    Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Configuration(org.apache.hadoop.conf.Configuration) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Instance(org.apache.accumulo.core.client.Instance) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) Properties(java.util.Properties) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) Entry(java.util.Map.Entry) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) FileSystem(org.apache.hadoop.fs.FileSystem) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) JobConf(org.apache.hadoop.mapred.JobConf) AccumuloRowSerializer(org.apache.hadoop.hive.accumulo.serde.AccumuloRowSerializer) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Authorizations(org.apache.accumulo.core.security.Authorizations) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AccumuloSerDe(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) Value(org.apache.accumulo.core.data.Value) Mutation(org.apache.accumulo.core.data.Mutation) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Aggregations

Entry (java.util.Map.Entry)1041 HashMap (java.util.HashMap)295 Map (java.util.Map)288 ArrayList (java.util.ArrayList)258 List (java.util.List)177 Iterator (java.util.Iterator)113 IOException (java.io.IOException)109 Test (org.junit.Test)77 Set (java.util.Set)68 LinkedHashMap (java.util.LinkedHashMap)64 HashSet (java.util.HashSet)62 File (java.io.File)56 Collection (java.util.Collection)42 TreeMap (java.util.TreeMap)36 Properties (java.util.Properties)35 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)35 TestSuite (junit.framework.TestSuite)33 LinkedList (java.util.LinkedList)31 NamedIcon (jmri.jmrit.catalog.NamedIcon)28 Collectors (java.util.stream.Collectors)27