Search in sources :

Example 1 with BiMap

use of com.google.common.collect.BiMap in project hadoop by apache.

the class TestShellBasedIdMapping method testStaticMapUpdate.

// Test staticMap refreshing
@Test
public void testStaticMapUpdate() throws IOException {
    assumeNotWindows();
    File tempStaticMapFile = File.createTempFile("nfs-", ".map");
    tempStaticMapFile.delete();
    Configuration conf = new Configuration();
    conf.setLong(IdMappingConstant.USERGROUPID_UPDATE_MILLIS_KEY, 1000);
    conf.set(IdMappingConstant.STATIC_ID_MAPPING_FILE_KEY, tempStaticMapFile.getPath());
    ShellBasedIdMapping refIdMapping = new ShellBasedIdMapping(conf, true);
    ShellBasedIdMapping incrIdMapping = new ShellBasedIdMapping(conf);
    BiMap<Integer, String> uidNameMap = refIdMapping.getUidNameMap();
    BiMap<Integer, String> gidNameMap = refIdMapping.getGidNameMap();
    // Force empty map, to see effect of incremental map update of calling
    // getUid()
    incrIdMapping.clearNameMaps();
    uidNameMap = refIdMapping.getUidNameMap();
    for (BiMap.Entry<Integer, String> me : uidNameMap.entrySet()) {
        tempStaticMapFile.delete();
        incrIdMapping.clearNameMaps();
        Integer id = me.getKey();
        String name = me.getValue();
        // The static map is empty, so the id found for "name" would be
        // the same as "id"
        Integer nid = incrIdMapping.getUid(name);
        assertEquals(id, nid);
        // Clear map and update staticMap file
        incrIdMapping.clearNameMaps();
        Integer rid = id + 10000;
        String smapStr = "uid " + rid + " " + id;
        createStaticMapFile(tempStaticMapFile, smapStr);
        // Now the id found for "name" should be the id specified by
        // the staticMap
        nid = incrIdMapping.getUid(name);
        assertEquals(rid, nid);
    }
    // Force empty map, to see effect of incremental map update of calling
    // getGid()
    incrIdMapping.clearNameMaps();
    gidNameMap = refIdMapping.getGidNameMap();
    for (BiMap.Entry<Integer, String> me : gidNameMap.entrySet()) {
        tempStaticMapFile.delete();
        incrIdMapping.clearNameMaps();
        Integer id = me.getKey();
        String name = me.getValue();
        // The static map is empty, so the id found for "name" would be
        // the same as "id"
        Integer nid = incrIdMapping.getGid(name);
        assertEquals(id, nid);
        // Clear map and update staticMap file
        incrIdMapping.clearNameMaps();
        Integer rid = id + 10000;
        String smapStr = "gid " + rid + " " + id;
        // Sleep a bit to avoid that two changes have the same modification time
        try {
            Thread.sleep(100);
        } catch (InterruptedException e) {
        // Do nothing
        }
        createStaticMapFile(tempStaticMapFile, smapStr);
        // Now the id found for "name" should be the id specified by
        // the staticMap
        nid = incrIdMapping.getGid(name);
        assertEquals(rid, nid);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) BiMap(com.google.common.collect.BiMap) HashBiMap(com.google.common.collect.HashBiMap) File(java.io.File) Test(org.junit.Test)

Example 2 with BiMap

use of com.google.common.collect.BiMap in project pinot by linkedin.

the class TableSizeReader method getTableSubtypeSize.

public TableSubTypeSizeDetails getTableSubtypeSize(String tableNameWithType, int timeoutMsec) {
    // for convenient usage within this function
    final String table = tableNameWithType;
    // get list of servers
    Map<String, List<String>> serverSegmentsMap = helixResourceManager.getInstanceToSegmentsInATableMap(table);
    ServerTableSizeReader serverTableSizeReader = new ServerTableSizeReader(executor, connectionManager);
    BiMap<String, String> endpoints = helixResourceManager.getDataInstanceAdminEndpoints(serverSegmentsMap.keySet());
    Map<String, List<SegmentSizeInfo>> serverSizeInfo = serverTableSizeReader.getSizeDetailsFromServers(endpoints, table, timeoutMsec);
    populateErroredServerSizes(serverSizeInfo, serverSegmentsMap);
    TableSubTypeSizeDetails subTypeSizeDetails = new TableSubTypeSizeDetails();
    Map<String, SegmentSizeDetails> segmentMap = subTypeSizeDetails.segments;
    // convert from server ->SegmentSizes to segment -> (SegmentSizeDetails: server -> segmentSizes)
    for (Map.Entry<String, List<SegmentSizeInfo>> serverSegments : serverSizeInfo.entrySet()) {
        String server = serverSegments.getKey();
        List<SegmentSizeInfo> segments = serverSegments.getValue();
        for (SegmentSizeInfo segment : segments) {
            SegmentSizeDetails sizeDetails = segmentMap.get(segment.segmentName);
            if (sizeDetails == null) {
                sizeDetails = new SegmentSizeDetails();
                segmentMap.put(segment.segmentName, sizeDetails);
            }
            sizeDetails.serverInfo.put(server, segment);
        }
    }
    // iterate through the map of segments and calculate the reported and estimated sizes
    // for each segment. For servers that reported error, we use the max size of the same segment
    // reported by another server. If no server reported size for a segment, we use the size
    // of the largest segment reported by any server for the table.
    // At all times, reportedSize indicates actual size that is reported by servers. For errored
    // segments are not reflected in that count. Estimated size is what we estimate in case of
    // errors, as described above.
    // estimatedSize >= reportedSize. If no server reported error, estimatedSize == reportedSize
    long tableLevelMax = -1;
    for (Map.Entry<String, SegmentSizeDetails> segmentEntry : segmentMap.entrySet()) {
        SegmentSizeDetails segmentSizes = segmentEntry.getValue();
        // track segment level max size
        long segmentLevelMax = -1;
        int errors = 0;
        // iterate over all servers that reported size for this segment
        for (Map.Entry<String, SegmentSizeInfo> serverInfo : segmentSizes.serverInfo.entrySet()) {
            SegmentSizeInfo ss = serverInfo.getValue();
            if (ss.diskSizeInBytes != -1) {
                segmentSizes.reportedSizeInBytes += ss.diskSizeInBytes;
                segmentLevelMax = Math.max(segmentLevelMax, ss.diskSizeInBytes);
            } else {
                ++errors;
            }
        }
        // after iterating over all servers update summary reported and estimated size of the segment
        if (errors != segmentSizes.serverInfo.size()) {
            // atleast one server reported size for this segment
            segmentSizes.estimatedSizeInBytes = segmentSizes.reportedSizeInBytes + errors * segmentLevelMax;
            tableLevelMax = Math.max(tableLevelMax, segmentLevelMax);
            subTypeSizeDetails.reportedSizeInBytes += segmentSizes.reportedSizeInBytes;
            subTypeSizeDetails.estimatedSizeInBytes += segmentSizes.estimatedSizeInBytes;
        } else {
            segmentSizes.reportedSizeInBytes = -1;
            segmentSizes.estimatedSizeInBytes = -1;
        }
    }
    if (tableLevelMax == -1) {
        // no server reported size
        subTypeSizeDetails.reportedSizeInBytes = -1;
        subTypeSizeDetails.estimatedSizeInBytes = -1;
    } else {
        // For segments with no reported sizes, use max table-level segment size as an estimate
        for (Map.Entry<String, SegmentSizeDetails> segmentSizeDetailsEntry : segmentMap.entrySet()) {
            SegmentSizeDetails sizeDetails = segmentSizeDetailsEntry.getValue();
            if (sizeDetails.reportedSizeInBytes != -1) {
                continue;
            }
            sizeDetails.estimatedSizeInBytes += sizeDetails.serverInfo.size() * tableLevelMax;
            subTypeSizeDetails.estimatedSizeInBytes += sizeDetails.estimatedSizeInBytes;
        }
    }
    return subTypeSizeDetails;
}
Also used : SegmentSizeInfo(com.linkedin.pinot.common.restlet.resources.SegmentSizeInfo) ArrayList(java.util.ArrayList) List(java.util.List) BiMap(com.google.common.collect.BiMap) HashMap(java.util.HashMap) Map(java.util.Map)

Example 3 with BiMap

use of com.google.common.collect.BiMap in project zm-mailbox by Zimbra.

the class CheckPortConflict method checkConfig.

private void checkConfig(Config config, Map<String, Object> configAttrsToModify) throws ServiceException {
    BiMap<String, String> newDefaults = HashBiMap.create();
    /*
         * First, make sure there is no conflict in the Config entry, even
         * if the value on the config entry might not be effective on a server.
         */
    for (String attrName : sPortAttrs) {
        if (!configAttrsToModify.containsKey(attrName))
            newDefaults.put(config.getAttr(attrName), attrName);
    }
    // check conflict for attrs being changed
    for (Map.Entry<String, Object> attrToModify : configAttrsToModify.entrySet()) {
        String attrName = attrToModify.getKey();
        if (!sPortAttrs.contains(attrName))
            continue;
        SingleValueMod mod = singleValueMod(configAttrsToModify, attrName);
        String newValue = null;
        if (mod.setting())
            newValue = mod.value();
        if (conflict(null, newDefaults, newValue, attrName)) {
            throw ServiceException.INVALID_REQUEST("port " + newValue + " conflict between " + attrName + " and " + newDefaults.get(newValue) + " on global config", null);
        } else
            newDefaults.put(newValue, attrName);
    }
    /*
         * Then, iterate through all servers see if this port change on the Config
         * entry has impact on a server.
         */
    List<Server> servers = Provisioning.getInstance().getAllServers();
    for (Server server : servers) {
        checkServerWithNewDefaults(server, newDefaults, configAttrsToModify);
    }
}
Also used : Server(com.zimbra.cs.account.Server) BiMap(com.google.common.collect.BiMap) HashMap(java.util.HashMap) HashBiMap(com.google.common.collect.HashBiMap) Map(java.util.Map)

Example 4 with BiMap

use of com.google.common.collect.BiMap in project hive by apache.

the class LlapTaskCommunicator method nodePinged.

void nodePinged(String hostname, String uniqueId, int port, TezAttemptArray tasks) {
    // TODO: do we ever need the port? we could just do away with nodeId altogether.
    LlapNodeId nodeId = LlapNodeId.getInstance(hostname, port);
    registerPingingNode(nodeId);
    BiMap<ContainerId, TezTaskAttemptID> biMap = entityTracker.getContainerAttemptMapForNode(nodeId);
    if (biMap != null) {
        HashSet<TezTaskAttemptID> attempts = new HashSet<>();
        for (Writable w : tasks.get()) {
            attempts.add((TezTaskAttemptID) w);
        }
        String error = "";
        synchronized (biMap) {
            for (Map.Entry<ContainerId, TezTaskAttemptID> entry : biMap.entrySet()) {
                // TODO: this is a stopgap fix. We really need to change all mappings by unique node ID,
                //       or at least (in this case) track the latest unique ID for LlapNode and retry all
                //       older-node tasks proactively. For now let the heartbeats fail them.
                TezTaskAttemptID attemptId = entry.getValue();
                String taskNodeId = entityTracker.getUniqueNodeId(attemptId);
                // Also, we prefer a missed heartbeat over a stuck query in case of discrepancy in ET.
                if (taskNodeId != null && taskNodeId.equals(uniqueId)) {
                    if (attempts.contains(attemptId)) {
                        getContext().taskAlive(entry.getValue());
                    } else {
                        error += (attemptId + ", ");
                    }
                    getContext().containerAlive(entry.getKey());
                }
            }
        }
        if (!error.isEmpty()) {
            LOG.info("The tasks we expected to be on the node are not there: " + error);
        }
    } else {
        long currentTs = TimeUnit.MILLISECONDS.convert(System.nanoTime(), TimeUnit.NANOSECONDS);
        if (currentTs > nodeNotFoundLogTime.get() + 5000l) {
            LOG.warn("Received ping from node without any registered tasks or containers: " + hostname + ":" + port + ". Could be caused by pre-emption by the AM," + " or a mismatched hostname. Enable debug logging for mismatched host names");
            nodeNotFoundLogTime.set(currentTs);
        }
    }
}
Also used : LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) Writable(org.apache.hadoop.io.Writable) ByteString(com.google.protobuf.ByteString) Map(java.util.Map) BiMap(com.google.common.collect.BiMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) HashBiMap(com.google.common.collect.HashBiMap) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) HashSet(java.util.HashSet)

Example 5 with BiMap

use of com.google.common.collect.BiMap in project hive by apache.

the class GenTezUtils method removeUnionOperators.

// removes any union operator and clones the plan
public static void removeUnionOperators(GenTezProcContext context, BaseWork work, int indexForTezUnion) throws SemanticException {
    List<Operator<?>> roots = new ArrayList<Operator<?>>();
    roots.addAll(work.getAllRootOperators());
    if (work.getDummyOps() != null) {
        roots.addAll(work.getDummyOps());
    }
    roots.addAll(context.eventOperatorSet);
    // need to clone the plan.
    List<Operator<?>> newRoots = SerializationUtilities.cloneOperatorTree(roots, indexForTezUnion);
    // we're cloning the operator plan but we're retaining the original work. That means
    // that root operators have to be replaced with the cloned ops. The replacement map
    // tells you what that mapping is.
    BiMap<Operator<?>, Operator<?>> replacementMap = HashBiMap.create();
    // there's some special handling for dummyOps required. Mapjoins won't be properly
    // initialized if their dummy parents aren't initialized. Since we cloned the plan
    // we need to replace the dummy operators in the work with the cloned ones.
    List<HashTableDummyOperator> dummyOps = new LinkedList<HashTableDummyOperator>();
    Iterator<Operator<?>> it = newRoots.iterator();
    for (Operator<?> orig : roots) {
        Set<FileSinkOperator> fsOpSet = OperatorUtils.findOperators(orig, FileSinkOperator.class);
        for (FileSinkOperator fsOp : fsOpSet) {
            context.fileSinkSet.remove(fsOp);
        }
        Operator<?> newRoot = it.next();
        replacementMap.put(orig, newRoot);
        if (newRoot instanceof HashTableDummyOperator) {
            // dummy ops need to be updated to the cloned ones.
            dummyOps.add((HashTableDummyOperator) newRoot);
            it.remove();
        } else if (newRoot instanceof AppMasterEventOperator) {
            // need to restore the original scan.
            if (newRoot.getConf() instanceof DynamicPruningEventDesc) {
                TableScanOperator ts = ((DynamicPruningEventDesc) orig.getConf()).getTableScan();
                if (ts == null) {
                    throw new AssertionError("No table scan associated with dynamic event pruning. " + orig);
                }
                ((DynamicPruningEventDesc) newRoot.getConf()).setTableScan(ts);
            }
            it.remove();
        } else {
            if (newRoot instanceof TableScanOperator) {
                if (context.tsToEventMap.containsKey(orig)) {
                    // we need to update event operators with the cloned table scan
                    for (AppMasterEventOperator event : context.tsToEventMap.get(orig)) {
                        ((DynamicPruningEventDesc) event.getConf()).setTableScan((TableScanOperator) newRoot);
                    }
                }
                // This TableScanOperator could be part of semijoin optimization.
                Map<ReduceSinkOperator, TableScanOperator> rsOpToTsOpMap = context.parseContext.getRsOpToTsOpMap();
                for (ReduceSinkOperator rs : rsOpToTsOpMap.keySet()) {
                    if (rsOpToTsOpMap.get(rs) == orig) {
                        rsOpToTsOpMap.put(rs, (TableScanOperator) newRoot);
                    }
                }
            }
            context.rootToWorkMap.remove(orig);
            context.rootToWorkMap.put(newRoot, work);
        }
    }
    // now we remove all the unions. we throw away any branch that's not reachable from
    // the current set of roots. The reason is that those branches will be handled in
    // different tasks.
    Deque<Operator<?>> operators = new LinkedList<Operator<?>>();
    operators.addAll(newRoots);
    Set<Operator<?>> seen = new HashSet<Operator<?>>();
    while (!operators.isEmpty()) {
        Operator<?> current = operators.pop();
        seen.add(current);
        if (current instanceof FileSinkOperator) {
            FileSinkOperator fileSink = (FileSinkOperator) current;
            // remember it for additional processing later
            context.fileSinkSet.add(fileSink);
            FileSinkDesc desc = fileSink.getConf();
            Path path = desc.getDirName();
            List<FileSinkDesc> linked;
            if (!context.linkedFileSinks.containsKey(path)) {
                linked = new ArrayList<FileSinkDesc>();
                context.linkedFileSinks.put(path, linked);
            }
            linked = context.linkedFileSinks.get(path);
            linked.add(desc);
            desc.setDirName(new Path(path, "" + linked.size()));
            desc.setLinkedFileSink(true);
            desc.setParentDir(path);
            desc.setLinkedFileSinkDesc(linked);
        }
        if (current instanceof AppMasterEventOperator) {
            // remember for additional processing later
            context.eventOperatorSet.add((AppMasterEventOperator) current);
            // mark the original as abandoned. Don't need it anymore.
            context.abandonedEventOperatorSet.add((AppMasterEventOperator) replacementMap.inverse().get(current));
        }
        if (current instanceof UnionOperator) {
            Operator<?> parent = null;
            int count = 0;
            for (Operator<?> op : current.getParentOperators()) {
                if (seen.contains(op)) {
                    ++count;
                    parent = op;
                }
            }
            // we should have been able to reach the union from only one side.
            assert count <= 1;
            if (parent == null) {
                // root operator is union (can happen in reducers)
                replacementMap.put(current, current.getChildOperators().get(0));
            } else {
                parent.removeChildAndAdoptItsChildren(current);
            }
        }
        if (current instanceof FileSinkOperator || current instanceof ReduceSinkOperator) {
            current.setChildOperators(null);
        } else {
            operators.addAll(current.getChildOperators());
        }
    }
    LOG.debug("Setting dummy ops for work " + work.getName() + ": " + dummyOps);
    work.setDummyOps(dummyOps);
    work.replaceRoots(replacementMap);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) HashTableDummyOperator(org.apache.hadoop.hive.ql.exec.HashTableDummyOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) Path(org.apache.hadoop.fs.Path) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) HashTableDummyOperator(org.apache.hadoop.hive.ql.exec.HashTableDummyOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) BiMap(com.google.common.collect.BiMap) HashBiMap(com.google.common.collect.HashBiMap)

Aggregations

BiMap (com.google.common.collect.BiMap)8 HashBiMap (com.google.common.collect.HashBiMap)5 Map (java.util.Map)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 LinkedHashMap (java.util.LinkedHashMap)2 Configuration (org.apache.hadoop.conf.Configuration)2 Test (org.junit.Test)2 ByteString (com.google.protobuf.ByteString)1 SegmentSizeInfo (com.linkedin.pinot.common.restlet.resources.SegmentSizeInfo)1 Server (com.zimbra.cs.account.Server)1 File (java.io.File)1 ResultSetMetaData (java.sql.ResultSetMetaData)1 HashSet (java.util.HashSet)1 List (java.util.List)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 ConcurrentMap (java.util.concurrent.ConcurrentMap)1 ObjectMappingException (ninja.leaping.configurate.objectmapping.ObjectMappingException)1 Path (org.apache.hadoop.fs.Path)1 LlapNodeId (org.apache.hadoop.hive.llap.LlapNodeId)1