Search in sources :

Example 1 with ChunkInfo

use of org.apache.drill.exec.store.mongo.common.ChunkInfo in project drill by apache.

the class MongoGroupScan method init.

@SuppressWarnings({ "rawtypes" })
private void init() throws IOException {
    List<String> h = storagePluginConfig.getHosts();
    List<ServerAddress> addresses = Lists.newArrayList();
    for (String host : h) {
        addresses.add(new ServerAddress(host));
    }
    MongoClient client = storagePlugin.getClient();
    chunksMapping = Maps.newHashMap();
    chunksInverseMapping = Maps.newLinkedHashMap();
    if (isShardedCluster(client)) {
        MongoDatabase db = client.getDatabase(CONFIG);
        MongoCollection<Document> chunksCollection = db.getCollection(CHUNKS);
        Document filter = new Document();
        filter.put(NS, this.scanSpec.getDbName() + "." + this.scanSpec.getCollectionName());
        Document projection = new Document();
        projection.put(SHARD, select);
        projection.put(MIN, select);
        projection.put(MAX, select);
        FindIterable<Document> chunkCursor = chunksCollection.find(filter).projection(projection);
        MongoCursor<Document> iterator = chunkCursor.iterator();
        MongoCollection<Document> shardsCollection = db.getCollection(SHARDS);
        projection = new Document();
        projection.put(HOST, select);
        boolean hasChunks = false;
        while (iterator.hasNext()) {
            Document chunkObj = iterator.next();
            String shardName = (String) chunkObj.get(SHARD);
            String chunkId = (String) chunkObj.get(ID);
            filter = new Document(ID, shardName);
            FindIterable<Document> hostCursor = shardsCollection.find(filter).projection(projection);
            MongoCursor<Document> hostIterator = hostCursor.iterator();
            while (hostIterator.hasNext()) {
                Document hostObj = hostIterator.next();
                String hostEntry = (String) hostObj.get(HOST);
                String[] tagAndHost = StringUtils.split(hostEntry, '/');
                String[] hosts = tagAndHost.length > 1 ? StringUtils.split(tagAndHost[1], ',') : StringUtils.split(tagAndHost[0], ',');
                List<String> chunkHosts = Arrays.asList(hosts);
                Set<ServerAddress> addressList = getPreferredHosts(storagePlugin.getClient(addresses), chunkHosts);
                if (addressList == null) {
                    addressList = Sets.newHashSet();
                    for (String host : chunkHosts) {
                        addressList.add(new ServerAddress(host));
                    }
                }
                chunksMapping.put(chunkId, addressList);
                ServerAddress address = addressList.iterator().next();
                List<ChunkInfo> chunkList = chunksInverseMapping.get(address.getHost());
                if (chunkList == null) {
                    chunkList = Lists.newArrayList();
                    chunksInverseMapping.put(address.getHost(), chunkList);
                }
                List<String> chunkHostsList = new ArrayList<String>();
                for (ServerAddress serverAddr : addressList) {
                    chunkHostsList.add(serverAddr.toString());
                }
                ChunkInfo chunkInfo = new ChunkInfo(chunkHostsList, chunkId);
                Document minMap = (Document) chunkObj.get(MIN);
                Map<String, Object> minFilters = Maps.newHashMap();
                Set keySet = minMap.keySet();
                for (Object keyObj : keySet) {
                    Object object = minMap.get(keyObj);
                    if (!(object instanceof MinKey)) {
                        minFilters.put(keyObj.toString(), object);
                    }
                }
                chunkInfo.setMinFilters(minFilters);
                Map<String, Object> maxFilters = Maps.newHashMap();
                Map maxMap = (Document) chunkObj.get(MAX);
                keySet = maxMap.keySet();
                for (Object keyObj : keySet) {
                    Object object = maxMap.get(keyObj);
                    if (!(object instanceof MaxKey)) {
                        maxFilters.put(keyObj.toString(), object);
                    }
                }
                chunkInfo.setMaxFilters(maxFilters);
                chunkList.add(chunkInfo);
            }
            hasChunks = true;
        }
        // unsharded collection and it will be stored in the primary shard of that database.
        if (!hasChunks) {
            handleUnshardedCollection(getPrimaryShardInfo(client));
        }
    } else {
        handleUnshardedCollection(storagePluginConfig.getHosts());
    }
}
Also used : ChunkInfo(org.apache.drill.exec.store.mongo.common.ChunkInfo) Set(java.util.Set) ServerAddress(com.mongodb.ServerAddress) ArrayList(java.util.ArrayList) MaxKey(org.bson.types.MaxKey) Document(org.bson.Document) MongoClient(com.mongodb.MongoClient) MinKey(org.bson.types.MinKey) Map(java.util.Map) BsonTypeClassMap(org.bson.codecs.BsonTypeClassMap) MongoDatabase(com.mongodb.client.MongoDatabase)

Example 2 with ChunkInfo

use of org.apache.drill.exec.store.mongo.common.ChunkInfo in project drill by apache.

the class TestMongoChunkAssignment method setUp.

@Before
public void setUp() throws UnknownHostException {
    chunksMapping = Maps.newHashMap();
    chunksInverseMapping = Maps.newLinkedHashMap();
    // entry1
    Set<ServerAddress> hosts_A = Sets.newHashSet();
    hosts_A.add(new ServerAddress(HOST_A));
    chunksMapping.put(dbName + "." + collectionName + "-01", hosts_A);
    chunksMapping.put(dbName + "." + collectionName + "-05", hosts_A);
    ChunkInfo chunk1Info = new ChunkInfo(Arrays.asList(HOST_A), dbName + "." + collectionName + "-01");
    chunk1Info.setMinFilters(Collections.<String, Object>emptyMap());
    Map<String, Object> chunk1MaxFilters = Maps.newHashMap();
    chunk1MaxFilters.put("name", Integer.valueOf(5));
    chunk1Info.setMaxFilters(chunk1MaxFilters);
    ChunkInfo chunk5Info = new ChunkInfo(Arrays.asList(HOST_A), dbName + "." + collectionName + "-05");
    Map<String, Object> chunk5MinFilters = Maps.newHashMap();
    chunk5MinFilters.put("name", Integer.valueOf(25));
    chunk5Info.setMinFilters(chunk5MinFilters);
    Map<String, Object> chunk5MaxFilters = Maps.newHashMap();
    chunk5MaxFilters.put("name", Integer.valueOf(30));
    chunk5Info.setMaxFilters(chunk5MaxFilters);
    List<ChunkInfo> chunkList = Arrays.asList(chunk1Info, chunk5Info);
    chunksInverseMapping.put(HOST_A, chunkList);
    // entry2
    Set<ServerAddress> hosts_B = Sets.newHashSet();
    hosts_A.add(new ServerAddress(HOST_B));
    chunksMapping.put(dbName + "." + collectionName + "-02", hosts_B);
    ChunkInfo chunk2Info = new ChunkInfo(Arrays.asList(HOST_B), dbName + "." + collectionName + "-02");
    Map<String, Object> chunk2MinFilters = Maps.newHashMap();
    chunk2MinFilters.put("name", Integer.valueOf(5));
    chunk2Info.setMinFilters(chunk2MinFilters);
    Map<String, Object> chunk2MaxFilters = Maps.newHashMap();
    chunk2MaxFilters.put("name", Integer.valueOf(15));
    chunk2Info.setMaxFilters(chunk2MaxFilters);
    chunkList = Arrays.asList(chunk2Info);
    chunksInverseMapping.put(HOST_B, chunkList);
    // enty3
    Set<ServerAddress> hosts_C = Sets.newHashSet();
    hosts_A.add(new ServerAddress(HOST_C));
    chunksMapping.put(dbName + "." + collectionName + "-03", hosts_C);
    chunksMapping.put(dbName + "." + collectionName + "-06", hosts_C);
    ChunkInfo chunk3Info = new ChunkInfo(Arrays.asList(HOST_C), dbName + "." + collectionName + "-03");
    Map<String, Object> chunk3MinFilters = Maps.newHashMap();
    chunk5MinFilters.put("name", Integer.valueOf(15));
    chunk3Info.setMinFilters(chunk3MinFilters);
    Map<String, Object> chunk3MaxFilters = Maps.newHashMap();
    chunk3MaxFilters.put("name", Integer.valueOf(20));
    chunk3Info.setMaxFilters(chunk3MaxFilters);
    ChunkInfo chunk6Info = new ChunkInfo(Arrays.asList(HOST_C), dbName + "." + collectionName + "-06");
    Map<String, Object> chunk6MinFilters = Maps.newHashMap();
    chunk5MinFilters.put("name", Integer.valueOf(25));
    chunk6Info.setMinFilters(chunk6MinFilters);
    Map<String, Object> chunk6MaxFilters = Maps.newHashMap();
    chunk5MaxFilters.put("name", Integer.valueOf(30));
    chunk6Info.setMaxFilters(chunk6MaxFilters);
    chunkList = Arrays.asList(chunk3Info, chunk6Info);
    chunksInverseMapping.put(HOST_C, chunkList);
    // entry4
    Set<ServerAddress> hosts_D = Sets.newHashSet();
    hosts_A.add(new ServerAddress(HOST_D));
    chunksMapping.put(dbName + "." + collectionName + "-04", hosts_D);
    ChunkInfo chunk4Info = new ChunkInfo(Arrays.asList(HOST_D), dbName + "." + collectionName + "-04");
    Map<String, Object> chunk4MinFilters = Maps.newHashMap();
    chunk4MinFilters.put("name", Integer.valueOf(20));
    chunk4Info.setMinFilters(chunk4MinFilters);
    Map<String, Object> chunk4MaxFilters = Maps.newHashMap();
    chunk4MaxFilters.put("name", Integer.valueOf(25));
    chunk4Info.setMaxFilters(chunk4MaxFilters);
    chunkList = Arrays.asList(chunk4Info);
    chunksInverseMapping.put(HOST_D, chunkList);
    mongoGroupScan = new MongoGroupScan();
    mongoGroupScan.setChunksMapping(chunksMapping);
    mongoGroupScan.setInverseChunsMapping(chunksInverseMapping);
    MongoScanSpec scanSpec = new MongoScanSpec(dbName, collectionName);
    mongoGroupScan.setScanSpec(scanSpec);
}
Also used : ChunkInfo(org.apache.drill.exec.store.mongo.common.ChunkInfo) ServerAddress(com.mongodb.ServerAddress) Before(org.junit.Before)

Example 3 with ChunkInfo

use of org.apache.drill.exec.store.mongo.common.ChunkInfo in project drill by apache.

the class MongoGroupScan method handleUnshardedCollection.

private void handleUnshardedCollection(List<String> hosts) {
    String chunkName = Joiner.on('.').join(scanSpec.getDbName(), scanSpec.getCollectionName());
    Set<ServerAddress> addressList = Sets.newHashSet();
    for (String host : hosts) {
        addressList.add(new ServerAddress(host));
    }
    chunksMapping.put(chunkName, addressList);
    String host = hosts.get(0);
    ServerAddress address = new ServerAddress(host);
    ChunkInfo chunkInfo = new ChunkInfo(hosts, chunkName);
    chunkInfo.setMinFilters(Collections.<String, Object>emptyMap());
    chunkInfo.setMaxFilters(Collections.<String, Object>emptyMap());
    List<ChunkInfo> chunksList = Lists.newArrayList();
    chunksList.add(chunkInfo);
    chunksInverseMapping.put(address.getHost(), chunksList);
}
Also used : ChunkInfo(org.apache.drill.exec.store.mongo.common.ChunkInfo) ServerAddress(com.mongodb.ServerAddress)

Example 4 with ChunkInfo

use of org.apache.drill.exec.store.mongo.common.ChunkInfo in project drill by apache.

the class MongoGroupScan method applyAssignments.

@Override
public void applyAssignments(List<DrillbitEndpoint> endpoints) throws PhysicalOperatorSetupException {
    logger.debug("Incoming endpoints :" + endpoints);
    watch.reset();
    watch.start();
    final int numSlots = endpoints.size();
    int totalAssignmentsTobeDone = chunksMapping.size();
    Preconditions.checkArgument(numSlots <= totalAssignmentsTobeDone, String.format("Incoming endpoints %d is greater than number of chunks %d", numSlots, totalAssignmentsTobeDone));
    final int minPerEndpointSlot = (int) Math.floor((double) totalAssignmentsTobeDone / numSlots);
    final int maxPerEndpointSlot = (int) Math.ceil((double) totalAssignmentsTobeDone / numSlots);
    endpointFragmentMapping = Maps.newHashMapWithExpectedSize(numSlots);
    Map<String, Queue<Integer>> endpointHostIndexListMap = Maps.newHashMap();
    for (int i = 0; i < numSlots; ++i) {
        endpointFragmentMapping.put(i, new ArrayList<MongoSubScanSpec>(maxPerEndpointSlot));
        String hostname = endpoints.get(i).getAddress();
        Queue<Integer> hostIndexQueue = endpointHostIndexListMap.get(hostname);
        if (hostIndexQueue == null) {
            hostIndexQueue = Lists.newLinkedList();
            endpointHostIndexListMap.put(hostname, hostIndexQueue);
        }
        hostIndexQueue.add(i);
    }
    Set<Entry<String, List<ChunkInfo>>> chunksToAssignSet = Sets.newHashSet(chunksInverseMapping.entrySet());
    for (Iterator<Entry<String, List<ChunkInfo>>> chunksIterator = chunksToAssignSet.iterator(); chunksIterator.hasNext(); ) {
        Entry<String, List<ChunkInfo>> chunkEntry = chunksIterator.next();
        Queue<Integer> slots = endpointHostIndexListMap.get(chunkEntry.getKey());
        if (slots != null) {
            for (ChunkInfo chunkInfo : chunkEntry.getValue()) {
                Integer slotIndex = slots.poll();
                List<MongoSubScanSpec> subScanSpecList = endpointFragmentMapping.get(slotIndex);
                subScanSpecList.add(buildSubScanSpecAndGet(chunkInfo));
                slots.offer(slotIndex);
            }
            chunksIterator.remove();
        }
    }
    PriorityQueue<List<MongoSubScanSpec>> minHeap = new PriorityQueue<List<MongoSubScanSpec>>(numSlots, LIST_SIZE_COMPARATOR);
    PriorityQueue<List<MongoSubScanSpec>> maxHeap = new PriorityQueue<List<MongoSubScanSpec>>(numSlots, LIST_SIZE_COMPARATOR_REV);
    for (List<MongoSubScanSpec> listOfScan : endpointFragmentMapping.values()) {
        if (listOfScan.size() < minPerEndpointSlot) {
            minHeap.offer(listOfScan);
        } else if (listOfScan.size() > minPerEndpointSlot) {
            maxHeap.offer(listOfScan);
        }
    }
    if (chunksToAssignSet.size() > 0) {
        for (Entry<String, List<ChunkInfo>> chunkEntry : chunksToAssignSet) {
            for (ChunkInfo chunkInfo : chunkEntry.getValue()) {
                List<MongoSubScanSpec> smallestList = minHeap.poll();
                smallestList.add(buildSubScanSpecAndGet(chunkInfo));
                minHeap.offer(smallestList);
            }
        }
    }
    while (minHeap.peek() != null && minHeap.peek().size() < minPerEndpointSlot) {
        List<MongoSubScanSpec> smallestList = minHeap.poll();
        List<MongoSubScanSpec> largestList = maxHeap.poll();
        smallestList.add(largestList.remove(largestList.size() - 1));
        if (largestList.size() > minPerEndpointSlot) {
            maxHeap.offer(largestList);
        }
        if (smallestList.size() < minPerEndpointSlot) {
            minHeap.offer(smallestList);
        }
    }
    logger.debug("Built assignment map in {} µs.\nEndpoints: {}.\nAssignment Map: {}", watch.elapsed(TimeUnit.NANOSECONDS) / 1000, endpoints, endpointFragmentMapping.toString());
}
Also used : ChunkInfo(org.apache.drill.exec.store.mongo.common.ChunkInfo) PriorityQueue(java.util.PriorityQueue) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) Entry(java.util.Map.Entry) MongoSubScanSpec(org.apache.drill.exec.store.mongo.MongoSubScan.MongoSubScanSpec) List(java.util.List) ArrayList(java.util.ArrayList) PriorityQueue(java.util.PriorityQueue) Queue(java.util.Queue)

Aggregations

ChunkInfo (org.apache.drill.exec.store.mongo.common.ChunkInfo)4 ServerAddress (com.mongodb.ServerAddress)3 ArrayList (java.util.ArrayList)2 MongoClient (com.mongodb.MongoClient)1 MongoDatabase (com.mongodb.client.MongoDatabase)1 List (java.util.List)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 PriorityQueue (java.util.PriorityQueue)1 Queue (java.util.Queue)1 Set (java.util.Set)1 DrillbitEndpoint (org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)1 MongoSubScanSpec (org.apache.drill.exec.store.mongo.MongoSubScan.MongoSubScanSpec)1 Document (org.bson.Document)1 BsonTypeClassMap (org.bson.codecs.BsonTypeClassMap)1 MaxKey (org.bson.types.MaxKey)1 MinKey (org.bson.types.MinKey)1 Before (org.junit.Before)1