use of org.apache.drill.exec.store.mongo.common.ChunkInfo in project drill by apache.
the class MongoGroupScan method init.
@SuppressWarnings({ "rawtypes" })
private void init() throws IOException {
List<String> h = storagePluginConfig.getHosts();
List<ServerAddress> addresses = Lists.newArrayList();
for (String host : h) {
addresses.add(new ServerAddress(host));
}
MongoClient client = storagePlugin.getClient();
chunksMapping = Maps.newHashMap();
chunksInverseMapping = Maps.newLinkedHashMap();
if (isShardedCluster(client)) {
MongoDatabase db = client.getDatabase(CONFIG);
MongoCollection<Document> chunksCollection = db.getCollection(CHUNKS);
Document filter = new Document();
filter.put(NS, this.scanSpec.getDbName() + "." + this.scanSpec.getCollectionName());
Document projection = new Document();
projection.put(SHARD, select);
projection.put(MIN, select);
projection.put(MAX, select);
FindIterable<Document> chunkCursor = chunksCollection.find(filter).projection(projection);
MongoCursor<Document> iterator = chunkCursor.iterator();
MongoCollection<Document> shardsCollection = db.getCollection(SHARDS);
projection = new Document();
projection.put(HOST, select);
boolean hasChunks = false;
while (iterator.hasNext()) {
Document chunkObj = iterator.next();
String shardName = (String) chunkObj.get(SHARD);
String chunkId = (String) chunkObj.get(ID);
filter = new Document(ID, shardName);
FindIterable<Document> hostCursor = shardsCollection.find(filter).projection(projection);
MongoCursor<Document> hostIterator = hostCursor.iterator();
while (hostIterator.hasNext()) {
Document hostObj = hostIterator.next();
String hostEntry = (String) hostObj.get(HOST);
String[] tagAndHost = StringUtils.split(hostEntry, '/');
String[] hosts = tagAndHost.length > 1 ? StringUtils.split(tagAndHost[1], ',') : StringUtils.split(tagAndHost[0], ',');
List<String> chunkHosts = Arrays.asList(hosts);
Set<ServerAddress> addressList = getPreferredHosts(storagePlugin.getClient(addresses), chunkHosts);
if (addressList == null) {
addressList = Sets.newHashSet();
for (String host : chunkHosts) {
addressList.add(new ServerAddress(host));
}
}
chunksMapping.put(chunkId, addressList);
ServerAddress address = addressList.iterator().next();
List<ChunkInfo> chunkList = chunksInverseMapping.get(address.getHost());
if (chunkList == null) {
chunkList = Lists.newArrayList();
chunksInverseMapping.put(address.getHost(), chunkList);
}
List<String> chunkHostsList = new ArrayList<String>();
for (ServerAddress serverAddr : addressList) {
chunkHostsList.add(serverAddr.toString());
}
ChunkInfo chunkInfo = new ChunkInfo(chunkHostsList, chunkId);
Document minMap = (Document) chunkObj.get(MIN);
Map<String, Object> minFilters = Maps.newHashMap();
Set keySet = minMap.keySet();
for (Object keyObj : keySet) {
Object object = minMap.get(keyObj);
if (!(object instanceof MinKey)) {
minFilters.put(keyObj.toString(), object);
}
}
chunkInfo.setMinFilters(minFilters);
Map<String, Object> maxFilters = Maps.newHashMap();
Map maxMap = (Document) chunkObj.get(MAX);
keySet = maxMap.keySet();
for (Object keyObj : keySet) {
Object object = maxMap.get(keyObj);
if (!(object instanceof MaxKey)) {
maxFilters.put(keyObj.toString(), object);
}
}
chunkInfo.setMaxFilters(maxFilters);
chunkList.add(chunkInfo);
}
hasChunks = true;
}
// unsharded collection and it will be stored in the primary shard of that database.
if (!hasChunks) {
handleUnshardedCollection(getPrimaryShardInfo(client));
}
} else {
handleUnshardedCollection(storagePluginConfig.getHosts());
}
}
use of org.apache.drill.exec.store.mongo.common.ChunkInfo in project drill by apache.
the class TestMongoChunkAssignment method setUp.
@Before
public void setUp() throws UnknownHostException {
chunksMapping = Maps.newHashMap();
chunksInverseMapping = Maps.newLinkedHashMap();
// entry1
Set<ServerAddress> hosts_A = Sets.newHashSet();
hosts_A.add(new ServerAddress(HOST_A));
chunksMapping.put(dbName + "." + collectionName + "-01", hosts_A);
chunksMapping.put(dbName + "." + collectionName + "-05", hosts_A);
ChunkInfo chunk1Info = new ChunkInfo(Arrays.asList(HOST_A), dbName + "." + collectionName + "-01");
chunk1Info.setMinFilters(Collections.<String, Object>emptyMap());
Map<String, Object> chunk1MaxFilters = Maps.newHashMap();
chunk1MaxFilters.put("name", Integer.valueOf(5));
chunk1Info.setMaxFilters(chunk1MaxFilters);
ChunkInfo chunk5Info = new ChunkInfo(Arrays.asList(HOST_A), dbName + "." + collectionName + "-05");
Map<String, Object> chunk5MinFilters = Maps.newHashMap();
chunk5MinFilters.put("name", Integer.valueOf(25));
chunk5Info.setMinFilters(chunk5MinFilters);
Map<String, Object> chunk5MaxFilters = Maps.newHashMap();
chunk5MaxFilters.put("name", Integer.valueOf(30));
chunk5Info.setMaxFilters(chunk5MaxFilters);
List<ChunkInfo> chunkList = Arrays.asList(chunk1Info, chunk5Info);
chunksInverseMapping.put(HOST_A, chunkList);
// entry2
Set<ServerAddress> hosts_B = Sets.newHashSet();
hosts_A.add(new ServerAddress(HOST_B));
chunksMapping.put(dbName + "." + collectionName + "-02", hosts_B);
ChunkInfo chunk2Info = new ChunkInfo(Arrays.asList(HOST_B), dbName + "." + collectionName + "-02");
Map<String, Object> chunk2MinFilters = Maps.newHashMap();
chunk2MinFilters.put("name", Integer.valueOf(5));
chunk2Info.setMinFilters(chunk2MinFilters);
Map<String, Object> chunk2MaxFilters = Maps.newHashMap();
chunk2MaxFilters.put("name", Integer.valueOf(15));
chunk2Info.setMaxFilters(chunk2MaxFilters);
chunkList = Arrays.asList(chunk2Info);
chunksInverseMapping.put(HOST_B, chunkList);
// enty3
Set<ServerAddress> hosts_C = Sets.newHashSet();
hosts_A.add(new ServerAddress(HOST_C));
chunksMapping.put(dbName + "." + collectionName + "-03", hosts_C);
chunksMapping.put(dbName + "." + collectionName + "-06", hosts_C);
ChunkInfo chunk3Info = new ChunkInfo(Arrays.asList(HOST_C), dbName + "." + collectionName + "-03");
Map<String, Object> chunk3MinFilters = Maps.newHashMap();
chunk5MinFilters.put("name", Integer.valueOf(15));
chunk3Info.setMinFilters(chunk3MinFilters);
Map<String, Object> chunk3MaxFilters = Maps.newHashMap();
chunk3MaxFilters.put("name", Integer.valueOf(20));
chunk3Info.setMaxFilters(chunk3MaxFilters);
ChunkInfo chunk6Info = new ChunkInfo(Arrays.asList(HOST_C), dbName + "." + collectionName + "-06");
Map<String, Object> chunk6MinFilters = Maps.newHashMap();
chunk5MinFilters.put("name", Integer.valueOf(25));
chunk6Info.setMinFilters(chunk6MinFilters);
Map<String, Object> chunk6MaxFilters = Maps.newHashMap();
chunk5MaxFilters.put("name", Integer.valueOf(30));
chunk6Info.setMaxFilters(chunk6MaxFilters);
chunkList = Arrays.asList(chunk3Info, chunk6Info);
chunksInverseMapping.put(HOST_C, chunkList);
// entry4
Set<ServerAddress> hosts_D = Sets.newHashSet();
hosts_A.add(new ServerAddress(HOST_D));
chunksMapping.put(dbName + "." + collectionName + "-04", hosts_D);
ChunkInfo chunk4Info = new ChunkInfo(Arrays.asList(HOST_D), dbName + "." + collectionName + "-04");
Map<String, Object> chunk4MinFilters = Maps.newHashMap();
chunk4MinFilters.put("name", Integer.valueOf(20));
chunk4Info.setMinFilters(chunk4MinFilters);
Map<String, Object> chunk4MaxFilters = Maps.newHashMap();
chunk4MaxFilters.put("name", Integer.valueOf(25));
chunk4Info.setMaxFilters(chunk4MaxFilters);
chunkList = Arrays.asList(chunk4Info);
chunksInverseMapping.put(HOST_D, chunkList);
mongoGroupScan = new MongoGroupScan();
mongoGroupScan.setChunksMapping(chunksMapping);
mongoGroupScan.setInverseChunsMapping(chunksInverseMapping);
MongoScanSpec scanSpec = new MongoScanSpec(dbName, collectionName);
mongoGroupScan.setScanSpec(scanSpec);
}
use of org.apache.drill.exec.store.mongo.common.ChunkInfo in project drill by apache.
the class MongoGroupScan method handleUnshardedCollection.
private void handleUnshardedCollection(List<String> hosts) {
String chunkName = Joiner.on('.').join(scanSpec.getDbName(), scanSpec.getCollectionName());
Set<ServerAddress> addressList = Sets.newHashSet();
for (String host : hosts) {
addressList.add(new ServerAddress(host));
}
chunksMapping.put(chunkName, addressList);
String host = hosts.get(0);
ServerAddress address = new ServerAddress(host);
ChunkInfo chunkInfo = new ChunkInfo(hosts, chunkName);
chunkInfo.setMinFilters(Collections.<String, Object>emptyMap());
chunkInfo.setMaxFilters(Collections.<String, Object>emptyMap());
List<ChunkInfo> chunksList = Lists.newArrayList();
chunksList.add(chunkInfo);
chunksInverseMapping.put(address.getHost(), chunksList);
}
use of org.apache.drill.exec.store.mongo.common.ChunkInfo in project drill by apache.
the class MongoGroupScan method applyAssignments.
@Override
public void applyAssignments(List<DrillbitEndpoint> endpoints) throws PhysicalOperatorSetupException {
logger.debug("Incoming endpoints :" + endpoints);
watch.reset();
watch.start();
final int numSlots = endpoints.size();
int totalAssignmentsTobeDone = chunksMapping.size();
Preconditions.checkArgument(numSlots <= totalAssignmentsTobeDone, String.format("Incoming endpoints %d is greater than number of chunks %d", numSlots, totalAssignmentsTobeDone));
final int minPerEndpointSlot = (int) Math.floor((double) totalAssignmentsTobeDone / numSlots);
final int maxPerEndpointSlot = (int) Math.ceil((double) totalAssignmentsTobeDone / numSlots);
endpointFragmentMapping = Maps.newHashMapWithExpectedSize(numSlots);
Map<String, Queue<Integer>> endpointHostIndexListMap = Maps.newHashMap();
for (int i = 0; i < numSlots; ++i) {
endpointFragmentMapping.put(i, new ArrayList<MongoSubScanSpec>(maxPerEndpointSlot));
String hostname = endpoints.get(i).getAddress();
Queue<Integer> hostIndexQueue = endpointHostIndexListMap.get(hostname);
if (hostIndexQueue == null) {
hostIndexQueue = Lists.newLinkedList();
endpointHostIndexListMap.put(hostname, hostIndexQueue);
}
hostIndexQueue.add(i);
}
Set<Entry<String, List<ChunkInfo>>> chunksToAssignSet = Sets.newHashSet(chunksInverseMapping.entrySet());
for (Iterator<Entry<String, List<ChunkInfo>>> chunksIterator = chunksToAssignSet.iterator(); chunksIterator.hasNext(); ) {
Entry<String, List<ChunkInfo>> chunkEntry = chunksIterator.next();
Queue<Integer> slots = endpointHostIndexListMap.get(chunkEntry.getKey());
if (slots != null) {
for (ChunkInfo chunkInfo : chunkEntry.getValue()) {
Integer slotIndex = slots.poll();
List<MongoSubScanSpec> subScanSpecList = endpointFragmentMapping.get(slotIndex);
subScanSpecList.add(buildSubScanSpecAndGet(chunkInfo));
slots.offer(slotIndex);
}
chunksIterator.remove();
}
}
PriorityQueue<List<MongoSubScanSpec>> minHeap = new PriorityQueue<List<MongoSubScanSpec>>(numSlots, LIST_SIZE_COMPARATOR);
PriorityQueue<List<MongoSubScanSpec>> maxHeap = new PriorityQueue<List<MongoSubScanSpec>>(numSlots, LIST_SIZE_COMPARATOR_REV);
for (List<MongoSubScanSpec> listOfScan : endpointFragmentMapping.values()) {
if (listOfScan.size() < minPerEndpointSlot) {
minHeap.offer(listOfScan);
} else if (listOfScan.size() > minPerEndpointSlot) {
maxHeap.offer(listOfScan);
}
}
if (chunksToAssignSet.size() > 0) {
for (Entry<String, List<ChunkInfo>> chunkEntry : chunksToAssignSet) {
for (ChunkInfo chunkInfo : chunkEntry.getValue()) {
List<MongoSubScanSpec> smallestList = minHeap.poll();
smallestList.add(buildSubScanSpecAndGet(chunkInfo));
minHeap.offer(smallestList);
}
}
}
while (minHeap.peek() != null && minHeap.peek().size() < minPerEndpointSlot) {
List<MongoSubScanSpec> smallestList = minHeap.poll();
List<MongoSubScanSpec> largestList = maxHeap.poll();
smallestList.add(largestList.remove(largestList.size() - 1));
if (largestList.size() > minPerEndpointSlot) {
maxHeap.offer(largestList);
}
if (smallestList.size() < minPerEndpointSlot) {
minHeap.offer(smallestList);
}
}
logger.debug("Built assignment map in {} µs.\nEndpoints: {}.\nAssignment Map: {}", watch.elapsed(TimeUnit.NANOSECONDS) / 1000, endpoints, endpointFragmentMapping.toString());
}
Aggregations