Search in sources :

Example 56 with DrillbitEndpoint

use of org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint in project drill by apache.

the class ParquetGroupScan method init.

private void init(MetadataContext metaContext) throws IOException {
    if (entries.size() == 1 && parquetTableMetadata == null) {
        Path p = Path.getPathWithoutSchemeAndAuthority(new Path(entries.get(0).getPath()));
        Path metaPath = null;
        if (fs.isDirectory(p)) {
            // Using the metadata file makes sense when querying a directory; otherwise
            // if querying a single file we can look up the metadata directly from the file
            metaPath = new Path(p, Metadata.METADATA_FILENAME);
        }
        if (metaPath != null && fs.exists(metaPath)) {
            usedMetadataCache = true;
            parquetTableMetadata = Metadata.readBlockMeta(fs, metaPath.toString(), metaContext, formatConfig);
        } else {
            parquetTableMetadata = Metadata.getParquetTableMetadata(fs, p.toString(), formatConfig);
        }
    } else {
        Path p = Path.getPathWithoutSchemeAndAuthority(new Path(selectionRoot));
        Path metaPath = new Path(p, Metadata.METADATA_FILENAME);
        if (fs.isDirectory(new Path(selectionRoot)) && fs.exists(metaPath)) {
            usedMetadataCache = true;
            if (parquetTableMetadata == null) {
                parquetTableMetadata = Metadata.readBlockMeta(fs, metaPath.toString(), metaContext, formatConfig);
            }
            if (fileSet != null) {
                parquetTableMetadata = removeUnneededRowGroups(parquetTableMetadata);
            }
        } else {
            final List<FileStatus> fileStatuses = Lists.newArrayList();
            for (ReadEntryWithPath entry : entries) {
                getFiles(entry.getPath(), fileStatuses);
            }
            parquetTableMetadata = Metadata.getParquetTableMetadata(fs, fileStatuses, formatConfig);
        }
    }
    if (fileSet == null) {
        fileSet = Sets.newHashSet();
        for (ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
            fileSet.add(file.getPath());
        }
    }
    Map<String, DrillbitEndpoint> hostEndpointMap = Maps.newHashMap();
    for (DrillbitEndpoint endpoint : formatPlugin.getContext().getBits()) {
        hostEndpointMap.put(endpoint.getAddress(), endpoint);
    }
    rowGroupInfos = Lists.newArrayList();
    for (ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
        int rgIndex = 0;
        for (RowGroupMetadata rg : file.getRowGroups()) {
            RowGroupInfo rowGroupInfo = new RowGroupInfo(file.getPath(), rg.getStart(), rg.getLength(), rgIndex, rg.getRowCount());
            EndpointByteMap endpointByteMap = new EndpointByteMapImpl();
            for (String host : rg.getHostAffinity().keySet()) {
                if (hostEndpointMap.containsKey(host)) {
                    endpointByteMap.add(hostEndpointMap.get(host), (long) (rg.getHostAffinity().get(host) * rg.getLength()));
                }
            }
            rowGroupInfo.setEndpointByteMap(endpointByteMap);
            rgIndex++;
            rowGroupInfos.add(rowGroupInfo);
        }
    }
    this.endpointAffinities = AffinityCreator.getAffinityMap(rowGroupInfos);
    columnValueCounts = Maps.newHashMap();
    this.rowCount = 0;
    boolean first = true;
    for (ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
        for (RowGroupMetadata rowGroup : file.getRowGroups()) {
            long rowCount = rowGroup.getRowCount();
            for (ColumnMetadata column : rowGroup.getColumns()) {
                SchemaPath schemaPath = SchemaPath.getCompoundPath(column.getName());
                Long previousCount = columnValueCounts.get(schemaPath);
                if (previousCount != null) {
                    if (previousCount != GroupScan.NO_COLUMN_STATS) {
                        if (column.getNulls() != null) {
                            Long newCount = rowCount - column.getNulls();
                            columnValueCounts.put(schemaPath, columnValueCounts.get(schemaPath) + newCount);
                        }
                    }
                } else {
                    if (column.getNulls() != null) {
                        Long newCount = rowCount - column.getNulls();
                        columnValueCounts.put(schemaPath, newCount);
                    } else {
                        columnValueCounts.put(schemaPath, GroupScan.NO_COLUMN_STATS);
                    }
                }
                boolean partitionColumn = checkForPartitionColumn(column, first);
                if (partitionColumn) {
                    Map<SchemaPath, Object> map = partitionValueMap.get(file.getPath());
                    if (map == null) {
                        map = Maps.newHashMap();
                        partitionValueMap.put(file.getPath(), map);
                    }
                    Object value = map.get(schemaPath);
                    Object currentValue = column.getMaxValue();
                    if (value != null) {
                        if (value != currentValue) {
                            partitionColTypeMap.remove(schemaPath);
                        }
                    } else {
                        map.put(schemaPath, currentValue);
                    }
                } else {
                    partitionColTypeMap.remove(schemaPath);
                }
            }
            this.rowCount += rowGroup.getRowCount();
            first = false;
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ReadEntryWithPath(org.apache.drill.exec.store.dfs.ReadEntryWithPath) ColumnMetadata(org.apache.drill.exec.store.parquet.Metadata.ColumnMetadata) FileStatus(org.apache.hadoop.fs.FileStatus) ParquetFileMetadata(org.apache.drill.exec.store.parquet.Metadata.ParquetFileMetadata) EndpointByteMap(org.apache.drill.exec.store.schedule.EndpointByteMap) RowGroupMetadata(org.apache.drill.exec.store.parquet.Metadata.RowGroupMetadata) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) ReadEntryWithPath(org.apache.drill.exec.store.dfs.ReadEntryWithPath) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) SchemaPath(org.apache.drill.common.expression.SchemaPath) EndpointByteMapImpl(org.apache.drill.exec.store.schedule.EndpointByteMapImpl)

Example 57 with DrillbitEndpoint

use of org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint in project drill by apache.

the class DrillClient method reconnect.

public synchronized boolean reconnect() {
    if (client.isActive()) {
        return true;
    }
    int retry = reconnectTimes;
    while (retry > 0) {
        retry--;
        try {
            Thread.sleep(this.reconnectDelay);
            final ArrayList<DrillbitEndpoint> endpoints = new ArrayList<>(clusterCoordinator.getAvailableEndpoints());
            if (endpoints.isEmpty()) {
                continue;
            }
            client.close();
            Collections.shuffle(endpoints);
            connect(endpoints.iterator().next());
            return true;
        } catch (Exception e) {
        }
    }
    return false;
}
Also used : DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) ArrayList(java.util.ArrayList) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) UserException(org.apache.drill.common.exceptions.UserException) RpcException(org.apache.drill.exec.rpc.RpcException) ChannelClosedException(org.apache.drill.exec.rpc.ChannelClosedException) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) NonTransientRpcException(org.apache.drill.exec.rpc.NonTransientRpcException) IOException(java.io.IOException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException)

Example 58 with DrillbitEndpoint

use of org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint in project drill by apache.

the class TestMongoChunkAssignment method testMongoGroupScanAssignmentNoAffinity.

@Test
public void testMongoGroupScanAssignmentNoAffinity() throws UnknownHostException, ExecutionSetupException {
    final List<DrillbitEndpoint> endpoints = Lists.newArrayList();
    final DrillbitEndpoint DB_M = DrillbitEndpoint.newBuilder().setAddress(HOST_M).setControlPort(1234).build();
    endpoints.add(DB_M);
    endpoints.add(DB_M);
    final DrillbitEndpoint DB_L = DrillbitEndpoint.newBuilder().setAddress(HOST_L).setControlPort(1234).build();
    endpoints.add(DB_L);
    final DrillbitEndpoint DB_X = DrillbitEndpoint.newBuilder().setAddress(HOST_X).setControlPort(1234).build();
    endpoints.add(DB_X);
    mongoGroupScan.applyAssignments(endpoints);
    // assignments for chunks on host A, assign on drill bit M
    assertEquals(1, mongoGroupScan.getSpecificScan(0).getChunkScanSpecList().size());
    // assignments for chunks on host B, assign on drill bit M
    assertEquals(2, mongoGroupScan.getSpecificScan(1).getChunkScanSpecList().size());
    // assignments for chunks on host C, assign on drill bit L
    assertEquals(2, mongoGroupScan.getSpecificScan(2).getChunkScanSpecList().size());
    // assignments for chunks on host D, assign on drill bit X
    assertEquals(1, mongoGroupScan.getSpecificScan(3).getChunkScanSpecList().size());
}
Also used : DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) Test(org.junit.Test)

Example 59 with DrillbitEndpoint

use of org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint in project drill by apache.

the class TestMongoChunkAssignment method testMongoGroupScanAssignmentWhenOnlyOneDrillBit.

@Test
public void testMongoGroupScanAssignmentWhenOnlyOneDrillBit() throws UnknownHostException, ExecutionSetupException {
    final List<DrillbitEndpoint> endpoints = Lists.newArrayList();
    final DrillbitEndpoint DB_A = DrillbitEndpoint.newBuilder().setAddress(HOST_A).setControlPort(1234).build();
    endpoints.add(DB_A);
    mongoGroupScan.applyAssignments(endpoints);
    // All the assignments should be given to drill bit A.
    assertEquals(6, mongoGroupScan.getSpecificScan(0).getChunkScanSpecList().size());
}
Also used : DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) Test(org.junit.Test)

Example 60 with DrillbitEndpoint

use of org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint in project drill by apache.

the class SimpleParallelizer method generateWorkUnit.

protected QueryWorkUnit generateWorkUnit(OptionList options, DrillbitEndpoint foremanNode, QueryId queryId, PhysicalPlanReader reader, Fragment rootNode, PlanningSet planningSet, UserSession session, QueryContextInformation queryContextInfo) throws ExecutionSetupException {
    List<PlanFragment> fragments = Lists.newArrayList();
    PlanFragment rootFragment = null;
    FragmentRoot rootOperator = null;
    // assigned before we can materialize, so we start a new loop here rather than utilizing the previous one.
    for (Wrapper wrapper : planningSet) {
        Fragment node = wrapper.getNode();
        final PhysicalOperator physicalOperatorRoot = node.getRoot();
        boolean isRootNode = rootNode == node;
        if (isRootNode && wrapper.getWidth() != 1) {
            throw new ForemanSetupException(String.format("Failure while trying to setup fragment. " + "The root fragment must always have parallelization one. In the current case, the width was set to %d.", wrapper.getWidth()));
        }
        // a fragment is self driven if it doesn't rely on any other exchanges.
        boolean isLeafFragment = node.getReceivingExchangePairs().size() == 0;
        // Create a minorFragment for each major fragment.
        for (int minorFragmentId = 0; minorFragmentId < wrapper.getWidth(); minorFragmentId++) {
            IndexedFragmentNode iNode = new IndexedFragmentNode(minorFragmentId, wrapper);
            wrapper.resetAllocation();
            PhysicalOperator op = physicalOperatorRoot.accept(Materializer.INSTANCE, iNode);
            Preconditions.checkArgument(op instanceof FragmentRoot);
            FragmentRoot root = (FragmentRoot) op;
            // get plan as JSON
            String plan;
            String optionsData;
            try {
                plan = reader.writeJson(root);
                optionsData = reader.writeJson(options);
            } catch (JsonProcessingException e) {
                throw new ForemanSetupException("Failure while trying to convert fragment into json.", e);
            }
            FragmentHandle handle = //
            FragmentHandle.newBuilder().setMajorFragmentId(//
            wrapper.getMajorFragmentId()).setMinorFragmentId(//
            minorFragmentId).setQueryId(//
            queryId).build();
            PlanFragment fragment = //
            PlanFragment.newBuilder().setForeman(//
            foremanNode).setFragmentJson(//
            plan).setHandle(//
            handle).setAssignment(//
            wrapper.getAssignedEndpoint(minorFragmentId)).setLeafFragment(//
            isLeafFragment).setContext(queryContextInfo).setMemInitial(//
            wrapper.getInitialAllocation()).setMemMax(wrapper.getMaxAllocation()).setOptionsJson(optionsData).setCredentials(session.getCredentials()).addAllCollector(CountRequiredFragments.getCollectors(root)).build();
            if (isRootNode) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Root fragment:\n {}", DrillStringUtils.unescapeJava(fragment.toString()));
                }
                rootFragment = fragment;
                rootOperator = root;
            } else {
                if (logger.isDebugEnabled()) {
                    logger.debug("Remote fragment:\n {}", DrillStringUtils.unescapeJava(fragment.toString()));
                }
                fragments.add(fragment);
            }
        }
    }
    return new QueryWorkUnit(rootOperator, rootFragment, fragments);
}
Also used : QueryWorkUnit(org.apache.drill.exec.work.QueryWorkUnit) FragmentRoot(org.apache.drill.exec.physical.base.FragmentRoot) FragmentHandle(org.apache.drill.exec.proto.ExecProtos.FragmentHandle) PlanFragment(org.apache.drill.exec.proto.BitControl.PlanFragment) IndexedFragmentNode(org.apache.drill.exec.planner.fragment.Materializer.IndexedFragmentNode) PlanFragment(org.apache.drill.exec.proto.BitControl.PlanFragment) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) MinorFragmentEndpoint(org.apache.drill.exec.physical.MinorFragmentEndpoint) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) ForemanSetupException(org.apache.drill.exec.work.foreman.ForemanSetupException)

Aggregations

DrillbitEndpoint (org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)77 Test (org.junit.Test)23 EndpointAffinity (org.apache.drill.exec.physical.EndpointAffinity)14 IOException (java.io.IOException)9 Stopwatch (com.google.common.base.Stopwatch)7 ArrayList (java.util.ArrayList)7 PlanFragment (org.apache.drill.exec.proto.BitControl.PlanFragment)7 ServerName (org.apache.hadoop.hbase.ServerName)7 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)6 Entry (java.util.Map.Entry)5 DrillConfig (org.apache.drill.common.config.DrillConfig)5 FragmentHandle (org.apache.drill.exec.proto.ExecProtos.FragmentHandle)5 DrillbitContext (org.apache.drill.exec.server.DrillbitContext)5 HBaseGroupScan (org.apache.drill.exec.store.hbase.HBaseGroupScan)5 HBaseScanSpec (org.apache.drill.exec.store.hbase.HBaseScanSpec)5 QueryWorkUnit (org.apache.drill.exec.work.QueryWorkUnit)5 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)4 HashMap (java.util.HashMap)4 AtomicLong (java.util.concurrent.atomic.AtomicLong)4 NonStrictExpectations (mockit.NonStrictExpectations)4