Search in sources :

Example 1 with NodeControllerInfo

use of org.apache.hyracks.api.client.NodeControllerInfo in project asterixdb by apache.

the class APIFrameworkTest method testGetComputationLocations.

@Test
public void testGetComputationLocations() throws AlgebricksException {
    // Constructs mocked cluster nodes.
    Map<String, NodeControllerInfo> map = new HashMap<>();
    NodeControllerInfo nc1Info = mock(NodeControllerInfo.class);
    when(nc1Info.getNumAvailableCores()).thenReturn(4);
    NodeControllerInfo nc2Info = mock(NodeControllerInfo.class);
    when(nc2Info.getNumAvailableCores()).thenReturn(4);
    String nc1 = "nc1";
    String nc2 = "nc2";
    map.put(nc1, nc1Info);
    map.put(nc2, nc2Info);
    // Creates an APIFramework.
    APIFramework apiFramework = new APIFramework(mock(ILangCompilationProvider.class));
    // Tests odd number parallelism.
    AlgebricksAbsolutePartitionConstraint loc = (AlgebricksAbsolutePartitionConstraint) PA.invokeMethod(apiFramework, "getComputationLocations(java.util.Map,int)", map, 5);
    int nc1Count = 0, nc2Count = 0;
    String[] partitions = loc.getLocations();
    for (String partition : partitions) {
        if (partition.equals(nc1)) {
            ++nc1Count;
        }
        if (partition.equals(nc2)) {
            ++nc2Count;
        }
    }
    Assert.assertTrue(nc1Count > 0);
    Assert.assertTrue(nc2Count > 0);
    // Tests load balance.
    Assert.assertTrue(Math.abs(nc1Count - nc2Count) == 1);
    Assert.assertTrue(partitions.length == 5);
    // Tests even number parallelism.
    loc = (AlgebricksAbsolutePartitionConstraint) PA.invokeMethod(apiFramework, "getComputationLocations(java.util.Map,int)", map, 8);
    nc1Count = 0;
    nc2Count = 0;
    partitions = loc.getLocations();
    for (String partition : partitions) {
        if (partition.equals(nc1)) {
            ++nc1Count;
        }
        if (partition.equals(nc2)) {
            ++nc2Count;
        }
    }
    Assert.assertTrue(nc1Count > 0);
    Assert.assertTrue(nc2Count > 0);
    // Tests load balance.
    Assert.assertTrue(Math.abs(nc1Count - nc2Count) == 0);
    // The maximum parallelism cannot be beyond n * core, where n is the number of NCs and #core is the number
    // of cores per NC.
    Assert.assertTrue(partitions.length == 8);
    // Tests the case when parallelism is one.
    loc = (AlgebricksAbsolutePartitionConstraint) PA.invokeMethod(apiFramework, "getComputationLocations(java.util.Map,int)", map, 1);
    Assert.assertTrue(loc.getLocations().length == 1);
    // Tests the case when parallelism is a negative.
    // In this case, the compiler has no idea and falls back to the default setting where all possible cores
    // are used.
    loc = (AlgebricksAbsolutePartitionConstraint) PA.invokeMethod(apiFramework, "getComputationLocations(java.util.Map,int)", map, -100);
    Assert.assertTrue(loc.getLocations().length == 8);
    // Tests the case when parallelism is -1.
    // In this case, the compiler has no idea and falls back to the default setting where all possible cores
    // are used.
    loc = (AlgebricksAbsolutePartitionConstraint) PA.invokeMethod(apiFramework, "getComputationLocations(java.util.Map,int)", map, -1);
    Assert.assertTrue(loc.getLocations().length == 8);
    // Tests the case when parallelism is zero.
    // In this case, the compiler has no idea and falls back to the default setting where all possible cores
    // are used.
    loc = (AlgebricksAbsolutePartitionConstraint) PA.invokeMethod(apiFramework, "getComputationLocations(java.util.Map,int)", map, 0);
    Assert.assertTrue(loc.getLocations().length == 8);
}
Also used : HashMap(java.util.HashMap) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) NodeControllerInfo(org.apache.hyracks.api.client.NodeControllerInfo) ILangCompilationProvider(org.apache.asterix.compiler.provider.ILangCompilationProvider) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) Test(org.junit.Test)

Example 2 with NodeControllerInfo

use of org.apache.hyracks.api.client.NodeControllerInfo in project asterixdb by apache.

the class ConnectorApiServletTest method testGet.

@Test
public void testGet() throws Exception {
    // Starts test asterixdb cluster.
    SqlppExecutionTest.setUp();
    // Configures a test connector api servlet.
    ConnectorApiServlet let = new ConnectorApiServlet(new ConcurrentHashMap<>(), new String[] { "/" }, (ICcApplicationContext) ExecutionTestUtil.integrationUtil.cc.getApplicationContext());
    Map<String, NodeControllerInfo> nodeMap = new HashMap<>();
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    PrintWriter outputWriter = new PrintWriter(outputStream);
    // Creates mocks.
    IHyracksClientConnection mockHcc = mock(IHyracksClientConnection.class);
    NodeControllerInfo mockInfo1 = mock(NodeControllerInfo.class);
    NodeControllerInfo mockInfo2 = mock(NodeControllerInfo.class);
    IServletRequest mockRequest = mock(IServletRequest.class);
    IServletResponse mockResponse = mock(IServletResponse.class);
    FullHttpRequest mockHttpRequest = mock(FullHttpRequest.class);
    // Put stuff in let map
    let.ctx().put(ServletConstants.HYRACKS_CONNECTION_ATTR, mockHcc);
    // Sets up mock returns.
    when(mockRequest.getHttpRequest()).thenReturn(mockHttpRequest);
    when(mockHttpRequest.method()).thenReturn(HttpMethod.GET);
    when(mockRequest.getParameter("dataverseName")).thenReturn("Metadata");
    when(mockRequest.getParameter("datasetName")).thenReturn("Dataset");
    when(mockResponse.writer()).thenReturn(outputWriter);
    when(mockHcc.getNodeControllerInfos()).thenReturn(nodeMap);
    when(mockInfo1.getNetworkAddress()).thenReturn(new NetworkAddress("127.0.0.1", 3099));
    when(mockInfo2.getNetworkAddress()).thenReturn(new NetworkAddress("127.0.0.2", 3099));
    // Calls ConnectorAPIServlet.formResponseObject.
    nodeMap.put("asterix_nc1", mockInfo1);
    nodeMap.put("asterix_nc2", mockInfo2);
    let.handle(mockRequest, mockResponse);
    // Constructs the actual response.
    ObjectMapper om = new ObjectMapper();
    ObjectNode actualResponse = (ObjectNode) om.readTree(outputStream.toString());
    // Checks the temp-or-not, primary key, data type of the dataset.
    boolean temp = actualResponse.get("temp").asBoolean();
    Assert.assertFalse(temp);
    String primaryKey = actualResponse.get("keys").asText();
    Assert.assertEquals("DataverseName,DatasetName", primaryKey);
    ARecordType recordType = (ARecordType) JSONDeserializerForTypes.convertFromJSON(actualResponse.get("type"));
    Assert.assertEquals(getMetadataRecordType("Metadata", "Dataset"), recordType);
    // Checks the correctness of results.
    ArrayNode splits = (ArrayNode) actualResponse.get("splits");
    String path = (splits.get(0)).get("path").asText();
    Assert.assertTrue(path.endsWith("Metadata/Dataset_idx_Dataset"));
    // Tears down the asterixdb cluster.
    SqlppExecutionTest.tearDown();
}
Also used : IHyracksClientConnection(org.apache.hyracks.api.client.IHyracksClientConnection) FullHttpRequest(io.netty.handler.codec.http.FullHttpRequest) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IServletRequest(org.apache.hyracks.http.api.IServletRequest) NetworkAddress(org.apache.hyracks.api.comm.NetworkAddress) NodeControllerInfo(org.apache.hyracks.api.client.NodeControllerInfo) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) ConnectorApiServlet(org.apache.asterix.api.http.server.ConnectorApiServlet) IServletResponse(org.apache.hyracks.http.api.IServletResponse) ARecordType(org.apache.asterix.om.types.ARecordType) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) PrintWriter(java.io.PrintWriter) Test(org.junit.Test) SqlppExecutionTest(org.apache.asterix.test.runtime.SqlppExecutionTest)

Example 3 with NodeControllerInfo

use of org.apache.hyracks.api.client.NodeControllerInfo in project asterixdb by apache.

the class SchedulerTest method testSchedulerSmallerHDFSOdd.

/**
     * Test the case where the HDFS cluster is a larger than the Hyracks cluster
     *
     * @throws Exception
     */
public void testSchedulerSmallerHDFSOdd() throws Exception {
    Map<String, NodeControllerInfo> ncNameToNcInfos = TestUtils.generateNodeControllerInfo(6, "nc", "10.0.0.", 5099, 5098, 5097);
    List<InputSplit> fileSplits = new ArrayList<>();
    fileSplits.add(new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" }));
    fileSplits.add(new FileSplit(new Path("part-2"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" }));
    fileSplits.add(new FileSplit(new Path("part-3"), 0, 0, new String[] { "10.0.0.4", "10.0.0.5", "10.0.0.3" }));
    fileSplits.add(new FileSplit(new Path("part-4"), 0, 0, new String[] { "10.0.0.2", "10.0.0.1", "10.0.0.3" }));
    fileSplits.add(new FileSplit(new Path("part-5"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" }));
    fileSplits.add(new FileSplit(new Path("part-6"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" }));
    fileSplits.add(new FileSplit(new Path("part-7"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" }));
    fileSplits.add(new FileSplit(new Path("part-8"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" }));
    fileSplits.add(new FileSplit(new Path("part-9"), 0, 0, new String[] { "10.0.0.4", "10.0.0.5", "10.0.0.1" }));
    fileSplits.add(new FileSplit(new Path("part-10"), 0, 0, new String[] { "10.0.0.2", "10.0.0.1", "10.0.0.2" }));
    fileSplits.add(new FileSplit(new Path("part-11"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" }));
    fileSplits.add(new FileSplit(new Path("part-12"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" }));
    fileSplits.add(new FileSplit(new Path("part-13"), 0, 0, new String[] { "10.0.0.2", "10.0.0.4", "10.0.0.5" }));
    Scheduler scheduler = new Scheduler(ncNameToNcInfos);
    String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
    String[] expectedResults = new String[] { "nc1", "nc4", "nc4", "nc1", "nc3", "nc2", "nc2", "nc3", "nc5", "nc1", "nc5", "nc2", "nc4" };
    for (int i = 0; i < locationConstraints.length; i++) {
        Assert.assertEquals(locationConstraints[i], expectedResults[i]);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) NodeControllerInfo(org.apache.hyracks.api.client.NodeControllerInfo) ArrayList(java.util.ArrayList) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Example 4 with NodeControllerInfo

use of org.apache.hyracks.api.client.NodeControllerInfo in project asterixdb by apache.

the class APIFramework method getComputationLocations.

// Computes the location constraints based on user-configured parallelism parameter.
// Note that the parallelism parameter is only a hint -- it will not be respected if it is too small or too large.
private static AlgebricksAbsolutePartitionConstraint getComputationLocations(Map<String, NodeControllerInfo> ncMap, int parallelismHint) {
    // Unifies the handling of non-positive parallelism.
    int parallelism = parallelismHint <= 0 ? -2 * ncMap.size() : parallelismHint;
    // Calculates per node parallelism, with load balance, i.e., randomly selecting nodes with larger
    // parallelism.
    int numNodes = ncMap.size();
    int numNodesWithOneMorePartition = parallelism % numNodes;
    int perNodeParallelismMin = parallelism / numNodes;
    int perNodeParallelismMax = parallelism / numNodes + 1;
    List<String> allNodes = new ArrayList<>();
    Set<String> selectedNodesWithOneMorePartition = new HashSet<>();
    for (Map.Entry<String, NodeControllerInfo> entry : ncMap.entrySet()) {
        allNodes.add(entry.getKey());
    }
    Random random = new Random();
    for (int index = numNodesWithOneMorePartition; index >= 1; --index) {
        int pick = random.nextInt(index);
        selectedNodesWithOneMorePartition.add(allNodes.get(pick));
        Collections.swap(allNodes, pick, index - 1);
    }
    // Generates cluster locations, which has duplicates for a node if it contains more than one partitions.
    List<String> locations = new ArrayList<>();
    for (Map.Entry<String, NodeControllerInfo> entry : ncMap.entrySet()) {
        String nodeId = entry.getKey();
        int availableCores = entry.getValue().getNumAvailableCores();
        int nodeParallelism = selectedNodesWithOneMorePartition.contains(nodeId) ? perNodeParallelismMax : perNodeParallelismMin;
        int coresToUse = nodeParallelism >= 0 && nodeParallelism < availableCores ? nodeParallelism : availableCores;
        for (int count = 0; count < coresToUse; ++count) {
            locations.add(nodeId);
        }
    }
    return new AlgebricksAbsolutePartitionConstraint(locations.toArray(new String[0]));
}
Also used : Random(java.util.Random) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) NodeControllerInfo(org.apache.hyracks.api.client.NodeControllerInfo) ArrayList(java.util.ArrayList) Map(java.util.Map) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) HashSet(java.util.HashSet)

Example 5 with NodeControllerInfo

use of org.apache.hyracks.api.client.NodeControllerInfo in project asterixdb by apache.

the class SchedulerTest method testSchedulercBoundary.

/**
     * Test boundary cases where splits array is empty or null
     *
     * @throws Exception
     */
public void testSchedulercBoundary() throws Exception {
    Map<String, NodeControllerInfo> ncNameToNcInfos = TestUtils.generateNodeControllerInfo(6, "nc", "10.0.0.", 5099, 5098, 5097);
    /** test empty file splits */
    InputSplit[] fileSplits = new InputSplit[0];
    String[] expectedResults = new String[] {};
    Scheduler scheduler = new Scheduler(ncNameToNcInfos);
    String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
    for (int i = 0; i < locationConstraints.length; i++) {
        Assert.assertEquals(locationConstraints[i], expectedResults[i]);
    }
    ClusterTopology topology = parseTopology();
    scheduler = new Scheduler(ncNameToNcInfos, topology);
    locationConstraints = scheduler.getLocationConstraints(fileSplits);
    for (int i = 0; i < locationConstraints.length; i++) {
        Assert.assertEquals(locationConstraints[i], expectedResults[i]);
    }
    fileSplits = null;
    expectedResults = new String[] {};
    scheduler = new Scheduler(ncNameToNcInfos);
    locationConstraints = scheduler.getLocationConstraints(fileSplits);
    for (int i = 0; i < locationConstraints.length; i++) {
        Assert.assertEquals(locationConstraints[i], expectedResults[i]);
    }
    scheduler = new Scheduler(ncNameToNcInfos, topology);
    locationConstraints = scheduler.getLocationConstraints(fileSplits);
    for (int i = 0; i < locationConstraints.length; i++) {
        Assert.assertEquals(locationConstraints[i], expectedResults[i]);
    }
}
Also used : NodeControllerInfo(org.apache.hyracks.api.client.NodeControllerInfo) ClusterTopology(org.apache.hyracks.api.topology.ClusterTopology) InputSplit(org.apache.hadoop.mapred.InputSplit)

Aggregations

NodeControllerInfo (org.apache.hyracks.api.client.NodeControllerInfo)21 HashMap (java.util.HashMap)8 Path (org.apache.hadoop.fs.Path)8 Map (java.util.Map)6 ArrayList (java.util.ArrayList)5 InputSplit (org.apache.hadoop.mapred.InputSplit)5 NetworkAddress (org.apache.hyracks.api.comm.NetworkAddress)5 ClusterTopology (org.apache.hyracks.api.topology.ClusterTopology)5 FileSplit (org.apache.hadoop.mapred.FileSplit)4 InputSplit (org.apache.hadoop.mapreduce.InputSplit)4 FileSplit (org.apache.hadoop.mapreduce.lib.input.FileSplit)4 AlgebricksAbsolutePartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint)4 Test (org.junit.Test)4 HyracksException (org.apache.hyracks.api.exceptions.HyracksException)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 ArrayNode (com.fasterxml.jackson.databind.node.ArrayNode)2 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)2 IOException (java.io.IOException)2 UnknownHostException (java.net.UnknownHostException)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2