Search in sources :

Example 11 with ResultScanner

use of org.apache.hadoop.hbase.client.ResultScanner in project hadoop by apache.

the class TestHBaseStorageFlowActivity method checkFlowActivityTableSeveralRuns.

private void checkFlowActivityTableSeveralRuns(String cluster, String user, String flow, Configuration c1, String flowVersion1, long runid1, String flowVersion2, long runid2, String flowVersion3, long runid3, long appCreatedTime) throws IOException {
    Scan s = new Scan();
    s.addFamily(FlowActivityColumnFamily.INFO.getBytes());
    byte[] startRow = new FlowActivityRowKey(cluster, appCreatedTime, user, flow).getRowKey();
    s.setStartRow(startRow);
    String clusterStop = cluster + "1";
    byte[] stopRow = new FlowActivityRowKey(clusterStop, appCreatedTime, user, flow).getRowKey();
    s.setStopRow(stopRow);
    Connection conn = ConnectionFactory.createConnection(c1);
    Table table1 = conn.getTable(TableName.valueOf(FlowActivityTable.DEFAULT_TABLE_NAME));
    ResultScanner scanner = table1.getScanner(s);
    int rowCount = 0;
    for (Result result : scanner) {
        assertNotNull(result);
        assertTrue(!result.isEmpty());
        byte[] row = result.getRow();
        FlowActivityRowKey flowActivityRowKey = FlowActivityRowKey.parseRowKey(row);
        assertNotNull(flowActivityRowKey);
        assertEquals(cluster, flowActivityRowKey.getClusterId());
        assertEquals(user, flowActivityRowKey.getUserId());
        assertEquals(flow, flowActivityRowKey.getFlowName());
        Long dayTs = HBaseTimelineStorageUtils.getTopOfTheDayTimestamp(appCreatedTime);
        assertEquals(dayTs, flowActivityRowKey.getDayTimestamp());
        Map<byte[], byte[]> values = result.getFamilyMap(FlowActivityColumnFamily.INFO.getBytes());
        rowCount++;
        assertEquals(3, values.size());
        checkFlowActivityRunId(runid1, flowVersion1, values);
        checkFlowActivityRunId(runid2, flowVersion2, values);
        checkFlowActivityRunId(runid3, flowVersion3, values);
    }
    // the flow activity table is such that it will insert
    // into current day's record
    // hence, if this test runs across the midnight boundary,
    // it may fail since it would insert into two records
    // one for each day
    assertEquals(1, rowCount);
}
Also used : Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) Connection(org.apache.hadoop.hbase.client.Connection) Scan(org.apache.hadoop.hbase.client.Scan) Result(org.apache.hadoop.hbase.client.Result)

Example 12 with ResultScanner

use of org.apache.hadoop.hbase.client.ResultScanner in project hadoop by apache.

the class TestHBaseStorageFlowActivity method checkFlowActivityTable.

private void checkFlowActivityTable(String cluster, String user, String flow, String flowVersion, long runid, Configuration c1, long appCreatedTime) throws IOException {
    Scan s = new Scan();
    s.addFamily(FlowActivityColumnFamily.INFO.getBytes());
    byte[] startRow = new FlowActivityRowKey(cluster, appCreatedTime, user, flow).getRowKey();
    s.setStartRow(startRow);
    String clusterStop = cluster + "1";
    byte[] stopRow = new FlowActivityRowKey(clusterStop, appCreatedTime, user, flow).getRowKey();
    s.setStopRow(stopRow);
    Connection conn = ConnectionFactory.createConnection(c1);
    Table table1 = conn.getTable(TableName.valueOf(FlowActivityTable.DEFAULT_TABLE_NAME));
    ResultScanner scanner = table1.getScanner(s);
    int rowCount = 0;
    for (Result result : scanner) {
        assertNotNull(result);
        assertTrue(!result.isEmpty());
        Map<byte[], byte[]> values = result.getFamilyMap(FlowActivityColumnFamily.INFO.getBytes());
        rowCount++;
        byte[] row = result.getRow();
        FlowActivityRowKey flowActivityRowKey = FlowActivityRowKey.parseRowKey(row);
        assertNotNull(flowActivityRowKey);
        assertEquals(cluster, flowActivityRowKey.getClusterId());
        assertEquals(user, flowActivityRowKey.getUserId());
        assertEquals(flow, flowActivityRowKey.getFlowName());
        Long dayTs = HBaseTimelineStorageUtils.getTopOfTheDayTimestamp(appCreatedTime);
        assertEquals(dayTs, flowActivityRowKey.getDayTimestamp());
        assertEquals(1, values.size());
        checkFlowActivityRunId(runid, flowVersion, values);
    }
    assertEquals(1, rowCount);
}
Also used : Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) Connection(org.apache.hadoop.hbase.client.Connection) Scan(org.apache.hadoop.hbase.client.Scan) Result(org.apache.hadoop.hbase.client.Result)

Example 13 with ResultScanner

use of org.apache.hadoop.hbase.client.ResultScanner in project hadoop by apache.

the class TestHBaseStorageFlowRun method checkFlowRunTableBatchLimit.

/*
   * checks the batch limits on a scan
   */
void checkFlowRunTableBatchLimit(String cluster, String user, String flow, long runid, Configuration c1) throws IOException {
    Scan s = new Scan();
    s.addFamily(FlowRunColumnFamily.INFO.getBytes());
    byte[] startRow = new FlowRunRowKey(cluster, user, flow, runid).getRowKey();
    s.setStartRow(startRow);
    // set a batch limit
    int batchLimit = 2;
    s.setBatch(batchLimit);
    String clusterStop = cluster + "1";
    byte[] stopRow = new FlowRunRowKey(clusterStop, user, flow, runid).getRowKey();
    s.setStopRow(stopRow);
    Connection conn = ConnectionFactory.createConnection(c1);
    Table table1 = conn.getTable(TableName.valueOf(FlowRunTable.DEFAULT_TABLE_NAME));
    ResultScanner scanner = table1.getScanner(s);
    int loopCount = 0;
    for (Result result : scanner) {
        assertNotNull(result);
        assertTrue(!result.isEmpty());
        assertTrue(result.rawCells().length <= batchLimit);
        Map<byte[], byte[]> values = result.getFamilyMap(FlowRunColumnFamily.INFO.getBytes());
        assertNotNull(values);
        assertTrue(values.size() <= batchLimit);
        loopCount++;
    }
    assertTrue(loopCount > 0);
    // test with a diff batch limit
    s = new Scan();
    s.addFamily(FlowRunColumnFamily.INFO.getBytes());
    s.setStartRow(startRow);
    // set a batch limit
    batchLimit = 1;
    s.setBatch(batchLimit);
    s.setMaxResultsPerColumnFamily(2);
    s.setStopRow(stopRow);
    scanner = table1.getScanner(s);
    loopCount = 0;
    for (Result result : scanner) {
        assertNotNull(result);
        assertTrue(!result.isEmpty());
        assertEquals(batchLimit, result.rawCells().length);
        Map<byte[], byte[]> values = result.getFamilyMap(FlowRunColumnFamily.INFO.getBytes());
        assertNotNull(values);
        assertEquals(batchLimit, values.size());
        loopCount++;
    }
    assertTrue(loopCount > 0);
    // test with a diff batch limit
    // set it high enough
    // we expect back 3 since there are
    // column = m!HDFS_BYTES_READ value=57
    // column = m!MAP_SLOT_MILLIS value=141
    // column min_start_time value=1425016501000
    s = new Scan();
    s.addFamily(FlowRunColumnFamily.INFO.getBytes());
    s.setStartRow(startRow);
    // set a batch limit
    batchLimit = 100;
    s.setBatch(batchLimit);
    s.setStopRow(stopRow);
    scanner = table1.getScanner(s);
    loopCount = 0;
    for (Result result : scanner) {
        assertNotNull(result);
        assertTrue(!result.isEmpty());
        assertTrue(result.rawCells().length <= batchLimit);
        Map<byte[], byte[]> values = result.getFamilyMap(FlowRunColumnFamily.INFO.getBytes());
        assertNotNull(values);
        // assert that with every next invocation
        // we get back <= batchLimit values
        assertTrue(values.size() <= batchLimit);
        // see comment above
        assertTrue(values.size() == 3);
        loopCount++;
    }
    // should loop through only once
    assertTrue(loopCount == 1);
    // set it to a negative number
    // we expect all 3 back since there are
    // column = m!HDFS_BYTES_READ value=57
    // column = m!MAP_SLOT_MILLIS value=141
    // column min_start_time value=1425016501000
    s = new Scan();
    s.addFamily(FlowRunColumnFamily.INFO.getBytes());
    s.setStartRow(startRow);
    // set a batch limit
    batchLimit = -671;
    s.setBatch(batchLimit);
    s.setStopRow(stopRow);
    scanner = table1.getScanner(s);
    loopCount = 0;
    for (Result result : scanner) {
        assertNotNull(result);
        assertTrue(!result.isEmpty());
        assertEquals(3, result.rawCells().length);
        Map<byte[], byte[]> values = result.getFamilyMap(FlowRunColumnFamily.INFO.getBytes());
        assertNotNull(values);
        // assert that with every next invocation
        // we get back <= batchLimit values
        assertEquals(3, values.size());
        loopCount++;
    }
    // should loop through only once
    assertEquals(1, loopCount);
    // set it to 0
    // we expect all 3 back since there are
    // column = m!HDFS_BYTES_READ value=57
    // column = m!MAP_SLOT_MILLIS value=141
    // column min_start_time value=1425016501000
    s = new Scan();
    s.addFamily(FlowRunColumnFamily.INFO.getBytes());
    s.setStartRow(startRow);
    // set a batch limit
    batchLimit = 0;
    s.setBatch(batchLimit);
    s.setStopRow(stopRow);
    scanner = table1.getScanner(s);
    loopCount = 0;
    for (Result result : scanner) {
        assertNotNull(result);
        assertTrue(!result.isEmpty());
        assertEquals(3, result.rawCells().length);
        Map<byte[], byte[]> values = result.getFamilyMap(FlowRunColumnFamily.INFO.getBytes());
        assertNotNull(values);
        // assert that with every next invocation
        // we get back <= batchLimit values
        assertEquals(3, values.size());
        loopCount++;
    }
    // should loop through only once
    assertEquals(1, loopCount);
}
Also used : EntityTable(org.apache.hadoop.yarn.server.timelineservice.storage.entity.EntityTable) Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) Connection(org.apache.hadoop.hbase.client.Connection) Scan(org.apache.hadoop.hbase.client.Scan) Result(org.apache.hadoop.hbase.client.Result)

Example 14 with ResultScanner

use of org.apache.hadoop.hbase.client.ResultScanner in project hadoop by apache.

the class TestHBaseTimelineStorageApps method testWriteNullApplicationToHBase.

@Test
public void testWriteNullApplicationToHBase() throws Exception {
    TimelineEntities te = new TimelineEntities();
    ApplicationEntity entity = new ApplicationEntity();
    String appId = "application_1000178881110_2002";
    entity.setId(appId);
    long cTime = 1425016501000L;
    entity.setCreatedTime(cTime);
    // add the info map in Timeline Entity
    Map<String, Object> infoMap = new HashMap<String, Object>();
    infoMap.put("in fo M apK  ey1", "infoMapValue1");
    infoMap.put("infoMapKey2", 10);
    entity.addInfo(infoMap);
    te.addEntity(entity);
    HBaseTimelineWriterImpl hbi = null;
    try {
        Configuration c1 = util.getConfiguration();
        hbi = new HBaseTimelineWriterImpl();
        hbi.init(c1);
        hbi.start();
        String cluster = "cluster_check_null_application";
        String user = "user1check_null_application";
        //set the flow name to null
        String flow = null;
        String flowVersion = "AB7822C10F1111";
        long runid = 1002345678919L;
        hbi.write(cluster, user, flow, flowVersion, runid, appId, te);
        hbi.stop();
        // retrieve the row
        Scan scan = new Scan();
        scan.setStartRow(Bytes.toBytes(cluster));
        scan.setStopRow(Bytes.toBytes(cluster + "1"));
        Connection conn = ConnectionFactory.createConnection(c1);
        ResultScanner resultScanner = new ApplicationTable().getResultScanner(c1, conn, scan);
        assertTrue(resultScanner != null);
        // try to iterate over results
        int count = 0;
        for (Result rr = resultScanner.next(); rr != null; rr = resultScanner.next()) {
            count++;
        }
        // there should be no rows written
        // no exceptions thrown during write
        assertEquals(0, count);
    } finally {
        if (hbi != null) {
            hbi.stop();
            hbi.close();
        }
    }
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) Connection(org.apache.hadoop.hbase.client.Connection) Result(org.apache.hadoop.hbase.client.Result) ApplicationTable(org.apache.hadoop.yarn.server.timelineservice.storage.application.ApplicationTable) TimelineEntities(org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntities) ApplicationEntity(org.apache.hadoop.yarn.api.records.timelineservice.ApplicationEntity) Scan(org.apache.hadoop.hbase.client.Scan) Test(org.junit.Test)

Example 15 with ResultScanner

use of org.apache.hadoop.hbase.client.ResultScanner in project hadoop by apache.

the class TestHBaseTimelineStorageEntities method testEventsWithEmptyInfo.

@Test
public void testEventsWithEmptyInfo() throws IOException {
    TimelineEvent event = new TimelineEvent();
    String eventId = "foo_ev e  nt_id";
    event.setId(eventId);
    Long expTs = 1436512802000L;
    event.setTimestamp(expTs);
    final TimelineEntity entity = new TimelineEntity();
    entity.setId("attempt_1329348432655_0001_m_000008_18");
    entity.setType("FOO_ATTEMPT");
    entity.addEvent(event);
    TimelineEntities entities = new TimelineEntities();
    entities.addEntity(entity);
    HBaseTimelineWriterImpl hbi = null;
    try {
        Configuration c1 = util.getConfiguration();
        hbi = new HBaseTimelineWriterImpl();
        hbi.init(c1);
        hbi.start();
        String cluster = "cluster_test_empty_eventkey";
        String user = "user_emptyeventkey";
        String flow = "other_flow_name";
        String flowVersion = "1111F01C2287BA";
        long runid = 1009876543218L;
        String appName = ApplicationId.newInstance(System.currentTimeMillis() + 9000000L, 1).toString();
        byte[] startRow = new EntityRowKeyPrefix(cluster, user, flow, runid, appName).getRowKeyPrefix();
        hbi.write(cluster, user, flow, flowVersion, runid, appName, entities);
        hbi.stop();
        // scan the table and see that entity exists
        Scan s = new Scan();
        s.setStartRow(startRow);
        s.addFamily(EntityColumnFamily.INFO.getBytes());
        Connection conn = ConnectionFactory.createConnection(c1);
        ResultScanner scanner = new EntityTable().getResultScanner(c1, conn, s);
        int rowCount = 0;
        for (Result result : scanner) {
            if (result != null && !result.isEmpty()) {
                rowCount++;
                // check the row key
                byte[] row1 = result.getRow();
                assertTrue(isRowKeyCorrect(row1, cluster, user, flow, runid, appName, entity));
                Map<EventColumnName, Object> eventsResult = EntityColumnPrefix.EVENT.readResults(result, new EventColumnNameConverter());
                // there should be only one event
                assertEquals(1, eventsResult.size());
                for (Map.Entry<EventColumnName, Object> e : eventsResult.entrySet()) {
                    EventColumnName eventColumnName = e.getKey();
                    // the qualifier is a compound key
                    // hence match individual values
                    assertEquals(eventId, eventColumnName.getId());
                    assertEquals(expTs, eventColumnName.getTimestamp());
                    // key must be empty
                    assertNull(eventColumnName.getInfoKey());
                    Object value = e.getValue();
                    // value should be empty
                    assertEquals("", value.toString());
                }
            }
        }
        assertEquals(1, rowCount);
        // read the timeline entity using the reader this time
        TimelineEntity e1 = reader.getEntity(new TimelineReaderContext(cluster, user, flow, runid, appName, entity.getType(), entity.getId()), new TimelineDataToRetrieve(null, null, EnumSet.of(Field.ALL), null));
        Set<TimelineEntity> es1 = reader.getEntities(new TimelineReaderContext(cluster, user, flow, runid, appName, entity.getType(), null), new TimelineEntityFilters(), new TimelineDataToRetrieve(null, null, EnumSet.of(Field.ALL), null));
        assertNotNull(e1);
        assertEquals(1, es1.size());
        // check the events
        NavigableSet<TimelineEvent> events = e1.getEvents();
        // there should be only one event
        assertEquals(1, events.size());
        for (TimelineEvent e : events) {
            assertEquals(eventId, e.getId());
            assertEquals(expTs, Long.valueOf(e.getTimestamp()));
            Map<String, Object> info = e.getInfo();
            assertTrue(info == null || info.isEmpty());
        }
    } finally {
        if (hbi != null) {
            hbi.stop();
            hbi.close();
        }
    }
}
Also used : TimelineEvent(org.apache.hadoop.yarn.api.records.timelineservice.TimelineEvent) Configuration(org.apache.hadoop.conf.Configuration) TimelineReaderContext(org.apache.hadoop.yarn.server.timelineservice.reader.TimelineReaderContext) EntityRowKeyPrefix(org.apache.hadoop.yarn.server.timelineservice.storage.entity.EntityRowKeyPrefix) EventColumnName(org.apache.hadoop.yarn.server.timelineservice.storage.common.EventColumnName) Result(org.apache.hadoop.hbase.client.Result) TimelineEntities(org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntities) EntityTable(org.apache.hadoop.yarn.server.timelineservice.storage.entity.EntityTable) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) TimelineEntityFilters(org.apache.hadoop.yarn.server.timelineservice.reader.TimelineEntityFilters) Connection(org.apache.hadoop.hbase.client.Connection) EventColumnNameConverter(org.apache.hadoop.yarn.server.timelineservice.storage.common.EventColumnNameConverter) TimelineEntity(org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity) TimelineDataToRetrieve(org.apache.hadoop.yarn.server.timelineservice.reader.TimelineDataToRetrieve) Scan(org.apache.hadoop.hbase.client.Scan) Map(java.util.Map) NavigableMap(java.util.NavigableMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Aggregations

ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)326 Scan (org.apache.hadoop.hbase.client.Scan)295 Result (org.apache.hadoop.hbase.client.Result)286 Table (org.apache.hadoop.hbase.client.Table)160 Test (org.junit.Test)143 Cell (org.apache.hadoop.hbase.Cell)104 IOException (java.io.IOException)102 TableName (org.apache.hadoop.hbase.TableName)88 Connection (org.apache.hadoop.hbase.client.Connection)75 Put (org.apache.hadoop.hbase.client.Put)75 Delete (org.apache.hadoop.hbase.client.Delete)70 ArrayList (java.util.ArrayList)61 PrivilegedExceptionAction (java.security.PrivilegedExceptionAction)47 InterruptedIOException (java.io.InterruptedIOException)46 CellScanner (org.apache.hadoop.hbase.CellScanner)42 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)31 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)29 HTable (org.apache.hadoop.hbase.client.HTable)29 Get (org.apache.hadoop.hbase.client.Get)23 Admin (org.apache.hadoop.hbase.client.Admin)22