Search in sources :

Example 6 with TDTable

use of com.treasuredata.client.model.TDTable in project td-client-java by treasure-data.

the class TestTDClient method testBulkImport.

@Test
public void testBulkImport() throws Exception {
    final String bulkImportTable = newTemporaryName("sample_bi");
    client.deleteTableIfExists(SAMPLE_DB, bulkImportTable);
    client.createTableIfNotExists(SAMPLE_DB, bulkImportTable);
    final int numRowsInPart = 10;
    final int numParts = 3;
    String dateStr = new SimpleDateFormat("yyyyMMddhhmmss").format(new Date());
    final String session = "td-client-java-test-session-" + dateStr;
    try {
        client.createBulkImportSession(session, SAMPLE_DB, bulkImportTable);
        List<TDBulkImportSession> sessionList = client.listBulkImportSessions();
        TDBulkImportSession foundInList = Iterables.find(sessionList, new Predicate<TDBulkImportSession>() {

            @Override
            public boolean apply(TDBulkImportSession input) {
                return test(input);
            }

            @Override
            public boolean test(TDBulkImportSession input) {
                return input.getName().equals(session);
            }
        });
        TDBulkImportSession bs = client.getBulkImportSession(session);
        logger.info("bulk import session: {}, error message: {}", bs.getJobId(), bs.getErrorMessage());
        assertEquals(session, bs.getName());
        assertEquals(SAMPLE_DB, bs.getDatabaseName());
        assertEquals(bulkImportTable, bs.getTableName());
        assertTrue(bs.isUploading());
        assertEquals(foundInList.getStatus(), bs.getStatus());
        int count = 0;
        final long time = System.currentTimeMillis() / 1000;
        // Upload part 0, 1, 2
        for (int i = 0; i < 3; ++i) {
            String partName = "bip" + i;
            // Prepare msgpack.gz
            ByteArrayOutputStream buf = new ByteArrayOutputStream();
            OutputStream out = new GZIPOutputStream(buf);
            MessagePacker packer = MessagePack.newDefaultPacker(out);
            for (int n = 0; n < numRowsInPart; ++n) {
                ValueFactory.MapBuilder b = ValueFactory.newMapBuilder();
                b.put(ValueFactory.newString("time"), ValueFactory.newInteger(time + count));
                b.put(ValueFactory.newString("event"), ValueFactory.newString("log" + count));
                b.put(ValueFactory.newString("description"), ValueFactory.newString("sample data"));
                packer.packValue(b.build());
                count += 1;
            }
            // Embed an error record
            packer.packValue(ValueFactory.newMap(new Value[] { ValueFactory.newNil(), ValueFactory.newString("invalid data") }));
            packer.close();
            out.close();
            File tmpFile = File.createTempFile(partName, ".msgpack.gz", new File("target"));
            Files.write(tmpFile.toPath(), buf.toByteArray());
            client.uploadBulkImportPart(session, partName, tmpFile);
            // list parts
            List<String> parts = client.listBulkImportParts(session);
            assertTrue(parts.contains(partName));
            // freeze test
            client.freezeBulkImportSession(session);
            // unfreeze test
            client.unfreezeBulkImportSession(session);
        }
        // delete the last
        client.deleteBulkImportPart(session, "bip2");
        List<String> parts = client.listBulkImportParts(session);
        assertTrue(!parts.contains("bip2"));
        // Freeze the session
        client.freezeBulkImportSession(session);
        // Perform the session
        client.performBulkImportSession(session);
        // Wait the perform completion
        ExponentialBackOff backoff = new ExponentialBackOff();
        long deadline = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(10);
        bs = client.getBulkImportSession(session);
        while (bs.getStatus() == TDBulkImportSession.ImportStatus.PERFORMING) {
            assertFalse(bs.isUploading());
            if (System.currentTimeMillis() > deadline) {
                throw new IllegalStateException("timeout error: bulk import perform");
            }
            logger.debug("Waiting bulk import completion");
            Thread.sleep(backoff.nextWaitTimeMillis());
            bs = client.getBulkImportSession(session);
        }
        // Check session contents
        assertTrue(bs.hasErrorOnPerform());
        logger.debug(bs.getErrorMessage());
        // Error record check
        int errorCount = client.getBulkImportErrorRecords(session, new Function<InputStream, Integer>() {

            int errorRecordCount = 0;

            @Override
            public Integer apply(InputStream input) {
                try {
                    MessageUnpacker unpacker = MessagePack.newDefaultUnpacker(new GZIPInputStream(input));
                    while (unpacker.hasNext()) {
                        Value v = unpacker.unpackValue();
                        logger.info("error record: " + v);
                        errorRecordCount += 1;
                    }
                    return errorRecordCount;
                } catch (IOException e) {
                    throw Throwables.propagate(e);
                }
            }
        });
        final int numValidParts = numParts - 1;
        assertEquals(numValidParts, errorCount);
        assertEquals(0, bs.getErrorParts());
        assertEquals(numValidParts, bs.getValidParts());
        assertEquals(numValidParts, bs.getErrorRecords());
        assertEquals(numValidParts * numRowsInPart, bs.getValidRecords());
        // Commit the session
        deadline = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(5);
        client.commitBulkImportSession(session);
        // Wait the commit completion
        bs = client.getBulkImportSession(session);
        while (bs.getStatus() != TDBulkImportSession.ImportStatus.COMMITTED) {
            if (System.currentTimeMillis() > deadline) {
                throw new IllegalStateException("timeout error: bulk import commit");
            }
            logger.info("Waiting bulk import perform step completion");
            Thread.sleep(TimeUnit.SECONDS.toMillis(5));
            bs = client.getBulkImportSession(session);
        }
        // Check the data
        TDTable imported = Iterables.find(client.listTables(SAMPLE_DB), new Predicate<TDTable>() {

            @Override
            public boolean apply(TDTable input) {
                return test(input);
            }

            @Override
            public boolean test(TDTable input) {
                return input.getName().equals(bulkImportTable);
            }
        });
        assertEquals(numRowsInPart * 2, imported.getRowCount());
        List<TDColumn> columns = imported.getColumns();
        logger.info(Joiner.on(", ").join(columns));
        // event, description, (time)
        assertEquals(2, columns.size());
    } finally {
        client.deleteBulkImportSession(session);
    }
}
Also used : GZIPOutputStream(java.util.zip.GZIPOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) TDColumn(com.treasuredata.client.model.TDColumn) Matchers.isEmptyOrNullString(org.hamcrest.Matchers.isEmptyOrNullString) Matchers.containsString(org.hamcrest.Matchers.containsString) GZIPInputStream(java.util.zip.GZIPInputStream) MessagePacker(org.msgpack.core.MessagePacker) MessageUnpacker(org.msgpack.core.MessageUnpacker) GZIPOutputStream(java.util.zip.GZIPOutputStream) TDTable(com.treasuredata.client.model.TDTable) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ValueFactory(org.msgpack.value.ValueFactory) IOException(java.io.IOException) Date(java.util.Date) ArrayValue(org.msgpack.value.ArrayValue) Value(org.msgpack.value.Value) TDBulkImportSession(com.treasuredata.client.model.TDBulkImportSession) SimpleDateFormat(java.text.SimpleDateFormat) File(java.io.File) Test(org.junit.Test)

Example 7 with TDTable

use of com.treasuredata.client.model.TDTable in project td-client-java by treasure-data.

the class TestTDClient method listTables.

@Test
public void listTables() throws Exception {
    List<TDTable> tableList = client.listTables("sample_datasets");
    assertTrue(tableList.size() >= 2);
    logger.debug(Joiner.on(", ").join(tableList));
    Set<TDTable> tableSet = new HashSet<>();
    for (final TDTable t : tableList) {
        logger.info("id: " + t.getId());
        logger.info("type: " + t.getType());
        logger.info("estimated size:" + t.getEstimatedStorageSize());
        logger.info("last log timestamp: " + t.getLastLogTimeStamp());
        logger.info("expire days:" + t.getExpireDays());
        logger.info("created at: " + t.getCreatedAt());
        logger.info("updated at: " + t.getUpdatedAt());
        if (t.getName().equals("nasdaq")) {
            assertTrue(t.getColumns().size() == 6);
        } else if (t.getName().equals("www_access")) {
            assertTrue(t.getColumns().size() == 8);
        }
        // To use equals and hashCode
        tableSet.add(t);
    }
    // equality tests
    for (TDTable t : tableSet) {
        tableSet.contains(t);
    }
    for (int i = 0; i < tableList.size(); ++i) {
        for (int j = 0; j < tableList.size(); ++j) {
            if (i == j) {
                assertEquals(tableList.get(i), tableList.get(j));
            } else {
                assertFalse(tableList.get(i).equals(tableList.get(j)));
            }
        }
    }
}
Also used : TDTable(com.treasuredata.client.model.TDTable) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 8 with TDTable

use of com.treasuredata.client.model.TDTable in project td-client-java by treasure-data.

the class TestTDClient method tableOperation.

@Test
public void tableOperation() throws Exception {
    String t = newTemporaryName("sample");
    String newTableName = t + "_renamed";
    try {
        client.deleteTableIfExists(SAMPLE_DB, t);
        client.createTable(SAMPLE_DB, t);
        client.deleteTable(SAMPLE_DB, t);
        client.createTableIfNotExists(SAMPLE_DB, t);
        assertFalse(client.existsTable(SAMPLE_DB + "_nonexistent", t));
        // conflict test
        try {
            client.createTable(SAMPLE_DB, t);
            fail("should not reach here");
        } catch (TDClientHttpConflictException e) {
            // OK
            assertEquals(HttpStatus.CONFLICT_409, e.getStatusCode());
        }
        // not found test
        try {
            client.listTables("__unknown__database");
            fail("should not reach here");
        } catch (TDClientHttpNotFoundException e) {
            // OK
            assertEquals(HttpStatus.NOT_FOUND_404, e.getStatusCode());
        }
        byte[] keyName = "int_col_key_name".getBytes(StandardCharsets.UTF_8);
        // schema test
        TDTable targetTable = findTable(SAMPLE_DB, t).get();
        List<TDColumn> newSchema = ImmutableList.<TDColumn>builder().addAll(targetTable.getSchema()).add(new TDColumn("int_col", TDColumnType.INT, keyName)).build();
        client.updateTableSchema(SAMPLE_DB, t, newSchema);
        TDTable updatedTable = findTable(SAMPLE_DB, t).get();
        logger.debug(updatedTable.toString());
        assertTrue("should have updated column", updatedTable.getSchema().contains(new TDColumn("int_col", TDColumnType.INT, keyName)));
        // schema test with duplicated key
        newSchema = ImmutableList.<TDColumn>builder().addAll(targetTable.getSchema()).add(new TDColumn("str_col", TDColumnType.STRING, keyName)).add(new TDColumn("str_col", TDColumnType.STRING, keyName)).build();
        client.updateTableSchema(SAMPLE_DB, t, newSchema, true);
        updatedTable = findTable(SAMPLE_DB, t).get();
        logger.debug(updatedTable.toString());
        assertTrue("should have updated column", updatedTable.getSchema().contains(new TDColumn("str_col", TDColumnType.STRING, keyName)));
        // rename
        client.deleteTableIfExists(SAMPLE_DB, newTableName);
        client.renameTable(SAMPLE_DB, t, newTableName);
        assertTrue(client.existsTable(SAMPLE_DB, newTableName));
        assertFalse(client.existsTable(SAMPLE_DB, t));
    } finally {
        client.deleteTableIfExists(SAMPLE_DB, t);
        client.deleteTableIfExists(SAMPLE_DB, newTableName);
    }
}
Also used : TDColumn(com.treasuredata.client.model.TDColumn) Matchers.isEmptyOrNullString(org.hamcrest.Matchers.isEmptyOrNullString) Matchers.containsString(org.hamcrest.Matchers.containsString) TDTable(com.treasuredata.client.model.TDTable) Test(org.junit.Test)

Aggregations

TDTable (com.treasuredata.client.model.TDTable)8 Test (org.junit.Test)7 Matchers.containsString (org.hamcrest.Matchers.containsString)3 Matchers.isEmptyOrNullString (org.hamcrest.Matchers.isEmptyOrNullString)3 TDColumn (com.treasuredata.client.model.TDColumn)2 TDJobList (com.treasuredata.client.model.TDJobList)2 InputStream (java.io.InputStream)2 GZIPInputStream (java.util.zip.GZIPInputStream)2 MessageUnpacker (org.msgpack.core.MessageUnpacker)2 ArrayValue (org.msgpack.value.ArrayValue)2 TDBulkImportSession (com.treasuredata.client.model.TDBulkImportSession)1 TDDatabase (com.treasuredata.client.model.TDDatabase)1 TDJob (com.treasuredata.client.model.TDJob)1 TDJobSummary (com.treasuredata.client.model.TDJobSummary)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 File (java.io.File)1 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1 SimpleDateFormat (java.text.SimpleDateFormat)1 Date (java.util.Date)1