Search in sources :

Example 1 with TDBulkImportSession

use of com.treasuredata.client.model.TDBulkImportSession in project td-client-java by treasure-data.

the class TestTDClient method testBulkImport.

@Test
public void testBulkImport() throws Exception {
    final String bulkImportTable = newTemporaryName("sample_bi");
    client.deleteTableIfExists(SAMPLE_DB, bulkImportTable);
    client.createTableIfNotExists(SAMPLE_DB, bulkImportTable);
    final int numRowsInPart = 10;
    final int numParts = 3;
    String dateStr = new SimpleDateFormat("yyyyMMddhhmmss").format(new Date());
    final String session = "td-client-java-test-session-" + dateStr;
    try {
        client.createBulkImportSession(session, SAMPLE_DB, bulkImportTable);
        List<TDBulkImportSession> sessionList = client.listBulkImportSessions();
        TDBulkImportSession foundInList = Iterables.find(sessionList, new Predicate<TDBulkImportSession>() {

            @Override
            public boolean apply(TDBulkImportSession input) {
                return test(input);
            }

            @Override
            public boolean test(TDBulkImportSession input) {
                return input.getName().equals(session);
            }
        });
        TDBulkImportSession bs = client.getBulkImportSession(session);
        logger.info("bulk import session: {}, error message: {}", bs.getJobId(), bs.getErrorMessage());
        assertEquals(session, bs.getName());
        assertEquals(SAMPLE_DB, bs.getDatabaseName());
        assertEquals(bulkImportTable, bs.getTableName());
        assertTrue(bs.isUploading());
        assertEquals(foundInList.getStatus(), bs.getStatus());
        int count = 0;
        final long time = System.currentTimeMillis() / 1000;
        // Upload part 0, 1, 2
        for (int i = 0; i < 3; ++i) {
            String partName = "bip" + i;
            // Prepare msgpack.gz
            ByteArrayOutputStream buf = new ByteArrayOutputStream();
            OutputStream out = new GZIPOutputStream(buf);
            MessagePacker packer = MessagePack.newDefaultPacker(out);
            for (int n = 0; n < numRowsInPart; ++n) {
                ValueFactory.MapBuilder b = ValueFactory.newMapBuilder();
                b.put(ValueFactory.newString("time"), ValueFactory.newInteger(time + count));
                b.put(ValueFactory.newString("event"), ValueFactory.newString("log" + count));
                b.put(ValueFactory.newString("description"), ValueFactory.newString("sample data"));
                packer.packValue(b.build());
                count += 1;
            }
            // Embed an error record
            packer.packValue(ValueFactory.newMap(new Value[] { ValueFactory.newNil(), ValueFactory.newString("invalid data") }));
            packer.close();
            out.close();
            File tmpFile = File.createTempFile(partName, ".msgpack.gz", new File("target"));
            Files.write(tmpFile.toPath(), buf.toByteArray());
            client.uploadBulkImportPart(session, partName, tmpFile);
            // list parts
            List<String> parts = client.listBulkImportParts(session);
            assertTrue(parts.contains(partName));
            // freeze test
            client.freezeBulkImportSession(session);
            // unfreeze test
            client.unfreezeBulkImportSession(session);
        }
        // delete the last
        client.deleteBulkImportPart(session, "bip2");
        List<String> parts = client.listBulkImportParts(session);
        assertTrue(!parts.contains("bip2"));
        // Freeze the session
        client.freezeBulkImportSession(session);
        // Perform the session
        client.performBulkImportSession(session);
        // Wait the perform completion
        ExponentialBackOff backoff = new ExponentialBackOff();
        long deadline = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(10);
        bs = client.getBulkImportSession(session);
        while (bs.getStatus() == TDBulkImportSession.ImportStatus.PERFORMING) {
            assertFalse(bs.isUploading());
            if (System.currentTimeMillis() > deadline) {
                throw new IllegalStateException("timeout error: bulk import perform");
            }
            logger.debug("Waiting bulk import completion");
            Thread.sleep(backoff.nextWaitTimeMillis());
            bs = client.getBulkImportSession(session);
        }
        // Check session contents
        assertTrue(bs.hasErrorOnPerform());
        logger.debug(bs.getErrorMessage());
        // Error record check
        int errorCount = client.getBulkImportErrorRecords(session, new Function<InputStream, Integer>() {

            int errorRecordCount = 0;

            @Override
            public Integer apply(InputStream input) {
                try {
                    MessageUnpacker unpacker = MessagePack.newDefaultUnpacker(new GZIPInputStream(input));
                    while (unpacker.hasNext()) {
                        Value v = unpacker.unpackValue();
                        logger.info("error record: " + v);
                        errorRecordCount += 1;
                    }
                    return errorRecordCount;
                } catch (IOException e) {
                    throw Throwables.propagate(e);
                }
            }
        });
        final int numValidParts = numParts - 1;
        assertEquals(numValidParts, errorCount);
        assertEquals(0, bs.getErrorParts());
        assertEquals(numValidParts, bs.getValidParts());
        assertEquals(numValidParts, bs.getErrorRecords());
        assertEquals(numValidParts * numRowsInPart, bs.getValidRecords());
        // Commit the session
        deadline = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(5);
        client.commitBulkImportSession(session);
        // Wait the commit completion
        bs = client.getBulkImportSession(session);
        while (bs.getStatus() != TDBulkImportSession.ImportStatus.COMMITTED) {
            if (System.currentTimeMillis() > deadline) {
                throw new IllegalStateException("timeout error: bulk import commit");
            }
            logger.info("Waiting bulk import perform step completion");
            Thread.sleep(TimeUnit.SECONDS.toMillis(5));
            bs = client.getBulkImportSession(session);
        }
        // Check the data
        TDTable imported = Iterables.find(client.listTables(SAMPLE_DB), new Predicate<TDTable>() {

            @Override
            public boolean apply(TDTable input) {
                return test(input);
            }

            @Override
            public boolean test(TDTable input) {
                return input.getName().equals(bulkImportTable);
            }
        });
        assertEquals(numRowsInPart * 2, imported.getRowCount());
        List<TDColumn> columns = imported.getColumns();
        logger.info(Joiner.on(", ").join(columns));
        // event, description, (time)
        assertEquals(2, columns.size());
    } finally {
        client.deleteBulkImportSession(session);
    }
}
Also used : GZIPOutputStream(java.util.zip.GZIPOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) TDColumn(com.treasuredata.client.model.TDColumn) Matchers.isEmptyOrNullString(org.hamcrest.Matchers.isEmptyOrNullString) Matchers.containsString(org.hamcrest.Matchers.containsString) GZIPInputStream(java.util.zip.GZIPInputStream) MessagePacker(org.msgpack.core.MessagePacker) MessageUnpacker(org.msgpack.core.MessageUnpacker) GZIPOutputStream(java.util.zip.GZIPOutputStream) TDTable(com.treasuredata.client.model.TDTable) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ValueFactory(org.msgpack.value.ValueFactory) IOException(java.io.IOException) Date(java.util.Date) ArrayValue(org.msgpack.value.ArrayValue) Value(org.msgpack.value.Value) TDBulkImportSession(com.treasuredata.client.model.TDBulkImportSession) SimpleDateFormat(java.text.SimpleDateFormat) File(java.io.File) Test(org.junit.Test)

Aggregations

TDBulkImportSession (com.treasuredata.client.model.TDBulkImportSession)1 TDColumn (com.treasuredata.client.model.TDColumn)1 TDTable (com.treasuredata.client.model.TDTable)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 File (java.io.File)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 SimpleDateFormat (java.text.SimpleDateFormat)1 Date (java.util.Date)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 GZIPOutputStream (java.util.zip.GZIPOutputStream)1 Matchers.containsString (org.hamcrest.Matchers.containsString)1 Matchers.isEmptyOrNullString (org.hamcrest.Matchers.isEmptyOrNullString)1 Test (org.junit.Test)1 MessagePacker (org.msgpack.core.MessagePacker)1 MessageUnpacker (org.msgpack.core.MessageUnpacker)1 ArrayValue (org.msgpack.value.ArrayValue)1 Value (org.msgpack.value.Value)1 ValueFactory (org.msgpack.value.ValueFactory)1