Search in sources :

Example 6 with MessagePacker

use of org.msgpack.core.MessagePacker in project td-client-java by treasure-data.

the class TestTDClient method testBulkImport.

@Test
public void testBulkImport() throws Exception {
    final String bulkImportTable = newTemporaryName("sample_bi");
    client.deleteTableIfExists(SAMPLE_DB, bulkImportTable);
    client.createTableIfNotExists(SAMPLE_DB, bulkImportTable);
    final int numRowsInPart = 10;
    final int numParts = 3;
    String dateStr = new SimpleDateFormat("yyyyMMddhhmmss").format(new Date());
    final String session = "td-client-java-test-session-" + dateStr;
    try {
        client.createBulkImportSession(session, SAMPLE_DB, bulkImportTable);
        List<TDBulkImportSession> sessionList = client.listBulkImportSessions();
        TDBulkImportSession foundInList = Iterables.find(sessionList, new Predicate<TDBulkImportSession>() {

            @Override
            public boolean apply(TDBulkImportSession input) {
                return test(input);
            }

            @Override
            public boolean test(TDBulkImportSession input) {
                return input.getName().equals(session);
            }
        });
        TDBulkImportSession bs = client.getBulkImportSession(session);
        logger.info("bulk import session: {}, error message: {}", bs.getJobId(), bs.getErrorMessage());
        assertEquals(session, bs.getName());
        assertEquals(SAMPLE_DB, bs.getDatabaseName());
        assertEquals(bulkImportTable, bs.getTableName());
        assertTrue(bs.isUploading());
        assertEquals(foundInList.getStatus(), bs.getStatus());
        int count = 0;
        final long time = System.currentTimeMillis() / 1000;
        // Upload part 0, 1, 2
        for (int i = 0; i < 3; ++i) {
            String partName = "bip" + i;
            // Prepare msgpack.gz
            ByteArrayOutputStream buf = new ByteArrayOutputStream();
            OutputStream out = new GZIPOutputStream(buf);
            MessagePacker packer = MessagePack.newDefaultPacker(out);
            for (int n = 0; n < numRowsInPart; ++n) {
                ValueFactory.MapBuilder b = ValueFactory.newMapBuilder();
                b.put(ValueFactory.newString("time"), ValueFactory.newInteger(time + count));
                b.put(ValueFactory.newString("event"), ValueFactory.newString("log" + count));
                b.put(ValueFactory.newString("description"), ValueFactory.newString("sample data"));
                packer.packValue(b.build());
                count += 1;
            }
            // Embed an error record
            packer.packValue(ValueFactory.newMap(new Value[] { ValueFactory.newNil(), ValueFactory.newString("invalid data") }));
            packer.close();
            out.close();
            File tmpFile = File.createTempFile(partName, ".msgpack.gz", new File("target"));
            Files.write(tmpFile.toPath(), buf.toByteArray());
            client.uploadBulkImportPart(session, partName, tmpFile);
            // list parts
            List<String> parts = client.listBulkImportParts(session);
            assertTrue(parts.contains(partName));
            // freeze test
            client.freezeBulkImportSession(session);
            // unfreeze test
            client.unfreezeBulkImportSession(session);
        }
        // delete the last
        client.deleteBulkImportPart(session, "bip2");
        List<String> parts = client.listBulkImportParts(session);
        assertTrue(!parts.contains("bip2"));
        // Freeze the session
        client.freezeBulkImportSession(session);
        // Perform the session
        client.performBulkImportSession(session);
        // Wait the perform completion
        ExponentialBackOff backoff = new ExponentialBackOff();
        long deadline = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(10);
        bs = client.getBulkImportSession(session);
        while (bs.getStatus() == TDBulkImportSession.ImportStatus.PERFORMING) {
            assertFalse(bs.isUploading());
            if (System.currentTimeMillis() > deadline) {
                throw new IllegalStateException("timeout error: bulk import perform");
            }
            logger.debug("Waiting bulk import completion");
            Thread.sleep(backoff.nextWaitTimeMillis());
            bs = client.getBulkImportSession(session);
        }
        // Check session contents
        assertTrue(bs.hasErrorOnPerform());
        logger.debug(bs.getErrorMessage());
        // Error record check
        int errorCount = client.getBulkImportErrorRecords(session, new Function<InputStream, Integer>() {

            int errorRecordCount = 0;

            @Override
            public Integer apply(InputStream input) {
                try {
                    MessageUnpacker unpacker = MessagePack.newDefaultUnpacker(new GZIPInputStream(input));
                    while (unpacker.hasNext()) {
                        Value v = unpacker.unpackValue();
                        logger.info("error record: " + v);
                        errorRecordCount += 1;
                    }
                    return errorRecordCount;
                } catch (IOException e) {
                    throw Throwables.propagate(e);
                }
            }
        });
        final int numValidParts = numParts - 1;
        assertEquals(numValidParts, errorCount);
        assertEquals(0, bs.getErrorParts());
        assertEquals(numValidParts, bs.getValidParts());
        assertEquals(numValidParts, bs.getErrorRecords());
        assertEquals(numValidParts * numRowsInPart, bs.getValidRecords());
        // Commit the session
        deadline = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(5);
        client.commitBulkImportSession(session);
        // Wait the commit completion
        bs = client.getBulkImportSession(session);
        while (bs.getStatus() != TDBulkImportSession.ImportStatus.COMMITTED) {
            if (System.currentTimeMillis() > deadline) {
                throw new IllegalStateException("timeout error: bulk import commit");
            }
            logger.info("Waiting bulk import perform step completion");
            Thread.sleep(TimeUnit.SECONDS.toMillis(5));
            bs = client.getBulkImportSession(session);
        }
        // Check the data
        TDTable imported = Iterables.find(client.listTables(SAMPLE_DB), new Predicate<TDTable>() {

            @Override
            public boolean apply(TDTable input) {
                return test(input);
            }

            @Override
            public boolean test(TDTable input) {
                return input.getName().equals(bulkImportTable);
            }
        });
        assertEquals(numRowsInPart * 2, imported.getRowCount());
        List<TDColumn> columns = imported.getColumns();
        logger.info(Joiner.on(", ").join(columns));
        // event, description, (time)
        assertEquals(2, columns.size());
    } finally {
        client.deleteBulkImportSession(session);
    }
}
Also used : GZIPOutputStream(java.util.zip.GZIPOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) TDColumn(com.treasuredata.client.model.TDColumn) Matchers.isEmptyOrNullString(org.hamcrest.Matchers.isEmptyOrNullString) Matchers.containsString(org.hamcrest.Matchers.containsString) GZIPInputStream(java.util.zip.GZIPInputStream) MessagePacker(org.msgpack.core.MessagePacker) MessageUnpacker(org.msgpack.core.MessageUnpacker) GZIPOutputStream(java.util.zip.GZIPOutputStream) TDTable(com.treasuredata.client.model.TDTable) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ValueFactory(org.msgpack.value.ValueFactory) IOException(java.io.IOException) Date(java.util.Date) ArrayValue(org.msgpack.value.ArrayValue) Value(org.msgpack.value.Value) TDBulkImportSession(com.treasuredata.client.model.TDBulkImportSession) SimpleDateFormat(java.text.SimpleDateFormat) File(java.io.File) Test(org.junit.Test)

Aggregations

MessagePacker (org.msgpack.core.MessagePacker)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 IOException (java.io.IOException)4 HashMap (java.util.HashMap)3 MessageUnpacker (org.msgpack.core.MessageUnpacker)3 Socket (java.net.Socket)2 Map (java.util.Map)2 TimeoutException (java.util.concurrent.TimeoutException)2 Test (org.junit.Test)2 Theory (org.junit.experimental.theories.Theory)2 ImmutableRawValue (org.msgpack.value.ImmutableRawValue)2 Value (org.msgpack.value.Value)2 TDBulkImportSession (com.treasuredata.client.model.TDBulkImportSession)1 TDColumn (com.treasuredata.client.model.TDColumn)1 TDTable (com.treasuredata.client.model.TDTable)1 File (java.io.File)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 ByteBuffer (java.nio.ByteBuffer)1 MappedByteBuffer (java.nio.MappedByteBuffer)1