use of com.treasuredata.client.model.TDBulkImportSession in project td-client-java by treasure-data.
the class TestTDClient method testBulkImport.
@Test
public void testBulkImport() throws Exception {
final String bulkImportTable = newTemporaryName("sample_bi");
client.deleteTableIfExists(SAMPLE_DB, bulkImportTable);
client.createTableIfNotExists(SAMPLE_DB, bulkImportTable);
final int numRowsInPart = 10;
final int numParts = 3;
String dateStr = new SimpleDateFormat("yyyyMMddhhmmss").format(new Date());
final String session = "td-client-java-test-session-" + dateStr;
try {
client.createBulkImportSession(session, SAMPLE_DB, bulkImportTable);
List<TDBulkImportSession> sessionList = client.listBulkImportSessions();
TDBulkImportSession foundInList = Iterables.find(sessionList, new Predicate<TDBulkImportSession>() {
@Override
public boolean apply(TDBulkImportSession input) {
return test(input);
}
@Override
public boolean test(TDBulkImportSession input) {
return input.getName().equals(session);
}
});
TDBulkImportSession bs = client.getBulkImportSession(session);
logger.info("bulk import session: {}, error message: {}", bs.getJobId(), bs.getErrorMessage());
assertEquals(session, bs.getName());
assertEquals(SAMPLE_DB, bs.getDatabaseName());
assertEquals(bulkImportTable, bs.getTableName());
assertTrue(bs.isUploading());
assertEquals(foundInList.getStatus(), bs.getStatus());
int count = 0;
final long time = System.currentTimeMillis() / 1000;
// Upload part 0, 1, 2
for (int i = 0; i < 3; ++i) {
String partName = "bip" + i;
// Prepare msgpack.gz
ByteArrayOutputStream buf = new ByteArrayOutputStream();
OutputStream out = new GZIPOutputStream(buf);
MessagePacker packer = MessagePack.newDefaultPacker(out);
for (int n = 0; n < numRowsInPart; ++n) {
ValueFactory.MapBuilder b = ValueFactory.newMapBuilder();
b.put(ValueFactory.newString("time"), ValueFactory.newInteger(time + count));
b.put(ValueFactory.newString("event"), ValueFactory.newString("log" + count));
b.put(ValueFactory.newString("description"), ValueFactory.newString("sample data"));
packer.packValue(b.build());
count += 1;
}
// Embed an error record
packer.packValue(ValueFactory.newMap(new Value[] { ValueFactory.newNil(), ValueFactory.newString("invalid data") }));
packer.close();
out.close();
File tmpFile = File.createTempFile(partName, ".msgpack.gz", new File("target"));
Files.write(tmpFile.toPath(), buf.toByteArray());
client.uploadBulkImportPart(session, partName, tmpFile);
// list parts
List<String> parts = client.listBulkImportParts(session);
assertTrue(parts.contains(partName));
// freeze test
client.freezeBulkImportSession(session);
// unfreeze test
client.unfreezeBulkImportSession(session);
}
// delete the last
client.deleteBulkImportPart(session, "bip2");
List<String> parts = client.listBulkImportParts(session);
assertTrue(!parts.contains("bip2"));
// Freeze the session
client.freezeBulkImportSession(session);
// Perform the session
client.performBulkImportSession(session);
// Wait the perform completion
ExponentialBackOff backoff = new ExponentialBackOff();
long deadline = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(10);
bs = client.getBulkImportSession(session);
while (bs.getStatus() == TDBulkImportSession.ImportStatus.PERFORMING) {
assertFalse(bs.isUploading());
if (System.currentTimeMillis() > deadline) {
throw new IllegalStateException("timeout error: bulk import perform");
}
logger.debug("Waiting bulk import completion");
Thread.sleep(backoff.nextWaitTimeMillis());
bs = client.getBulkImportSession(session);
}
// Check session contents
assertTrue(bs.hasErrorOnPerform());
logger.debug(bs.getErrorMessage());
// Error record check
int errorCount = client.getBulkImportErrorRecords(session, new Function<InputStream, Integer>() {
int errorRecordCount = 0;
@Override
public Integer apply(InputStream input) {
try {
MessageUnpacker unpacker = MessagePack.newDefaultUnpacker(new GZIPInputStream(input));
while (unpacker.hasNext()) {
Value v = unpacker.unpackValue();
logger.info("error record: " + v);
errorRecordCount += 1;
}
return errorRecordCount;
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
});
final int numValidParts = numParts - 1;
assertEquals(numValidParts, errorCount);
assertEquals(0, bs.getErrorParts());
assertEquals(numValidParts, bs.getValidParts());
assertEquals(numValidParts, bs.getErrorRecords());
assertEquals(numValidParts * numRowsInPart, bs.getValidRecords());
// Commit the session
deadline = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(5);
client.commitBulkImportSession(session);
// Wait the commit completion
bs = client.getBulkImportSession(session);
while (bs.getStatus() != TDBulkImportSession.ImportStatus.COMMITTED) {
if (System.currentTimeMillis() > deadline) {
throw new IllegalStateException("timeout error: bulk import commit");
}
logger.info("Waiting bulk import perform step completion");
Thread.sleep(TimeUnit.SECONDS.toMillis(5));
bs = client.getBulkImportSession(session);
}
// Check the data
TDTable imported = Iterables.find(client.listTables(SAMPLE_DB), new Predicate<TDTable>() {
@Override
public boolean apply(TDTable input) {
return test(input);
}
@Override
public boolean test(TDTable input) {
return input.getName().equals(bulkImportTable);
}
});
assertEquals(numRowsInPart * 2, imported.getRowCount());
List<TDColumn> columns = imported.getColumns();
logger.info(Joiner.on(", ").join(columns));
// event, description, (time)
assertEquals(2, columns.size());
} finally {
client.deleteBulkImportSession(session);
}
}
Aggregations