use of org.apache.hadoop.examples.terasort.TeraGen in project whirr by apache.
the class HadoopServiceTeraSortBenchmark method runTeraGen.
private void runTeraGen() throws IOException {
int numTaskTrackers = controller.getCluster().getInstances().size() - 1;
long bytesPerNode = Long.parseLong(System.getProperty("terasortBytesPerNode", "1000000000"));
long rows = numTaskTrackers * (bytesPerNode / 100);
StopWatch stopWatch = new StopWatch();
TeraGen teraGen = new TeraGen();
teraGen.setConf(controller.getJobConf());
LOG.info("Starting TeraGen with {} tasktrackers, {} bytes per node, {} rows", new Object[] { numTaskTrackers, bytesPerNode, rows });
stopWatch.start();
teraGen.run(new String[] { "" + rows, "input" });
stopWatch.stop();
LOG.info("TeraGen took {} ms", stopWatch.getTime());
}
use of org.apache.hadoop.examples.terasort.TeraGen in project ignite by apache.
the class HadoopTeraSortTest method teraGenerate.
/**
* Represents the data generation stage.
* @throws Exception
*/
private void teraGenerate() throws Exception {
System.out.println("TeraGenerate ===============================================================");
getFileSystem().delete(new Path(generateOutDir), true);
// TeraGen makes 100 bytes ber line
final long numLines = dataSizeBytes() / 100;
if (numLines < 1)
throw new IllegalStateException("Data size is too small: " + dataSizeBytes());
// Generate input data:
int res = ToolRunner.run(new Configuration(), new TeraGen(), new String[] { "-Dmapreduce.framework.name=local", String.valueOf(numLines), generateOutDir });
assertEquals(0, res);
FileStatus[] fileStatuses = getFileSystem().listStatus(new Path(generateOutDir));
long sumLen = 0;
for (FileStatus fs : fileStatuses) sumLen += fs.getLen();
// Ensure correct size data is generated.
assertEquals(dataSizeBytes(), sumLen);
}
Aggregations