use of com.thinkaurelius.titan.diskstorage.keycolumnvalue.scan.ScanJob in project titan by thinkaurelius.
the class HadoopScanRunner method runJob.
public static ScanMetrics runJob(org.apache.hadoop.conf.Configuration hadoopConf, Class<? extends InputFormat> inputFormat, String jobName, Class<? extends Mapper> mapperClass) throws IOException, InterruptedException, ClassNotFoundException {
Job job = Job.getInstance(hadoopConf);
//job.setJarByClass(HadoopScanMapper.class);
job.setJarByClass(mapperClass);
//job.setJobName(HadoopScanMapper.class.getSimpleName() + "[" + scanJob + "]");
job.setJobName(jobName);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);
//job.setMapperClass(HadoopScanMapper.class);
job.setMapperClass(mapperClass);
job.setOutputFormatClass(NullOutputFormat.class);
job.setInputFormatClass(inputFormat);
boolean success = job.waitForCompletion(true);
if (!success) {
String f;
try {
// Just in case one of Job's methods throws an exception
f = String.format("MapReduce JobID %s terminated abnormally: %s", job.getJobID().toString(), HadoopCompatLoader.DEFAULT_COMPAT.getJobFailureString(job));
} catch (RuntimeException e) {
f = "Job failed (unable to read job status programmatically -- see MapReduce logs for information)";
}
throw new IOException(f);
} else {
return DEFAULT_COMPAT.getMetrics(job.getCounters());
}
}
use of com.thinkaurelius.titan.diskstorage.keycolumnvalue.scan.ScanJob in project titan by thinkaurelius.
the class CassandraScanJobIT method testSimpleScan.
@Test
public void testSimpleScan() throws InterruptedException, ExecutionException, IOException, BackendException {
int keys = 1000;
int cols = 40;
String[][] values = KeyValueStoreUtil.generateData(keys, cols);
//Make it only half the number of columns for every 2nd key
for (int i = 0; i < values.length; i++) {
if (i % 2 == 0)
values[i] = Arrays.copyOf(values[i], cols / 2);
}
log.debug("Loading values: " + keys + "x" + cols);
KeyColumnValueStoreManager mgr = new CassandraThriftStoreManager(GraphDatabaseConfiguration.buildGraphConfiguration());
KeyColumnValueStore store = mgr.openDatabase("edgestore");
StoreTransaction tx = mgr.beginTransaction(StandardBaseTransactionConfig.of(TimestampProviders.MICRO));
KeyColumnValueStoreUtil.loadValues(store, tx, values);
// noop on Cassandra, but harmless
tx.commit();
SimpleScanJobRunner runner = (ScanJob job, Configuration jobConf, String rootNSName) -> {
try {
return new CassandraHadoopScanRunner(job).scanJobConf(jobConf).scanJobConfRoot(rootNSName).partitionerOverride("org.apache.cassandra.dht.Murmur3Partitioner").run();
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
};
SimpleScanJob.runBasicTests(keys, cols, runner);
}
use of com.thinkaurelius.titan.diskstorage.keycolumnvalue.scan.ScanJob in project titan by thinkaurelius.
the class KeyColumnValueStoreTest method scanTestWithSimpleJob.
@Test
public void scanTestWithSimpleJob() throws Exception {
int keys = 1000, columns = 40;
String[][] values = KeyValueStoreUtil.generateData(keys, columns);
//Make it only half the number of columns for every 2nd key
for (int i = 0; i < values.length; i++) {
if (i % 2 == 0)
values[i] = Arrays.copyOf(values[i], columns / 2);
}
log.debug("Loading values: " + keys + "x" + columns);
loadValues(values);
clopen();
StandardScanner scanner = new StandardScanner(manager);
SimpleScanJobRunner runner = (ScanJob job, Configuration jobConf, String rootNSName) -> runSimpleJob(scanner, job, jobConf);
SimpleScanJob.runBasicTests(keys, columns, runner);
}
Aggregations