use of com.thinkaurelius.titan.hadoop.config.ModifiableHadoopConfiguration in project titan by thinkaurelius.
the class MapReduceIndexManagement method updateIndex.
/**
* Updates the provided index according to the given {@link SchemaAction}.
* Only {@link SchemaAction#REINDEX} and {@link SchemaAction#REMOVE_INDEX} are supported.
*
* @param index the index to process
* @param updateAction either {@code REINDEX} or {@code REMOVE_INDEX}
* @return a future that returns immediately;
* this method blocks until the Hadoop MapReduce job completes
*/
// TODO make this future actually async and update javadoc @return accordingly
public TitanManagement.IndexJobFuture updateIndex(TitanIndex index, SchemaAction updateAction) throws BackendException {
Preconditions.checkNotNull(index, "Index parameter must not be null", index);
Preconditions.checkNotNull(updateAction, "%s parameter must not be null", SchemaAction.class.getSimpleName());
Preconditions.checkArgument(SUPPORTED_ACTIONS.contains(updateAction), "Only these %s parameters are supported: %s (was given %s)", SchemaAction.class.getSimpleName(), SUPPORTED_ACTIONS_STRING, updateAction);
Preconditions.checkArgument(RelationTypeIndex.class.isAssignableFrom(index.getClass()) || TitanGraphIndex.class.isAssignableFrom(index.getClass()), "Index %s has class %s: must be a %s or %s (or subtype)", index.getClass(), RelationTypeIndex.class.getSimpleName(), TitanGraphIndex.class.getSimpleName());
org.apache.hadoop.conf.Configuration hadoopConf = new org.apache.hadoop.conf.Configuration();
ModifiableHadoopConfiguration titanmrConf = ModifiableHadoopConfiguration.of(TitanHadoopConfiguration.MAPRED_NS, hadoopConf);
// The job we'll execute to either REINDEX or REMOVE_INDEX
final Class<? extends IndexUpdateJob> indexJobClass;
final Class<? extends Mapper> mapperClass;
// The class of the IndexUpdateJob and the Mapper that will be used to run it (VertexScanJob vs ScanJob)
if (updateAction.equals(SchemaAction.REINDEX)) {
indexJobClass = IndexRepairJob.class;
mapperClass = HadoopVertexScanMapper.class;
} else if (updateAction.equals(SchemaAction.REMOVE_INDEX)) {
indexJobClass = IndexRemoveJob.class;
mapperClass = HadoopScanMapper.class;
} else {
// Shouldn't get here -- if this exception is ever thrown, update SUPPORTED_ACTIONS
throw new IllegalStateException("Unrecognized " + SchemaAction.class.getSimpleName() + ": " + updateAction);
}
// The column family that serves as input to the IndexUpdateJob
final String readCF;
if (RelationTypeIndex.class.isAssignableFrom(index.getClass())) {
readCF = Backend.EDGESTORE_NAME;
} else {
TitanGraphIndex gindex = (TitanGraphIndex) index;
if (gindex.isMixedIndex() && !updateAction.equals(SchemaAction.REINDEX))
throw new UnsupportedOperationException("External mixed indexes must be removed in the indexing system directly.");
Preconditions.checkState(TitanGraphIndex.class.isAssignableFrom(index.getClass()));
if (updateAction.equals(SchemaAction.REMOVE_INDEX))
readCF = Backend.INDEXSTORE_NAME;
else
readCF = Backend.EDGESTORE_NAME;
}
titanmrConf.set(TitanHadoopConfiguration.COLUMN_FAMILY_NAME, readCF);
// The MapReduce InputFormat class based on the open graph's store manager
final Class<? extends InputFormat> inputFormat;
final Class<? extends KeyColumnValueStoreManager> storeManagerClass = graph.getBackend().getStoreManagerClass();
if (CASSANDRA_STORE_MANAGER_CLASSES.contains(storeManagerClass)) {
inputFormat = CassandraBinaryInputFormat.class;
// Set the partitioner
IPartitioner part = ((AbstractCassandraStoreManager) graph.getBackend().getStoreManager()).getCassandraPartitioner();
hadoopConf.set("cassandra.input.partitioner.class", part.getClass().getName());
} else if (HBASE_STORE_MANAGER_CLASSES.contains(storeManagerClass)) {
inputFormat = HBaseBinaryInputFormat.class;
} else {
throw new IllegalArgumentException("Store manager class " + storeManagerClass + "is not supported");
}
// The index name and relation type name (if the latter is applicable)
final String indexName = index.name();
final String relationTypeName = RelationTypeIndex.class.isAssignableFrom(index.getClass()) ? ((RelationTypeIndex) index).getType().name() : "";
Preconditions.checkNotNull(indexName);
// Set the class of the IndexUpdateJob
titanmrConf.set(TitanHadoopConfiguration.SCAN_JOB_CLASS, indexJobClass.getName());
// Set the configuration of the IndexUpdateJob
copyIndexJobKeys(hadoopConf, indexName, relationTypeName);
titanmrConf.set(TitanHadoopConfiguration.SCAN_JOB_CONFIG_ROOT, GraphDatabaseConfiguration.class.getName() + "#JOB_NS");
// Copy the StandardTitanGraph configuration under TitanHadoopConfiguration.GRAPH_CONFIG_KEYS
org.apache.commons.configuration.Configuration localbc = graph.getConfiguration().getLocalConfiguration();
localbc.clearProperty(Graph.GRAPH);
copyInputKeys(hadoopConf, localbc);
String jobName = HadoopScanMapper.class.getSimpleName() + "[" + indexJobClass.getSimpleName() + "]";
try {
return new CompletedJobFuture(HadoopScanRunner.runJob(hadoopConf, inputFormat, jobName, mapperClass));
} catch (Exception e) {
return new FailedJobFuture(e);
}
}
use of com.thinkaurelius.titan.hadoop.config.ModifiableHadoopConfiguration in project titan by thinkaurelius.
the class HadoopScanRunner method runJob.
/**
* Run a ScanJob on Hadoop MapReduce.
* <p>
* The {@code confRootField} parameter must be a string in the format
* {@code package.package...class#fieldname}, where {@code fieldname} is the
* name of a public static field on the class specified by the portion of the
* string before the {@code #}. The {@code #} itself is just a separator and
* is discarded.
* <p>
* When a MapReduce task process prepares to execute the {@code ScanJob}, it will
* read the public static field named by {@code confFieldRoot} and cast it to a
* {@link ConfigNamespace}. This namespace object becomes the root of a
* {@link Configuration} instantiated, populated with the key-value pairs
* from the {@code conf} parameter, and then passed into the {@code ScanJob}.
* <p>
* This method blocks until the ScanJob completes, then returns the metrics
* generated by the job during its execution. It does not timeout.
*
* @param conf configuration settings for the ScanJob
* @param confRootField the root of the ScanJob's configuration
* @param hadoopConf the Configuration passed to the MapReduce Job
* @param inputFormat the InputFormat<StaticBuffer, Iterable<Entry>>
* that reads (row, columns) pairs out of a Titan edgestore
* @return metrics generated by the ScanJob
* @throws IOException if the job fails for any reason
* @throws ClassNotFoundException if {@code scanJob.getClass()} or if Hadoop
* MapReduce's internal job-submission-related reflection fails
* @throws InterruptedException if interrupted while waiting for the Hadoop
* MapReduce job to complete
*/
public static ScanMetrics runJob(Configuration conf, String confRootField, org.apache.hadoop.conf.Configuration hadoopConf, Class<? extends InputFormat> inputFormat, String jobName, Class<? extends Mapper> mapperClass) throws IOException, InterruptedException, ClassNotFoundException {
Preconditions.checkArgument(null != hadoopConf);
Preconditions.checkArgument(null != inputFormat);
if (null != conf) {
Preconditions.checkArgument(null != confRootField, "Configuration root field must be provided when configuration instance is provided");
}
ModifiableHadoopConfiguration scanConf = ModifiableHadoopConfiguration.of(TitanHadoopConfiguration.MAPRED_NS, hadoopConf);
if (null != confRootField) {
// Set the scanjob configuration root
scanConf.set(TitanHadoopConfiguration.SCAN_JOB_CONFIG_ROOT, confRootField);
// Instantiate scanjob configuration root
ConfigNamespace confRoot = HadoopScanMapper.getJobRoot(confRootField);
// Create writable view of scanjob configuration atop the Hadoop Configuration instance, where all keys are prefixed with SCAN_JOB_CONFIG_KEYS
ModifiableConfiguration hadoopJobConf = ModifiableHadoopConfiguration.prefixView(confRoot, TitanHadoopConfiguration.SCAN_JOB_CONFIG_KEYS, scanConf);
// Copy scanjob settings from the Titan Configuration instance to the Hadoop Configuration instance
Map<String, Object> jobConfMap = conf.getSubset(confRoot);
for (Map.Entry<String, Object> jobConfEntry : jobConfMap.entrySet()) {
hadoopJobConf.set((ConfigOption) ConfigElement.parse(confRoot, jobConfEntry.getKey()).element, jobConfEntry.getValue());
}
}
return runJob(scanConf.getHadoopConfiguration(), inputFormat, jobName, mapperClass);
}
use of com.thinkaurelius.titan.hadoop.config.ModifiableHadoopConfiguration in project titan by thinkaurelius.
the class HadoopScanRunner method runScanJob.
public static ScanMetrics runScanJob(ScanJob scanJob, Configuration conf, String confRootField, org.apache.hadoop.conf.Configuration hadoopConf, Class<? extends InputFormat> inputFormat) throws IOException, InterruptedException, ClassNotFoundException {
ModifiableHadoopConfiguration scanConf = ModifiableHadoopConfiguration.of(TitanHadoopConfiguration.MAPRED_NS, hadoopConf);
tryToLoadClassByName(scanJob);
// Set the ScanJob class
scanConf.set(TitanHadoopConfiguration.SCAN_JOB_CLASS, scanJob.getClass().getName());
String jobName = HadoopScanMapper.class.getSimpleName() + "[" + scanJob + "]";
return runJob(conf, confRootField, hadoopConf, inputFormat, jobName, HadoopScanMapper.class);
}
use of com.thinkaurelius.titan.hadoop.config.ModifiableHadoopConfiguration in project titan by thinkaurelius.
the class HadoopScanRunner method runVertexScanJob.
public static ScanMetrics runVertexScanJob(VertexScanJob vertexScanJob, Configuration conf, String confRootField, org.apache.hadoop.conf.Configuration hadoopConf, Class<? extends InputFormat> inputFormat) throws IOException, InterruptedException, ClassNotFoundException {
ModifiableHadoopConfiguration scanConf = ModifiableHadoopConfiguration.of(TitanHadoopConfiguration.MAPRED_NS, hadoopConf);
tryToLoadClassByName(vertexScanJob);
// Set the VertexScanJob class
scanConf.set(TitanHadoopConfiguration.SCAN_JOB_CLASS, vertexScanJob.getClass().getName());
String jobName = HadoopScanMapper.class.getSimpleName() + "[" + vertexScanJob + "]";
return runJob(conf, confRootField, hadoopConf, inputFormat, jobName, HadoopVertexScanMapper.class);
}
use of com.thinkaurelius.titan.hadoop.config.ModifiableHadoopConfiguration in project titan by thinkaurelius.
the class HadoopScanMapper method setup.
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
org.apache.hadoop.conf.Configuration hadoopConf = DEFAULT_COMPAT.getContextConfiguration(context);
ModifiableHadoopConfiguration scanConf = ModifiableHadoopConfiguration.of(TitanHadoopConfiguration.MAPRED_NS, hadoopConf);
job = getJob(scanConf);
metrics = new HadoopContextScanMetrics(context);
Configuration graphConf = getTitanConfiguration(context);
finishSetup(scanConf, graphConf);
}
Aggregations