use of org.apache.rya.accumulo.AccumuloRdfConfiguration in project incubator-rya by apache.
the class SomeValuesFromVisitorTest method testSomeValuesFromDisabled.
@Test
public void testSomeValuesFromDisabled() throws Exception {
// Disable someValuesOf inference
final AccumuloRdfConfiguration disabledConf = conf.clone();
disabledConf.setInferSomeValuesFrom(false);
// Configure a mock instance engine with an ontology:
final InferenceEngine inferenceEngine = mock(InferenceEngine.class);
Map<Resource, Set<URI>> personSVF = new HashMap<>();
personSVF.put(gradCourse, Sets.newHashSet(takesCourse));
personSVF.put(course, Sets.newHashSet(takesCourse));
personSVF.put(department, Sets.newHashSet(headOf));
personSVF.put(organization, Sets.newHashSet(worksFor, headOf));
when(inferenceEngine.getSomeValuesFromByRestrictionType(person)).thenReturn(personSVF);
// Query for a specific type visit -- should not change
StatementPattern originalSP = new StatementPattern(new Var("s"), new Var("p", RDF.TYPE), new Var("o", person));
final Projection originalQuery = new Projection(originalSP, new ProjectionElemList(new ProjectionElem("s", "subject")));
final Projection modifiedQuery = originalQuery.clone();
modifiedQuery.visit(new SomeValuesFromVisitor(disabledConf, inferenceEngine));
Assert.assertEquals(originalQuery, modifiedQuery);
}
use of org.apache.rya.accumulo.AccumuloRdfConfiguration in project incubator-rya by apache.
the class PcjIntegrationTestingUtil method getAccumuloNonPcjRepo.
public static SailRepository getAccumuloNonPcjRepo(final String tablePrefix, final String instance) throws AccumuloException, AccumuloSecurityException, RyaDAOException, RepositoryException, InferenceEngineException, NumberFormatException, UnknownHostException, SailException {
final AccumuloRdfConfiguration nonPcjConf = new AccumuloRdfConfiguration();
populateAccumuloConfig(instance, tablePrefix, nonPcjConf);
final Sail nonPcjSail = RyaSailFactory.getInstance(nonPcjConf);
final SailRepository nonPcjRepo = new SailRepository(nonPcjSail);
return nonPcjRepo;
}
use of org.apache.rya.accumulo.AccumuloRdfConfiguration in project incubator-rya by apache.
the class CopyTool method runQueryCopy.
private int runQueryCopy() throws Exception {
log.info("Setting up Copy Tool with a query-based ruleset...");
setup();
if (!useCopyFileOutput) {
createChildInstance(conf);
}
// Set up the configuration
final AccumuloRdfConfiguration aconf = new AccumuloRdfConfiguration(conf);
aconf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, mock);
aconf.setTablePrefix(tablePrefix);
aconf.setFlush(false);
ConfigUtils.setIndexers(aconf);
// Since we're copying at the statement-level, ignore any given list of tables and determine
// which tables we might need to create based on which indexers are desired.
final TablePrefixLayoutStrategy prefixStrategy = new TablePrefixLayoutStrategy(tablePrefix);
tables.clear();
// Always include core tables
tables.add(prefixStrategy.getSpo());
tables.add(prefixStrategy.getOsp());
tables.add(prefixStrategy.getPo());
// Copy namespaces if they exist
tables.add(prefixStrategy.getNs());
// Add tables associated with any configured indexers
/* TODO: SEE RYA-160
if (aconf.getBoolean(ConfigUtils.USE_FREETEXT, false)) {
tables.add(ConfigUtils.getFreeTextDocTablename(conf));
tables.add(ConfigUtils.getFreeTextTermTablename(conf));
}
if (aconf.getBoolean(ConfigUtils.USE_GEO, false)) {
tables.add(ConfigUtils.getGeoTablename(conf));
}
if (aconf.getBoolean(ConfigUtils.USE_TEMPORAL, false)) {
tables.add(ConfigUtils.getTemporalTableName(conf));
}
if (aconf.getBoolean(ConfigUtils.USE_ENTITY, false)) {
tables.add(ConfigUtils.getEntityTableName(conf));
}
*/
// Ignore anything else, e.g. statistics -- must be recalculated for the child if desired
// Extract the ruleset, and copy the namespace table directly
final AccumuloQueryRuleset ruleset = new AccumuloQueryRuleset(aconf);
ruleset.addTable(prefixStrategy.getNs());
for (final String line : ruleset.toString().split("\n")) {
log.info(line);
}
// Create a Job and configure its input and output
final Job job = Job.getInstance(aconf);
job.setJarByClass(this.getClass());
setupMultiTableInputFormat(job, ruleset);
setupAccumuloOutput(job, "");
if (useCopyFileOutput) {
// Configure job for file output
job.setJobName("Ruleset-based export to file: " + tablePrefix + " -> " + localBaseOutputDir);
// Map (row) to (table+key, key+value)
job.setMapperClass(RowRuleMapper.class);
job.setMapOutputKeyClass(GroupedRow.class);
job.setMapOutputValueClass(GroupedRow.class);
// Group according to table and and sort according to key
job.setGroupingComparatorClass(GroupedRow.GroupComparator.class);
job.setSortComparatorClass(GroupedRow.SortComparator.class);
// Reduce ([table+row], rows): output each row to the file for that table, in sorted order
job.setReducerClass(MultipleFileReducer.class);
job.setOutputKeyClass(Key.class);
job.setOutputValueClass(Value.class);
} else {
// Configure job for table output
job.setJobName("Ruleset-based copy: " + tablePrefix + " -> " + childTablePrefix);
// Map (row): convert to statement, insert to child (for namespace table, output row directly)
job.setMapperClass(AccumuloRyaRuleMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Mutation.class);
job.setNumReduceTasks(0);
// Create the child tables, so mappers don't try to do this in parallel
for (final String parentTable : tables) {
final String childTable = parentTable.replaceFirst(tablePrefix, childTablePrefix);
createTableIfNeeded(childTable);
}
}
// Run the job and copy files to local filesystem if needed
final Date beginTime = new Date();
log.info("Job started: " + beginTime);
final boolean success = job.waitForCompletion(true);
if (success) {
if (useCopyFileOutput) {
log.info("Moving data from HDFS to the local file system");
final Path baseOutputPath = new Path(baseOutputDir);
for (final FileStatus status : FileSystem.get(conf).listStatus(baseOutputPath)) {
if (status.isDirectory()) {
final String tableName = status.getPath().getName();
final Path hdfsPath = getPath(baseOutputDir, tableName);
final Path localPath = getPath(localBaseOutputDir, tableName);
log.info("HDFS directory: " + hdfsPath.toString());
log.info("Local directory: " + localPath.toString());
copyHdfsToLocal(hdfsPath, localPath);
}
}
}
final Date endTime = new Date();
log.info("Job finished: " + endTime);
log.info("The job took " + (endTime.getTime() - beginTime.getTime()) / 1000 + " seconds.");
return 0;
} else {
log.error("Job failed!!!");
return 1;
}
}
use of org.apache.rya.accumulo.AccumuloRdfConfiguration in project incubator-rya by apache.
the class BaseRuleMapper method setup.
@Override
protected void setup(final Context context) throws IOException, InterruptedException {
final Configuration conf = context.getConfiguration();
split = (RangeInputSplit) context.getInputSplit();
final Range range = split.getRange();
// Determine the table and table layout we're scanning
parentTableName = split.getTableName();
parentTablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY);
for (final TABLE_LAYOUT layout : TABLE_LAYOUT.values()) {
final String tableName = RdfCloudTripleStoreUtils.layoutPrefixToTable(layout, parentTablePrefix);
if (tableName.equals(parentTableName)) {
parentLayout = layout;
}
}
conf.set(MergeTool.TABLE_NAME_PROP, parentTableName);
// Set up connections and parent/child table information, if necessary
super.setup(context);
// If we're working at the statement level, get the relevant rules and conditions:
if (parentLayout != null) {
AccumuloQueryRuleset ruleset;
try {
ruleset = new AccumuloQueryRuleset(new AccumuloRdfConfiguration(conf));
} catch (final QueryRulesetException e) {
throw new IOException("Error parsing the input query", e);
}
final List<CopyRule> rules = ruleset.getRules(parentLayout, range);
for (final CopyRule rule : rules) {
log.info("Mapper applies to rule:");
for (final String line : rule.toString().split("\n")) {
log.info("\t" + line);
}
}
// this input split will receive, so if any condition is true we'll want to copy the statement.
for (final CopyRule rule : rules) {
// (even if there are redundant rules with conditions)
if (rule.getCondition() == null) {
condition = null;
break;
} else // If there is a set of conditions, matching it means we should accept the statement.
if (condition == null) {
condition = rule.getCondition();
} else // If there are more than one rules that match, satisfying any conditions means we should accept.
{
condition = new Or(condition, rule.getCondition());
}
}
// Set up the strategy to evaluate those conditions
strategy = new ParallelEvaluationStrategyImpl(null, null, null, childAccumuloRdfConfiguration);
// Log info about the split and combined condition
log.info("Table: " + parentTableName);
log.info("Range:");
log.info("\tfrom " + keyToString(range.getStartKey(), Integer.MAX_VALUE));
log.info("\tto " + keyToString(range.getEndKey(), Integer.MAX_VALUE));
if (condition == null) {
log.info("Condition: none");
} else {
log.info("Condition:");
for (final String line : condition.toString().split("\n")) {
log.info("\t" + line);
}
}
} else {
log.info("(Copying all rows from " + parentTableName + " directly.)");
}
}
use of org.apache.rya.accumulo.AccumuloRdfConfiguration in project incubator-rya by apache.
the class MergeToolMapper method setup.
@Override
protected void setup(final Context context) throws IOException, InterruptedException {
super.setup(context);
log.info("Setting up mapper");
parentConfig = context.getConfiguration();
childConfig = getChildConfig(parentConfig);
startTimeString = parentConfig.get(MergeTool.START_TIME_PROP, null);
if (startTimeString != null) {
startTime = MergeTool.convertStartTimeStringToDate(startTimeString);
}
usesStartTime = startTime != null;
useTimeSync = parentConfig.getBoolean(CopyTool.USE_NTP_SERVER_PROP, false);
useMergeFileInput = parentConfig.getBoolean(MergeTool.USE_MERGE_FILE_INPUT, false);
parentTableName = parentConfig.get(MergeTool.TABLE_NAME_PROP, null);
parentTablePrefix = parentConfig.get(MRUtils.TABLE_PREFIX_PROPERTY, null);
childTablePrefix = childConfig.get(MRUtils.TABLE_PREFIX_PROPERTY, null);
if (useMergeFileInput) {
childTableName = parentTableName.replaceFirst(parentTablePrefix, childTablePrefix) + MergeTool.TEMP_SUFFIX;
} else {
childTableName = parentTableName.replaceFirst(parentTablePrefix, childTablePrefix);
}
spoTable = new Text(parentTablePrefix + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX);
poTable = new Text(parentTablePrefix + RdfCloudTripleStoreConstants.TBL_PO_SUFFIX);
ospTable = new Text(parentTablePrefix + RdfCloudTripleStoreConstants.TBL_OSP_SUFFIX);
childScanner = setupChildScanner(context);
childIterator = childScanner.iterator();
parentAccumuloRdfConfiguration = new AccumuloRdfConfiguration(parentConfig);
parentAccumuloRdfConfiguration.setTablePrefix(parentTablePrefix);
parentRyaContext = RyaTripleContext.getInstance(parentAccumuloRdfConfiguration);
ryaTableMutationFactory = new RyaTableMutationsFactory(parentRyaContext);
childAccumuloRdfConfiguration = new AccumuloRdfConfiguration(childConfig);
childAccumuloRdfConfiguration.setTablePrefix(childTablePrefix);
childRyaContext = RyaTripleContext.getInstance(childAccumuloRdfConfiguration);
childConnector = AccumuloRyaUtils.setupConnector(childAccumuloRdfConfiguration);
childDao = AccumuloRyaUtils.setupDao(childConnector, childAccumuloRdfConfiguration);
if (startTime != null && useTimeSync) {
try {
copyToolInputTime = AccumuloRyaUtils.getCopyToolSplitDate(childDao);
copyToolRunTime = AccumuloRyaUtils.getCopyToolRunDate(childDao);
// Find the parent's time offset that was stored when the child was copied.
parentTimeOffset = AccumuloRyaUtils.getTimeOffset(childDao);
final String durationBreakdown = TimeUtils.getDurationBreakdown(parentTimeOffset);
log.info("The table " + parentTableName + " has a time offset of: " + durationBreakdown);
childTimeOffset = Long.valueOf(childConfig.get(CopyTool.CHILD_TIME_OFFSET_PROP, null));
final Date adjustedParentStartTime = new Date(startTime.getTime() - parentTimeOffset);
final Date adjustedChildStartTime = new Date(startTime.getTime() - childTimeOffset);
log.info("Adjusted parent start time: " + adjustedParentStartTime);
log.info("Adjusted child start time: " + adjustedChildStartTime);
} catch (final RyaDAOException e) {
log.error("Error getting time offset", e);
}
}
log.info("Finished setting up mapper");
}
Aggregations