use of org.apache.rya.accumulo.AccumuloRdfConfiguration in project incubator-rya by apache.
the class AccumuloPcjIT method getConf.
private static Configuration getConf() {
final AccumuloRdfConfiguration conf = new AccumuloRdfConfiguration();
conf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, true);
conf.set(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX, "rya_");
conf.set(ConfigUtils.CLOUDBASE_USER, "root");
conf.set(ConfigUtils.CLOUDBASE_PASSWORD, "");
conf.set(ConfigUtils.CLOUDBASE_INSTANCE, "instance");
conf.set(ConfigUtils.CLOUDBASE_AUTHS, "");
conf.set(PrecomputedJoinIndexerConfig.PCJ_STORAGE_TYPE, PrecomputedJoinStorageType.ACCUMULO.name());
return conf;
}
use of org.apache.rya.accumulo.AccumuloRdfConfiguration in project incubator-rya by apache.
the class PcjIntegrationTestingUtil method getAccumuloPcjRepo.
public static SailRepository getAccumuloPcjRepo(final String tablePrefix, final String instance) throws AccumuloException, AccumuloSecurityException, RyaDAOException, RepositoryException, InferenceEngineException, NumberFormatException, UnknownHostException, SailException {
final AccumuloRdfConfiguration pcjConf = new AccumuloRdfConfiguration();
pcjConf.set(ConfigUtils.USE_PCJ, "true");
pcjConf.set(PrecomputedJoinIndexerConfig.PCJ_STORAGE_TYPE, PrecomputedJoinStorageType.ACCUMULO.name());
populateAccumuloConfig(instance, tablePrefix, pcjConf);
final Sail pcjSail = RyaSailFactory.getInstance(pcjConf);
final SailRepository pcjRepo = new SailRepository(pcjSail);
return pcjRepo;
}
use of org.apache.rya.accumulo.AccumuloRdfConfiguration in project incubator-rya by apache.
the class CopyTool method createTableIfNeeded.
/**
* Creates the child table if it doesn't already exist.
* @param childTableName the name of the child table.
* @throws IOException
*/
public void createTableIfNeeded(final String childTableName) throws IOException {
try {
final Configuration childConfig = MergeToolMapper.getChildConfig(conf);
final AccumuloRdfConfiguration childAccumuloRdfConfiguration = new AccumuloRdfConfiguration(childConfig);
childAccumuloRdfConfiguration.setTablePrefix(childTablePrefix);
final Connector childConnector = AccumuloRyaUtils.setupConnector(childAccumuloRdfConfiguration);
if (!childConnector.tableOperations().exists(childTableName)) {
log.info("Creating table: " + childTableName);
childConnector.tableOperations().create(childTableName);
log.info("Created table: " + childTableName);
log.info("Granting authorizations to table: " + childTableName);
childConnector.securityOperations().grantTablePermission(childUserName, childTableName, TablePermission.WRITE);
log.info("Granted authorizations to table: " + childTableName);
}
} catch (TableExistsException | AccumuloException | AccumuloSecurityException e) {
throw new IOException(e);
}
}
use of org.apache.rya.accumulo.AccumuloRdfConfiguration in project incubator-rya by apache.
the class CopyTool method importFilesToChildTable.
/**
* Imports the files that hold the table data into the child instance.
* @param childTableName the name of the child table to import.
* @throws Exception
*/
public void importFilesToChildTable(final String childTableName) throws Exception {
final String normalizedChildTableName = FilenameUtils.normalize(childTableName);
if (normalizedChildTableName == null) {
throw new Exception("Invalid child table name: " + childTableName);
}
final Configuration childConfig = MergeToolMapper.getChildConfig(conf);
final AccumuloRdfConfiguration childAccumuloRdfConfiguration = new AccumuloRdfConfiguration(childConfig);
childAccumuloRdfConfiguration.setTablePrefix(childTablePrefix);
final Connector childConnector = AccumuloRyaUtils.setupConnector(childAccumuloRdfConfiguration);
final TableOperations childTableOperations = childConnector.tableOperations();
final Path localWorkDir = getPath(localCopyFileImportDir, normalizedChildTableName);
final Path hdfsBaseWorkDir = getPath(baseImportDir, normalizedChildTableName);
final FileSystem fs = FileSystem.get(conf);
if (fs.exists(hdfsBaseWorkDir)) {
fs.delete(hdfsBaseWorkDir, true);
}
log.info("Importing from the local directory: " + localWorkDir);
log.info("Importing to the HDFS directory: " + hdfsBaseWorkDir);
copyLocalToHdfs(localWorkDir, hdfsBaseWorkDir);
final Path files = getPath(hdfsBaseWorkDir.toString(), "files");
final Path failures = getPath(hdfsBaseWorkDir.toString(), "failures");
// With HDFS permissions on, we need to make sure the Accumulo user can read/move the files
final FsShell hdfs = new FsShell(conf);
if (!fs.isDirectory(hdfsBaseWorkDir)) {
throw new IllegalArgumentException("Configured working directory is not a valid directory" + hdfsBaseWorkDir.toString());
}
hdfs.run(new String[] { "-chmod", "777", hdfsBaseWorkDir.toString() });
if (fs.exists(failures)) {
fs.delete(failures, true);
}
fs.mkdirs(failures);
childTableOperations.importDirectory(normalizedChildTableName, files.toString(), failures.toString(), false);
}
use of org.apache.rya.accumulo.AccumuloRdfConfiguration in project incubator-rya by apache.
the class CopyTool method runCopy.
private int runCopy() throws Exception {
log.info("Setting up Copy Tool...");
setup();
if (!useCopyFileOutput) {
createChildInstance(conf);
}
final AccumuloRdfConfiguration parentAccumuloRdfConfiguration = new AccumuloRdfConfiguration(conf);
parentAccumuloRdfConfiguration.setTablePrefix(tablePrefix);
final Connector parentConnector = AccumuloRyaUtils.setupConnector(parentAccumuloRdfConfiguration);
final TableOperations parentTableOperations = parentConnector.tableOperations();
for (final String table : tables) {
// Check if the parent table exists before creating a job on it
if (parentTableOperations.exists(table)) {
final String childTable = table.replaceFirst(tablePrefix, childTablePrefix);
final String jobName = "Copy Tool, copying Parent Table: " + table + ", into Child Table: " + childTable + ", " + System.currentTimeMillis();
log.info("Initializing job: " + jobName);
conf.set(MRUtils.JOB_NAME_PROP, jobName);
conf.set(MergeTool.TABLE_NAME_PROP, table);
final Job job = Job.getInstance(conf);
job.setJarByClass(CopyTool.class);
setupAccumuloInput(job);
InputFormatBase.setInputTableName(job, table);
// Set input output of the particular job
if (useCopyFileOutput) {
job.setMapOutputKeyClass(Key.class);
job.setMapOutputValueClass(Value.class);
job.setOutputKeyClass(Key.class);
job.setOutputValueClass(Value.class);
} else {
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Mutation.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Mutation.class);
}
setupAccumuloOutput(job, childTable);
// Set mapper and reducer classes
if (useCopyFileOutput) {
setupSplitsFile(job, parentTableOperations, table, childTable);
job.setMapperClass(FileCopyToolMapper.class);
} else {
job.setMapperClass(AccumuloCopyToolMapper.class);
}
job.setReducerClass(Reducer.class);
// Submit the job
final Date beginTime = new Date();
log.info("Job for table \"" + table + "\" started: " + beginTime);
final int exitCode = job.waitForCompletion(true) ? 0 : 1;
if (exitCode == 0) {
if (useCopyFileOutput) {
log.info("Moving data from HDFS to the local file system for the table: " + childTable);
final Path hdfsPath = getPath(baseOutputDir, childTable);
final Path localPath = getPath(localBaseOutputDir, childTable);
log.info("HDFS directory: " + hdfsPath.toString());
log.info("Local directory: " + localPath.toString());
copyHdfsToLocal(hdfsPath, localPath);
}
final Date endTime = new Date();
log.info("Job for table \"" + table + "\" finished: " + endTime);
log.info("The job took " + (endTime.getTime() - beginTime.getTime()) / 1000 + " seconds.");
} else {
log.error("Job for table \"" + table + "\" Failed!!!");
return exitCode;
}
} else {
log.warn("The table \"" + table + "\" was NOT found in the parent instance and cannot be copied.");
}
}
return 0;
}
Aggregations