use of org.openrdf.rio.RDFFormat in project blueprints by tinkerpop.
the class SparqlRepositorySailGraph method ignoreDatatypesInAllParsers.
// wrap RDF parser factories such that they ignore invalid values in data-typed literals
// (e.g. a value of "fish" for an xsd:integer literal,
// or a value of 1995-01-01T00:00:00+02:00 for an xsd:gYear literal).
// The default behavior is to throw an exception when bad literals are encountered,
// resulting in failure.
private static void ignoreDatatypesInAllParsers() {
RDFParserRegistry r = RDFParserRegistry.getInstance();
Collection<RDFParserFactory> oldFactories = new LinkedList<RDFParserFactory>();
Collection<RDFParserFactory> newFactories = new LinkedList<RDFParserFactory>();
for (final RDFFormat f : r.getKeys()) {
final RDFParserFactory pf = r.get(f);
pf.getParser().setDatatypeHandling(RDFParser.DatatypeHandling.IGNORE);
RDFParserFactory pfn = new RDFParserFactory() {
public RDFFormat getRDFFormat() {
return f;
}
public RDFParser getParser() {
RDFParser p = pf.getParser();
p.setDatatypeHandling(RDFParser.DatatypeHandling.IGNORE);
return p;
}
};
oldFactories.add(pf);
newFactories.add(pfn);
}
for (RDFParserFactory pf : oldFactories) {
r.remove(pf);
}
for (RDFParserFactory pfn : newFactories) {
r.add(pfn);
}
}
use of org.openrdf.rio.RDFFormat in project incubator-rya by apache.
the class AbstractAccumuloMRTool method setupFileInput.
/**
* Sets up RDF file input for a job: the job receives
* ({@link org.apache.hadoop.io.LongWritable}, {@link RyaStatementWritable})
* pairs from RDF file(s) found at the specified path.
* @param job Job to configure
* @param inputPath File or directory name
* @param defaultFormat Default RDF serialization format, can be
* overridden by {@link MRUtils#FORMAT_PROP}
* @throws IOException if there's an error interacting with the
* {@link org.apache.hadoop.fs.FileSystem}.
*/
protected void setupFileInput(Job job, String inputPath, RDFFormat defaultFormat) throws IOException {
RDFFormat format = MRUtils.getRDFFormat(conf);
if (format == null) {
format = defaultFormat;
}
RdfFileInputFormat.addInputPath(job, new Path(inputPath));
RdfFileInputFormat.setRDFFormat(job, format);
job.setInputFormatClass(RdfFileInputFormat.class);
}
use of org.openrdf.rio.RDFFormat in project incubator-rya by apache.
the class RyaCommands method loadData.
@CliCommand(value = LOAD_DATA_CMD, help = "Loads RDF Statement data from a local file to the connected Rya instance.")
public String loadData(@CliOption(key = { "file" }, mandatory = true, help = "A local file containing RDF Statements that is to be loaded.") final String file, @CliOption(key = { "format" }, mandatory = false, help = "The format of the supplied RDF Statements file. [RDF/XML, N-Triples, Turtle, N3, TriX, TriG, BinaryRDF, N-Quads, JSON-LD, RDF/JSON, RDFa]") final String format) {
// Fetch the command that is connected to the store.
final ShellState shellState = state.getShellState();
final RyaClient commands = shellState.getConnectedCommands().get();
final Optional<String> ryaInstanceName = shellState.getRyaInstanceName();
try {
final long start = System.currentTimeMillis();
// If the provided path is relative, then make it rooted in the user's home.
// Make sure the path is formatted with Unix style file
// separators('/') before using it as a regex replacement string.
// Windows file separators('\') will not work unless escaped.
final String userHome = FilenameUtils.separatorsToUnix(System.getProperty("user.home"));
final Path rootedFile = Paths.get(file.replaceFirst("^~", userHome));
RDFFormat rdfFormat = null;
// If a format was provided, then go with that.
if (format != null) {
rdfFormat = RDFFormat.valueOf(format);
if (rdfFormat == null) {
throw new RuntimeException("Unsupported RDF Statement data input format: " + format);
}
} else // Otherwise try to figure it out using the filename.
if (rdfFormat == null) {
rdfFormat = RDFFormat.forFileName(rootedFile.getFileName().toString());
if (rdfFormat == null) {
throw new RuntimeException("Unable to detect RDF Statement data input format for file: " + rootedFile);
} else {
consolePrinter.println("Detected RDF Format: " + rdfFormat);
consolePrinter.flush();
}
}
commands.getLoadStatementsFile().loadStatements(ryaInstanceName.get(), rootedFile, rdfFormat);
final String seconds = new DecimalFormat("0.0##").format((System.currentTimeMillis() - start) / 1000.0);
return "Loaded the file: '" + file + "' successfully in " + seconds + " seconds.";
} catch (final RyaClientException | IOException e) {
log.error("Error", e);
throw new RuntimeException("Can not load the RDF Statement data. Reason: " + e.getMessage(), e);
}
}
use of org.openrdf.rio.RDFFormat in project incubator-rya by apache.
the class RdfFileInputFormat method createRecordReader.
/**
* Instantiate a RecordReader for a given task attempt.
* @param inputSplit Input split to handle, may refer to part or all of
* an RDF file
* @param taskAttemptContext Contains configuration options.
* @return A RecordReader that reads and parses RDF text.
*/
@Override
public RecordReader<LongWritable, RyaStatementWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
Configuration conf = taskAttemptContext.getConfiguration();
RDFFormat format = getRDFFormat(taskAttemptContext);
if (format == null) {
format = DEFAULT_RDF_FORMAT;
}
int charBufferSize = conf.getInt(CHAR_BUFFER_SIZE_PROP, DEFAULT_CHAR_BUFFER_SIZE);
int statementBufferSize = conf.getInt(STATEMENT_BUFFER_SIZE_PROP, DEFAULT_STATEMENT_BUFFER_SIZE);
int timeoutSeconds = conf.getInt(TIMEOUT_PROP, DEFAULT_TIMEOUT);
return new RdfFileRecordReader(format, charBufferSize, statementBufferSize, timeoutSeconds);
}
use of org.openrdf.rio.RDFFormat in project incubator-rya by apache.
the class MongoSpinIT method insertDataFile.
private void insertDataFile(URL dataFile, String defaultNamespace) throws Exception {
RDFFormat format = Rio.getParserFormatForFileName(dataFile.getFile());
SailRepositoryConnection conn = repository.getConnection();
try {
conn.add(dataFile, defaultNamespace, format);
} finally {
closeQuietly(conn);
}
}
Aggregations