use of org.apache.hadoop.mapred.FileSplit in project voldemort by voldemort.
the class BdbBuildPerformanceTest method main.
public static void main(String[] args) throws FileNotFoundException, IOException {
if (args.length != 3)
Utils.croak("USAGE: java " + BdbBuildPerformanceTest.class.getName() + "serverPropsFile storeName jsonSequenceDataFile");
String serverPropsFile = args[0];
String storeName = args[1];
String jsonDataFile = args[2];
final Store<ByteArray, byte[], byte[]> store = new BdbStorageConfiguration(new VoldemortConfig(new Props(new File(serverPropsFile)))).getStore(TestUtils.makeStoreDefinition(storeName), TestUtils.makeSingleNodeRoutingStrategy());
final AtomicInteger obsoletes = new AtomicInteger(0);
Path jsonFilePath = new Path(jsonDataFile);
FileStatus jsonFileStatus = jsonFilePath.getFileSystem(new Configuration()).listStatus(jsonFilePath)[0];
final SequenceFileRecordReader<BytesWritable, BytesWritable> reader = new SequenceFileRecordReader<BytesWritable, BytesWritable>(new Configuration(), new FileSplit(jsonFilePath, 0, jsonFileStatus.getLen(), (String[]) null));
PerformanceTest readWriteTest = new PerformanceTest() {
@Override
public void doOperation(int index) throws Exception {
try {
BytesWritable key = new BytesWritable();
BytesWritable value = new BytesWritable();
reader.next(key, value);
store.put(new ByteArray(ByteUtils.copy(key.get(), 0, key.getSize())), Versioned.value(ByteUtils.copy(value.get(), 0, value.getSize())), null);
} catch (ObsoleteVersionException e) {
obsoletes.incrementAndGet();
}
}
};
readWriteTest.run(30 * 1000 * 1000, 1);
System.out.println("Bdb write throuhput with one thread:");
readWriteTest.printStats();
}
use of org.apache.hadoop.mapred.FileSplit in project voldemort by voldemort.
the class MysqlBuildPerformanceTest method main.
public static void main(String[] args) throws FileNotFoundException, IOException {
if (args.length != 3)
Utils.croak("USAGE: java " + MysqlBuildPerformanceTest.class.getName() + "serverPropsFile storeName jsonSequenceDataFile");
String serverPropsFile = args[0];
String storeName = args[1];
String jsonDataFile = args[2];
final Store<ByteArray, byte[], byte[]> store = new MysqlStorageConfiguration(new VoldemortConfig(new Props(new File(serverPropsFile)))).getStore(TestUtils.makeStoreDefinition(storeName), TestUtils.makeSingleNodeRoutingStrategy());
final AtomicInteger obsoletes = new AtomicInteger(0);
Path jsonFilePath = new Path(jsonDataFile);
FileStatus jsonFileStatus = jsonFilePath.getFileSystem(new Configuration()).listStatus(jsonFilePath)[0];
final SequenceFileRecordReader<BytesWritable, BytesWritable> reader = new SequenceFileRecordReader<BytesWritable, BytesWritable>(new Configuration(), new FileSplit(jsonFilePath, 0, jsonFileStatus.getLen(), (String[]) null));
PerformanceTest readWriteTest = new PerformanceTest() {
@Override
public void doOperation(int index) throws Exception {
try {
BytesWritable key = new BytesWritable();
BytesWritable value = new BytesWritable();
reader.next(key, value);
store.put(new ByteArray(ByteUtils.copy(key.get(), 0, key.getSize())), Versioned.value(ByteUtils.copy(value.get(), 0, value.getSize())), null);
} catch (ObsoleteVersionException e) {
obsoletes.incrementAndGet();
}
}
};
readWriteTest.run(1000, 1);
System.out.println("MySQl write throuhput with one thread:");
readWriteTest.printStats();
}
use of org.apache.hadoop.mapred.FileSplit in project voldemort by voldemort.
the class JsonSequenceFileInputFormat method getRecordReader.
@Override
public RecordReader<BytesWritable, BytesWritable> getRecordReader(InputSplit split, JobConf conf, Reporter reporter) throws IOException {
String inputPathString = ((FileSplit) split).getPath().toUri().getPath();
log.info("Input file path:" + inputPathString);
Path inputPath = new Path(inputPathString);
SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf), inputPath, conf);
SequenceFile.Metadata meta = reader.getMetadata();
try {
Text keySchema = meta.get(new Text("key.schema"));
Text valueSchema = meta.get(new Text("value.schema"));
if (0 == keySchema.getLength() || 0 == valueSchema.getLength()) {
throw new Exception();
}
// update Joboconf with schemas
conf.set("mapper.input.key.schema", keySchema.toString());
conf.set("mapper.input.value.schema", valueSchema.toString());
} catch (Exception e) {
throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n");
}
return super.getRecordReader(split, conf, reporter);
}
use of org.apache.hadoop.mapred.FileSplit in project mongo-hadoop by mongodb.
the class HiveMongoInputFormat method getSplits.
@Override
public FileSplit[] getSplits(final JobConf conf, final int numSplits) throws IOException {
try {
MongoSplitter splitterImpl = MongoSplitterFactory.getSplitter(conf);
final List<org.apache.hadoop.mapreduce.InputSplit> splits = splitterImpl.calculateSplits();
InputSplit[] splitIns = splits.toArray(new InputSplit[splits.size()]);
// wrap InputSplits in FileSplits so that 'getPath'
// doesn't produce an error (Hive bug)
FileSplit[] wrappers = new FileSplit[splitIns.length];
Path path = new Path(conf.get(MongoStorageHandler.TABLE_LOCATION));
for (int i = 0; i < wrappers.length; i++) {
wrappers[i] = new MongoHiveInputSplit(splitIns[i], path);
}
return wrappers;
} catch (SplitFailedException spfe) {
// split failed because no namespace found
// (so the corresponding collection doesn't exist)
LOG.error(spfe.getMessage(), spfe);
throw new IOException(spfe.getMessage(), spfe);
} catch (Exception e) {
throw new IOException(e);
}
}
use of org.apache.hadoop.mapred.FileSplit in project mongo-hadoop by mongodb.
the class BSONFileInputFormat method getSplits.
@Override
public FileSplit[] getSplits(final JobConf job, final int numSplits) throws IOException {
BSONSplitter splitter = new BSONSplitter();
splitter.setConf(job);
FileStatus[] inputFiles = listStatus(job);
List<FileSplit> results = new ArrayList<FileSplit>();
for (FileStatus file : inputFiles) {
FileSystem fs = FileSystem.get(file.getPath().toUri(), job);
if (!isSplitable(fs, file.getPath())) {
LOG.info("File " + file.getPath() + " is compressed so " + "cannot be split.");
org.apache.hadoop.mapreduce.lib.input.FileSplit delegate = splitter.createFileSplit(file, fs, 0L, file.getLen());
results.add(new BSONFileSplit(delegate.getPath(), delegate.getStart(), delegate.getLength(), delegate.getLocations()));
continue;
}
splitter.setInputPath(file.getPath());
Path splitFilePath = getSplitsFilePath(file.getPath(), job);
try {
splitter.loadSplitsFromSplitFile(file, splitFilePath);
} catch (BSONSplitter.NoSplitFileException nsfe) {
if (LOG.isDebugEnabled()) {
LOG.debug(format("No split file for %s; building split file", file.getPath()));
}
splitter.readSplitsForFile(file);
}
if (LOG.isDebugEnabled()) {
LOG.debug(format("BSONSplitter found %d splits.", splitter.getAllSplits().size()));
}
for (org.apache.hadoop.mapreduce.lib.input.FileSplit split : splitter.getAllSplits()) {
BSONFileSplit fsplit = new BSONFileSplit(split.getPath(), split.getStart(), split.getLength(), split.getLocations());
fsplit.setKeyField(MongoConfigUtil.getInputKey(job));
results.add(fsplit);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug(format("Total of %d found.", results.size()));
}
return results.toArray(new BSONFileSplit[results.size()]);
}
Aggregations