use of java.nio.channels.SeekableByteChannel in project lucene-solr by apache.
the class LineFileDocs method open.
private synchronized void open(Random random) throws IOException {
InputStream is = getClass().getResourceAsStream(path);
boolean needSkip = true;
long size = 0L, seekTo = 0L;
if (is == null) {
// if it's not in classpath, we load it as absolute filesystem path (e.g. Hudson's home dir)
Path file = Paths.get(path);
size = Files.size(file);
if (path.endsWith(".gz")) {
// if it is a gzip file, we need to use InputStream and slowly skipTo:
is = Files.newInputStream(file);
} else {
// optimized seek using SeekableByteChannel
seekTo = randomSeekPos(random, size);
final SeekableByteChannel channel = Files.newByteChannel(file);
if (LuceneTestCase.VERBOSE) {
System.out.println("TEST: LineFileDocs: file seek to fp=" + seekTo + " on open");
}
channel.position(seekTo);
is = Channels.newInputStream(channel);
needSkip = false;
}
} else {
// if the file comes from Classpath:
size = is.available();
}
if (path.endsWith(".gz")) {
is = new GZIPInputStream(is);
// guestimate:
size *= 2.8;
}
// but this seek is a scan, so very inefficient!!!
if (needSkip) {
seekTo = randomSeekPos(random, size);
if (LuceneTestCase.VERBOSE) {
System.out.println("TEST: LineFileDocs: stream skip to fp=" + seekTo + " on open");
}
is.skip(seekTo);
}
// if we seeked somewhere, read until newline char
if (seekTo > 0L) {
int b;
do {
b = is.read();
} while (b >= 0 && b != 13 && b != 10);
}
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE);
if (seekTo > 0L) {
// read one more line, to make sure we are not inside a Windows linebreak (\r\n):
reader.readLine();
}
}
use of java.nio.channels.SeekableByteChannel in project lucene-solr by apache.
the class HandleTrackingFS method newByteChannel.
@Override
public SeekableByteChannel newByteChannel(Path path, Set<? extends OpenOption> options, FileAttribute<?>... attrs) throws IOException {
SeekableByteChannel channel = new FilterSeekableByteChannel(super.newByteChannel(path, options, attrs)) {
boolean closed;
@Override
public void close() throws IOException {
try {
if (!closed) {
closed = true;
onClose(path, this);
}
} finally {
super.close();
}
}
@Override
public String toString() {
return "SeekableByteChannel(" + path.toString() + ")";
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
@Override
public boolean equals(Object obj) {
return this == obj;
}
};
callOpenHook(path, channel);
return channel;
}
use of java.nio.channels.SeekableByteChannel in project gatk by broadinstitute.
the class ReadsDataSourceUnitTest method testCloudBamWithCustomReaderFactoryAndWrappers.
@Test(dataProvider = "cloudXorTestData", groups = { "bucket" })
public void testCloudBamWithCustomReaderFactoryAndWrappers(final List<Path> bams, final List<Path> indices) {
final SamReaderFactory customFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.STRICT);
// The input files are XOR'd with a constant. We use a wrapper to XOR it back.
// If the code uses the wrong wrapper, or omits one, then the test will fail.
Function<SeekableByteChannel, SeekableByteChannel> xorData = XorWrapper.forKey((byte) 74);
Function<SeekableByteChannel, SeekableByteChannel> xorIndex = XorWrapper.forKey((byte) 80);
try (final ReadsDataSource readsSource = new ReadsDataSource(bams, indices, customFactory, xorData, xorIndex)) {
Assert.assertTrue(readsSource.indicesAvailable(), "Explicitly-provided indices not detected for bams: " + bams);
final Iterator<GATKRead> queryReads = readsSource.query(new SimpleInterval("1", 1, 300));
int queryCount = 0;
while (queryReads.hasNext()) {
++queryCount;
queryReads.next();
}
Assert.assertEquals(queryCount, 2, "Wrong number of reads returned in query");
}
}
use of java.nio.channels.SeekableByteChannel in project gatk by broadinstitute.
the class ParallelCopyGCSDirectoryIntoHDFSSpark method readChunkToHdfs.
private static final Tuple2<Integer, String> readChunkToHdfs(final String inputGCSPathFinal, final long chunkSize, final Integer chunkNum, final String outputDirectory) {
final Path gcsPath = IOUtils.getPath(inputGCSPathFinal);
final String basename = gcsPath.getName(gcsPath.getNameCount() - 1).toString();
org.apache.hadoop.fs.Path outputPath = new org.apache.hadoop.fs.Path(outputDirectory);
final String chunkPath = outputPath + "/" + basename + ".chunk." + chunkNum;
try (SeekableByteChannel channel = Files.newByteChannel(gcsPath);
final OutputStream outputStream = new BufferedOutputStream(BucketUtils.createFile(chunkPath))) {
final long start = chunkSize * (long) chunkNum;
channel.position(start);
ByteBuffer byteBuffer = ByteBuffer.allocateDirect((int) Math.min(SIXTY_FOUR_MIB, chunkSize));
long bytesRead = 0;
while (channel.read(byteBuffer) > 0) {
byteBuffer.flip();
while (byteBuffer.hasRemaining() && bytesRead < chunkSize) {
byte b = byteBuffer.get();
outputStream.write(b);
bytesRead++;
}
if (bytesRead == chunkSize) {
break;
}
if (bytesRead > chunkSize) {
throw new GATKException("Encountered an unknown error condition and read too many bytes; output file may be corrupt");
}
byteBuffer.clear();
}
} catch (IOException e) {
throw new GATKException(e.getMessage() + "; inputGCSPathFinal = " + inputGCSPathFinal, e);
}
return new Tuple2<>(chunkNum, chunkPath);
}
use of java.nio.channels.SeekableByteChannel in project gatk by broadinstitute.
the class GcsNioIntegrationTest method testCloseWhilePrefetching.
@Test(groups = { "cloud" })
public void testCloseWhilePrefetching() throws Exception {
final String large = getGCPTestInputPath() + largeFilePath;
SeekableByteChannel chan = new SeekableByteChannelPrefetcher(Files.newByteChannel(Paths.get(URI.create(large))), 10 * 1024 * 1024);
// read just 1 byte, get the prefetching going
ByteBuffer one = ByteBuffer.allocate(1);
chan.read(one);
// closing must not throw an exception, even if the prefetching
// thread is active.
chan.close();
}
Aggregations