use of htsjdk.samtools.util.RuntimeIOException in project jvarkit by lindenb.
the class VcfCalledWithAnotherMethod method doVcfToVcf.
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
final List<ExternalVcf> externalVcfs = new ArrayList<>();
try {
final VCFHeader header = in.getHeader();
this.dictionary = header.getSequenceDictionary();
/**
* open primitive input
*/
if (this.dictionary == null) {
LOG.error("no dictionary in input");
return -1;
}
final Set<File> samtoolsFiles = new HashSet<>();
this.externalVcfStrs.stream().filter(S -> S.endsWith(".list")).map(S -> Paths.get(S)).forEach(P -> {
try {
samtoolsFiles.addAll(Files.readAllLines(P).stream().filter(L -> !(L.startsWith("#") || L.trim().isEmpty())).map(S -> new File(S)).collect(Collectors.toSet()));
} catch (final Exception err) {
throw new RuntimeIOException(err);
}
});
samtoolsFiles.addAll(this.externalVcfStrs.stream().filter(S -> !S.endsWith(".list")).map(S -> new File(S)).collect(Collectors.toSet()));
externalVcfs.addAll(samtoolsFiles.stream().map(F -> new ExternalVcf(F)).collect(Collectors.toList()));
/**
* check for uniq keys
*/
final Set<String> uniqKeys = new HashSet<>();
for (final ExternalVcf extvcf : externalVcfs) {
int n = 0;
for (; ; ) {
final String newkey = extvcf.key + (n == 0 ? "" : "_" + n);
if (!uniqKeys.contains(newkey)) {
extvcf.key = newkey;
uniqKeys.add(newkey);
break;
}
++n;
}
}
final VCFHeader h2 = new VCFHeader(header);
for (final ExternalVcf extvcf : externalVcfs) {
h2.addMetaDataLine(new VCFHeaderLine("otherVcf:" + extvcf.key, extvcf.vcfFile.getPath()));
}
final VCFFilterHeaderLine variantNotFoundElsewhereFILTER = (filterNameVariantNotFoundElseWhere.isEmpty() ? null : new VCFFilterHeaderLine(filterNameVariantNotFoundElseWhere, "Variant Was not found in other VCFs: " + externalVcfs.stream().map(S -> S.vcfFile.getPath()).collect(Collectors.joining(", "))));
if (variantNotFoundElsewhereFILTER != null) {
h2.addMetaDataLine(variantNotFoundElsewhereFILTER);
}
final VCFInfoHeaderLine variantFoundElseWhereKeys = new VCFInfoHeaderLine(this.infoFoundKey, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Variant was found in the VCFs designed by those keys");
h2.addMetaDataLine(variantFoundElseWhereKeys);
final VCFInfoHeaderLine variantFoundElseWhereCount = new VCFInfoHeaderLine(this.infoFoundKey, 1, VCFHeaderLineType.Integer, "Number of times the Variant was found in the VCFs");
h2.addMetaDataLine(variantFoundElseWhereCount);
final VCFFormatHeaderLine genotypeCountSame = new VCFFormatHeaderLine(this.formatCountSame, 1, VCFHeaderLineType.Integer, "Number of times the Genotype was found the same in other VCFs");
h2.addMetaDataLine(genotypeCountSame);
final VCFFormatHeaderLine genotypeCountDiscordant = new VCFFormatHeaderLine(this.formatCountDiscordant, 1, VCFHeaderLineType.Integer, "Number of times the Genotype was found dicordant in other VCFs");
h2.addMetaDataLine(genotypeCountDiscordant);
super.addMetaData(h2);
final VariantContextWriter w = super.openVariantContextWriter(outputFile);
w.writeHeader(h2);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.dictionary);
while (in.hasNext()) {
final VariantContext ctx = progress.watch(in.next());
final List<GenotypeCount> genotypeCounts = ctx.getGenotypes().stream().map(G -> new GenotypeCount(G)).collect(Collectors.toList());
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
final Set<String> variantFoundInOtherVcfs = new HashSet<>();
for (final ExternalVcf extvcf : externalVcfs) {
final List<VariantContext> otherVariants = extvcf.get(ctx);
if (otherVariants.stream().filter(CTX2 -> {
if (ctx.getAlternateAlleles().isEmpty())
return true;
for (final Allele a : ctx.getAlternateAlleles()) {
if (CTX2.hasAllele(a))
return true;
}
return false;
}).findAny().isPresent()) {
variantFoundInOtherVcfs.add(extvcf.key);
}
for (final GenotypeCount gt : genotypeCounts) {
for (final VariantContext otherVariant : otherVariants) {
final Genotype otherGt = otherVariant.getGenotype(gt.gt.getSampleName());
if (otherGt == null)
continue;
if (gt.gt.sameGenotype(otherGt) || (this.noCallSameAsHomRef && ((gt.gt.isNoCall() && otherGt.isHomRef()) || (gt.gt.isHomRef() && otherGt.isNoCall())))) {
gt.countSame++;
} else {
gt.countDiscordant++;
}
}
}
}
vcb.genotypes(genotypeCounts.stream().map(G -> {
final GenotypeBuilder gb = new GenotypeBuilder(G.gt);
gb.attribute(genotypeCountSame.getID(), G.countSame);
gb.attribute(genotypeCountDiscordant.getID(), G.countDiscordant);
return gb.make();
}).collect(Collectors.toList()));
vcb.attribute(variantFoundElseWhereCount.getID(), variantFoundInOtherVcfs.size());
if (variantFoundInOtherVcfs.isEmpty()) {
if (variantNotFoundElsewhereFILTER != null) {
vcb.filter(variantNotFoundElsewhereFILTER.getID());
}
} else {
if (variantNotFoundElsewhereFILTER != null && !ctx.isFiltered()) {
vcb.passFilters();
}
vcb.attribute(variantFoundElseWhereKeys.getID(), new ArrayList<>(variantFoundInOtherVcfs));
}
w.add(vcb.make());
}
progress.finish();
while (!externalVcfs.isEmpty()) externalVcfs.remove(0).close();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
while (!externalVcfs.isEmpty()) externalVcfs.remove(0).close();
}
}
use of htsjdk.samtools.util.RuntimeIOException in project jvarkit by lindenb.
the class PubmedOrcidGraph method dumpGexf.
private void dumpGexf() {
final XMLOutputFactory xof = XMLOutputFactory.newFactory();
PrintWriter pw = null;
XMLStreamWriter w = null;
DatabaseEntry key = new DatabaseEntry();
DatabaseEntry data = new DatabaseEntry();
Cursor c = null;
try {
pw = openFileOrStdoutAsPrintWriter(this.outputFile);
w = xof.createXMLStreamWriter(pw);
w.writeStartDocument("UTF-8", "1.0");
w.writeStartElement("gexf");
w.writeAttribute("xmlns", GexfConstants.XMLNS);
w.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
w.writeAttribute("xsi:schemaLocation", GexfConstants.XSI_SCHEMA_LOCATION);
w.writeAttribute("version", GexfConstants.VERSION);
/* meta */
w.writeStartElement("meta");
w.writeAttribute("lastmodifieddate", "2017-04-03");
w.writeStartElement("creator");
w.writeCharacters("PumedOrcidGraph");
w.writeEndElement();
w.writeStartElement("description");
w.writeCharacters("PumedOrcidGraph");
w.writeEndElement();
w.writeEndElement();
/* graph */
w.writeStartElement("graph");
w.writeAttribute("mode", "static");
w.writeAttribute("defaultedgetype", "undirected");
/* attributes */
w.writeStartElement("attributes");
w.writeAttribute("class", "node");
w.writeAttribute("mode", "static");
gexfAttDecl(w, "orcid", "string");
gexfAttDecl(w, "foreName", "string");
gexfAttDecl(w, "lastName", "string");
gexfAttDecl(w, "initials", "string");
gexfAttDecl(w, "affiliation", "string");
// attributes
w.writeEndElement();
if (!this.hide_edge_attributes) {
w.writeStartElement("attributes");
w.writeAttribute("class", "edge");
w.writeAttribute("mode", "static");
gexfAttDecl(w, "pmid", "string");
gexfAttDecl(w, "title", "string");
gexfAttDecl(w, "doi", "string");
gexfAttDecl(w, "year", "string");
gexfAttDecl(w, "journal", "string");
// attributes
w.writeEndElement();
}
/* nodes */
w.writeStartElement("nodes");
c = this.authorDatabase.openCursor(txn, null);
while (c.getNext(key, data, LockMode.DEFAULT) == OperationStatus.SUCCESS) {
final Author au = this.authorBinding.entryToObject(data);
if (NAME_NOT_FOUND.equals(au.foreName)) {
w.writeComment("Orcid " + au.orcid + " not found in pubmed");
continue;
}
au.gexf(w);
}
c.close();
// nodes
w.writeEndElement();
w.writeStartElement("edges");
key = new DatabaseEntry();
c = this.linkDatabase.openCursor(txn, null);
while (c.getNext(key, data, LockMode.DEFAULT) == OperationStatus.SUCCESS) {
final Link L = this.linkBinding.entryToObject(data);
w.writeStartElement("edge");
w.writeAttribute("id", "E" + (++ID_GENERATOR));
w.writeAttribute("type", "undirected");
w.writeAttribute("weight", String.valueOf(L.pmids.size()));
w.writeAttribute("source", L.orcid1);
w.writeAttribute("target", L.orcid2);
for (String pmid : L.pmids) {
DatabaseEntry key2 = new DatabaseEntry();
DatabaseEntry data2 = new DatabaseEntry();
StringBinding.stringToEntry(pmid, key2);
if (this.articleDatabase.get(txn, key2, data2, LockMode.DEFAULT) != OperationStatus.SUCCESS) {
throw new JvarkitException.BerkeleyDbError("cannot get article");
}
final Article article = this.articleBinding.entryToObject(data2);
if (this.all_links_between_authors) {
w.writeAttribute("label", String.valueOf(L.pmids.size()));
} else {
w.writeAttribute("label", "" + article.ArticleTitle + ". " + article.ISOAbbreviation + ". (" + article.Year + ")");
}
if (!this.hide_edge_attributes) {
w.writeStartElement("attributes");
w.writeEmptyElement("attribute");
w.writeAttribute("for", "pmid");
w.writeAttribute("value", article.pmid);
w.writeEmptyElement("attribute");
w.writeAttribute("for", "title");
w.writeAttribute("value", article.ArticleTitle);
w.writeEmptyElement("attribute");
w.writeAttribute("for", "doi");
w.writeAttribute("value", article.doi);
w.writeEmptyElement("attribute");
w.writeAttribute("for", "year");
w.writeAttribute("value", article.Year);
w.writeEmptyElement("attribute");
w.writeAttribute("for", "journal");
w.writeAttribute("value", article.ISOAbbreviation);
// attributes
w.writeEndElement();
}
break;
}
//
// edge
w.writeEndElement();
}
c.close();
// edges
w.writeEndElement();
w.writeEndElement();
w.writeEndDocument();
w.flush();
pw.flush();
pw.close();
pw = null;
} catch (Exception err) {
throw new RuntimeIOException(err);
} finally {
CloserUtil.close(w);
CloserUtil.close(pw);
}
}
use of htsjdk.samtools.util.RuntimeIOException in project gatk by broadinstitute.
the class NormalizeFasta method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
Utils.validateArg(!INPUT.getAbsoluteFile().equals(OUTPUT.getAbsoluteFile()), "Input and output cannot be the same file.");
try (final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(INPUT, TRUNCATE_SEQUENCE_NAMES_AT_WHITESPACE);
final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT)) {
ReferenceSequence seq = null;
while ((seq = ref.nextSequence()) != null) {
final String name = seq.getName();
final byte[] bases = seq.getBases();
try {
out.write(">");
out.write(name);
out.newLine();
if (bases.length == 0) {
logger.warn("Sequence " + name + " contains 0 bases.");
} else {
for (int i = 0; i < bases.length; ++i) {
if (i > 0 && i % LINE_LENGTH == 0)
out.write("\n");
out.write(bases[i]);
}
out.write("\n");
}
} catch (IOException ioe) {
throw new RuntimeIOException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);
}
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
return null;
}
use of htsjdk.samtools.util.RuntimeIOException in project gatk by broadinstitute.
the class BucketUtils method dirSize.
/**
* Returns the total file size of all files in a directory, or the file size if the path specifies a file.
* Note that sub-directories are ignored - they are not recursed into.
* Only supports HDFS and local paths.
*
* @param path The URL to the file or directory whose size to return
* @return the total size of all files in bytes
*/
public static long dirSize(String path) {
try {
// GCS case (would work with local too)
if (isCloudStorageUrl(path)) {
java.nio.file.Path p = getPathOnGcs(path);
return Files.list(p).mapToLong(q -> {
try {
return (Files.isRegularFile(q) ? Files.size(q) : 0);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}).sum();
}
// local file or HDFS case
Path hadoopPath = new Path(path);
FileSystem fs = new Path(path).getFileSystem(new Configuration());
FileStatus status = fs.getFileStatus(hadoopPath);
if (status == null) {
throw new UserException.CouldNotReadInputFile(path, "File not found.");
}
long size = 0;
if (status.isDirectory()) {
for (FileStatus st : fs.listStatus(status.getPath())) {
if (st.isFile()) {
size += st.getLen();
}
}
} else {
size += status.getLen();
}
return size;
} catch (RuntimeIOException | IOException e) {
throw new UserException("Failed to determine total input size of " + path + "\n Caused by:" + e.getMessage(), e);
}
}
use of htsjdk.samtools.util.RuntimeIOException in project gatk by broadinstitute.
the class AbstractInputParser method parseLine.
/**
* This method represents the most efficient way (so far) to parse a line of whitespace-delimited text
*
* @param line the line to parse
* @return an array of all the "words"
*/
private String[] parseLine(final byte[] line) {
if (getWordCount() == 0) {
calculateWordCount(line);
}
final String[] parts = new String[getWordCount()];
boolean delimiter = true;
int index = 0;
int start = 0;
try {
for (int i = 0; i < line.length; i++) {
if (isDelimiter(line[i])) {
if (!delimiter) {
parts[index++] = new String(line, start, i - start);
} else if (!isTreatGroupedDelimitersAsOne()) {
parts[index++] = null;
}
delimiter = true;
} else {
if (delimiter)
start = i;
delimiter = false;
}
}
if (!delimiter) {
parts[index] = new String(line, start, line.length - start);
}
} catch (ArrayIndexOutOfBoundsException e) {
throw new RuntimeIOException("Unexpected number of elements found when parsing file " + this.getFileName() + ": " + index + ". Expected a maximum of " + this.getWordCount() + " elements per line:" + new String(line, 0, line.length), e);
}
return parts;
}
Aggregations