use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.
the class KnownGene method loadUriAsIntervalTreeMap.
/**
* load knownGene file/uri as an IntervalTreeMap. Intervals in the IntervalTreeMap are *1-based* (interval.start= kg.txStart+1)
*/
public static IntervalTreeMap<List<KnownGene>> loadUriAsIntervalTreeMap(final String uri, final Predicate<KnownGene> filterOrNull) throws IOException {
final IntervalTreeMap<List<KnownGene>> treeMap = new IntervalTreeMap<>();
BufferedReader in = null;
try {
in = IOUtils.openURIForBufferedReading(uri);
String line;
final CharSplitter tab = CharSplitter.TAB;
while ((line = in.readLine()) != null) {
if (line.isEmpty())
continue;
final String[] tokens = tab.split(line);
final KnownGene g = new KnownGene(tokens);
if (filterOrNull != null && !filterOrNull.test(g))
continue;
final Interval interval = new Interval(g.getContig(), g.getTxStart() + 1, g.getTxEnd(), g.isNegativeStrand(), g.getName());
List<KnownGene> L = treeMap.get(interval);
if (L == null) {
L = new ArrayList<>(2);
treeMap.put(interval, L);
}
L.add(g);
}
in.close();
in = null;
return treeMap;
} finally {
CloserUtil.close(in);
}
}
use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.
the class PedigreeParser method parse.
/**
* parse pedigree file
*/
public Pedigree parse(final BufferedReader br) throws IOException {
final CharSplitter tab = CharSplitter.TAB;
final PedigreeImpl ped = new PedigreeImpl();
String line;
while ((line = br.readLine()) != null) {
if (StringUtils.isBlank(line))
continue;
if (line.startsWith("#"))
continue;
final String[] tokens = tab.split(line);
if (tokens.length < 4)
throw new IllegalArgumentException("not enough tokens for pedigree in " + line);
final String famId = tokens[0];
final String indiId = tokens[1];
final String fatherId = tokens[2];
final String motherId = tokens[3];
final String sex = (tokens.length > 4 ? tokens[4] : "");
final String status = (tokens.length > 5 ? tokens[5] : "");
build(ped, famId, indiId, fatherId, motherId, sex, status);
}
ped.validate();
return ped;
}
use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.
the class VcfBiomart method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VCFIterator iter, final VariantContextWriter out) {
HttpGet httpGet = null;
final CharSplitter tab = CharSplitter.TAB;
try {
final TransformerFactory factory = TransformerFactory.newInstance();
final Transformer transformer = factory.newTransformer();
// transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
final VCFHeader header = iter.getHeader();
StringBuilder desc = new StringBuilder("Biomart query. Format: ");
desc.append(this.attributes.stream().map(S -> this.printLabels ? S + "|" + S : S).collect(Collectors.joining("|")));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header.addMetaDataLine(new VCFInfoHeaderLine(this.TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, desc.toString()));
out.writeHeader(header);
while (iter.hasNext()) {
final VariantContext ctx = iter.next();
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.rmAttribute(this.TAG);
this.filterColumnContig.set(ctx.getContig());
this.filterColumnStart.set(String.valueOf(ctx.getStart()));
this.filterColumnEnd.set(String.valueOf(ctx.getEnd()));
final StringWriter domToStr = new StringWriter();
transformer.transform(new DOMSource(this.domQuery), new StreamResult(domToStr));
final URIBuilder builder = new URIBuilder(this.serviceUrl);
builder.addParameter("query", domToStr.toString());
// System.err.println("\nwget -O - 'http://grch37.ensembl.org/biomart/martservice?query="+escapedQuery+"'\n");
// escapedQuery = URLEncoder.encode(escapedQuery,"UTF-8");
httpGet = new HttpGet(builder.build());
final CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
int responseCode = httpResponse.getStatusLine().getStatusCode();
if (responseCode != 200) {
throw new RuntimeIOException("Response code was not 200. Detected response was " + responseCode);
}
InputStream response = httpResponse.getEntity().getContent();
if (this.teeResponse) {
response = new TeeInputStream(response, stderr(), false);
}
final BufferedReader br = new BufferedReader(new InputStreamReader(response));
final Set<String> infoAtts = br.lines().filter(L -> !StringUtil.isBlank(L)).filter(L -> !L.equals("[success]")).map(L -> tab.split(L)).map(T -> {
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < this.attributes.size(); i++) {
if (i > 0)
sb.append("|");
if (this.printLabels)
sb.append(escapeInfo(this.attributes.get(i))).append("|");
sb.append(i < T.length ? escapeInfo(T[i]) : "");
}
return sb.toString();
}).collect(Collectors.toCollection(LinkedHashSet::new));
CloserUtil.close(br);
CloserUtil.close(response);
CloserUtil.close(httpResponse);
if (!infoAtts.isEmpty()) {
vcb.attribute(this.TAG, new ArrayList<>(infoAtts));
}
out.add(vcb.make());
}
return 0;
} catch (final Exception err) {
LOG.error(err);
throw new RuntimeIOException(err);
}
}
use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.
the class VcfGnomadOld method doWork.
@Override
public int doWork(final List<String> args) {
try {
if (this.gnomadBufferSize < 10) {
LOG.error("buffer size is too small " + this.gnomadBufferSize);
return -1;
}
if (this.manifestFile == null) {
LOG.info("Building default manifest file...");
for (final OmeType ot : OmeType.values()) {
if (this.useGenomeOnly && !ot.equals(OmeType.genome))
continue;
for (int i = 1; i <= 24; ++i) {
final ManifestEntry entry = new ManifestEntry();
entry.omeType = ot;
switch(i) {
case 23:
entry.contig = "X";
break;
case 24:
entry.contig = "Y";
break;
default:
entry.contig = String.valueOf(i);
break;
}
switch(gnomadVersion) {
case v2_1:
if (ot == OmeType.genome) {
// no "chrY" for this version for genome
if (i == 24)
continue;
entry.uri = "https://storage.googleapis.com/gnomad-public/release/2.1/vcf/exomes/gnomad.exomes.r2.1.sites.chr" + entry.contig + ".vcf.bgz";
} else {
entry.uri = "https://storage.googleapis.com/gnomad-public/release/2.1/vcf/genomes/gnomad.genomes.r2.1.sites.chr" + entry.contig + ".vcf.bgz";
}
break;
default:
{
entry.close();
LOG.error("Building default manifest is not available for version: " + this.gnomadVersion);
return -1;
}
}
this.manifestEntries.add(entry);
}
}
LOG.info("Building default manifest file... Done");
} else {
try {
final CharSplitter tab = CharSplitter.TAB;
Files.lines(this.manifestFile.toPath()).forEach(L -> {
if (L.startsWith("#") || StringUtil.isBlank(L))
return;
final String[] tokens = tab.split(L);
if (tokens.length < 3)
throw new JvarkitException.TokenErrors("Expected 3 words", tokens);
final ManifestEntry entry = new ManifestEntry();
entry.omeType = OmeType.valueOf(tokens[0]);
if (this.useGenomeOnly && !entry.omeType.equals(OmeType.genome)) {
entry.close();
return;
}
entry.contig = tokens[1].trim();
entry.uri = tokens[2].trim();
this.manifestEntries.add(entry);
});
} catch (final IOException err) {
LOG.error(err);
return -1;
}
}
return doVcfToVcf(args, this.outputFile);
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
}
}
use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.
the class VcfGnomadPext method findOverlapping.
/**
* find matching variant in tabix file, use a buffer to avoid multiple random accesses
*/
final List<PextEntry> findOverlapping(final TabixReader tabix, final VariantContext ctx) throws IOException {
final String normContig = this.ensemblCtgConvert.apply(ctx.getContig());
if (StringUtil.isBlank(normContig))
return Collections.emptyList();
if (!buffer.isEmpty() && !buffer.get(0).contig.equals(normContig)) {
this.buffer.clear();
}
if (this.lastInterval == null || !this.lastInterval.getContig().equals(normContig) || !CoordMath.encloses(lastInterval.getStart(), lastInterval.getEnd(), ctx.getStart(), ctx.getEnd())) {
final CharSplitter tab = CharSplitter.TAB;
this.buffer.clear();
this.lastInterval = new Interval(normContig, Math.max(0, ctx.getStart() - 10), ctx.getEnd() + VcfGnomadPext.this.gnomadBufferSize);
final TabixReader.Iterator iter = tabix.query(this.lastInterval.getContig(), this.lastInterval.getStart(), this.lastInterval.getEnd());
for (; ; ) {
final String line = iter.next();
if (line == null)
break;
final String[] tokens = tab.split(line);
this.buffer.add(new PextEntry(tokens));
}
}
return this.buffer.stream().filter(V -> V.getStart() == ctx.getStart() && V.getEnd() == ctx.getEnd() && V.getContig().equals(normContig) && V.ref.equals(ctx.getReference()) && ctx.getAlleles().contains(V.alt)).collect(Collectors.toList());
}
Aggregations