use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.
the class VcfGnomadPext method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VCFIterator iter, final VariantContextWriter out) {
final JsonParser jsonParser = new JsonParser();
final String[] standard_pext_header = new String[] { "chrom", "pos", "ref", "alt", "tx_annotation" };
final VCFHeader h0 = iter.getHeader();
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(h0);
if (!SequenceDictionaryUtils.isGRCh37(dict)) {
LOG.error("Input is NOT GRCh37 ");
return -1;
}
final OrderChecker<VariantContext> orderChecked = new OrderChecker<>(dict, false);
final CharSplitter tab = CharSplitter.TAB;
try (TabixReader gextFileReader = new TabixReader(this.pextDatabasePath)) {
final String line1 = gextFileReader.readLine();
if (StringUtils.isBlank(line1)) {
LOG.error("Cannot read first line of " + this.pextDatabasePath);
return -1;
}
if (!Arrays.equals(tab.split(line1), standard_pext_header)) {
LOG.error("Bad header in " + this.pextDatabasePath + " expected " + String.join("(tab)", standard_pext_header) + " but got " + line1.replace("\t", "(tab)"));
return -1;
}
final VCFHeader h2 = new VCFHeader(h0);
final VCFInfoHeaderLine pexInfo = new VCFInfoHeaderLine("GNOMAD_PEXT", VCFHeaderLineCount.A, VCFHeaderLineType.String, "Gnomad pext Data from " + this.pextDatabasePath);
h2.addMetaDataLine(pexInfo);
JVarkitVersion.getInstance().addMetaData(this, h2);
out.writeHeader(h2);
while (iter.hasNext()) {
final VariantContext ctx = orderChecked.apply(iter.next());
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.rmAttribute(pexInfo.getID());
if (!ctx.isVariant() || (this.skipFiltered && ctx.isFiltered())) {
out.add(vcb.make());
continue;
}
final List<PextEntry> entries = findOverlapping(gextFileReader, ctx);
if (entries.isEmpty()) {
out.add(vcb.make());
continue;
}
final List<String> altInfo = new ArrayList<>(ctx.getAlleles().size());
for (int allele_idx = 1; /* 0 is ref */
allele_idx < ctx.getAlleles().size(); allele_idx++) {
final Allele alt = ctx.getAlleles().get(allele_idx);
final PextEntry entry = entries.stream().filter(E -> E.alt.equals(alt)).findFirst().orElse(null);
if (entry == null) {
altInfo.add(".");
} else {
final JsonElement e = jsonParser.parse(entry.jsonStr);
if (!e.isJsonArray())
throw new IllegalStateException("not an array: " + entry.jsonStr);
final JsonArray array = e.getAsJsonArray();
final StringBuilder sb = new StringBuilder();
for (int x = 0; x < array.size(); ++x) {
if (x > 0)
sb.append("&");
if (!array.get(x).isJsonObject())
throw new IllegalStateException("not an array: " + entry.jsonStr);
final StringBuilder sb2 = new StringBuilder();
final JsonObject obj = array.get(x).getAsJsonObject();
for (final Map.Entry<String, JsonElement> kv : obj.entrySet()) {
String key = kv.getKey();
// "Brain_FrontalCortex_BA9_": 1.0,
if (key.endsWith("_"))
key = key.substring(0, key.length() - 1);
// as far as I can see, tissues start with a uppercase
if (Character.isUpperCase(key.charAt(0)) && !this.restrictTissues.isEmpty() && !this.restrictTissues.contains(key))
continue;
final JsonElement v = kv.getValue();
if (v.isJsonNull())
continue;
if (v.getAsJsonPrimitive().isString()) {
final String strv = v.getAsString();
if (strv.equals("NaN"))
continue;
}
if (sb2.length() > 0)
sb2.append("|");
sb2.append(key);
sb2.append(":");
sb2.append(kv.getValue().getAsString());
}
sb.append(sb2.toString());
}
if (sb.length() == 0)
sb.append(".");
altInfo.add(sb.toString());
}
}
// at least none is not '.'
if (altInfo.stream().anyMatch(S -> !S.equals("."))) {
vcb.attribute(pexInfo.getID(), altInfo);
}
out.add(vcb.make());
}
out.close();
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
}
}
use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.
the class ScanRetroCopy method doWork.
@Override
public int doWork(final List<String> args) {
if (this.min_depth < 1) {
LOG.error("Bad min depth");
return -1;
}
SamReader sr = null;
VariantContextWriter vcw0 = null;
CloseableIterator<SAMRecord> iter = null;
SAMFileWriter sfw = null;
try {
/* load the reference genome */
this.indexedFastaSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidx);
final SAMSequenceDictionary refDict = SequenceDictionaryUtils.extractRequired(this.indexedFastaSequenceFile);
/* create a contig name converter from the REF */
this.refCtgNameConverter = ContigNameConverter.fromOneDictionary(refDict);
/* READ KNOWGENES FILES */
LOG.info("Loading " + this.knownGeneUri);
try (BufferedReader br = IOUtils.openURIForBufferedReading(this.knownGeneUri)) {
String line;
final CharSplitter tab = CharSplitter.TAB;
while ((line = br.readLine()) != null) {
if (StringUtils.isBlank(line))
continue;
final String[] tokens = tab.split(line);
final KnownGene kg = new KnownGene(tokens);
if (kg.getExonCount() < 2)
continue;
if (this.onlyCodingTranscript && kg.isNonCoding())
continue;
final String ctg = this.refCtgNameConverter.apply(kg.getContig());
if (StringUtils.isBlank(ctg))
continue;
kg.setChrom(ctg);
final Interval interval = new Interval(ctg, kg.getTxStart() + 1, kg.getTxEnd(), kg.isNegativeStrand(), kg.getName());
List<KnownGene> L = this.knownGenesMap.get(interval);
if (L == null) {
L = new ArrayList<KnownGene>();
this.knownGenesMap.put(interval, L);
}
L.add(kg);
}
}
if (this.knownGenesMap.isEmpty()) {
LOG.error("no gene found in " + this.knownGeneUri);
return -1;
}
LOG.info("Number of transcripts: " + this.knownGenesMap.values().stream().flatMap(L -> L.stream()).count());
// open the sam file
final SamReaderFactory samReaderFactory = super.createSamReaderFactory();
samReaderFactory.referenceSequence(this.faidx);
sr = samReaderFactory.open(SamInputResource.of(oneFileOrNull(args)));
final SAMFileHeader samFileHeader = sr.getFileHeader();
// check it's sorted on coordinate
if (!samFileHeader.getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) {
LOG.error("input is not sorted on coordinate but \"" + samFileHeader.getSortOrder() + "\"");
return -1;
}
if (this.saveBamTo != null) {
sfw = new SAMFileWriterFactory().makeSAMOrBAMWriter(samFileHeader, true, this.saveBamTo);
}
if (this.use_bai && !sr.hasIndex()) {
LOG.warning("Cannot used bai because input is not indexed");
}
// if there is a bai, only query the bam in the regions of splicing
if (this.use_bai && sr.hasIndex()) {
LOG.info("building intervals...");
final SAMSequenceDictionary samdict = SequenceDictionaryUtils.extractRequired(samFileHeader);
final ContigNameConverter samConvert = ContigNameConverter.fromOneDictionary(samdict);
final List<QueryInterval> intervalsL = this.knownGenesMap.values().stream().flatMap(K -> K.stream()).filter(KG -> samConvert.apply(KG.getContig()) != null).flatMap(KG -> KG.getExons().stream()).flatMap(exon -> {
// we need the reads overlapping the exon bounds
final int tid = samdict.getSequenceIndex(samConvert.apply(exon.getGene().getContig()));
final QueryInterval q1 = new QueryInterval(tid, exon.getStart() + 1, exon.getStart() + 1);
final QueryInterval q2 = new QueryInterval(tid, exon.getEnd(), exon.getEnd());
return Arrays.stream(new QueryInterval[] { q1, q2 });
}).sorted().collect(Collectors.toList());
final QueryInterval[] intervals = QueryInterval.optimizeIntervals(intervalsL.toArray(new QueryInterval[intervalsL.size()]));
// GC
intervalsL.clear();
LOG.debug("Query bam using " + intervals.length + " random access intervals. Please wait...");
iter = sr.queryOverlapping(intervals);
} else {
iter = sr.iterator();
}
final Set<String> samples = this.partiton.getPartitions(samFileHeader);
if (samples.isEmpty()) {
LOG.error("No sample was defined in the read group of the input bam.");
return -1;
}
/**
* build vcf header
*/
final Set<VCFHeaderLine> metaData = new HashSet<>();
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY, true));
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_QUALITY_KEY, true));
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY, true));
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY, true));
metaData.add(new VCFInfoHeaderLine(VCFConstants.SVTYPE, 1, VCFHeaderLineType.String, "Variation type"));
metaData.add(new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "Variation Length"));
metaData.add(new VCFInfoHeaderLine(ATT_BEST_MATCHING_LENGTH, 1, VCFHeaderLineType.Integer, "Best Matching length"));
metaData.add(new VCFFormatHeaderLine(ATT_BEST_MATCHING_LENGTH, 1, VCFHeaderLineType.Integer, "Best Matching length"));
metaData.add(new VCFFormatHeaderLine(ATT_GT_INTRON, 1, VCFHeaderLineType.String, "Introns info: (intron-0-idx,valid,dp-5,dp-3,max-len-5,max-len-3,avg-5,avg-3)*"));
// metaData.add(new VCFFormatHeaderLine(ATT_COUNT_SUPPORTING_READS, 2,VCFHeaderLineType.Integer,"Count supporting reads [intron-left/intron-right]"));
// metaData.add(new VCFInfoHeaderLine(ATT_RETRO_DESC, VCFHeaderLineCount.UNBOUNDED,VCFHeaderLineType.String,
// "Retrocopy attributes: transcript-id|strand|exon-left|exon-left-bases|exon-right-bases|exon-right"));
metaData.add(new VCFInfoHeaderLine(ATT_INSERTION, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Possible place of insertion:" + "chr:start-end|count-evidence|mate-genes|non-coding|distance"));
metaData.add(new VCFInfoHeaderLine(ATT_KG_STRAND, 1, VCFHeaderLineType.String, "KnownGene strand."));
metaData.add(new VCFInfoHeaderLine(ATT_INTRONS_COUNT, 1, VCFHeaderLineType.Integer, "Number of introns for the Transcript"));
metaData.add(new VCFInfoHeaderLine(ATT_INTRONS_CANDIDATE_COUNT, 1, VCFHeaderLineType.Integer, "Number of introns found retrocopied for the transcript"));
metaData.add(new VCFInfoHeaderLine(ATT_INTRONS_CANDIDATE_FRACTION, 1, VCFHeaderLineType.Float, "Fraction of introns found retrocopied for the transcript"));
metaData.add(new VCFInfoHeaderLine(ATT_INTRONS_INFO, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Introns found: chr|start|end|seq-left|seq-right"));
metaData.add(new VCFFilterHeaderLine(ATT_FILTER_NONDOCODING, "Only non-coding transcripts"));
metaData.add(new VCFInfoHeaderLine(ATT_SAMPLES, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Samples found. partition:" + this.partiton.name()));
metaData.add(new VCFFilterHeaderLine(ATT_LOW_DEPTH_FILTER + this.low_depth_threshold, "Number of read is lower than :" + this.min_depth));
metaData.add(new VCFFilterHeaderLine(ATT_NOT_ALL_INTRONS, "Not all introns were found retrocopied"));
final VCFHeader header = new VCFHeader(metaData, samples);
JVarkitVersion.getInstance().addMetaData(this, header);
header.setSequenceDictionary(refDict);
/* open vcf for writing*/
vcw0 = super.openVariantContextWriter(this.outputFile);
final VariantContextWriter vcw = vcw0;
vcw.writeHeader(header);
/* save gene writer */
if (this.saveBedPeTo != null) {
this.saveInsertionsPw = super.openPathOrStdoutAsPrintWriter(this.saveBedPeTo);
} else {
this.saveInsertionsPw = new PrintWriter(new NullOuputStream());
}
final ProgressFactory.Watcher<SAMRecord> progress = ProgressFactory.newInstance().dictionary(samFileHeader).logger(LOG).build();
while (iter.hasNext()) {
final SAMRecord rec = progress.apply(iter.next());
if (rec.getReadUnmappedFlag())
continue;
if (rec.getMappingQuality() < this.min_read_mapq)
continue;
if (rec.isSecondaryOrSupplementary())
continue;
if (rec.getDuplicateReadFlag())
continue;
final byte[] bases = rec.getReadBases();
if (bases == null || bases == SAMRecord.NULL_SEQUENCE)
continue;
final Cigar cigar = rec.getCigar();
if (cigar == null || cigar.numCigarElements() < 2)
continue;
final String refContig = this.refCtgNameConverter.apply(rec.getContig());
if (StringUtils.isBlank(refContig))
continue;
boolean save_read_to_bam = false;
/* get sample */
final String sampleName = this.partiton.getPartion(rec, null);
if (StringUtils.isBlank(sampleName))
continue;
/* this is a new reference sequence */
if (this.genomicSequence == null || !this.genomicSequence.getChrom().equals(refContig)) {
if (this.genomicSequence != null) {
/* DUMP things BEFORE changing the reference sequence!!! */
/* dump buffer */
dump(vcw, null);
}
/* map transcript-name to their transcript */
/*this.kgId2knownGenes.clear();
this.knownGenesMap.values().
stream().
flatMap(L->L.stream()).
filter(G->refContig.equals(G.getContig())).
forEach(K->this.kgId2knownGenes.put(K.getName(), K));*/
/* now, we can change genomicSequence */
this.genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, refContig);
}
final CigarElement leftCigar = cigar.getCigarElement(0);
final CigarElement rightCigar = cigar.getCigarElement(cigar.numCigarElements() - 1);
/* both ends are not candidate */
if (!isCandidateCigarElement(leftCigar) && !isCandidateCigarElement(rightCigar)) {
continue;
}
final List<KnownGene> genes = this.knownGenesMap.getOverlapping(new Interval(refContig, rec.getUnclippedStart(), rec.getUnclippedEnd())).stream().flatMap(L -> L.stream()).collect(Collectors.toList());
/* time to time dump the buffer for the transcripts before the current ones*/
if (!genes.isEmpty()) {
// get all genes overlapping the current set of genes
int minTxStart = genes.stream().mapToInt(K -> K.getTxStart()).min().getAsInt();
final int maxTxStart = genes.stream().mapToInt(K -> K.getTxEnd()).max().getAsInt();
// update minTxtStart to get the lowest gene overlapping the set of genes
minTxStart = this.knownGenesMap.getOverlapping(new Interval(refContig, minTxStart, maxTxStart)).stream().flatMap(L -> L.stream()).mapToInt(K -> K.getStart()).min().getAsInt();
// not max, because we only need the 5' side
dump(vcw, new Interval(refContig, minTxStart, minTxStart));
}
/* test each side of the clipped read */
for (int side = 0; side < 2 && !genes.isEmpty(); ++side) {
final CigarElement ce_side = (side == 0 ? leftCigar : rightCigar);
if (!isCandidateCigarElement(ce_side))
continue;
for (final KnownGene knownGene : genes) {
for (int exonIndex = 0; exonIndex < knownGene.getExonCount(); exonIndex++) {
if (side == 0) /* looking at cigar string in 5' */
{
if (exonIndex == 0)
continue;
// first 'M' element
final CigarLocatable cigarM = new CigarLocatable(refContig, rec, 1);
// first cigar element
final CigarLocatable cigarS = new CigarLocatable(refContig, rec, 0);
// current exon
final ExonOne exonRight = new ExonOne(knownGene.getExon(exonIndex));
if (!cigarM.overlaps(exonRight))
continue;
if (!(exonRight.getStart() >= cigarM.getStart()))
continue;
// get exon on the left
final ExonOne exonLeft = new ExonOne(knownGene.getExon(exonIndex - 1));
if (exonLeft.getLengthOnReference() < this.minCigarSize)
continue;
/* end of cigar 'M' can have same bases than the prev exon. */
final int malus = exonRight.getStart() - cigarM.getStart();
int genomic1 = exonLeft.getEnd() - malus;
if (genomic1 < exonLeft.getStart() || genomic1 > exonLeft.getEnd()) {
continue;
}
int matchLength = (this.use_malus ? malus : 0);
int readIdx0 = cigarS.size() - 1;
// loop over sequence
while (readIdx0 >= 0 && genomic1 >= exonLeft.getStart()) {
final char read_base = cigarS.readBaseAt0(readIdx0);
final char genome_base = exonLeft.charAt1(genomic1);
if (read_base != genome_base) {
break;
}
readIdx0--;
matchLength++;
genomic1--;
}
if (matchLength < this._priv_ignoreCigarSize)
continue;
final KnownGene.Intron intron = knownGene.getIntron(exonIndex - 1);
// find match or create new
final Match match = new Match(intron, sampleName, rec, SIDE_3_PRIME, matchLength);
this.intronBuffer.add(match);
save_read_to_bam = true;
} else /* test last cigar */
{
if (exonIndex + 1 >= knownGene.getExonCount())
continue;
// last 'M' element
final CigarLocatable cigarM = new CigarLocatable(refContig, rec, cigar.numCigarElements() - 2);
// last cigar element
final CigarLocatable cigarS = new CigarLocatable(refContig, rec, cigar.numCigarElements() - 1);
// current exon
final ExonOne exonLeft = new ExonOne(knownGene.getExon(exonIndex));
if (!cigarM.overlaps(exonLeft))
continue;
if (!(exonLeft.getEnd() <= cigarM.getEnd()))
continue;
// get next exon
final ExonOne exonRight = new ExonOne(knownGene.getExon(exonIndex + 1));
if (exonRight.getLengthOnReference() < this.minCigarSize)
continue;
/* end of cigar 'M' can have same bases than the next exon. */
final int malus = cigarM.getEnd() - exonLeft.getEnd();
int genomic1 = exonRight.getStart() + malus;
if (genomic1 < exonRight.getStart() || genomic1 > exonRight.getEnd()) {
continue;
}
int matchLength = (this.use_malus ? malus : 0);
int readIdx0 = 0;
// loop over sequence
while (readIdx0 < cigarS.size() && genomic1 <= exonRight.getEnd()) {
final char read_base = cigarS.readBaseAt0(readIdx0);
final char genome_base = exonRight.charAt1(genomic1);
if (read_base != genome_base) {
break;
}
readIdx0++;
matchLength++;
genomic1++;
}
if (matchLength < this._priv_ignoreCigarSize)
continue;
// find match or create new
final KnownGene.Intron intron = knownGene.getIntron(exonIndex);
final Match match = new Match(intron, sampleName, rec, SIDE_5_PRIME, matchLength);
this.intronBuffer.add(match);
save_read_to_bam = true;
}
}
}
}
// end for side
if (save_read_to_bam && sfw != null)
sfw.addAlignment(rec);
}
/* dump buffer */
dump(vcw, null);
progress.close();
vcw.close();
iter.close();
iter = null;
sr.close();
sr = null;
this.saveInsertionsPw.flush();
this.saveInsertionsPw.close();
this.saveInsertionsPw = null;
if (sfw != null) {
sfw.close();
sfw = null;
}
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sr);
CloserUtil.close(vcw0);
CloserUtil.close(sfw);
CloserUtil.close(this.indexedFastaSequenceFile);
CloserUtil.close(this.saveInsertionsPw);
}
}
use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.
the class PlateOptimizer method doWork.
@Override
public int doWork(final List<String> args) {
try {
if (this.seed != -1) {
this.random = new Random(this.seed);
} else {
this.random = new Random(System.currentTimeMillis());
}
final CharSplitter tab = CharSplitter.TAB;
final String input = oneFileOrNull(args);
try (BufferedReader br = (input == null ? IOUtils.openStreamForBufferedReader(stdin()) : IOUtils.openURIForBufferedReading(input))) {
String line = br.readLine();
if (line == null) {
LOG.error("cannot read first line");
}
this.headers.addAll(Arrays.asList(tab.split(line)));
for (int i = 0; i < this.headers.size(); ++i) this.column2index.put(this.headers.get(i), i);
LOG.info("header: " + String.join(";", this.headers));
while ((line = br.readLine()) != null) {
if (StringUtils.isBlank(line))
continue;
line = line.replaceAll("<e9>", "e");
final String[] tokens = tab.split(line);
if (tokens.length != this.headers.size())
throw new JvarkitException.TokenErrors(this.headers.size(), tokens);
final Content content = new Content(this.all_contents.size(), tokens);
this.all_contents.add(content);
}
}
if (this.all_contents.isEmpty()) {
LOG.error("no data defined");
return -1;
}
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
ctrlc_catched_count--;
if (ctrlc_catched_count == 0) {
LOG.warn("Shutting down ...");
Runtime.getRuntime().removeShutdownHook(this);
} else {
LOG.info("press again " + ctrlc_catched_count + " times.");
}
}
});
long n_iteration = 0L;
while (this.ctrlc_catched_count > 0) {
iteration(++n_iteration);
}
ctrlc_catched_count = 0;
LOG.info("done");
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
}
}
use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.
the class VcfGeneOntology method readGOA.
protected void readGOA() throws IOException {
if (this.name2go != null)
return;
this.name2go = new HashMap<String, Set<GOOntology.Term>>();
LOG.info("read GOA " + GOA);
final CharSplitter tab = CharSplitter.TAB;
BufferedReader in = IOUtils.openURIForBufferedReading(GOA);
String line;
while ((line = in.readLine()) != null) {
if (line.isEmpty() || line.startsWith("!"))
continue;
String[] tokens = tab.split(line, 6);
if (tokens.length < 6)
continue;
GOOntology.Term term = this.goTree.getTermByAccession(tokens[4]);
if (term == null) {
continue;
}
Set<GOOntology.Term> set = this.name2go.get(tokens[2]);
if (set == null) {
set = new HashSet<GOOntology.Term>();
this.name2go.put(tokens[2], set);
}
set.add(term);
}
in.close();
LOG.info("GOA size:" + this.name2go.size());
}
use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.
the class TestSupport method assertTsvTableIsConsitent.
public void assertTsvTableIsConsitent(final Path f, Predicate<String> ignoreLine) {
final CharSplitter tab = CharSplitter.TAB;
BufferedReader r = null;
try {
Assert.assertTrue(Files.exists(f), "file " + f + " should exist");
r = IOUtils.openPathForBufferedReading(f);
int nCols = -1;
int nRow = 0;
String line;
while ((line = r.readLine()) != null) {
if (ignoreLine != null && ignoreLine.test(line))
continue;
nRow++;
final String[] tokens = tab.split(line);
final int c = tokens.length;
if (nRow == 1) {
nCols = c;
} else {
if (nCols != c) {
for (int i = 0; i < tokens.length; i++) {
System.err.println("$" + (i + 1) + ":" + tokens[i]);
}
}
Assert.assertEquals(nCols, c, "Line " + line + " expected " + nCols + " tokens but got " + c);
}
}
} catch (final Exception e) {
e.printStackTrace();
Assert.fail();
} finally {
CloserUtil.close(r);
}
}
Aggregations