use of org.molgenis.data.MolgenisDataException in project molgenis by molgenis.
the class ExcelUtils method toValue.
// Gets a cell value as String and process the value with the given cellProcessors
static String toValue(Cell cell, List<CellProcessor> cellProcessors) {
String value;
switch(cell.getCellTypeEnum()) {
case BLANK:
value = null;
break;
case STRING:
value = cell.getStringCellValue();
break;
case NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
try {
// Excel dates are LocalDateTime, stored without timezone.
// Interpret them as UTC to prevent ambiguous DST overlaps which happen in other timezones.
LocaleUtil.setUserTimeZone(LocaleUtil.TIMEZONE_UTC);
Date dateCellValue = cell.getDateCellValue();
value = formatUTCDateAsLocalDateTime(dateCellValue);
} finally {
LocaleUtil.resetUserTimeZone();
}
} else {
// excel stores integer values as double values
// read an integer if the double value equals the
// integer value
double x = cell.getNumericCellValue();
if (x == Math.rint(x) && !Double.isNaN(x) && !Double.isInfinite(x))
value = String.valueOf((long) x);
else
value = String.valueOf(x);
}
break;
case BOOLEAN:
value = String.valueOf(cell.getBooleanCellValue());
break;
case FORMULA:
// evaluate formula
FormulaEvaluator evaluator = cell.getSheet().getWorkbook().getCreationHelper().createFormulaEvaluator();
CellValue cellValue = evaluator.evaluate(cell);
switch(cellValue.getCellTypeEnum()) {
case BOOLEAN:
value = String.valueOf(cellValue.getBooleanValue());
break;
case NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
try {
// Excel dates are LocalDateTime, stored without timezone.
// Interpret them as UTC to prevent ambiguous DST overlaps which happen in other timezones.
LocaleUtil.setUserTimeZone(LocaleUtil.TIMEZONE_UTC);
Date javaDate = DateUtil.getJavaDate(cellValue.getNumberValue(), false);
value = formatUTCDateAsLocalDateTime(javaDate);
} finally {
LocaleUtil.resetUserTimeZone();
}
} else {
// excel stores integer values as double values
// read an integer if the double value equals the
// integer value
double x = cellValue.getNumberValue();
if (x == Math.rint(x) && !Double.isNaN(x) && !Double.isInfinite(x))
value = String.valueOf((long) x);
else
value = String.valueOf(x);
}
break;
case STRING:
value = cellValue.getStringValue();
break;
case BLANK:
value = null;
break;
default:
throw new MolgenisDataException("unsupported cell type: " + cellValue.getCellTypeEnum());
}
break;
default:
throw new MolgenisDataException("unsupported cell type: " + cell.getCellTypeEnum());
}
return AbstractCellProcessor.processCell(value, false, cellProcessors);
}
use of org.molgenis.data.MolgenisDataException in project molgenis by molgenis.
the class VcfToEntity method createSampleEntities.
private List<Entity> createSampleEntities(VcfRecord vcfRecord, String entityPosAlt, String entityId) {
List<Entity> samples = new ArrayList<>();
Iterator<VcfSample> sampleIterator = vcfRecord.getSamples().iterator();
if (vcfRecord.getNrSamples() > 0) {
Iterator<String> sampleNameIterator = vcfMeta.getSampleNames().iterator();
for (int j = 0; sampleIterator.hasNext(); ++j) {
String[] format = vcfRecord.getFormat();
VcfSample sample = sampleIterator.next();
Entity sampleEntity = new DynamicEntity(sampleEntityType);
for (int i = 0; i < format.length; i = i + 1) {
String strValue = sample.getData(i);
Object value = null;
EntityType sampleEntityType = sampleEntity.getEntityType();
Attribute attr = sampleEntityType.getAttribute(format[i]);
if (attr != null) {
if (strValue != null) {
value = getTypedValue(strValue, attr);
}
} else {
if (Arrays.equals(EMPTY_FORMAT, format)) {
LOG.debug("Found a dot as format, assuming no samples present");
} else {
throw new MolgenisDataException("Sample entity contains an attribute [" + format[i] + "] which is not specified in vcf headers");
}
}
sampleEntity.set(format[i], value);
}
sampleEntity.set(ID, entityId + j);
// FIXME remove entity ID from Sample label after #1400 is fixed, see also:
// jquery.molgenis.table.js line 152
String original_name = sampleNameIterator.next();
sampleEntity.set(NAME, entityPosAlt + "_" + original_name);
sampleEntity.set(ORIGINAL_NAME, original_name);
samples.add(sampleEntity);
}
}
return samples;
}
use of org.molgenis.data.MolgenisDataException in project molgenis by molgenis.
the class VcfWriterUtils method writeFormatString.
private static void writeFormatString(BufferedWriter writer, Entity sample) throws IOException {
StringBuilder formatColumn = new StringBuilder();
// write GT first if available
if (sample.getEntityType().getAttribute(FORMAT_GT) != null) {
formatColumn.append(FORMAT_GT);
}
EntityType entityType = sample.getEntityType();
for (Attribute sampleAttribute : entityType.getAttributes()) {
String sampleAttributeName = sampleAttribute.getName();
if (!sampleAttributeName.equals(FORMAT_GT) && !sampleAttributeName.equals(VcfRepository.ORIGINAL_NAME)) {
// skip the field that were generated for the use of the entity within molgenis
if (!sampleAttribute.equals(entityType.getIdAttribute()) && !sampleAttribute.equals(entityType.getLabelAttribute())) {
if (formatColumn.length() != 0)
formatColumn.append(':');
formatColumn.append(sampleAttributeName);
}
}
}
if (formatColumn.length() > 0) {
formatColumn.append('\t');
writer.write(formatColumn.toString());
} else {
throw new MolgenisDataException("Missing FORMAT information while trying to print first sample");
}
}
use of org.molgenis.data.MolgenisDataException in project molgenis by molgenis.
the class VcfUtils method getPedigree.
/**
* Get pedigree data from VCF Now only support child, father, mother No fancy data structure either Output:
* result.put(childID, Arrays.asList(new String[]{motherID, fatherID}));
*/
public static HashMap<String, Trio> getPedigree(Scanner inputVcfFileScanner) {
HashMap<String, Trio> result = new HashMap<>();
while (inputVcfFileScanner.hasNextLine()) {
String line = inputVcfFileScanner.nextLine();
// quit when we don't see header lines anymore
if (!line.startsWith(VcfRepository.PREFIX)) {
break;
}
// expecting e.g. ##PEDIGREE=<Child=100400,Mother=100402,Father=100401>
if (line.startsWith("##PEDIGREE")) {
System.out.println("Pedigree data line: " + line);
String childID = null;
String motherID = null;
String fatherID = null;
String lineStripped = line.replace("##PEDIGREE=<", "").replace(">", "");
String[] lineSplit = lineStripped.split(",", -1);
for (String element : lineSplit) {
if (element.startsWith("Child")) {
childID = element.replace("Child=", "");
} else if (element.startsWith("Mother")) {
motherID = element.replace("Mother=", "");
} else if (element.startsWith("Father")) {
fatherID = element.replace("Father=", "");
} else {
throw new MolgenisDataException("Expected Child, Mother or Father, but found: " + element + " in line " + line);
}
}
if (childID != null && motherID != null && fatherID != null) {
// good
result.put(childID, new Trio(new Sample(childID), new Sample(motherID), new Sample(fatherID)));
} else {
throw new MolgenisDataException("Missing Child, Mother or Father ID in line " + line);
}
}
}
return result;
}
use of org.molgenis.data.MolgenisDataException in project molgenis by molgenis.
the class VcfReaderFactoryImpl method get.
@Override
public VcfReader get() {
try {
InputStream inputStream = new FileInputStream(file);
if (file.getName().endsWith(".gz")) {
inputStream = new GZIPInputStream(inputStream);
} else if (file.getName().endsWith(".zip")) {
ZipFile zipFile = new ZipFile(file.getPath());
Enumeration<? extends ZipEntry> e = zipFile.entries();
// your only file
ZipEntry entry = e.nextElement();
inputStream = zipFile.getInputStream(entry);
}
VcfReader reader = new VcfReader(new InputStreamReader(inputStream, UTF_8));
// bootstrap reader so close() can close all readers
vcfReaderRegistry.add(reader);
return reader;
} catch (IOException e) {
throw new MolgenisDataException("Failed to create VCF Reader for file" + file.getAbsolutePath(), e);
}
}
Aggregations