use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.
the class CSVFileReaderTest method testVariableUNFs.
/*
* UNF test;
* I'd like to use a file with more interesting values - "special" numbers, freaky dates, accents, etc.
* for this. But checking it in with this simple file, for now.
* (thinking about it, the "csv file from hell" may be a better test case for the UNF test)
*/
@Test
public void testVariableUNFs() {
String testFile = "src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/election_precincts.csv";
Long expectedNumberOfVariables = 13L;
// aka the number of lines in the TAB file produced by the ingest plugin
Long expectedNumberOfCases = 24L;
String[] expectedUNFs = { "UNF:6:wb7OATtNC/leh1sOP5IGDQ==", "UNF:6:0V3xQ3ea56rzKwvGt9KBCA==", "UNF:6:0V3xQ3ea56rzKwvGt9KBCA==", "UNF:6:H9inAvq5eiIHW6lpqjjKhQ==", "UNF:6:Bh0M6QvunZwW1VoTyioRCQ==", "UNF:6:o5VTaEYz+0Kudf6hQEEupQ==", "UNF:6:eJRvbDJkIeDPrfN2dYpRfA==", "UNF:6:JD1wrtM12E7evrJJ3bRFGA==", "UNF:6:xUKbK9hb5o0nL5/mYiy7Bw==", "UNF:6:Mvq3BrdzoNhjndMiVr92Ww==", "UNF:6:KkHM6Qlyv3QlUd+BKqqB3Q==", "UNF:6:EWUVuyXKSpyllsrjHnheig==", "UNF:6:ri9JsRJxM2xpWSIq17oWNw==" };
TabularDataIngest ingestResult = null;
File generatedTabFile = null;
DataTable generatedDataTable = null;
try (BufferedInputStream stream = new BufferedInputStream(new FileInputStream(testFile))) {
CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi());
ingestResult = instance.read(stream, null);
generatedTabFile = ingestResult.getTabDelimitedFile();
generatedDataTable = ingestResult.getDataTable();
} catch (IOException ex) {
fail("" + ex);
}
assertNotNull(generatedDataTable);
assertNotNull(generatedDataTable.getDataVariables());
assertEquals(generatedDataTable.getVarQuantity(), new Long(generatedDataTable.getDataVariables().size()));
assertEquals(generatedDataTable.getVarQuantity(), expectedNumberOfVariables);
assertEquals(expectedNumberOfCases, generatedDataTable.getCaseQuantity());
for (int i = 0; i < expectedNumberOfVariables; i++) {
String unf = null;
if (generatedDataTable.getDataVariables().get(i).isIntervalContinuous()) {
FileInputStream generatedTabInputStream = null;
try {
generatedTabInputStream = new FileInputStream(generatedTabFile);
} catch (FileNotFoundException ioex) {
fail("Failed to open generated tab-delimited file for reading" + ioex);
}
Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
try {
unf = UNFUtil.calculateUNF(columnVector);
} catch (IOException | UnfException ioex) {
fail("Failed to generate the UNF for variable number " + i + ", (" + generatedDataTable.getDataVariables().get(i).getName() + ", floating point)");
}
}
if (generatedDataTable.getDataVariables().get(i).isIntervalDiscrete() && generatedDataTable.getDataVariables().get(i).isTypeNumeric()) {
FileInputStream generatedTabInputStream = null;
try {
generatedTabInputStream = new FileInputStream(generatedTabFile);
} catch (FileNotFoundException ioex) {
fail("Failed to open generated tab-delimited file for reading" + ioex);
}
Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
try {
unf = UNFUtil.calculateUNF(columnVector);
} catch (IOException | UnfException ioex) {
fail("Failed to generate the UNF for variable number " + i + ", (" + generatedDataTable.getDataVariables().get(i).getName() + ", integer)");
}
}
if (generatedDataTable.getDataVariables().get(i).isTypeCharacter()) {
FileInputStream generatedTabInputStream = null;
try {
generatedTabInputStream = new FileInputStream(generatedTabFile);
} catch (FileNotFoundException ioex) {
fail("Failed to open generated tab-delimited file for reading" + ioex);
}
String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
String[] dateFormats = null;
// Special handling for Character strings that encode dates and times:
if ("time".equals(generatedDataTable.getDataVariables().get(i).getFormatCategory()) || "date".equals(generatedDataTable.getDataVariables().get(i).getFormatCategory())) {
dateFormats = new String[expectedNumberOfCases.intValue()];
for (int j = 0; j < expectedNumberOfCases; j++) {
dateFormats[j] = generatedDataTable.getDataVariables().get(i).getFormat();
}
}
try {
if (dateFormats == null) {
unf = UNFUtil.calculateUNF(columnVector);
} else {
unf = UNFUtil.calculateUNF(columnVector, dateFormats);
}
} catch (IOException | UnfException iex) {
fail("Failed to generate the UNF for variable number " + i + ", (" + generatedDataTable.getDataVariables().get(i).getName() + ", " + (dateFormats == null ? "String" : "Date/Time value") + ")");
}
}
assertEquals("Variable number " + i + ":", expectedUNFs[i], unf);
}
}
use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.
the class ExternalToolServiceBeanTest method testfindAll.
@Test
public void testfindAll() {
DataFile dataFile = new DataFile();
dataFile.setId(42l);
List<DataTable> dataTables = new ArrayList<DataTable>();
dataTables.add(new DataTable());
dataFile.setDataTables(dataTables);
ApiToken apiToken = new ApiToken();
apiToken.setTokenString("7196b5ce-f200-4286-8809-03ffdbc255d7");
ExternalTool.Type type = ExternalTool.Type.EXPLORE;
ExternalTool externalTool = new ExternalTool("displayName", "description", type, "http://foo.com", "{}");
ExternalToolHandler externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, apiToken);
List<ExternalTool> externalTools = new ArrayList<>();
externalTools.add(externalTool);
List<ExternalTool> availableExternalTools = ExternalToolServiceBean.findExternalToolsByFile(externalTools, dataFile);
assertEquals(availableExternalTools.size(), 1);
}
use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.
the class DataTableImportDDI method processLocation.
private void processLocation(XMLStreamReader xmlr, DataVariable dv, Map<String, DataTable> dataTablesMap, Map<String, Integer> varsPerFileMap) throws XMLStreamException {
// if these fields don't convert to Long, just leave blank
try {
dv.setFileStartPosition(new Long(xmlr.getAttributeValue(null, "StartPos")));
} catch (NumberFormatException ex) {
}
try {
dv.setFileEndPosition(new Long(xmlr.getAttributeValue(null, "EndPos")));
} catch (NumberFormatException ex) {
}
try {
dv.setRecordSegmentNumber(new Long(xmlr.getAttributeValue(null, "RecSegNo")));
} catch (NumberFormatException ex) {
}
if (dv.getDataTable() == null) {
String fileId = xmlr.getAttributeValue(null, "fileid");
if (fileId != null && !fileId.isEmpty()) {
DataTable datatable = null;
if (dataTablesMap.get(fileId) != null) {
datatable = dataTablesMap.get(fileId);
} else {
datatable = new DataTable();
dataTablesMap.put(fileId, datatable);
varsPerFileMap.put(fileId, 0);
}
dv.setDataTable(datatable);
if (datatable.getDataVariables() == null) {
datatable.setDataVariables(new ArrayList<>());
}
datatable.getDataVariables().add(dv);
int filePosition = varsPerFileMap.get(fileId);
dv.setFileOrder(filePosition++);
varsPerFileMap.put(fileId, filePosition);
}
} else {
throw new XMLStreamException("Empty or NULL location attribute in a variable section.");
}
}
use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.
the class DDIExportServiceBean method createDataFileDDI.
private void createDataFileDDI(XMLStreamWriter xmlw, Set<String> excludedFieldSet, Set<String> includedFieldSet, DataFile df) throws XMLStreamException {
/* This method will create both the <fileDscr> and <dataDscr><var>
* portions of the DDI that describe the tabular data contained in
* the file, the file-, datatable- and variable-level metadata; or
* a subset of the above, as defined by the "include" and "exclude"
* parameters.
*/
/*
* This method is only called when an /api/meta/file request comes
* in; i.e., for a study export, createFileDscr and createData/createVar
* methods will be called separately. So we need to create the top-level
* ddi (<codeBook>) tag header:
*/
xmlw.writeStartElement("codeBook");
xmlw.writeDefaultNamespace("http://www.icpsr.umich.edu/DDI");
writeAttribute(xmlw, "version", "2.0");
createStdyDscr(xmlw, excludedFieldSet, includedFieldSet, df.getOwner().getLatestVersion());
DataTable dt = fileService.findDataTableByFileId(df.getId());
if (checkField("fileDscr", excludedFieldSet, includedFieldSet)) {
createFileDscr(xmlw, excludedFieldSet, null, df, dt);
}
// And now, the variables:
xmlw.writeStartElement("dataDscr");
if (checkField("var", excludedFieldSet, includedFieldSet)) {
List<DataVariable> vars = variableService.findByDataTableId(dt.getId());
for (DataVariable var : vars) {
createVarDDI(xmlw, excludedFieldSet, null, var);
}
}
// dataDscr
xmlw.writeEndElement();
// codeBook
xmlw.writeEndElement();
}
use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.
the class DDIExportServiceBean method createDatasetDDI.
private void createDatasetDDI(XMLStreamWriter xmlw, Set<String> excludedFieldSet, Set<String> includedFieldSet, DatasetVersion version) throws XMLStreamException {
xmlw.writeStartElement("codeBook");
xmlw.writeDefaultNamespace("http://www.icpsr.umich.edu/DDI");
writeAttribute(xmlw, "version", "2.0");
createStdyDscr(xmlw, excludedFieldSet, includedFieldSet, version);
// Files:
List<FileMetadata> tabularDataFiles = new ArrayList<>();
List<FileMetadata> otherDataFiles = new ArrayList<>();
List<FileMetadata> fileMetadatas = version.getFileMetadatas();
if (fileMetadatas == null || fileMetadatas.isEmpty()) {
// codeBook
xmlw.writeEndElement();
return;
}
for (FileMetadata fileMetadata : fileMetadatas) {
if (fileMetadata.getDataFile().isTabularData()) {
tabularDataFiles.add(fileMetadata);
} else {
otherDataFiles.add(fileMetadata);
}
}
if (checkField("fileDscr", excludedFieldSet, includedFieldSet)) {
for (FileMetadata fileMetadata : tabularDataFiles) {
DataTable dt = fileService.findDataTableByFileId(fileMetadata.getDataFile().getId());
createFileDscr(xmlw, excludedFieldSet, includedFieldSet, fileMetadata.getDataFile(), dt);
}
// 2nd pass, to create data (variable) description sections:
xmlw.writeStartElement("dataDscr");
for (FileMetadata fileMetadata : tabularDataFiles) {
DataTable dt = fileService.findDataTableByFileId(fileMetadata.getDataFile().getId());
List<DataVariable> vars = variableService.findByDataTableId(dt.getId());
for (DataVariable var : vars) {
createVarDDI(xmlw, excludedFieldSet, null, var);
}
}
// dataDscr
xmlw.writeEndElement();
}
if (checkField("othrMat", excludedFieldSet, includedFieldSet)) {
for (FileMetadata fileMetadata : otherDataFiles) {
createOtherMat(xmlw, excludedFieldSet, includedFieldSet, fileMetadata);
}
}
// codeBook
xmlw.writeEndElement();
}
Aggregations