use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.
the class DdiExportUtil method createFileDscr.
private static void createFileDscr(XMLStreamWriter xmlw, DatasetVersion datasetVersion) throws XMLStreamException {
String dataverseUrl = getDataverseSiteUrl();
for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) {
DataFile dataFile = fileMetadata.getDataFile();
if (dataFile != null && dataFile.isTabularData()) {
DataTable dt = dataFile.getDataTable();
xmlw.writeStartElement("fileDscr");
writeAttribute(xmlw, "ID", "f" + dataFile.getId());
writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + dataFile.getId());
xmlw.writeStartElement("fileTxt");
xmlw.writeStartElement("fileName");
xmlw.writeCharacters(fileMetadata.getLabel());
// fileName
xmlw.writeEndElement();
if (dt.getCaseQuantity() != null || dt.getVarQuantity() != null || dt.getRecordsPerCase() != null) {
xmlw.writeStartElement("dimensns");
if (dt.getCaseQuantity() != null) {
xmlw.writeStartElement("caseQnty");
xmlw.writeCharacters(dt.getCaseQuantity().toString());
// caseQnty
xmlw.writeEndElement();
}
if (dt.getVarQuantity() != null) {
xmlw.writeStartElement("varQnty");
xmlw.writeCharacters(dt.getVarQuantity().toString());
// varQnty
xmlw.writeEndElement();
}
if (dt.getRecordsPerCase() != null) {
xmlw.writeStartElement("recPrCas");
xmlw.writeCharacters(dt.getRecordsPerCase().toString());
// recPrCas
xmlw.writeEndElement();
}
// dimensns
xmlw.writeEndElement();
}
xmlw.writeStartElement("fileType");
xmlw.writeCharacters(dataFile.getContentType());
// fileType
xmlw.writeEndElement();
// fileTxt
xmlw.writeEndElement();
// (Universal Numeric Fingerprint) signature:
if (dt.getUnf() != null && !dt.getUnf().equals("")) {
xmlw.writeStartElement("notes");
writeAttribute(xmlw, "level", LEVEL_FILE);
writeAttribute(xmlw, "type", NOTE_TYPE_UNF);
writeAttribute(xmlw, "subject", NOTE_SUBJECT_UNF);
xmlw.writeCharacters(dt.getUnf());
// notes
xmlw.writeEndElement();
}
if (dataFile.getTags() != null) {
for (int i = 0; i < dataFile.getTags().size(); i++) {
xmlw.writeStartElement("notes");
writeAttribute(xmlw, "level", LEVEL_FILE);
writeAttribute(xmlw, "type", NOTE_TYPE_TAG);
writeAttribute(xmlw, "subject", NOTE_SUBJECT_TAG);
xmlw.writeCharacters(dataFile.getTags().get(i).getTypeLabel());
// notes
xmlw.writeEndElement();
}
}
// TODO: add the remaining fileDscr elements!
// fileDscr
xmlw.writeEndElement();
}
}
}
use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.
the class IngestServiceBean method main.
public static void main(String[] args) {
String file = args[0];
String type = args[1];
if (file == null || type == null || "".equals(file) || "".equals(type)) {
System.err.println("Usage: java edu.harvard.iq.dataverse.ingest.IngestServiceBean <file> <type>.");
System.exit(1);
}
BufferedInputStream fileInputStream = null;
try {
fileInputStream = new BufferedInputStream(new FileInputStream(new File(file)));
} catch (FileNotFoundException notfoundEx) {
fileInputStream = null;
}
if (fileInputStream == null) {
System.err.println("Could not open file " + file + ".");
System.exit(1);
}
TabularDataFileReader ingestPlugin = getTabDataReaderByMimeType(type);
if (ingestPlugin == null) {
System.err.println("Could not locate an ingest plugin for type " + type + ".");
System.exit(1);
}
TabularDataIngest tabDataIngest = null;
try {
tabDataIngest = ingestPlugin.read(fileInputStream, null);
} catch (IOException ingestEx) {
System.err.println("Caught an exception trying to ingest file " + file + ".");
System.exit(1);
}
try {
if (tabDataIngest != null) {
File tabFile = tabDataIngest.getTabDelimitedFile();
if (tabDataIngest.getDataTable() != null && tabFile != null && tabFile.exists()) {
String tabFilename = FileUtil.replaceExtension(file, "tab");
Files.copy(Paths.get(tabFile.getAbsolutePath()), Paths.get(tabFilename), StandardCopyOption.REPLACE_EXISTING);
DataTable dataTable = tabDataIngest.getDataTable();
System.out.println("NVARS: " + dataTable.getVarQuantity());
System.out.println("NOBS: " + dataTable.getCaseQuantity());
System.out.println("UNF: " + dataTable.getUnf());
for (int i = 0; i < dataTable.getVarQuantity(); i++) {
String vartype = "";
if (dataTable.getDataVariables().get(i).isIntervalContinuous()) {
vartype = "numeric-continuous";
} else {
if (dataTable.getDataVariables().get(i).isTypeNumeric()) {
vartype = "numeric-discrete";
} else {
vartype = "character";
}
}
System.out.print("VAR" + i + " ");
System.out.print(dataTable.getDataVariables().get(i).getName() + " ");
System.out.print(vartype + " ");
System.out.print(dataTable.getDataVariables().get(i).getUnf());
System.out.println();
}
} else {
System.err.println("Ingest failed to produce tab file or data table for file " + file + ".");
System.exit(1);
}
} else {
System.err.println("Ingest resulted in a null tabDataIngest object for file " + file + ".");
System.exit(1);
}
} catch (IOException ex) {
System.err.println("Caught an exception trying to save ingested data for file " + file + ".");
System.exit(1);
}
}
use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.
the class TestIngest method datafile.
// @EJB
@Path("test/file")
@GET
@Produces({ "text/plain" })
public String datafile(@QueryParam("fileName") String fileName, @QueryParam("fileType") String fileType, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/
{
String output = "";
if (StringUtil.isEmpty(fileName) || StringUtil.isEmpty(fileType)) {
output = output.concat("Usage: /api/ingest/test/file?fileName=PATH&fileType=TYPE");
return output;
}
BufferedInputStream fileInputStream = null;
try {
fileInputStream = new BufferedInputStream(new FileInputStream(new File(fileName)));
} catch (FileNotFoundException notfoundEx) {
fileInputStream = null;
}
if (fileInputStream == null) {
output = output.concat("Could not open file " + fileName + ".");
return output;
}
TabularDataFileReader ingestPlugin = ingestService.getTabDataReaderByMimeType(fileType);
if (ingestPlugin == null) {
output = output.concat("Could not locate an ingest plugin for type " + fileType + ".");
return output;
}
TabularDataIngest tabDataIngest = null;
try {
tabDataIngest = ingestPlugin.read(fileInputStream, null);
} catch (IOException ingestEx) {
output = output.concat("Caught an exception trying to ingest file " + fileName + ".");
return output;
}
try {
if (tabDataIngest != null) {
File tabFile = tabDataIngest.getTabDelimitedFile();
if (tabDataIngest.getDataTable() != null && tabFile != null && tabFile.exists()) {
String tabFilename = FileUtil.replaceExtension(fileName, "tab");
java.nio.file.Files.copy(Paths.get(tabFile.getAbsolutePath()), Paths.get(tabFilename), StandardCopyOption.REPLACE_EXISTING);
DataTable dataTable = tabDataIngest.getDataTable();
DataFile dataFile = new DataFile();
dataFile.setStorageIdentifier(tabFilename);
FileMetadata fileMetadata = new FileMetadata();
fileMetadata.setLabel(fileName);
dataFile.setDataTable(dataTable);
dataTable.setDataFile(dataFile);
fileMetadata.setDataFile(dataFile);
dataFile.getFileMetadatas().add(fileMetadata);
output = output.concat("NVARS: " + dataTable.getVarQuantity() + "\n");
output = output.concat("NOBS: " + dataTable.getCaseQuantity() + "\n");
try {
ingestService.produceSummaryStatistics(dataFile, tabFile);
output = output.concat("UNF: " + dataTable.getUnf() + "\n");
} catch (IOException ioex) {
output = output.concat("UNF: failed to calculate\n" + "\n");
}
for (int i = 0; i < dataTable.getVarQuantity(); i++) {
String vartype = "";
// if ("continuous".equals(dataTable.getDataVariables().get(i).getVariableIntervalType().getName())) {
if (dataTable.getDataVariables().get(i).isIntervalContinuous()) {
vartype = "numeric-continuous";
} else {
if (dataTable.getDataVariables().get(i).isTypeNumeric()) {
vartype = "numeric-discrete";
} else {
String formatCategory = dataTable.getDataVariables().get(i).getFormatCategory();
if ("time".equals(formatCategory)) {
vartype = "character-time";
} else if ("date".equals(formatCategory)) {
vartype = "character-date";
} else {
vartype = "character";
}
}
}
output = output.concat("VAR" + i + " ");
output = output.concat(dataTable.getDataVariables().get(i).getName() + " ");
output = output.concat(vartype + " ");
output = output.concat(dataTable.getDataVariables().get(i).getUnf());
output = output.concat("\n");
}
} else {
output = output.concat("Ingest failed to produce tab file or data table for file " + fileName + ".");
return output;
}
} else {
output = output.concat("Ingest resulted in a null tabDataIngest object for file " + fileName + ".");
return output;
}
} catch (IOException ex) {
output = output.concat("Caught an exception trying to save ingested data for file " + fileName + ".");
return output;
}
return output;
}
use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.
the class SearchIncludeFragment method tabularDataDisplayInfo.
public String tabularDataDisplayInfo(DataFile datafile) {
String ret = "";
if (datafile == null) {
return null;
}
if (datafile.isTabularData() && datafile.getDataTable() != null) {
DataTable datatable = datafile.getDataTable();
String unf = datatable.getUnf();
Long varNumber = datatable.getVarQuantity();
Long obsNumber = datatable.getCaseQuantity();
if (varNumber != null && varNumber.intValue() != 0) {
ret = ret.concat(varNumber + " Variables");
if (obsNumber != null && obsNumber.intValue() != 0) {
ret = ret.concat(", " + obsNumber + " Observations");
}
ret = ret.concat(" - ");
}
if (unf != null && !unf.equals("")) {
ret = ret.concat("UNF: " + unf);
}
}
return ret;
}
use of edu.harvard.iq.dataverse.DataTable in project dataverse by IQSS.
the class IngestUtilTest method testRecalculateDatasetVersionUNF.
@Test
public void testRecalculateDatasetVersionUNF() {
IngestUtil.recalculateDatasetVersionUNF(null);
DatasetVersion dsvNoFile = new DatasetVersion();
IngestUtil.recalculateDatasetVersionUNF(dsvNoFile);
assertEquals(null, dsvNoFile.getUNF());
List<Dataset> datasets = new ArrayList<>();
Dataset dataset = new Dataset();
dataset.setProtocol("doi");
dataset.setAuthority("fakeAuthority");
dataset.setIdentifier("12345");
DatasetVersion dsv1 = new DatasetVersion();
dsv1.setDataset(dataset);
dsv1.setId(42l);
dsv1.setVersionState(DatasetVersion.VersionState.DRAFT);
List<DatasetVersion> datasetVersions = new ArrayList<>();
datasetVersions.add(dsv1);
DataFile datafile1 = new DataFile("application/octet-stream");
DataTable dataTable = new DataTable();
dataTable.setUnf("unfOnDataTable");
datafile1.setDataTable(dataTable);
assertEquals(true, datafile1.isTabularData());
FileMetadata fmd1 = new FileMetadata();
fmd1.setId(1L);
fmd1.setLabel("datafile1.txt");
fmd1.setDataFile(datafile1);
datafile1.getFileMetadatas().add(fmd1);
dsv1.getFileMetadatas().add(fmd1);
fmd1.setDatasetVersion(dsv1);
dataset.setVersions(datasetVersions);
datasets.add(dataset);
assertEquals(null, dsv1.getUNF());
IngestUtil.recalculateDatasetVersionUNF(dsv1);
assertEquals("UNF:6:rDlgOhoEkEQQdwtLRHjmtw==", dsv1.getUNF());
}
Aggregations