Search in sources :

Example 1 with SeparatorBasedImporter

use of com.google.refine.importers.SeparatorBasedImporter in project OpenRefine by OpenRefine.

the class ImportingUtilitiesTests method importArchive.

/**
 * This tests both exploding a zip archive into it's constituent files as well as importing them all (both) and
 * making sure that the recording of archive names and file names works correctly.
 *
 * It's kind of a lot to have in one test, but it's a sequence of steps that need to be done in order.
 *
 * @throws IOException
 */
@SuppressWarnings("unchecked")
@Test
public void importArchive() throws IOException {
    String filename = "movies.zip";
    String filepath = ClassLoader.getSystemResource(filename).getPath();
    // Make a copy in our data directory where it's expected
    File tmp = File.createTempFile("openrefine-test-movies", ".zip", job.getRawDataDir());
    tmp.deleteOnExit();
    FileUtils.copyFile(new File(filepath), tmp);
    Progress dummyProgress = new Progress() {

        @Override
        public void setProgress(String message, int percent) {
        }

        @Override
        public boolean isCanceled() {
            return false;
        }
    };
    ArrayNode fileRecords = ParsingUtilities.mapper.createArrayNode();
    ObjectNode fileRecord = ParsingUtilities.mapper.createObjectNode();
    JSONUtilities.safePut(fileRecord, "origin", "upload");
    JSONUtilities.safePut(fileRecord, "declaredEncoding", "UTF-8");
    JSONUtilities.safePut(fileRecord, "declaredMimeType", "application/x-zip-compressed");
    JSONUtilities.safePut(fileRecord, "fileName", filename);
    JSONUtilities.safePut(fileRecord, "location", tmp.getName());
    assertTrue(ImportingUtilities.postProcessRetrievedFile(job.getRawDataDir(), tmp, fileRecord, fileRecords, dummyProgress));
    assertEquals(fileRecords.size(), 2);
    assertEquals(fileRecords.get(0).get("fileName").asText(), "movies-condensed.tsv");
    assertEquals(fileRecords.get(0).get("archiveFileName").asText(), "movies.zip");
    assertEquals(fileRecords.get(1).get("fileName").asText(), "movies.tsv");
    ObjectNode options = ParsingUtilities.mapper.createObjectNode();
    JSONUtilities.safePut(options, "includeArchiveFileName", true);
    JSONUtilities.safePut(options, "includeFileSources", true);
    ImportingParserBase parser = new SeparatorBasedImporter();
    List<Exception> exceptions = new ArrayList<Exception>();
    parser.parse(project, metadata, job, IteratorUtils.toList(fileRecords.iterator()), "tsv", -1, options, exceptions);
    assertEquals(exceptions.size(), 0);
    project.update();
    assertEquals(project.columnModel.columns.get(0).getName(), "Archive");
    assertEquals(project.rows.get(0).getCell(0).getValue(), "movies.zip");
    assertEquals(project.columnModel.columns.get(1).getName(), "File");
    assertEquals(project.rows.get(0).getCell(1).getValue(), "movies-condensed.tsv");
    assertEquals(project.columnModel.columns.get(2).getName(), "name");
    assertEquals(project.rows.get(0).getCell(2).getValue(), "Wayne's World");
    // Make sure we imported both files contained in the zip file
    assertEquals(project.rows.size(), 252);
    ArrayNode importOptionsArray = metadata.getImportOptionMetadata();
    assertEquals(importOptionsArray.size(), 2);
    ObjectNode importOptions = (ObjectNode) importOptionsArray.get(0);
    assertEquals(importOptions.get("archiveFileName").asText(), "movies.zip");
    assertEquals(importOptions.get("fileSource").asText(), "movies-condensed.tsv");
    assertTrue(importOptions.get("includeFileSources").asBoolean());
    assertTrue(importOptions.get("includeArchiveFileName").asBoolean());
    importOptions = (ObjectNode) importOptionsArray.get(1);
    assertEquals(importOptions.get("fileSource").asText(), "movies.tsv");
    assertEquals(importOptions.get("archiveFileName").asText(), "movies.zip");
}
Also used : Progress(com.google.refine.importing.ImportingUtilities.Progress) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) ArrayList(java.util.ArrayList) SeparatorBasedImporter(com.google.refine.importers.SeparatorBasedImporter) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) File(java.io.File) IOException(java.io.IOException) ImportingParserBase(com.google.refine.importers.ImportingParserBase) ImporterTest(com.google.refine.importers.ImporterTest) Test(org.testng.annotations.Test)

Example 2 with SeparatorBasedImporter

use of com.google.refine.importers.SeparatorBasedImporter in project OpenRefine by OpenRefine.

the class KeyValueColumnizeTests method SetUp.

@BeforeMethod
public void SetUp() throws IOException, ModelException {
    servlet = new RefineServletStub();
    File dir = TestUtils.createTempDirectory("openrefine-test-workspace-dir");
    FileProjectManager.initialize(dir);
    project = new Project();
    pm = new ProjectMetadata();
    pm.setName("KeyValueColumnize test");
    ProjectManager.singleton.registerProject(project, pm);
    options = mock(ObjectNode.class);
    OperationRegistry.registerOperation(getCoreModule(), "key-value-columnize", KeyValueColumnizeOperation.class);
    ImportingManager.initialize(servlet);
    job = ImportingManager.createJob();
    importer = new SeparatorBasedImporter();
}
Also used : Project(com.google.refine.model.Project) RefineServletStub(com.google.refine.RefineServletStub) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) ProjectMetadata(com.google.refine.ProjectMetadata) SeparatorBasedImporter(com.google.refine.importers.SeparatorBasedImporter) File(java.io.File) BeforeMethod(org.testng.annotations.BeforeMethod)

Example 3 with SeparatorBasedImporter

use of com.google.refine.importers.SeparatorBasedImporter in project OpenRefine by OpenRefine.

the class RefineTest method createCSVProject.

/**
 * Helper to create a project from a CSV encoded as a file. Not much control is given on the import options, because
 * this method is intended to be a quick way to create a project for a test. For more control over the import, just
 * call the importer directly.
 *
 * The projects created via this method and their importing jobs will be disposed of at the end of each test.
 *
 * @param projectName
 *            the name of the project to create
 * @param input
 *            the content of the file, encoded as a CSV (with "," as a separator)
 * @return
 */
protected Project createCSVProject(String projectName, String input) {
    Project project = new Project();
    ProjectMetadata metadata = new ProjectMetadata();
    metadata.setName(projectName);
    ObjectNode options = mock(ObjectNode.class);
    prepareImportOptions(options, ",", -1, 0, 0, 1, false, false);
    ImportingJob job = ImportingManager.createJob();
    SeparatorBasedImporter importer = new SeparatorBasedImporter();
    List<Exception> exceptions = new ArrayList<Exception>();
    importer.parseOneFile(project, metadata, job, "filesource", new StringReader(input), -1, options, exceptions);
    project.update();
    ProjectManager.singleton.registerProject(project, metadata);
    projects.add(project);
    importingJobs.add(job);
    return project;
}
Also used : Project(com.google.refine.model.Project) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) ArrayList(java.util.ArrayList) StringReader(java.io.StringReader) ImportingJob(com.google.refine.importing.ImportingJob) SeparatorBasedImporter(com.google.refine.importers.SeparatorBasedImporter) IOException(java.io.IOException) ModelException(com.google.refine.model.ModelException) ParsingException(com.google.refine.expr.ParsingException)

Example 4 with SeparatorBasedImporter

use of com.google.refine.importers.SeparatorBasedImporter in project OpenRefine by OpenRefine.

the class TransposeTests method SetUp.

@BeforeMethod
public void SetUp() {
    servlet = new RefineServletStub();
    ProjectManager.singleton = new ProjectManagerStub();
    ImportingManager.initialize(servlet);
    project = new Project();
    metadata = new ProjectMetadata();
    job = ImportingManager.createJob();
    options = mock(JSONObject.class);
    importer = new SeparatorBasedImporter();
}
Also used : Project(com.google.refine.model.Project) ProjectManagerStub(com.google.refine.tests.ProjectManagerStub) RefineServletStub(com.google.refine.tests.RefineServletStub) JSONObject(org.json.JSONObject) ProjectMetadata(com.google.refine.ProjectMetadata) SeparatorBasedImporter(com.google.refine.importers.SeparatorBasedImporter) BeforeMethod(org.testng.annotations.BeforeMethod)

Example 5 with SeparatorBasedImporter

use of com.google.refine.importers.SeparatorBasedImporter in project OpenRefine by OpenRefine.

the class TsvCsvImporterTests method setUp.

@Override
@BeforeMethod
public void setUp() {
    super.setUp();
    SUT = new SeparatorBasedImporter();
}
Also used : SeparatorBasedImporter(com.google.refine.importers.SeparatorBasedImporter) BeforeMethod(org.testng.annotations.BeforeMethod)

Aggregations

SeparatorBasedImporter (com.google.refine.importers.SeparatorBasedImporter)6 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)4 Project (com.google.refine.model.Project)4 ProjectMetadata (com.google.refine.ProjectMetadata)3 ArrayList (java.util.ArrayList)3 BeforeMethod (org.testng.annotations.BeforeMethod)3 ImportingJob (com.google.refine.importing.ImportingJob)2 File (java.io.File)2 IOException (java.io.IOException)2 StringReader (java.io.StringReader)2 ArrayNode (com.fasterxml.jackson.databind.node.ArrayNode)1 RefineServletStub (com.google.refine.RefineServletStub)1 ParsingException (com.google.refine.expr.ParsingException)1 ImporterTest (com.google.refine.importers.ImporterTest)1 ImportingParserBase (com.google.refine.importers.ImportingParserBase)1 Progress (com.google.refine.importing.ImportingUtilities.Progress)1 ModelException (com.google.refine.model.ModelException)1 ProjectManagerStub (com.google.refine.tests.ProjectManagerStub)1 RefineServletStub (com.google.refine.tests.RefineServletStub)1 JSONObject (org.json.JSONObject)1