Search in sources :

Example 6 with CsvBulkLoadTool

use of org.apache.phoenix.mapreduce.CsvBulkLoadTool in project phoenix by apache.

the class CsvBulkLoadToolIT method testInvalidArguments.

@Test
public void testInvalidArguments() {
    String tableName = "TABLE8";
    CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
    csvBulkLoadTool.setConf(getUtility().getConfiguration());
    try {
        csvBulkLoadTool.run(new String[] { "--input", "/tmp/input4.csv", "--table", tableName, "--zookeeper", zkQuorum });
        fail(String.format("Table %s not created, hence should fail", tableName));
    } catch (Exception ex) {
        assertTrue(ex instanceof IllegalArgumentException);
        assertTrue(ex.getMessage().contains(String.format("Table %s not found", tableName)));
    }
}
Also used : CsvBulkLoadTool(org.apache.phoenix.mapreduce.CsvBulkLoadTool) FileAlreadyExistsException(org.apache.hadoop.mapred.FileAlreadyExistsException) Test(org.junit.Test)

Example 7 with CsvBulkLoadTool

use of org.apache.phoenix.mapreduce.CsvBulkLoadTool in project phoenix by apache.

the class CsvBulkLoadToolIT method testImportWithLocalIndex.

@Test
public void testImportWithLocalIndex() throws Exception {
    Statement stmt = conn.createStatement();
    stmt.execute("CREATE TABLE TABLE6 (ID INTEGER NOT NULL PRIMARY KEY, " + "FIRST_NAME VARCHAR, LAST_NAME VARCHAR) SPLIt ON (1,2)");
    String ddl = "CREATE LOCAL INDEX TABLE6_IDX ON TABLE6 " + " (FIRST_NAME ASC)";
    stmt.execute(ddl);
    ddl = "CREATE LOCAL INDEX TABLE6_IDX2 ON TABLE6 " + " (LAST_NAME ASC)";
    stmt.execute(ddl);
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input3.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1,FirstName 1,LastName 1");
    printWriter.println("2,FirstName 2,LastName 2");
    printWriter.close();
    CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
    csvBulkLoadTool.setConf(getUtility().getConfiguration());
    int exitCode = csvBulkLoadTool.run(new String[] { "--input", "/tmp/input3.csv", "--table", "table6", "--zookeeper", zkQuorum });
    assertEquals(0, exitCode);
    ResultSet rs = stmt.executeQuery("SELECT id, FIRST_NAME FROM TABLE6 where first_name='FirstName 2'");
    assertTrue(rs.next());
    assertEquals(2, rs.getInt(1));
    assertEquals("FirstName 2", rs.getString(2));
    rs = stmt.executeQuery("SELECT LAST_NAME FROM TABLE6  where last_name='LastName 1'");
    assertTrue(rs.next());
    assertEquals("LastName 1", rs.getString(1));
    rs.close();
    stmt.close();
}
Also used : Path(org.apache.hadoop.fs.Path) CsvBulkLoadTool(org.apache.phoenix.mapreduce.CsvBulkLoadTool) Statement(java.sql.Statement) FileSystem(org.apache.hadoop.fs.FileSystem) ResultSet(java.sql.ResultSet) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 8 with CsvBulkLoadTool

use of org.apache.phoenix.mapreduce.CsvBulkLoadTool in project phoenix by apache.

the class CsvBulkLoadToolIT method testImportWithIndex.

@Test
public void testImportWithIndex() throws Exception {
    Statement stmt = conn.createStatement();
    stmt.execute("CREATE TABLE TABLE3 (ID INTEGER NOT NULL PRIMARY KEY, " + "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)");
    String ddl = "CREATE INDEX TABLE3_IDX ON TABLE3 " + " (FIRST_NAME ASC)" + " INCLUDE (LAST_NAME)";
    stmt.execute(ddl);
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input3.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1,FirstName 1,LastName 1");
    printWriter.println("2,FirstName 2,LastName 2");
    printWriter.close();
    CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
    csvBulkLoadTool.setConf(getUtility().getConfiguration());
    int exitCode = csvBulkLoadTool.run(new String[] { "--input", "/tmp/input3.csv", "--table", "table3", "--zookeeper", zkQuorum });
    assertEquals(0, exitCode);
    ResultSet rs = stmt.executeQuery("SELECT id, FIRST_NAME FROM TABLE3 where first_name='FirstName 2'");
    assertTrue(rs.next());
    assertEquals(2, rs.getInt(1));
    assertEquals("FirstName 2", rs.getString(2));
    rs.close();
    stmt.close();
}
Also used : Path(org.apache.hadoop.fs.Path) CsvBulkLoadTool(org.apache.phoenix.mapreduce.CsvBulkLoadTool) Statement(java.sql.Statement) FileSystem(org.apache.hadoop.fs.FileSystem) ResultSet(java.sql.ResultSet) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 9 with CsvBulkLoadTool

use of org.apache.phoenix.mapreduce.CsvBulkLoadTool in project phoenix by apache.

the class CsvBulkLoadToolIT method testMultipleInputFiles.

@Test
public void testMultipleInputFiles() throws Exception {
    Statement stmt = conn.createStatement();
    stmt.execute("CREATE TABLE TABLE7 (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, T DATE) SPLIT ON (1,2)");
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input1.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1,Name 1,1970/01/01");
    printWriter.close();
    outputStream = fs.create(new Path("/tmp/input2.csv"));
    printWriter = new PrintWriter(outputStream);
    printWriter.println("2,Name 2,1970/01/02");
    printWriter.close();
    CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
    csvBulkLoadTool.setConf(new Configuration(getUtility().getConfiguration()));
    csvBulkLoadTool.getConf().set(DATE_FORMAT_ATTRIB, "yyyy/MM/dd");
    int exitCode = csvBulkLoadTool.run(new String[] { "--input", "/tmp/input1.csv,/tmp/input2.csv", "--table", "table7", "--zookeeper", zkQuorum });
    assertEquals(0, exitCode);
    ResultSet rs = stmt.executeQuery("SELECT id, name, t FROM table7 ORDER BY id");
    assertTrue(rs.next());
    assertEquals(1, rs.getInt(1));
    assertEquals("Name 1", rs.getString(2));
    assertEquals(DateUtil.parseDate("1970-01-01"), rs.getDate(3));
    assertTrue(rs.next());
    assertEquals(2, rs.getInt(1));
    assertEquals("Name 2", rs.getString(2));
    assertEquals(DateUtil.parseDate("1970-01-02"), rs.getDate(3));
    assertFalse(rs.next());
    rs.close();
    stmt.close();
}
Also used : Path(org.apache.hadoop.fs.Path) CsvBulkLoadTool(org.apache.phoenix.mapreduce.CsvBulkLoadTool) Configuration(org.apache.hadoop.conf.Configuration) Statement(java.sql.Statement) FileSystem(org.apache.hadoop.fs.FileSystem) ResultSet(java.sql.ResultSet) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 10 with CsvBulkLoadTool

use of org.apache.phoenix.mapreduce.CsvBulkLoadTool in project phoenix by apache.

the class CsvBulkLoadToolIT method testFullOptionImport.

@Test
public void testFullOptionImport() throws Exception {
    Statement stmt = conn.createStatement();
    stmt.execute("CREATE TABLE TABLE2 (ID INTEGER NOT NULL PRIMARY KEY, " + "NAME VARCHAR, NAMES VARCHAR ARRAY)");
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input2.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1|Name 1a;Name 1b");
    printWriter.println("2|Name 2a;Name 2b");
    printWriter.close();
    CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
    csvBulkLoadTool.setConf(getUtility().getConfiguration());
    int exitCode = csvBulkLoadTool.run(new String[] { "--input", "/tmp/input2.csv", "--table", "table2", "--zookeeper", zkQuorum, "--delimiter", "|", "--array-delimiter", ";", "--import-columns", "ID,NAMES" });
    assertEquals(0, exitCode);
    ResultSet rs = stmt.executeQuery("SELECT id, names FROM table2 ORDER BY id");
    assertTrue(rs.next());
    assertEquals(1, rs.getInt(1));
    assertArrayEquals(new Object[] { "Name 1a", "Name 1b" }, (Object[]) rs.getArray(2).getArray());
    assertTrue(rs.next());
    assertEquals(2, rs.getInt(1));
    assertArrayEquals(new Object[] { "Name 2a", "Name 2b" }, (Object[]) rs.getArray(2).getArray());
    assertFalse(rs.next());
    rs.close();
    stmt.close();
}
Also used : Path(org.apache.hadoop.fs.Path) CsvBulkLoadTool(org.apache.phoenix.mapreduce.CsvBulkLoadTool) Statement(java.sql.Statement) FileSystem(org.apache.hadoop.fs.FileSystem) ResultSet(java.sql.ResultSet) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Aggregations

CsvBulkLoadTool (org.apache.phoenix.mapreduce.CsvBulkLoadTool)10 PrintWriter (java.io.PrintWriter)9 Statement (java.sql.Statement)9 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)9 FileSystem (org.apache.hadoop.fs.FileSystem)9 Path (org.apache.hadoop.fs.Path)9 Test (org.junit.Test)9 ResultSet (java.sql.ResultSet)8 Configuration (org.apache.hadoop.conf.Configuration)3 FileAlreadyExistsException (org.apache.hadoop.mapred.FileAlreadyExistsException)2