Search in sources :

Example 1 with CsvBulkLoadTool

use of org.apache.phoenix.mapreduce.CsvBulkLoadTool in project phoenix by apache.

the class CsvBulkLoadToolIT method testBasicImport.

@Test
public void testBasicImport() throws Exception {
    Statement stmt = conn.createStatement();
    stmt.execute("CREATE TABLE S.TABLE1 (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, T DATE) SPLIT ON (1,2)");
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input1.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1,Name 1,1970/01/01");
    printWriter.println("2,Name 2,1970/01/02");
    printWriter.close();
    CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
    csvBulkLoadTool.setConf(new Configuration(getUtility().getConfiguration()));
    csvBulkLoadTool.getConf().set(DATE_FORMAT_ATTRIB, "yyyy/MM/dd");
    int exitCode = csvBulkLoadTool.run(new String[] { "--input", "/tmp/input1.csv", "--table", "table1", "--schema", "s", "--zookeeper", zkQuorum });
    assertEquals(0, exitCode);
    ResultSet rs = stmt.executeQuery("SELECT id, name, t FROM s.table1 ORDER BY id");
    assertTrue(rs.next());
    assertEquals(1, rs.getInt(1));
    assertEquals("Name 1", rs.getString(2));
    assertEquals(DateUtil.parseDate("1970-01-01"), rs.getDate(3));
    assertTrue(rs.next());
    assertEquals(2, rs.getInt(1));
    assertEquals("Name 2", rs.getString(2));
    assertEquals(DateUtil.parseDate("1970-01-02"), rs.getDate(3));
    assertFalse(rs.next());
    rs.close();
    stmt.close();
}
Also used : Path(org.apache.hadoop.fs.Path) CsvBulkLoadTool(org.apache.phoenix.mapreduce.CsvBulkLoadTool) Configuration(org.apache.hadoop.conf.Configuration) Statement(java.sql.Statement) FileSystem(org.apache.hadoop.fs.FileSystem) ResultSet(java.sql.ResultSet) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 2 with CsvBulkLoadTool

use of org.apache.phoenix.mapreduce.CsvBulkLoadTool in project phoenix by apache.

the class CsvBulkLoadToolIT method testImportInImmutableTable.

@Test
public void testImportInImmutableTable() throws Exception {
    Statement stmt = conn.createStatement();
    stmt.execute("CREATE IMMUTABLE TABLE S.TABLE10 (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, T DATE, CF1.T2 DATE, CF2.T3 DATE) ");
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input10.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1,Name 1,1970/01/01,1970/02/01,1970/03/01");
    printWriter.println("2,Name 2,1970/01/02,1970/02/02,1970/03/02");
    printWriter.close();
    CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
    csvBulkLoadTool.setConf(new Configuration(getUtility().getConfiguration()));
    csvBulkLoadTool.getConf().set(DATE_FORMAT_ATTRIB, "yyyy/MM/dd");
    int exitCode = csvBulkLoadTool.run(new String[] { "--input", "/tmp/input10.csv", "--table", "table10", "--schema", "s", "--zookeeper", zkQuorum });
    assertEquals(0, exitCode);
    ResultSet rs = stmt.executeQuery("SELECT id, name, t, CF1.T2, CF2.T3 FROM s.table10 ORDER BY id");
    assertTrue(rs.next());
    assertEquals(1, rs.getInt(1));
    assertEquals("Name 1", rs.getString(2));
    assertEquals(DateUtil.parseDate("1970-01-01"), rs.getDate(3));
    assertEquals(DateUtil.parseDate("1970-02-01"), rs.getDate(4));
    assertEquals(DateUtil.parseDate("1970-03-01"), rs.getDate(5));
    assertTrue(rs.next());
    assertEquals(2, rs.getInt(1));
    assertEquals("Name 2", rs.getString(2));
    assertEquals(DateUtil.parseDate("1970-01-02"), rs.getDate(3));
    assertEquals(DateUtil.parseDate("1970-02-02"), rs.getDate(4));
    assertEquals(DateUtil.parseDate("1970-03-02"), rs.getDate(5));
    assertFalse(rs.next());
    rs.close();
    stmt.close();
}
Also used : Path(org.apache.hadoop.fs.Path) CsvBulkLoadTool(org.apache.phoenix.mapreduce.CsvBulkLoadTool) Configuration(org.apache.hadoop.conf.Configuration) Statement(java.sql.Statement) FileSystem(org.apache.hadoop.fs.FileSystem) ResultSet(java.sql.ResultSet) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 3 with CsvBulkLoadTool

use of org.apache.phoenix.mapreduce.CsvBulkLoadTool in project phoenix by apache.

the class CsvBulkLoadToolIT method testImportOneIndexTable.

public void testImportOneIndexTable(String tableName, boolean localIndex) throws Exception {
    String indexTableName = String.format("%s_IDX", tableName);
    Statement stmt = conn.createStatement();
    stmt.execute("CREATE TABLE " + tableName + "(ID INTEGER NOT NULL PRIMARY KEY, " + "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)");
    String ddl = "CREATE " + (localIndex ? "LOCAL" : "") + " INDEX " + indexTableName + " ON " + tableName + "(FIRST_NAME ASC)";
    stmt.execute(ddl);
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input4.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1,FirstName 1,LastName 1");
    printWriter.println("2,FirstName 2,LastName 2");
    printWriter.close();
    CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
    csvBulkLoadTool.setConf(getUtility().getConfiguration());
    int exitCode = csvBulkLoadTool.run(new String[] { "--input", "/tmp/input4.csv", "--table", tableName, "--index-table", indexTableName, "--zookeeper", zkQuorum });
    assertEquals(0, exitCode);
    ResultSet rs = stmt.executeQuery("SELECT * FROM " + tableName);
    assertFalse(rs.next());
    rs = stmt.executeQuery("SELECT FIRST_NAME FROM " + tableName + " where FIRST_NAME='FirstName 1'");
    assertTrue(rs.next());
    assertEquals("FirstName 1", rs.getString(1));
    rs.close();
    stmt.close();
}
Also used : Path(org.apache.hadoop.fs.Path) CsvBulkLoadTool(org.apache.phoenix.mapreduce.CsvBulkLoadTool) Statement(java.sql.Statement) FileSystem(org.apache.hadoop.fs.FileSystem) ResultSet(java.sql.ResultSet) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) PrintWriter(java.io.PrintWriter)

Example 4 with CsvBulkLoadTool

use of org.apache.phoenix.mapreduce.CsvBulkLoadTool in project phoenix by apache.

the class CsvBulkLoadToolIT method testAlreadyExistsOutputPath.

@Test
public void testAlreadyExistsOutputPath() {
    String tableName = "TABLE9";
    String outputPath = "/tmp/output/tabl9";
    try {
        Statement stmt = conn.createStatement();
        stmt.execute("CREATE TABLE " + tableName + "(ID INTEGER NOT NULL PRIMARY KEY, " + "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)");
        FileSystem fs = FileSystem.get(getUtility().getConfiguration());
        fs.create(new Path(outputPath));
        FSDataOutputStream outputStream = fs.create(new Path("/tmp/input9.csv"));
        PrintWriter printWriter = new PrintWriter(outputStream);
        printWriter.println("1,FirstName 1,LastName 1");
        printWriter.println("2,FirstName 2,LastName 2");
        printWriter.close();
        CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
        csvBulkLoadTool.setConf(getUtility().getConfiguration());
        csvBulkLoadTool.run(new String[] { "--input", "/tmp/input9.csv", "--output", outputPath, "--table", tableName, "--zookeeper", zkQuorum });
        fail(String.format("Output path %s already exists. hence, should fail", outputPath));
    } catch (Exception ex) {
        assertTrue(ex instanceof FileAlreadyExistsException);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) CsvBulkLoadTool(org.apache.phoenix.mapreduce.CsvBulkLoadTool) FileAlreadyExistsException(org.apache.hadoop.mapred.FileAlreadyExistsException) Statement(java.sql.Statement) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) FileAlreadyExistsException(org.apache.hadoop.mapred.FileAlreadyExistsException) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 5 with CsvBulkLoadTool

use of org.apache.phoenix.mapreduce.CsvBulkLoadTool in project phoenix by apache.

the class CsvBulkLoadToolIT method testImportWithTabs.

@Test
public void testImportWithTabs() throws Exception {
    Statement stmt = conn.createStatement();
    stmt.execute("CREATE TABLE TABLE8 (ID INTEGER NOT NULL PRIMARY KEY, " + "NAME1 VARCHAR, NAME2 VARCHAR)");
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input8.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1\tName 1a\tName 2a");
    printWriter.println("2\tName 2a\tName 2b");
    printWriter.close();
    CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
    csvBulkLoadTool.setConf(getUtility().getConfiguration());
    int exitCode = csvBulkLoadTool.run(new String[] { "--input", "/tmp/input8.csv", "--table", "table8", "--zookeeper", zkQuorum, "--delimiter", "\\t" });
    assertEquals(0, exitCode);
    ResultSet rs = stmt.executeQuery("SELECT id, name1, name2 FROM table8 ORDER BY id");
    assertTrue(rs.next());
    assertEquals(1, rs.getInt(1));
    assertEquals("Name 1a", rs.getString(2));
    rs.close();
    stmt.close();
}
Also used : Path(org.apache.hadoop.fs.Path) CsvBulkLoadTool(org.apache.phoenix.mapreduce.CsvBulkLoadTool) Statement(java.sql.Statement) FileSystem(org.apache.hadoop.fs.FileSystem) ResultSet(java.sql.ResultSet) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Aggregations

CsvBulkLoadTool (org.apache.phoenix.mapreduce.CsvBulkLoadTool)10 PrintWriter (java.io.PrintWriter)9 Statement (java.sql.Statement)9 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)9 FileSystem (org.apache.hadoop.fs.FileSystem)9 Path (org.apache.hadoop.fs.Path)9 Test (org.junit.Test)9 ResultSet (java.sql.ResultSet)8 Configuration (org.apache.hadoop.conf.Configuration)3 FileAlreadyExistsException (org.apache.hadoop.mapred.FileAlreadyExistsException)2