Search in sources :

Example 6 with RegexBulkLoadTool

use of org.apache.phoenix.mapreduce.RegexBulkLoadTool in project phoenix by apache.

the class RegexBulkLoadToolIT method testBasicImport.

@Test
public void testBasicImport() throws Exception {
    Statement stmt = conn.createStatement();
    stmt.execute("CREATE TABLE S.TABLE1 (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, T DATE) SPLIT ON (1,2)");
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input1.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1,Name 1,1970/01/01");
    printWriter.println("2,Name 2,1970/01/02");
    printWriter.close();
    RegexBulkLoadTool regexBulkLoadTool = new RegexBulkLoadTool();
    regexBulkLoadTool.setConf(getUtility().getConfiguration());
    regexBulkLoadTool.getConf().set(DATE_FORMAT_ATTRIB, "yyyy/MM/dd");
    int exitCode = regexBulkLoadTool.run(new String[] { "--input", "/tmp/input1.csv", "--table", "table1", "--schema", "s", "--regex", "([^,]*),([^,]*),([^,]*)", "--zookeeper", zkQuorum });
    assertEquals(0, exitCode);
    ResultSet rs = stmt.executeQuery("SELECT id, name, t FROM s.table1 ORDER BY id");
    assertTrue(rs.next());
    assertEquals(1, rs.getInt(1));
    assertEquals("Name 1", rs.getString(2));
    assertEquals(DateUtil.parseDate("1970-01-01"), rs.getDate(3));
    assertTrue(rs.next());
    assertEquals(2, rs.getInt(1));
    assertEquals("Name 2", rs.getString(2));
    assertEquals(DateUtil.parseDate("1970-01-02"), rs.getDate(3));
    assertFalse(rs.next());
    rs.close();
    stmt.close();
}
Also used : Path(org.apache.hadoop.fs.Path) Statement(java.sql.Statement) FileSystem(org.apache.hadoop.fs.FileSystem) ResultSet(java.sql.ResultSet) RegexBulkLoadTool(org.apache.phoenix.mapreduce.RegexBulkLoadTool) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 7 with RegexBulkLoadTool

use of org.apache.phoenix.mapreduce.RegexBulkLoadTool in project phoenix by apache.

the class RegexBulkLoadToolIT method testMultipleInputFiles.

@Test
public void testMultipleInputFiles() throws Exception {
    Statement stmt = conn.createStatement();
    stmt.execute("CREATE TABLE TABLE7 (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, T DATE) SPLIT ON (1,2)");
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input1.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1,Name 1,1970/01/01");
    printWriter.close();
    outputStream = fs.create(new Path("/tmp/input2.csv"));
    printWriter = new PrintWriter(outputStream);
    printWriter.println("2,Name 2,1970/01/02");
    printWriter.close();
    RegexBulkLoadTool regexBulkLoadTool = new RegexBulkLoadTool();
    regexBulkLoadTool.setConf(getUtility().getConfiguration());
    regexBulkLoadTool.getConf().set(DATE_FORMAT_ATTRIB, "yyyy/MM/dd");
    int exitCode = regexBulkLoadTool.run(new String[] { "--input", "/tmp/input1.csv,/tmp/input2.csv", "--table", "table7", "--regex", "([^,]*),([^,]*),([^,]*)", "--zookeeper", zkQuorum });
    assertEquals(0, exitCode);
    ResultSet rs = stmt.executeQuery("SELECT id, name, t FROM table7 ORDER BY id");
    assertTrue(rs.next());
    assertEquals(1, rs.getInt(1));
    assertEquals("Name 1", rs.getString(2));
    assertEquals(DateUtil.parseDate("1970-01-01"), rs.getDate(3));
    assertTrue(rs.next());
    assertEquals(2, rs.getInt(1));
    assertEquals("Name 2", rs.getString(2));
    assertEquals(DateUtil.parseDate("1970-01-02"), rs.getDate(3));
    assertFalse(rs.next());
    rs.close();
    stmt.close();
}
Also used : Path(org.apache.hadoop.fs.Path) Statement(java.sql.Statement) FileSystem(org.apache.hadoop.fs.FileSystem) ResultSet(java.sql.ResultSet) RegexBulkLoadTool(org.apache.phoenix.mapreduce.RegexBulkLoadTool) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 8 with RegexBulkLoadTool

use of org.apache.phoenix.mapreduce.RegexBulkLoadTool in project phoenix by apache.

the class RegexBulkLoadToolIT method testImportOneIndexTable.

public void testImportOneIndexTable(String tableName, boolean localIndex) throws Exception {
    String indexTableName = String.format("%s_IDX", tableName);
    Statement stmt = conn.createStatement();
    stmt.execute("CREATE TABLE " + tableName + "(ID INTEGER NOT NULL PRIMARY KEY, " + "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)");
    String ddl = "CREATE " + (localIndex ? "LOCAL" : "") + " INDEX " + indexTableName + " ON " + tableName + "(FIRST_NAME ASC)";
    stmt.execute(ddl);
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input4.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1,FirstName 1,LastName 1");
    printWriter.println("2,FirstName 2,LastName 2");
    printWriter.close();
    RegexBulkLoadTool regexBulkLoadTool = new RegexBulkLoadTool();
    regexBulkLoadTool.setConf(getUtility().getConfiguration());
    int exitCode = regexBulkLoadTool.run(new String[] { "--input", "/tmp/input4.csv", "--table", tableName, "--regex", "([^,]*),([^,]*),([^,]*)", "--index-table", indexTableName, "--zookeeper", zkQuorum });
    assertEquals(0, exitCode);
    ResultSet rs = stmt.executeQuery("SELECT * FROM " + tableName);
    assertFalse(rs.next());
    rs = stmt.executeQuery("SELECT FIRST_NAME FROM " + tableName + " where FIRST_NAME='FirstName 1'");
    assertTrue(rs.next());
    assertEquals("FirstName 1", rs.getString(1));
    rs.close();
    stmt.close();
}
Also used : Path(org.apache.hadoop.fs.Path) Statement(java.sql.Statement) FileSystem(org.apache.hadoop.fs.FileSystem) ResultSet(java.sql.ResultSet) RegexBulkLoadTool(org.apache.phoenix.mapreduce.RegexBulkLoadTool) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) PrintWriter(java.io.PrintWriter)

Example 9 with RegexBulkLoadTool

use of org.apache.phoenix.mapreduce.RegexBulkLoadTool in project phoenix by apache.

the class RegexBulkLoadToolIT method testImportWithLocalIndex.

@Test
public void testImportWithLocalIndex() throws Exception {
    Statement stmt = conn.createStatement();
    stmt.execute("CREATE TABLE TABLE6 (ID INTEGER NOT NULL PRIMARY KEY, " + "FIRST_NAME VARCHAR, LAST_NAME VARCHAR) SPLIt ON (1,2)");
    String ddl = "CREATE LOCAL INDEX TABLE6_IDX ON TABLE6 " + " (FIRST_NAME ASC)";
    stmt.execute(ddl);
    ddl = "CREATE LOCAL INDEX TABLE6_IDX2 ON TABLE6 " + " (LAST_NAME ASC)";
    stmt.execute(ddl);
    FileSystem fs = FileSystem.get(getUtility().getConfiguration());
    FSDataOutputStream outputStream = fs.create(new Path("/tmp/input3.csv"));
    PrintWriter printWriter = new PrintWriter(outputStream);
    printWriter.println("1,FirstName 1:LastName 1");
    printWriter.println("2,FirstName 2:LastName 2");
    printWriter.close();
    RegexBulkLoadTool regexBulkLoadTool = new RegexBulkLoadTool();
    regexBulkLoadTool.setConf(getUtility().getConfiguration());
    int exitCode = regexBulkLoadTool.run(new String[] { "--input", "/tmp/input3.csv", "--table", "table6", "--regex", "([^,]*),([^:]*):([^,]*)", "--zookeeper", zkQuorum });
    assertEquals(0, exitCode);
    ResultSet rs = stmt.executeQuery("SELECT id, FIRST_NAME FROM TABLE6 where first_name='FirstName 2'");
    assertTrue(rs.next());
    assertEquals(2, rs.getInt(1));
    assertEquals("FirstName 2", rs.getString(2));
    rs.close();
    stmt.close();
}
Also used : Path(org.apache.hadoop.fs.Path) Statement(java.sql.Statement) FileSystem(org.apache.hadoop.fs.FileSystem) ResultSet(java.sql.ResultSet) RegexBulkLoadTool(org.apache.phoenix.mapreduce.RegexBulkLoadTool) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Aggregations

RegexBulkLoadTool (org.apache.phoenix.mapreduce.RegexBulkLoadTool)9 PrintWriter (java.io.PrintWriter)8 Statement (java.sql.Statement)8 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)8 FileSystem (org.apache.hadoop.fs.FileSystem)8 Path (org.apache.hadoop.fs.Path)8 Test (org.junit.Test)8 ResultSet (java.sql.ResultSet)6 FileAlreadyExistsException (org.apache.hadoop.mapred.FileAlreadyExistsException)2