Search in sources :

Example 96 with BatchWriter

use of org.apache.accumulo.core.client.BatchWriter in project accumulo by apache.

the class SparseColumnFamilyIT method sparceColumnFamily.

@Test
public void sparceColumnFamily() throws Exception {
    String scftt = getUniqueNames(1)[0];
    Connector c = getConnector();
    c.tableOperations().create(scftt);
    BatchWriter bw = c.createBatchWriter(scftt, new BatchWriterConfig());
    // create file in the tablet that has mostly column family 0, with a few entries for column family 1
    bw.addMutation(nm(0, 1, 0));
    for (int i = 1; i < 99999; i++) {
        bw.addMutation(nm(i * 2, 0, i));
    }
    bw.addMutation(nm(99999 * 2, 1, 99999));
    bw.flush();
    c.tableOperations().flush(scftt, null, null, true);
    // create a file that has column family 1 and 0 interleaved
    for (int i = 0; i < 100000; i++) {
        bw.addMutation(nm(i * 2 + 1, i % 2 == 0 ? 0 : 1, i));
    }
    bw.close();
    c.tableOperations().flush(scftt, null, null, true);
    try (Scanner scanner = c.createScanner(scftt, Authorizations.EMPTY)) {
        for (int i = 0; i < 200; i++) {
            // every time we search for column family 1, it will scan the entire file
            // that has mostly column family 0 until the bug is fixed
            scanner.setRange(new Range(String.format("%06d", i), null));
            scanner.clearColumns();
            scanner.setBatchSize(3);
            scanner.fetchColumnFamily(new Text(String.format("%03d", 1)));
            Iterator<Entry<Key, Value>> iter = scanner.iterator();
            if (iter.hasNext()) {
                Entry<Key, Value> entry = iter.next();
                if (!"001".equals(entry.getKey().getColumnFamilyData().toString())) {
                    throw new Exception();
                }
            }
        }
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Scanner(org.apache.accumulo.core.client.Scanner) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) Entry(java.util.Map.Entry) Value(org.apache.accumulo.core.data.Value) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 97 with BatchWriter

use of org.apache.accumulo.core.client.BatchWriter in project accumulo by apache.

the class MergeStateIT method update.

private static void update(Connector c, Mutation m) throws TableNotFoundException, MutationsRejectedException {
    BatchWriter bw = c.createBatchWriter(MetadataTable.NAME, new BatchWriterConfig());
    bw.addMutation(m);
    bw.close();
}
Also used : BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) BatchWriter(org.apache.accumulo.core.client.BatchWriter)

Example 98 with BatchWriter

use of org.apache.accumulo.core.client.BatchWriter in project accumulo by apache.

the class ContinuousIngest method main.

public static void main(String[] args) throws Exception {
    ContinuousOpts opts = new ContinuousOpts();
    BatchWriterOpts bwOpts = new BatchWriterOpts();
    ClientOnDefaultTable clientOpts = new ClientOnDefaultTable("ci");
    clientOpts.parseArgs(ContinuousIngest.class.getName(), args, bwOpts, opts);
    initVisibilities(opts);
    if (opts.min < 0 || opts.max < 0 || opts.max <= opts.min) {
        throw new IllegalArgumentException("bad min and max");
    }
    Connector conn = clientOpts.getConnector();
    if (!conn.tableOperations().exists(clientOpts.getTableName())) {
        throw new TableNotFoundException(null, clientOpts.getTableName(), "Consult the README and create the table before starting ingest.");
    }
    BatchWriter bw = conn.createBatchWriter(clientOpts.getTableName(), bwOpts.getBatchWriterConfig());
    bw = Trace.wrapAll(bw, new CountSampler(1024));
    Random r = new Random();
    byte[] ingestInstanceId = UUID.randomUUID().toString().getBytes(UTF_8);
    System.out.printf("UUID %d %s%n", System.currentTimeMillis(), new String(ingestInstanceId, UTF_8));
    long count = 0;
    final int flushInterval = 1000000;
    final int maxDepth = 25;
    // always want to point back to flushed data. This way the previous item should
    // always exist in accumulo when verifying data. To do this make insert N point
    // back to the row from insert (N - flushInterval). The array below is used to keep
    // track of this.
    long[] prevRows = new long[flushInterval];
    long[] firstRows = new long[flushInterval];
    int[] firstColFams = new int[flushInterval];
    int[] firstColQuals = new int[flushInterval];
    long lastFlushTime = System.currentTimeMillis();
    out: while (true) {
        // generate first set of nodes
        ColumnVisibility cv = getVisibility(r);
        for (int index = 0; index < flushInterval; index++) {
            long rowLong = genLong(opts.min, opts.max, r);
            prevRows[index] = rowLong;
            firstRows[index] = rowLong;
            int cf = r.nextInt(opts.maxColF);
            int cq = r.nextInt(opts.maxColQ);
            firstColFams[index] = cf;
            firstColQuals[index] = cq;
            Mutation m = genMutation(rowLong, cf, cq, cv, ingestInstanceId, count, null, r, opts.checksum);
            count++;
            bw.addMutation(m);
        }
        lastFlushTime = flush(bw, count, flushInterval, lastFlushTime);
        if (count >= opts.num)
            break out;
        // generate subsequent sets of nodes that link to previous set of nodes
        for (int depth = 1; depth < maxDepth; depth++) {
            for (int index = 0; index < flushInterval; index++) {
                long rowLong = genLong(opts.min, opts.max, r);
                byte[] prevRow = genRow(prevRows[index]);
                prevRows[index] = rowLong;
                Mutation m = genMutation(rowLong, r.nextInt(opts.maxColF), r.nextInt(opts.maxColQ), cv, ingestInstanceId, count, prevRow, r, opts.checksum);
                count++;
                bw.addMutation(m);
            }
            lastFlushTime = flush(bw, count, flushInterval, lastFlushTime);
            if (count >= opts.num)
                break out;
        }
        // point to something
        for (int index = 0; index < flushInterval - 1; index++) {
            Mutation m = genMutation(firstRows[index], firstColFams[index], firstColQuals[index], cv, ingestInstanceId, count, genRow(prevRows[index + 1]), r, opts.checksum);
            count++;
            bw.addMutation(m);
        }
        lastFlushTime = flush(bw, count, flushInterval, lastFlushTime);
        if (count >= opts.num)
            break out;
    }
    bw.close();
    clientOpts.stopTracing();
}
Also used : Connector(org.apache.accumulo.core.client.Connector) ClientOnDefaultTable(org.apache.accumulo.core.cli.ClientOnDefaultTable) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) CountSampler(org.apache.accumulo.core.trace.CountSampler) Random(java.util.Random) BatchWriterOpts(org.apache.accumulo.core.cli.BatchWriterOpts) BatchWriter(org.apache.accumulo.core.client.BatchWriter) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Mutation(org.apache.accumulo.core.data.Mutation)

Example 99 with BatchWriter

use of org.apache.accumulo.core.client.BatchWriter in project incubator-rya by apache.

the class AccumuloDocIndexerTest method testContextCommonVarBs1.

@Test
public void testContextCommonVarBs1() throws Exception {
    BatchWriter bw = null;
    RyaTableMutationsFactory rtm = new RyaTableMutationsFactory(RyaTripleContext.getInstance(conf));
    bw = accCon.createBatchWriter(tableName, 500L * 1024L * 1024L, Long.MAX_VALUE, 30);
    for (int i = 0; i < 30; i++) {
        RyaStatement rs1 = new RyaStatement(new RyaURI("uri:" + i), new RyaURI("uri:cf1"), new RyaURI("uri:cq1"), new RyaURI("uri:joe"));
        RyaStatement rs2 = new RyaStatement(new RyaURI("uri:" + i), new RyaURI("uri:cf2"), new RyaType(XMLSchema.STRING, "cq2"), new RyaURI("uri:joe"));
        RyaStatement rs3 = null;
        RyaStatement rs4 = new RyaStatement(new RyaURI("uri:" + i), new RyaURI("uri:cf1"), new RyaURI("uri:cq1"), new RyaURI("uri:hank"));
        RyaStatement rs5 = new RyaStatement(new RyaURI("uri:" + i), new RyaURI("uri:cf2"), new RyaType(XMLSchema.STRING, "cq2"), new RyaURI("uri:hank"));
        RyaStatement rs6 = null;
        if (i == 5 || i == 10 || i == 15 || i == 20 || i == 25) {
            rs3 = new RyaStatement(new RyaURI("uri:" + i), new RyaURI("uri:cf3"), new RyaType(XMLSchema.INTEGER, Integer.toString(i)), new RyaURI("uri:joe"));
            rs6 = new RyaStatement(new RyaURI("uri:" + i), new RyaURI("uri:cf3"), new RyaType(XMLSchema.INTEGER, Integer.toString(i)), new RyaURI("uri:hank"));
        }
        Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>> serialize1 = rtm.serialize(rs1);
        Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>> serialize2 = rtm.serialize(rs2);
        Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>> serialize3 = null;
        Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>> serialize4 = rtm.serialize(rs4);
        Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>> serialize5 = rtm.serialize(rs5);
        Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>> serialize6 = null;
        if (rs3 != null) {
            serialize3 = rtm.serialize(rs3);
        }
        if (rs6 != null) {
            serialize6 = rtm.serialize(rs6);
        }
        Collection<Mutation> m1 = EntityCentricIndex.createMutations(rs1);
        for (Mutation m : m1) {
            bw.addMutation(m);
        }
        Collection<Mutation> m2 = EntityCentricIndex.createMutations(rs2);
        for (Mutation m : m2) {
            bw.addMutation(m);
        }
        if (serialize3 != null) {
            Collection<Mutation> m3 = EntityCentricIndex.createMutations(rs3);
            for (Mutation m : m3) {
                bw.addMutation(m);
            }
        }
        Collection<Mutation> m4 = EntityCentricIndex.createMutations(rs4);
        for (Mutation m : m4) {
            bw.addMutation(m);
        }
        Collection<Mutation> m5 = EntityCentricIndex.createMutations(rs5);
        for (Mutation m : m5) {
            bw.addMutation(m);
        }
        if (serialize6 != null) {
            Collection<Mutation> m6 = EntityCentricIndex.createMutations(rs6);
            for (Mutation m : m6) {
                bw.addMutation(m);
            }
        }
    }
    String q1 = // 
    "" + // 
    "SELECT ?X ?Y1 ?Y2 ?Y3 " + // 
    "{" + // 
    "?X <uri:cf1> ?Y1 ." + // 
    "?X <uri:cf2> ?Y2 ." + // 
    "?X <uri:cf3> ?Y3 ." + "}";
    String q2 = // 
    "" + // 
    "SELECT ?X ?Y1 ?Y2 ?Y3 " + // 
    "{" + // 
    " GRAPH <uri:hank> { " + // 
    "?X <uri:cf1> ?Y1 ." + // 
    "?X <uri:cf2> ?Y2 ." + // 
    "?X <uri:cf3> ?Y3 ." + // 
    " } " + "}";
    SPARQLParser parser = new SPARQLParser();
    ParsedQuery pq1 = parser.parseQuery(q1, null);
    TupleExpr te1 = pq1.getTupleExpr();
    List<StatementPattern> spList1 = StatementPatternCollector.process(te1);
    Assert.assertTrue(StarQuery.isValidStarQuery(spList1));
    StarQuery sq1 = new StarQuery(spList1);
    AccumuloDocIdIndexer adi = new AccumuloDocIdIndexer(conf);
    Value v1 = RyaToRdfConversions.convertValue(new RyaType(XMLSchema.INTEGER, Integer.toString(5)));
    Value v2 = RyaToRdfConversions.convertValue(new RyaType(XMLSchema.INTEGER, Integer.toString(25)));
    List<BindingSet> bsList = Lists.newArrayList();
    QueryBindingSet b1 = (new QueryBindingSet());
    b1.addBinding("X", vf.createURI("uri:5"));
    QueryBindingSet b2 = (new QueryBindingSet());
    b2.addBinding("X", vf.createURI("uri:15"));
    QueryBindingSet b3 = (new QueryBindingSet());
    b3.addBinding("X", vf.createURI("uri:25"));
    bsList.add(b1);
    bsList.add(b2);
    bsList.add(b3);
    CloseableIteration<BindingSet, QueryEvaluationException> sol1 = adi.queryDocIndex(sq1, bsList);
    System.out.println("**********************TEST 12***********************");
    int results = 0;
    while (sol1.hasNext()) {
        System.out.println(sol1.next());
        results++;
    }
    Assert.assertEquals(6, results);
    ParsedQuery pq2 = parser.parseQuery(q2, null);
    TupleExpr te2 = pq2.getTupleExpr();
    List<StatementPattern> spList2 = StatementPatternCollector.process(te2);
    Assert.assertTrue(StarQuery.isValidStarQuery(spList2));
    StarQuery sq2 = new StarQuery(spList2);
    CloseableIteration<BindingSet, QueryEvaluationException> sol2 = adi.queryDocIndex(sq2, bsList);
    System.out.println("**********************TEST 12***********************");
    results = 0;
    while (sol2.hasNext()) {
        System.out.println(sol2.next());
        results++;
    }
    Assert.assertEquals(3, results);
    adi.close();
}
Also used : ParsedQuery(org.openrdf.query.parser.ParsedQuery) RyaStatement(org.apache.rya.api.domain.RyaStatement) RyaType(org.apache.rya.api.domain.RyaType) StatementPattern(org.openrdf.query.algebra.StatementPattern) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) BindingSet(org.openrdf.query.BindingSet) SPARQLParser(org.openrdf.query.parser.sparql.SPARQLParser) TupleExpr(org.openrdf.query.algebra.TupleExpr) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) RyaURI(org.apache.rya.api.domain.RyaURI) QueryEvaluationException(org.openrdf.query.QueryEvaluationException) RyaTableMutationsFactory(org.apache.rya.accumulo.RyaTableMutationsFactory) Value(org.openrdf.model.Value) Collection(java.util.Collection) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) Test(org.junit.Test)

Example 100 with BatchWriter

use of org.apache.accumulo.core.client.BatchWriter in project incubator-rya by apache.

the class AccumuloDocIndexerTest method testNoContextCommonVarBs2.

@Test
public void testNoContextCommonVarBs2() throws Exception {
    BatchWriter bw = null;
    RyaTableMutationsFactory rtm = new RyaTableMutationsFactory(RyaTripleContext.getInstance(conf));
    bw = accCon.createBatchWriter(tableName, 500L * 1024L * 1024L, Long.MAX_VALUE, 30);
    for (int i = 0; i < 30; i++) {
        RyaStatement rs1 = new RyaStatement(new RyaURI("uri:cq1"), new RyaURI("uri:cf1"), new RyaURI("uri:" + i));
        RyaStatement rs2 = new RyaStatement(new RyaURI("uri:" + i), new RyaURI("uri:cf2"), new RyaType(XMLSchema.STRING, "cq2"));
        RyaStatement rs3 = null;
        if (i == 5 || i == 10 || i == 15 || i == 20 || i == 25) {
            rs3 = new RyaStatement(new RyaURI("uri:" + i), new RyaURI("uri:cf3"), new RyaType(XMLSchema.INTEGER, Integer.toString(i)));
        }
        Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>> serialize1 = rtm.serialize(rs1);
        Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>> serialize2 = rtm.serialize(rs2);
        Map<RdfCloudTripleStoreConstants.TABLE_LAYOUT, Collection<Mutation>> serialize3 = null;
        if (rs3 != null) {
            serialize3 = rtm.serialize(rs3);
        }
        Collection<Mutation> m1 = EntityCentricIndex.createMutations(rs1);
        for (Mutation m : m1) {
            bw.addMutation(m);
        }
        Collection<Mutation> m2 = EntityCentricIndex.createMutations(rs2);
        for (Mutation m : m2) {
            bw.addMutation(m);
        }
        if (serialize3 != null) {
            Collection<Mutation> m3 = EntityCentricIndex.createMutations(rs3);
            for (Mutation m : m3) {
                bw.addMutation(m);
            }
        }
    }
    String q1 = // 
    "" + // 
    "SELECT ?X ?Y1 ?Y2 ?Y3 " + // 
    "{" + // 
    "?Y1 <uri:cf1> ?X ." + // 
    "?X <uri:cf2> ?Y2 ." + // 
    "?X <uri:cf3> ?Y3 ." + "}";
    SPARQLParser parser = new SPARQLParser();
    ParsedQuery pq1 = parser.parseQuery(q1, null);
    TupleExpr te1 = pq1.getTupleExpr();
    List<StatementPattern> spList1 = StatementPatternCollector.process(te1);
    Assert.assertTrue(StarQuery.isValidStarQuery(spList1));
    StarQuery sq1 = new StarQuery(spList1);
    AccumuloDocIdIndexer adi = new AccumuloDocIdIndexer(conf);
    List<BindingSet> bsList = Lists.newArrayList();
    QueryBindingSet b1 = (new QueryBindingSet());
    b1.addBinding("X", vf.createURI("uri:5"));
    QueryBindingSet b2 = (new QueryBindingSet());
    b2.addBinding("X", vf.createURI("uri:15"));
    QueryBindingSet b3 = (new QueryBindingSet());
    b3.addBinding("X", vf.createURI("uri:25"));
    bsList.add(b1);
    bsList.add(b2);
    bsList.add(b3);
    CloseableIteration<BindingSet, QueryEvaluationException> sol1 = adi.queryDocIndex(sq1, bsList);
    System.out.println("**********************TEST 5***********************");
    int results = 0;
    while (sol1.hasNext()) {
        System.out.println(sol1.next());
        results++;
    }
    Assert.assertEquals(3, results);
    adi.close();
}
Also used : QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) BindingSet(org.openrdf.query.BindingSet) SPARQLParser(org.openrdf.query.parser.sparql.SPARQLParser) ParsedQuery(org.openrdf.query.parser.ParsedQuery) RyaStatement(org.apache.rya.api.domain.RyaStatement) RyaType(org.apache.rya.api.domain.RyaType) TupleExpr(org.openrdf.query.algebra.TupleExpr) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) RyaURI(org.apache.rya.api.domain.RyaURI) StatementPattern(org.openrdf.query.algebra.StatementPattern) QueryEvaluationException(org.openrdf.query.QueryEvaluationException) RyaTableMutationsFactory(org.apache.rya.accumulo.RyaTableMutationsFactory) Collection(java.util.Collection) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) Test(org.junit.Test)

Aggregations

BatchWriter (org.apache.accumulo.core.client.BatchWriter)402 Mutation (org.apache.accumulo.core.data.Mutation)360 Test (org.junit.Test)264 Value (org.apache.accumulo.core.data.Value)250 BatchWriterConfig (org.apache.accumulo.core.client.BatchWriterConfig)246 Text (org.apache.hadoop.io.Text)194 Key (org.apache.accumulo.core.data.Key)179 Scanner (org.apache.accumulo.core.client.Scanner)174 Connector (org.apache.accumulo.core.client.Connector)169 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)81 Authorizations (org.apache.accumulo.core.security.Authorizations)68 Range (org.apache.accumulo.core.data.Range)61 Entry (java.util.Map.Entry)51 Map (java.util.Map)50 BatchScanner (org.apache.accumulo.core.client.BatchScanner)46 MutationsRejectedException (org.apache.accumulo.core.client.MutationsRejectedException)44 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)40 HashMap (java.util.HashMap)38 ArrayList (java.util.ArrayList)36 Status (org.apache.accumulo.server.replication.proto.Replication.Status)32