Search in sources :

Example 66 with Key

use of org.apache.accumulo.core.data.Key in project hive by apache.

the class AccumuloDefaultIndexScanner method getIndexRowRanges.

/**
 * Get a list of rowid ranges by scanning a column index.
 *
 * @param column     - the hive column name
 * @param indexRange - Key range to scan on the index table
 * @return List of matching rowid ranges or null if too many matches found
 * if index values are not found a newline range is added to list to
 * short-circuit the query
 */
@Override
public List<Range> getIndexRowRanges(String column, Range indexRange) {
    List<Range> rowIds = new ArrayList<Range>();
    Scanner scan = null;
    String col = this.colMap.get(column);
    if (col != null) {
        try {
            LOG.debug("Searching tab=" + indexTable + " column=" + column + " range=" + indexRange);
            Connector conn = getConnector();
            scan = conn.createScanner(indexTable, auths);
            scan.setRange(indexRange);
            Text cf = new Text(col);
            LOG.debug("Using Column Family=" + toString());
            scan.fetchColumnFamily(cf);
            for (Map.Entry<Key, Value> entry : scan) {
                rowIds.add(new Range(entry.getKey().getColumnQualifier()));
                // if we have too many results return null for a full scan
                if (rowIds.size() > maxRowIds) {
                    return null;
                }
            }
            // no hits on the index so return a no match range
            if (rowIds.isEmpty()) {
                LOG.debug("Found 0 index matches");
            } else {
                LOG.debug("Found " + rowIds.size() + " index matches");
            }
            return rowIds;
        } catch (AccumuloException | AccumuloSecurityException | TableNotFoundException e) {
            LOG.error("Failed to scan index table: " + indexTable, e);
        } finally {
            if (scan != null) {
                scan.close();
            }
        }
    }
    // assume the index is bad and do a full scan
    LOG.debug("Index lookup failed for table " + indexTable);
    return null;
}
Also used : Scanner(org.apache.accumulo.core.client.Scanner) Connector(org.apache.accumulo.core.client.Connector) AccumuloException(org.apache.accumulo.core.client.AccumuloException) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Value(org.apache.accumulo.core.data.Value) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) HashMap(java.util.HashMap) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key)

Example 67 with Key

use of org.apache.accumulo.core.data.Key in project apex-malhar by apache.

the class AccumuloTestHelper method getAccumuloTuple.

public static AccumuloTuple getAccumuloTuple(String row, String colFam, String colName) {
    Authorizations auths = new Authorizations();
    Scanner scan = null;
    try {
        scan = con.createScanner("tab1", auths);
    } catch (TableNotFoundException e) {
        logger.error("error in test helper");
        DTThrowable.rethrow(e);
    }
    scan.setRange(new Range(new Text(row)));
    scan.fetchColumn(new Text(colFam), new Text(colName));
    // assuming only one row
    for (Entry<Key, Value> entry : scan) {
        AccumuloTuple tuple = new AccumuloTuple();
        tuple.setRow(entry.getKey().getRow().toString());
        tuple.setColFamily(entry.getKey().getColumnFamily().toString());
        tuple.setColName(entry.getKey().getColumnQualifier().toString());
        tuple.setColValue(entry.getValue().toString());
        return tuple;
    }
    return null;
}
Also used : Scanner(org.apache.accumulo.core.client.Scanner) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Authorizations(org.apache.accumulo.core.security.Authorizations) Value(org.apache.accumulo.core.data.Value) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) Key(org.apache.accumulo.core.data.Key)

Example 68 with Key

use of org.apache.accumulo.core.data.Key in project apex-malhar by apache.

the class AccumuloWindowStore method getCommittedWindowId.

@Override
public long getCommittedWindowId(String appId, int operatorId) {
    byte[] value = null;
    Authorizations auths = new Authorizations();
    Scanner scan = null;
    String columnKey = appId + "_" + operatorId + "_" + lastWindowColumnName;
    lastWindowColumnBytes = columnKey.getBytes();
    try {
        scan = connector.createScanner(tableName, auths);
    } catch (TableNotFoundException e) {
        logger.error("error getting committed window id", e);
        DTThrowable.rethrow(e);
    }
    scan.setRange(new Range(new Text(rowBytes)));
    scan.fetchColumn(new Text(columnFamilyBytes), new Text(lastWindowColumnBytes));
    for (Entry<Key, Value> entry : scan) {
        value = entry.getValue().get();
    }
    if (value != null) {
        long longval = toLong(value);
        return longval;
    }
    return -1;
}
Also used : Scanner(org.apache.accumulo.core.client.Scanner) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Authorizations(org.apache.accumulo.core.security.Authorizations) Value(org.apache.accumulo.core.data.Value) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) Key(org.apache.accumulo.core.data.Key)

Example 69 with Key

use of org.apache.accumulo.core.data.Key in project Gaffer by gchq.

the class BloomFilter18IT method testFilter.

private void testFilter(final AccumuloElementConverter elementConverter, final RangeFactory rangeFactory) throws AccumuloElementConversionException, RangeFactoryException, IOException {
    // Create random data to insert, and sort it
    final Random random = new Random();
    final HashSet<Key> keysSet = new HashSet<>();
    final HashSet<Entity> dataSet = new HashSet<>();
    for (int i = 0; i < 100000; i++) {
        final Entity source = new Entity(TestGroups.ENTITY);
        source.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
        final Entity destination = new Entity(TestGroups.ENTITY);
        destination.setVertex("type" + random.nextInt(Integer.MAX_VALUE));
        dataSet.add(source);
        dataSet.add(destination);
        final Entity sourceEntity = new Entity(source.getGroup());
        sourceEntity.setVertex(source.getVertex());
        final Entity destinationEntity = new Entity(destination.getGroup());
        destinationEntity.setVertex(destination.getVertex());
        final Edge edge = new Edge(TestGroups.EDGE, source.getVertex(), destination.getVertex(), true);
        keysSet.add(elementConverter.getKeyFromEntity(sourceEntity));
        keysSet.add(elementConverter.getKeyFromEntity(destinationEntity));
        final Pair<Key> edgeKeys = elementConverter.getKeysFromEdge(edge);
        keysSet.add(edgeKeys.getFirst());
        keysSet.add(edgeKeys.getSecond());
    }
    final ArrayList<Key> keys = new ArrayList<>(keysSet);
    Collections.sort(keys);
    final Properties property = new Properties();
    property.put(AccumuloPropertyNames.COUNT, 10);
    final Value value = elementConverter.getValueFromProperties(TestGroups.ENTITY, property);
    final Value value2 = elementConverter.getValueFromProperties(TestGroups.EDGE, property);
    // Create Accumulo configuration
    final ConfigurationCopy accumuloConf = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration());
    accumuloConf.set(Property.TABLE_BLOOM_ENABLED, "true");
    accumuloConf.set(Property.TABLE_BLOOM_KEY_FUNCTOR, CoreKeyBloomFunctor.class.getName());
    accumuloConf.set(Property.TABLE_FILE_TYPE, RFile.EXTENSION);
    accumuloConf.set(Property.TABLE_BLOOM_LOAD_THRESHOLD, "1");
    accumuloConf.set(Property.TSERV_BLOOM_LOAD_MAXCONCURRENT, "1");
    // Create Hadoop configuration
    final Configuration conf = CachedConfiguration.getInstance();
    final FileSystem fs = FileSystem.get(conf);
    // Open file
    final String suffix = FileOperations.getNewFileExtension(accumuloConf);
    final String filenameTemp = tempFolder.getRoot().getAbsolutePath();
    final String filename = filenameTemp + "." + suffix;
    final File file = new File(filename);
    if (file.exists()) {
        file.delete();
    }
    final FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder().forFile(filename, fs, conf).withTableConfiguration(accumuloConf).build();
    try {
        // Write data to file
        writer.startDefaultLocalityGroup();
        for (final Key key : keys) {
            if (elementConverter.getElementFromKey(key).getGroup().equals(TestGroups.ENTITY)) {
                writer.append(key, value);
            } else {
                writer.append(key, value2);
            }
        }
    } finally {
        writer.close();
    }
    // Reader
    final FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(filename, fs, conf).withTableConfiguration(accumuloConf).seekToBeginning(false).build();
    try {
        // Calculate random look up rate - run it 3 times and take best
        final int numTrials = 5;
        double maxRandomRate = -1.0;
        for (int i = 0; i < numTrials; i++) {
            final double rate = calculateRandomLookUpRate(reader, dataSet, random, rangeFactory);
            if (rate > maxRandomRate) {
                maxRandomRate = rate;
            }
        }
        LOGGER.info("Max random rate = " + maxRandomRate);
        // Calculate look up rate for items that were inserted
        double maxCausalRate = -1.0;
        for (int i = 0; i < numTrials; i++) {
            double rate = calculateCausalLookUpRate(reader, dataSet, random, rangeFactory);
            if (rate > maxCausalRate) {
                maxCausalRate = rate;
            }
        }
        LOGGER.info("Max causal rate = " + maxCausalRate);
        // Random look up rate should be much faster
        assertTrue(maxRandomRate > maxCausalRate);
    } finally {
        // Close reader
        reader.close();
    }
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) ConfigurationCopy(org.apache.accumulo.core.conf.ConfigurationCopy) Configuration(org.apache.hadoop.conf.Configuration) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) CachedConfiguration(org.apache.accumulo.core.util.CachedConfiguration) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) ArrayList(java.util.ArrayList) Properties(uk.gov.gchq.gaffer.data.element.Properties) Random(java.util.Random) CoreKeyBloomFunctor(uk.gov.gchq.gaffer.accumulostore.key.core.impl.CoreKeyBloomFunctor) FileSystem(org.apache.hadoop.fs.FileSystem) Value(org.apache.accumulo.core.data.Value) Edge(uk.gov.gchq.gaffer.data.element.Edge) File(java.io.File) RFile(org.apache.accumulo.core.file.rfile.RFile) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Example 70 with Key

use of org.apache.accumulo.core.data.Key in project Gaffer by gchq.

the class ImportKeyValueJavaPairRDDToAccumuloHandlerTest method checkImportKeyValueJavaPairRDD.

@Test
public void checkImportKeyValueJavaPairRDD() throws OperationException, IOException, InterruptedException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeSchema.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final List<Element> elements = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity(TestGroups.ENTITY);
        entity.setVertex("" + i);
        final Edge edge1 = new Edge(TestGroups.EDGE);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty(TestPropertyNames.COUNT, 2);
        final Edge edge2 = new Edge(TestGroups.EDGE);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty(TestPropertyNames.COUNT, 4);
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
    }
    final User user = new User();
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    final String outputPath = this.getClass().getResource("/").getPath().toString() + "load";
    final String failurePath = this.getClass().getResource("/").getPath().toString() + "failure";
    final File file = new File(outputPath);
    if (file.exists()) {
        FileUtils.forceDelete(file);
    }
    final ElementConverterFunction func = new ElementConverterFunction(sparkContext.broadcast(new ByteEntityAccumuloElementConverter(graph1.getSchema())));
    final JavaPairRDD<Key, Value> elementJavaRDD = sparkContext.parallelize(elements).flatMapToPair(func);
    final ImportKeyValueJavaPairRDDToAccumulo addRdd = new ImportKeyValueJavaPairRDDToAccumulo.Builder().input(elementJavaRDD).outputPath(outputPath).failurePath(failurePath).build();
    graph1.execute(addRdd, user);
    FileUtils.forceDelete(file);
    // Check all elements were added
    final GetJavaRDDOfAllElements rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString).build();
    final JavaRDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>(rdd.collect());
    assertEquals(elements.size(), results.size());
    sparkContext.stop();
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) GetJavaRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) HashSet(java.util.HashSet) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) ImportKeyValueJavaPairRDDToAccumulo(uk.gov.gchq.gaffer.sparkaccumulo.operation.javardd.ImportKeyValueJavaPairRDDToAccumulo) Graph(uk.gov.gchq.gaffer.graph.Graph) Value(org.apache.accumulo.core.data.Value) ByteEntityAccumuloElementConverter(uk.gov.gchq.gaffer.accumulostore.key.core.impl.byteEntity.ByteEntityAccumuloElementConverter) ElementConverterFunction(uk.gov.gchq.gaffer.sparkaccumulo.operation.utils.java.ElementConverterFunction) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) File(java.io.File) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Aggregations

Key (org.apache.accumulo.core.data.Key)115 Value (org.apache.accumulo.core.data.Value)68 Test (org.junit.Test)66 Edge (uk.gov.gchq.gaffer.data.element.Edge)44 Range (org.apache.accumulo.core.data.Range)35 HashMap (java.util.HashMap)29 Text (org.apache.hadoop.io.Text)23 Element (uk.gov.gchq.gaffer.data.element.Element)23 Scanner (org.apache.accumulo.core.client.Scanner)19 Authorizations (org.apache.accumulo.core.security.Authorizations)18 Mutation (org.apache.accumulo.core.data.Mutation)17 Entity (uk.gov.gchq.gaffer.data.element.Entity)15 Entry (java.util.Map.Entry)14 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)13 AccumuloException (org.apache.accumulo.core.client.AccumuloException)11 Connector (org.apache.accumulo.core.client.Connector)11 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)11 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)11 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)11 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)11