Search in sources :

Example 16 with TupleWriter

use of org.apache.drill.exec.vector.accessor.TupleWriter in project drill by apache.

the class TestResultSetLoaderProjection method testStrictFullMapProjectionWithSchema.

/**
 * Projection is based on both the projection list and the
 * provided schema, if strict.
 */
@Test
public void testStrictFullMapProjectionWithSchema() {
    // From the reader's perspective, m1, m2 and m3 are projected, m4 is not
    // (the reader does not ask m4 to be created)
    // m1.z is not requested, m2.c is, but is not projected.
    // None of m3 is projected.
    List<SchemaPath> selection = RowSetTestUtils.projectList("m1", "m2", "m3", "m4");
    ResultSetLoader rsLoader = setupProvidedSchema(true, selection);
    RowSetLoader rootWriter = rsLoader.writer();
    // Verify the projected columns
    TupleMetadata actualSchema = rootWriter.tupleSchema();
    TupleWriter m1Writer = rootWriter.tuple("m1");
    assertTrue(m1Writer.isProjected());
    assertEquals(2, m1Writer.tupleSchema().size());
    assertTrue(m1Writer.column("a").isProjected());
    assertTrue(m1Writer.column("b").isProjected());
    TupleWriter m2Writer = rootWriter.tuple("m2");
    assertTrue(m2Writer.isProjected());
    assertEquals(2, m2Writer.tupleSchema().size());
    assertFalse(m2Writer.column("c").isProjected());
    assertTrue(m2Writer.column("d").isProjected());
    TupleWriter m3Writer = rootWriter.tuple("m3");
    assertFalse(m3Writer.isProjected());
    assertEquals(2, m3Writer.tupleSchema().size());
    assertFalse(m3Writer.column("e").isProjected());
    assertFalse(m3Writer.column("f").isProjected());
    assertNull(actualSchema.metadata("m4"));
    // Verify. Only the projected columns appear in the result set.
    TupleMetadata expectedSchema = new SchemaBuilder().addMap("m1").add("a", MinorType.INT).add("b", MinorType.INT).resumeSchema().addMap("m2").add("d", MinorType.INT).resumeSchema().build();
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(mapValue(1, 2), mapValue(4)).addRow(mapValue(11, 12), mapValue(14)).build();
    RowSetUtilities.verify(expected, fixture.wrap(rsLoader.harvest()));
    rsLoader.close();
}
Also used : SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleWriter(org.apache.drill.exec.vector.accessor.TupleWriter) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) SubOperatorTest(org.apache.drill.test.SubOperatorTest) EvfTest(org.apache.drill.categories.EvfTest) Test(org.junit.Test)

Example 17 with TupleWriter

use of org.apache.drill.exec.vector.accessor.TupleWriter in project drill by apache.

the class TestResultSetLoaderProjection method testMapProjection.

@Test
public void testMapProjection() {
    List<SchemaPath> selection = RowSetTestUtils.projectList("m1", "m2.d");
    TupleMetadata schema = new SchemaBuilder().addMap("m1").add("a", MinorType.INT).add("b", MinorType.INT).resumeSchema().addMap("m2").add("c", MinorType.INT).add("d", MinorType.INT).resumeSchema().addMap("m3").add("e", MinorType.INT).add("f", MinorType.INT).resumeSchema().buildSchema();
    ResultSetOptions options = new ResultSetOptionBuilder().projection(Projections.parse(selection)).readerSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    // Verify the projected columns
    TupleMetadata actualSchema = rootWriter.tupleSchema();
    ColumnMetadata m1Md = actualSchema.metadata("m1");
    TupleWriter m1Writer = rootWriter.tuple("m1");
    assertTrue(m1Md.isMap());
    assertTrue(m1Writer.isProjected());
    assertEquals(2, m1Md.tupleSchema().size());
    assertTrue(m1Writer.column("a").isProjected());
    assertTrue(m1Writer.column("b").isProjected());
    ColumnMetadata m2Md = actualSchema.metadata("m2");
    TupleWriter m2Writer = rootWriter.tuple("m2");
    assertTrue(m2Md.isMap());
    assertTrue(m2Writer.isProjected());
    assertEquals(2, m2Md.tupleSchema().size());
    assertFalse(m2Writer.column("c").isProjected());
    assertTrue(m2Writer.column("d").isProjected());
    ColumnMetadata m3Md = actualSchema.metadata("m3");
    TupleWriter m3Writer = rootWriter.tuple("m3");
    assertTrue(m3Md.isMap());
    assertFalse(m3Writer.isProjected());
    assertEquals(2, m3Md.tupleSchema().size());
    assertFalse(m3Writer.column("e").isProjected());
    assertFalse(m3Writer.column("f").isProjected());
    // Write a couple of rows.
    rsLoader.startBatch();
    rootWriter.start();
    rootWriter.addRow(mapValue(1, 2), mapValue(3, 4), mapValue(5, 6)).addRow(mapValue(11, 12), mapValue(13, 14), mapValue(15, 16));
    // Verify. Only the projected columns appear in the result set.
    TupleMetadata expectedSchema = new SchemaBuilder().addMap("m1").add("a", MinorType.INT).add("b", MinorType.INT).resumeSchema().addMap("m2").add("d", MinorType.INT).resumeSchema().buildSchema();
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(mapValue(1, 2), mapValue(4)).addRow(mapValue(11, 12), mapValue(14)).build();
    RowSetUtilities.verify(expected, fixture.wrap(rsLoader.harvest()));
    rsLoader.close();
}
Also used : ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleWriter(org.apache.drill.exec.vector.accessor.TupleWriter) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) ResultSetOptions(org.apache.drill.exec.physical.resultSet.impl.ResultSetLoaderImpl.ResultSetOptions) SubOperatorTest(org.apache.drill.test.SubOperatorTest) EvfTest(org.apache.drill.categories.EvfTest) Test(org.junit.Test)

Example 18 with TupleWriter

use of org.apache.drill.exec.vector.accessor.TupleWriter in project drill by apache.

the class ImageDirectoryProcessor method processXmpDirectory.

protected static void processXmpDirectory(final MapColumnDefn writer, final XmpDirectory directory) {
    XMPMeta xmpMeta = directory.getXMPMeta();
    if (xmpMeta != null) {
        try {
            IteratorOptions iteratorOptions = new IteratorOptions().setJustLeafnodes(true);
            for (final XMPIterator i = xmpMeta.iterator(iteratorOptions); i.hasNext(); ) {
                try {
                    XMPPropertyInfo prop = (XMPPropertyInfo) i.next();
                    String path = prop.getPath();
                    String value = prop.getValue();
                    if (path != null && value != null) {
                        // handling lang-alt array items
                        if (prop.getOptions().getHasLanguage()) {
                            XMPPropertyInfo langProp = (XMPPropertyInfo) i.next();
                            if (langProp.getPath().endsWith("/xml:lang")) {
                                String lang = langProp.getValue();
                                path = path.replaceFirst("\\[\\d+\\]$", "") + (lang.equals("x-default") ? "" : "_" + lang);
                            }
                        }
                        ColumnDefn rootColumn = writer;
                        ColumnWriter subColumn = null;
                        String[] elements = path.replaceAll("/\\w+:", "/").split(":|/|(?=\\[)");
                        // 1. lookup and create nested structure
                        for (int j = 1; j < elements.length; j++) {
                            String parent = elements[j - 1];
                            boolean isList = elements[j].startsWith("[");
                            if (!parent.startsWith("[")) {
                                // skipped. such as parent is [1] but not the last element
                                final String formatName = ImageMetadataUtils.formatName(parent);
                                if (isList) {
                                    if (j + 1 == elements.length) {
                                        // for list
                                        subColumn = rootColumn.addList(formatName);
                                    } else {
                                        // for list-map
                                        subColumn = rootColumn.addListMap(formatName);
                                    }
                                    rootColumn = new ListColumnDefn(formatName).builder((ArrayWriter) subColumn);
                                } else {
                                    // for map
                                    subColumn = ((MapColumnDefn) rootColumn).addMap(formatName);
                                    // set up the current writer in nested structure
                                    rootColumn = new MapColumnDefn(formatName).builder((TupleWriter) subColumn);
                                }
                            }
                        }
                        // 2. set up the value for writer
                        String parent = elements[elements.length - 1];
                        if (parent.startsWith("[")) {
                            subColumn.setObject(new String[] { value });
                        } else {
                            rootColumn.addText(ImageMetadataUtils.formatName(parent)).setString(value);
                            if (subColumn instanceof ArrayWriter) {
                                ((ArrayWriter) subColumn).save();
                            }
                        }
                    }
                } catch (Exception skipped) {
                    // simply skip this property
                    logger.warn("Error in written xmp metadata : {}", skipped.getMessage());
                }
            }
        } catch (XMPException ignored) {
            logger.warn("Error in processing xmp directory : {}", ignored.getMessage());
        }
    }
}
Also used : XMPIterator(com.adobe.internal.xmp.XMPIterator) MapColumnDefn(org.apache.drill.exec.store.image.ImageBatchReader.MapColumnDefn) ColumnWriter(org.apache.drill.exec.vector.accessor.ColumnWriter) XMPPropertyInfo(com.adobe.internal.xmp.properties.XMPPropertyInfo) IteratorOptions(com.adobe.internal.xmp.options.IteratorOptions) XMPException(com.adobe.internal.xmp.XMPException) ListColumnDefn(org.apache.drill.exec.store.image.ImageBatchReader.ListColumnDefn) MapColumnDefn(org.apache.drill.exec.store.image.ImageBatchReader.MapColumnDefn) ColumnDefn(org.apache.drill.exec.store.image.ImageBatchReader.ColumnDefn) ListColumnDefn(org.apache.drill.exec.store.image.ImageBatchReader.ListColumnDefn) XMPMeta(com.adobe.internal.xmp.XMPMeta) TupleWriter(org.apache.drill.exec.vector.accessor.TupleWriter) XMPException(com.adobe.internal.xmp.XMPException) ArrayWriter(org.apache.drill.exec.vector.accessor.ArrayWriter)

Example 19 with TupleWriter

use of org.apache.drill.exec.vector.accessor.TupleWriter in project drill by apache.

the class ImageDirectoryProcessor method processValue.

/**
 * Convert the value if necessary
 * @see org.apache.drill.exec.vector.accessor.writer.AbstractScalarWriter#setObject(Object)
 * @param writer MapColumnDefn
 * @param name Tag Name
 * @param value  Tag Value
 */
protected static void processValue(final MapColumnDefn writer, final String name, final Object value) {
    if (value == null) {
        return;
    }
    if (value instanceof Boolean) {
        writer.addObject(name, MinorType.BIT).setObject(value);
    } else if (value instanceof Byte) {
        writer.addObject(name, MinorType.TINYINT).setObject(value);
    } else if (value instanceof Short) {
        writer.addObject(name, MinorType.SMALLINT).setObject(value);
    } else if (value instanceof Integer) {
        writer.addObject(name, MinorType.INT).setObject(value);
    } else if (value instanceof Long) {
        writer.addObject(name, MinorType.BIGINT).setObject(value);
    } else if (value instanceof Float) {
        writer.addObject(name, MinorType.FLOAT4).setObject(value);
    } else if (value instanceof Double) {
        writer.addObject(name, MinorType.FLOAT8).setObject(value);
    } else if (value instanceof Rational) {
        writer.addDouble(name).setDouble(((Rational) value).doubleValue());
    } else if (value instanceof StringValue) {
        writer.addText(name).setString(((StringValue) value).toString());
    } else if (value instanceof Date) {
        writer.addDate(name).setTimestamp(Instant.ofEpochMilli(((Date) value).getTime()));
    } else if (value instanceof String[]) {
        writer.addList(name).setObject(value);
    } else if (value instanceof byte[]) {
        writer.addListByte(name).setObject(value);
    } else if (value instanceof JpegComponent) {
        JpegComponent v = (JpegComponent) value;
        TupleWriter component = writer.addMap(name);
        writer.addIntToMap(component, TagName.JPEGCOMPONENT_CID).setInt(v.getComponentId());
        writer.addIntToMap(component, TagName.JPEGCOMPONENT_HSF).setInt(v.getHorizontalSamplingFactor());
        writer.addIntToMap(component, TagName.JPEGCOMPONENT_VSF).setInt(v.getVerticalSamplingFactor());
        writer.addIntToMap(component, TagName.JPEGCOMPONENT_QTN).setInt(v.getQuantizationTableNumber());
    } else if (value instanceof List<?>) {
        ArrayWriter listMap = writer.addListMap(name);
        ListColumnDefn list = new ListColumnDefn(name).builder(listMap);
        for (Object v : (List<?>) value) {
            if (v instanceof KeyValuePair) {
                list.addText(TagName.KEYVALUEPAIR_K).setString(((KeyValuePair) v).getKey());
                list.addText(TagName.KEYVALUEPAIR_V).setString(((KeyValuePair) v).getValue().toString());
            } else {
                list.addText(TagName.KEYVALUEPAIR_V).setString(v.toString());
            }
            listMap.save();
        }
    } else {
        writer.addText(name).setString(value.toString());
    }
}
Also used : Rational(com.drew.lang.Rational) KeyValuePair(com.drew.lang.KeyValuePair) Date(java.util.Date) ListColumnDefn(org.apache.drill.exec.store.image.ImageBatchReader.ListColumnDefn) JpegComponent(com.drew.metadata.jpeg.JpegComponent) TupleWriter(org.apache.drill.exec.vector.accessor.TupleWriter) StringValue(com.drew.metadata.StringValue) ArrayWriter(org.apache.drill.exec.vector.accessor.ArrayWriter)

Example 20 with TupleWriter

use of org.apache.drill.exec.vector.accessor.TupleWriter in project drill by apache.

the class HttpdLogRecord method getWildcardWriter.

/**
 * For a configuration like HTTP.URI:request.firstline.uri.query.*, a writer was created with name
 * HTTP.URI:request.firstline.uri.query, we traverse the list of wildcard writers to see which one is the root of the
 * name of the field passed in like HTTP.URI:request.firstline.uri.query.old. This is writer entry that is needed.
 *
 * @param field like HTTP.URI:request.firstline.uri.query.old where 'old' is one of many different parameter names.
 * @return the writer to be used for this field.
 */
private TupleWriter getWildcardWriter(String field) {
    TupleWriter writer = startedWildcards.get(field);
    if (writer == null) {
        for (Map.Entry<String, TupleWriter> entry : wildcards.entrySet()) {
            String root = entry.getKey();
            if (field.startsWith(root)) {
                writer = entry.getValue();
                /*
           * In order to save some time, store the cleaned version of the field extension. It is possible it will have
           * unsafe characters in it.
           */
                if (!cleanExtensions.containsKey(field)) {
                    String extension = field.substring(root.length() + 1);
                    String cleanExtension = HttpdUtils.drillFormattedFieldName(extension);
                    cleanExtensions.put(field, cleanExtension);
                    logger.debug("Added extension: field='{}' with cleanExtension='{}'", field, cleanExtension);
                }
                /*
           * We already know we have the writer, but if we have put this writer in the started list, do NOT call start
           * again.
           */
                if (!wildcardWriters.containsKey(root)) {
                    /*
             * Start and store this root map writer for later retrieval.
             */
                    logger.debug("Starting new wildcard field writer: {}", field);
                    startedWildcards.put(field, writer);
                    wildcardWriters.put(root, writer);
                }
                /*
           * Break out of the for loop when we find a root writer that matches the field.
           */
                break;
            }
        }
    }
    return writer;
}
Also used : TupleWriter(org.apache.drill.exec.vector.accessor.TupleWriter) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)59 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)52 SubOperatorTest (org.apache.drill.test.SubOperatorTest)50 Test (org.junit.Test)50 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)35 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)33 RowSetLoader (org.apache.drill.exec.physical.resultSet.RowSetLoader)26 ResultSetLoader (org.apache.drill.exec.physical.resultSet.ResultSetLoader)25 ScalarWriter (org.apache.drill.exec.vector.accessor.ScalarWriter)23 TupleReader (org.apache.drill.exec.vector.accessor.TupleReader)20 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)19 ArrayWriter (org.apache.drill.exec.vector.accessor.ArrayWriter)18 SchemaBuilder (org.apache.drill.test.rowSet.schema.SchemaBuilder)16 SingleRowSet (org.apache.drill.test.rowSet.RowSet.SingleRowSet)15 ResultSetLoader (org.apache.drill.exec.physical.rowSet.ResultSetLoader)14 RowSetLoader (org.apache.drill.exec.physical.rowSet.RowSetLoader)14 RowSet (org.apache.drill.test.rowSet.RowSet)13 ArrayReader (org.apache.drill.exec.vector.accessor.ArrayReader)12 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)12 ScalarReader (org.apache.drill.exec.vector.accessor.ScalarReader)10