Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions c_glib/arrow-glib/reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,35 @@ garrow_record_batch_file_reader_read_record_batch(GArrowRecordBatchFileReader *r
}
}

/**
* garrow_record_batch_file_reader_get_metadata:
* @reader: A #GArrowRecordBatchFileReader.
*
* Returns: (nullable) (element-type utf8 utf8) (transfer full):
* The metadata in the footer.
*
* Since: 24.0.0
*/
GHashTable *
garrow_record_batch_file_reader_get_metadata(GArrowRecordBatchFileReader *reader)
{
auto arrow_reader = garrow_record_batch_file_reader_get_raw(reader);
auto arrow_metadata = arrow_reader->metadata();

if (!arrow_metadata) {
return nullptr;
}

auto metadata = g_hash_table_new(g_str_hash, g_str_equal);
const auto n = arrow_metadata->size();
for (int64_t i = 0; i < n; ++i) {
g_hash_table_insert(metadata,
const_cast<gchar *>(arrow_metadata->key(i).c_str()),
const_cast<gchar *>(arrow_metadata->value(i).c_str()));
}
return metadata;
}

struct GArrowFeatherFileReaderPrivate
{
std::shared_ptr<arrow::ipc::feather::Reader> feather_reader;
Expand Down
4 changes: 4 additions & 0 deletions c_glib/arrow-glib/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ garrow_record_batch_file_reader_read_record_batch(GArrowRecordBatchFileReader *r
guint i,
GError **error);

GARROW_AVAILABLE_IN_24_0
GHashTable *
garrow_record_batch_file_reader_get_metadata(GArrowRecordBatchFileReader *reader);

#define GARROW_TYPE_FEATHER_FILE_READER (garrow_feather_file_reader_get_type())
GARROW_AVAILABLE_IN_ALL
G_DECLARE_DERIVABLE_TYPE(GArrowFeatherFileReader,
Expand Down
42 changes: 39 additions & 3 deletions c_glib/arrow-glib/writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include <arrow-glib/array.hpp>
#include <arrow-glib/enums.h>
#include <arrow-glib/error.hpp>
#include <arrow-glib/internal-hash-table.hpp>
#include <arrow-glib/ipc-options.hpp>
#include <arrow-glib/record-batch.hpp>
#include <arrow-glib/schema.hpp>
#include <arrow-glib/table.hpp>
Expand Down Expand Up @@ -288,16 +290,50 @@ GArrowRecordBatchFileWriter *
garrow_record_batch_file_writer_new(GArrowOutputStream *sink,
GArrowSchema *schema,
GError **error)
{
return garrow_record_batch_file_writer_new_full(sink, schema, nullptr, nullptr, error);
}

/**
* garrow_record_batch_file_writer_new_full:
* @sink: The output of the writer.
* @schema: The schema of the writer.
* @options: (nullable): The options for serialization.
* @metadata: (nullable) (element-type utf8 utf8): The custom metadata in
* the footer.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: (nullable): A newly created #GArrowRecordBatchFileWriter
* or %NULL on error.
*
* Since: 24.0.0
*/
GArrowRecordBatchFileWriter *
garrow_record_batch_file_writer_new_full(GArrowOutputStream *sink,
GArrowSchema *schema,
GArrowWriteOptions *options,
GHashTable *metadata,
GError **error)
{
auto arrow_sink = garrow_output_stream_get_raw(sink);
auto arrow_schema = garrow_schema_get_raw(schema);
arrow::ipc::IpcWriteOptions arrow_options = arrow::ipc::IpcWriteOptions::Defaults();
if (options) {
arrow_options = *garrow_write_options_get_raw(options);
}
std::shared_ptr<arrow::KeyValueMetadata> arrow_metadata;
if (metadata) {
arrow_metadata = garrow_internal_hash_table_to_metadata(metadata);
}

std::shared_ptr<arrow::ipc::RecordBatchWriter> arrow_writer;
auto arrow_writer_result = arrow::ipc::MakeFileWriter(arrow_sink, arrow_schema);
if (garrow::check(error, arrow_writer_result, "[record-batch-file-writer][open]")) {
auto arrow_writer_result =
arrow::ipc::MakeFileWriter(arrow_sink, arrow_schema, arrow_options, arrow_metadata);
if (garrow::check(error, arrow_writer_result, "[record-batch-file-writer][new]")) {
auto arrow_writer = *arrow_writer_result;
return garrow_record_batch_file_writer_new_raw(&arrow_writer);
} else {
return NULL;
return nullptr;
}
}

Expand Down
9 changes: 9 additions & 0 deletions c_glib/arrow-glib/writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#pragma once

#include <arrow-glib/array.h>
#include <arrow-glib/ipc-options.h>
#include <arrow-glib/record-batch.h>
#include <arrow-glib/schema.h>

Expand Down Expand Up @@ -94,6 +95,14 @@ garrow_record_batch_file_writer_new(GArrowOutputStream *sink,
GArrowSchema *schema,
GError **error);

GARROW_AVAILABLE_IN_24_0
GArrowRecordBatchFileWriter *
garrow_record_batch_file_writer_new_full(GArrowOutputStream *sink,
GArrowSchema *schema,
GArrowWriteOptions *options,
GHashTable *metadata,
GError **error);

/**
* GArrowCSVQuotingStyle:
* @GARROW_CSV_QUOTING_STYLE_NEEDED: Only enclose values in quotes which need them.
Expand Down
32 changes: 32 additions & 0 deletions c_glib/test/test-file-writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,36 @@ def test_write_table
input.close
end
end

def test_footer_custom_metadata
tempfile = Tempfile.open("arrow-ipc-file-writer")
output = Arrow::FileOutputStream.new(tempfile.path, false)

array = build_boolean_array([true, false, true])
field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
schema = Arrow::Schema.new([field])

options = Arrow::WriteOptions.new
metadata = {"key1" => "value1", "key2" => "value2"}
begin
file_writer = Arrow::RecordBatchFileWriter.new(output,
schema,
options,
metadata)
file_writer.close
assert do
file_writer.closed?
end
ensure
output.close
end

input = Arrow::MemoryMappedInputStream.new(tempfile.path)
begin
file_reader = Arrow::RecordBatchFileReader.new(input)
assert_equal(metadata, file_reader.metadata)
ensure
input.close
end
end
end
1 change: 1 addition & 0 deletions ruby/red-arrow-format/Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ gem "red-arrow", path: "../red-arrow"
group :development do
gem "benchmark-driver"
gem "rake"
gem "stringio"
gem "test-unit"
end
2 changes: 2 additions & 0 deletions ruby/red-arrow-format/lib/arrow-format/file-reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class FileReader
FOOTER_SIZE_SIZE = IO::Buffer.size_of(FOOTER_SIZE_FORMAT)

attr_reader :schema
attr_reader :metadata
def initialize(input)
case input
when IO
Expand All @@ -47,6 +48,7 @@ def initialize(input)

validate
@footer = read_footer
@metadata = read_custom_metadata(@footer.custom_metadata)
@record_batch_blocks = @footer.record_batches || []
@schema = read_schema(@footer.schema)
@dictionaries = read_dictionaries
Expand Down
21 changes: 14 additions & 7 deletions ruby/red-arrow-format/lib/arrow-format/file-writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,33 @@ def start(schema)
super
end

def finish
super
write_footer
def finish(metadata=nil)
super()
write_footer(metadata)
write_data(MAGIC)
@output
end

private
def build_footer
def build_footer(metadata)
fb_footer = FB::Footer::Data.new
fb_footer.version = FB::MetadataVersion::V5
fb_footer.schema = @fb_schema
fb_footer.dictionaries = @fb_dictionary_blocks
fb_footer.record_batches = @fb_record_batch_blocks
# fb_footer.custom_metadata = ... # TODO
if metadata
fb_footer.custom_metadata = metadata.collect do |key, value|
fb_key_value = FB::KeyValue::Data.new
fb_key_value.key = key
fb_key_value.value = value
fb_key_value
end
end
FB::Footer.serialize(fb_footer)
end

def write_footer
footer = build_footer
def write_footer(metadata)
footer = build_footer(metadata)
write_data(footer)
write_data([footer.bytesize].pack("l<"))
end
Expand Down
1 change: 1 addition & 0 deletions ruby/red-arrow-format/test/helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

require "stringio"
require "tmpdir"

require "test-unit"
Expand Down
32 changes: 26 additions & 6 deletions ruby/red-arrow-format/test/test-reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -675,18 +675,36 @@ def test_dictionary
end
end

module FileReaderTests
def test_custom_metadata_footer
Dir.mktmpdir do |tmp_dir|
table = Arrow::Table.new(value: Arrow::Int8Array.new([1, 2, 3]))
metadata = {
"key1" => "value1",
"key2" => "value2",
}
open_input(table, tmp_dir, metadata: metadata) do |input|
reader = reader_class.new(input)
assert_equal(metadata, reader.metadata)
end
ensure
GC.start
end
end
end

module FileInput
def open_input(table, tmp_dir, &block)
def open_input(table, tmp_dir, **options, &block)
path = File.join(tmp_dir, "data.#{file_extension}")
table.save(path)
table.save(path, **options)
File.open(path, "rb", &block)
end
end

module PipeInput
def open_input(table, tmp_dir, &block)
def open_input(table, tmp_dir, **options)
buffer = Arrow::ResizableBuffer.new(4096)
table.save(buffer, format: format)
table.save(buffer, format: format, **options)
IO.pipe do |input, output|
write_thread = Thread.new do
output.write(buffer.data.to_s)
Expand All @@ -701,15 +719,16 @@ def open_input(table, tmp_dir, &block)
end

module StringInput
def open_input(table, tmp_dir)
def open_input(table, tmp_dir, **options)
buffer = Arrow::ResizableBuffer.new(4096)
table.save(buffer, format: format)
table.save(buffer, format: format, **options)
yield(buffer.data.to_s)
end
end

class TestFileReaderFileInput < Test::Unit::TestCase
include ReaderTests
include FileReaderTests
include FileInput

def file_extension
Expand All @@ -723,6 +742,7 @@ def reader_class

class TestFileReaderStringInput < Test::Unit::TestCase
include ReaderTests
include FileReaderTests
include StringInput

def format
Expand Down
21 changes: 21 additions & 0 deletions ruby/red-arrow-format/test/test-writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -924,6 +924,26 @@ def test_dictionary
end
end

module FileWriterTests
def test_custom_metadata_footer
output = StringIO.new(+"".b)
writer = writer_class.new(output)
field = ArrowFormat::Field.new("value", ArrowFormat::BooleanType.new)
schema = ArrowFormat::Schema.new([field])
writer.start(schema)
metadata = {
"key1" => "value1",
"key2" => "value2",
}
writer.finish(metadata)
buffer = Arrow::Buffer.new(output.string)
Arrow::BufferInputStream.open(buffer) do |input|
reader = Arrow::RecordBatchFileReader.new(input)
assert_equal(metadata, reader.metadata)
end
end
end

module WriterDictionaryDeltaTests
def build_schema(value_type)
index_type = ArrowFormat::Int32Type.singleton
Expand Down Expand Up @@ -1513,6 +1533,7 @@ def read(path)

sub_test_case("Basic") do
include WriterTests
include FileWriterTests
end

sub_test_case("Dictionary: delta") do
Expand Down
6 changes: 3 additions & 3 deletions ruby/red-arrow/lib/arrow/table-saver.rb
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@ def open_output_stream(&block)
end
end

def save_raw(writer_class)
def save_raw(writer_class, *args)
open_output_stream do |output|
writer_class.open(output, @table.schema) do |writer|
writer_class.open(output, @table.schema, *args) do |writer|
writer.write_table(@table)
end
end
Expand All @@ -144,7 +144,7 @@ def save_as_arrow

# @since 1.0.0
def save_as_arrow_file
save_raw(RecordBatchFileWriter)
save_raw(RecordBatchFileWriter, nil, @options[:metadata])
end

# @deprecated Use `format: :arrow_batch` instead.
Expand Down
Loading