Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# cpp11 (development version)

* Removed non-API usage of `ATTRIB()` (#481).

* Improved hygiene around using C++ specific C compatibility headers (i.e. by using `<cstring>` rather than `<string.h>` and `<cstddef>` rather than `<stddef.h>`) (#454, @MichaelChirico).

* Fixed an rchk issue related to `std::initializer_list<named_arg>` (#457, @pachadotdev).
Expand Down
119 changes: 100 additions & 19 deletions cpp11test/src/test-data_frame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,42 +26,123 @@ context("data_frame-C++") {
}

test_that("data_frame::nrow works with 0x0 dfs") {
// From bare list
SEXP x = PROTECT(Rf_allocVector(VECSXP, 0));

cpp11::data_frame df(x);
expect_true(df.nrow() == 0);

cpp11::data_frame x_df(x);
expect_true(x_df.nrow() == 0);
UNPROTECT(1);
}

test_that("data_frame::nrow works with 10x0 dfs") {
using namespace cpp11::literals;
cpp11::writable::list x(0_xl);
x.attr(R_RowNamesSymbol) = {NA_INTEGER, -10};

cpp11::data_frame df(x);
expect_true(df.nrow() == 10);
// From bare list with `R_RowNamesSymbol`
SEXP y = PROTECT(Rf_allocVector(VECSXP, 0));
SEXP y_row_names = PROTECT(Rf_allocVector(INTSXP, 2));
SET_INTEGER_ELT(y_row_names, 0, NA_INTEGER);
SET_INTEGER_ELT(y_row_names, 1, 0);
Rf_setAttrib(y, R_RowNamesSymbol, y_row_names);
cpp11::data_frame y_df(y);
expect_true(y_df.nrow() == 0);
UNPROTECT(2);

// From classed data frame with `R_RowNamesSymbol`
SEXP z = PROTECT(Rf_allocVector(VECSXP, 0));
SEXP z_row_names = PROTECT(Rf_allocVector(INTSXP, 2));
SET_INTEGER_ELT(z_row_names, 0, NA_INTEGER);
SET_INTEGER_ELT(z_row_names, 1, 0);
Rf_setAttrib(z, R_RowNamesSymbol, z_row_names);
SEXP z_class = PROTECT(Rf_allocVector(STRSXP, 1));
SET_STRING_ELT(z_class, 0, Rf_mkChar("data.frame"));
Rf_setAttrib(z, R_ClassSymbol, z_class);
cpp11::data_frame z_df(z);
expect_true(z_df.nrow() == 0);
UNPROTECT(3);
}

test_that("writable::data_frame::nrow works with 0x0 dfs") {
using namespace cpp11::literals;

// From bare list
SEXP x = PROTECT(Rf_allocVector(VECSXP, 0));
cpp11::writable::data_frame x_df(x);
expect_true(x_df.nrow() == 0);
UNPROTECT(1);

cpp11::writable::data_frame df(x);
expect_true(df.nrow() == 0);
// From bare list with `R_RowNamesSymbol`
cpp11::writable::list y(0_xl);
y.attr(R_RowNamesSymbol) = {NA_INTEGER, 0};
cpp11::writable::data_frame y_df(y);
expect_true(y_df.nrow() == 0);

// From classed data frame with `R_RowNamesSymbol`
cpp11::writable::list z(0_xl);
z.attr(R_RowNamesSymbol) = {NA_INTEGER, 0};
z.attr(R_ClassSymbol) = "data.frame";
cpp11::writable::data_frame z_df(z);
expect_true(z_df.nrow() == 0);
}

UNPROTECT(1);
test_that("data_frame::nrow works with 10x0 dfs") {
// From bare list with `R_RowNamesSymbol`
SEXP y = PROTECT(Rf_allocVector(VECSXP, 0));
SEXP y_row_names = PROTECT(Rf_allocVector(INTSXP, 2));
SET_INTEGER_ELT(y_row_names, 0, NA_INTEGER);
SET_INTEGER_ELT(y_row_names, 1, 10);
Rf_setAttrib(y, R_RowNamesSymbol, y_row_names);
cpp11::data_frame y_df(y);
expect_true(y_df.nrow() == 10);
UNPROTECT(2);

// From classed data frame with `R_RowNamesSymbol`
SEXP z = PROTECT(Rf_allocVector(VECSXP, 0));
SEXP z_row_names = PROTECT(Rf_allocVector(INTSXP, 2));
SET_INTEGER_ELT(z_row_names, 0, NA_INTEGER);
SET_INTEGER_ELT(z_row_names, 1, 10);
Rf_setAttrib(z, R_RowNamesSymbol, z_row_names);
SEXP z_class = PROTECT(Rf_allocVector(STRSXP, 1));
SET_STRING_ELT(z_class, 0, Rf_mkChar("data.frame"));
Rf_setAttrib(z, R_ClassSymbol, z_class);
cpp11::data_frame z_df(z);
expect_true(z_df.nrow() == 10);
UNPROTECT(3);
}

test_that("writable::data_frame::nrow works with 10x0 dfs (#272)") {
SEXP x = PROTECT(Rf_allocVector(VECSXP, 0));
using namespace cpp11::literals;

bool is_altrep = false;
R_xlen_t nrow = 10;
// From bare list with `R_RowNamesSymbol`
cpp11::writable::list y(0_xl);
y.attr(R_RowNamesSymbol) = {NA_INTEGER, 10};
cpp11::writable::data_frame y_df(y);
expect_true(y_df.nrow() == 10);

// From classed data frame with `R_RowNamesSymbol`
cpp11::writable::list z(0_xl);
z.attr(R_RowNamesSymbol) = {NA_INTEGER, 10};
z.attr(R_ClassSymbol) = "data.frame";
cpp11::writable::data_frame z_df(z);
expect_true(z_df.nrow() == 10);

// Manually specify `nrow` using special constructor
cpp11::writable::data_frame df(x, is_altrep, nrow);
bool is_altrep = false;
SEXP x = PROTECT(Rf_allocVector(VECSXP, 0));
cpp11::writable::data_frame df(x, is_altrep, 10);
expect_true(df.nrow() == 10);
UNPROTECT(1);
}

test_that("data_frame::nrow works with 0x1 dfs") {
// From bare list
SEXP x = PROTECT(Rf_allocVector(VECSXP, 1));
SET_VECTOR_ELT(x, 0, Rf_allocVector(INTSXP, 0));
cpp11::data_frame x_df(x);
expect_true(x_df.nrow() == 0);
UNPROTECT(1);
}

test_that("writable::data_frame::nrow works with 0x1 dfs") {
// From bare list
SEXP x = PROTECT(Rf_allocVector(VECSXP, 1));
SET_VECTOR_ELT(x, 0, Rf_allocVector(INTSXP, 0));
cpp11::writable::data_frame x_df(x);
expect_true(x_df.nrow() == 0);
UNPROTECT(1);
}

Expand Down
58 changes: 27 additions & 31 deletions inst/include/cpp11/data_frame.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#pragma once

#include <cstdlib> // for abs
#include <cstdlib>
#include <cstdlib> // for abs
#include <initializer_list> // for initializer_list
#include <string> // for string, basic_string
#include <utility> // for move
Expand All @@ -24,53 +23,50 @@ class data_frame : public list {

friend class writable::data_frame;

/* we cannot use Rf_getAttrib because it has a special case for c(NA, -n) and creates
* the full vector */
static SEXP get_attrib0(SEXP x, SEXP sym) {
for (SEXP attr = ATTRIB(x); attr != R_NilValue; attr = CDR(attr)) {
if (TAG(attr) == sym) {
return CAR(attr);
}
}

return R_NilValue;
}

static R_xlen_t calc_nrow(SEXP x) {
auto nms = get_attrib0(x, R_RowNamesSymbol);
bool has_short_rownames =
(Rf_isInteger(nms) && Rf_xlength(nms) == 2 && INTEGER(nms)[0] == NA_INTEGER);
if (has_short_rownames) {
return static_cast<R_xlen_t>(abs(INTEGER(nms)[1]));
}

if (!Rf_isNull(nms)) {
return Rf_xlength(nms);
static R_xlen_t calculate_nrow(SEXP x) {
// If there is a `R_RowNamesSymbol`, we take the number of rows from there
// (regardless of whether or not there is a `"data.frame"` class yet!).
//
// As of R >=3.5, `Rf_getAttrib(R_RowNamesSymbol)` returns one of the following:
// - A character vector
// - An integer vector
// - An ALTREP integer compact intrange (converted cheaply from `c(NA, -n)`)
//
// We can take the `Rf_xlength()` of all of these cheaply.
//
// We used to worry about `Rf_getAttrib()` fully expanding `c(NA, -n)`, but with
// ALTREP integer compact intranges that is no longer the case.
SEXP row_names = Rf_getAttrib(x, R_RowNamesSymbol);
if (row_names != R_NilValue) {
return Rf_xlength(row_names);
}

// Otherwise it's a bare list, and we infer the number of rows from the first element
// (i.e. first column). Calling `Rf_xlength()` on the first column isn't 100% right
// (it doesn't dispatch to `length()`, nor does it correctly handle df-cols or
// matrix-cols), but it is close enough and people can use the data_frame constructor
// that allows you to specify `nrow` directly as needed.
if (Rf_xlength(x) == 0) {
return 0;
} else {
return Rf_xlength(VECTOR_ELT(x, 0));
}

return Rf_xlength(VECTOR_ELT(x, 0));
}

public:
/* Adapted from
* https://github.com/wch/r-source/blob/f2a0dfab3e26fb42b8b296fcba40cbdbdbec767d/src/main/attrib.c#L198-L207
*/
R_xlen_t nrow() const { return calc_nrow(*this); }
R_xlen_t nrow() const { return calculate_nrow(*this); }
R_xlen_t ncol() const { return size(); }
};

namespace writable {
class data_frame : public cpp11::data_frame {
private:
writable::list set_data_frame_attributes(writable::list&& x) {
return set_data_frame_attributes(std::move(x), calc_nrow(x));
return set_data_frame_attributes(std::move(x), calculate_nrow(x));
}

writable::list set_data_frame_attributes(writable::list&& x, R_xlen_t nrow) {
// `Rf_setAttrib(R_RowNamesSymbol)` will keep `c(NA, -n)` in compact form
x.attr(R_RowNamesSymbol) = {NA_INTEGER, -static_cast<int>(nrow)};
x.attr(R_ClassSymbol) = "data.frame";
return std::move(x);
Expand Down