Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Manifest.toml
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
name = "GroupedArrays"
uuid = "6407cd72-fade-4a84-8a1e-56e431fc1533"
authors = ["matthieugomez <gomez.matthieu@gmail.com>"]
version = "0.3.4"
version = "0.3.5"

[deps]
DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"

[compat]
DataAPI = "1"
Missings = "1"
PrecompileTools = "1"
julia = "1.4"

[extras]
Expand Down
28 changes: 22 additions & 6 deletions src/GroupedArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ module GroupedArrays
using Missings
using DataAPI
using Base.Threads
using PrecompileTools

include("spawn.jl")
include("utils.jl")

Expand Down Expand Up @@ -70,7 +72,7 @@ Construct a `GroupedArray` taking on distinct values for the groups formed by el
* `args...`: `AbstractArrays` of same sizes.

### Keyword arguments
* `coalesce::Bool`: should missing values considered as distinct grotups indicators?
* `coalesce::Bool`: should missing values be considered as distinct group indicators?
* `sort::Union{Bool, Nothing}`: should the order of the groups be the sort order? Set to `nothing` for best performance.

### Examples
Expand Down Expand Up @@ -103,7 +105,6 @@ function GroupedArray(args...; coalesce = false, sort = true)
end

# Find index of representative row for each group
# now in fillfirst!
function find_index(g::GroupedArray)
groups, ngroups = g.groups, g.ngroups
idx = Vector{Int}(undef, ngroups)
Expand Down Expand Up @@ -132,7 +133,7 @@ function Base.convert(::Type{GroupedArray{Union{Int, Missing},N}}, g::GroupedArr
return GroupedArray{Union{Int, Missing},N}(g.groups, g.ngroups)
end
function Base.convert(::Type{GroupedArray{Int, N}}, g::GroupedArray{Union{Int, Missing}, N}) where {N}
@assert all(x > 0 for x in g.groups)
all(x > 0 for x in g.groups) || throw(InexactError(:convert, GroupedArray{Int,N}, g))
return GroupedArray{Int,N}(g.groups, g.ngroups)
end

Expand Down Expand Up @@ -191,17 +192,32 @@ end
@inline Base.haskey(x::GroupedInvRefPool{T}, ::Missing) where {T} = T >: Missing
@inline Base.haskey(x::GroupedInvRefPool, v::Integer) = 1 <= v <= x.ngroups
@inline function Base.getindex(x::GroupedInvRefPool{T}, ::Missing) where {T}
@boundscheck T >: Missing
@boundscheck T >: Missing || throw(KeyError(missing))
0
end
@inline function Base.getindex(x::GroupedInvRefPool, i::Integer)
@boundscheck 1 <= i <= x.ngroups
@boundscheck 1 <= i <= x.ngroups || throw(KeyError(i))
i
end
@inline Base.get(x::GroupedInvRefPool{T}, ::Missing, default) where {T} = T >: Missing ? 0 : default
@inline Base.get(x::GroupedInvRefPool, i::Integer, default) = 1 <= v <= x.ngroups ? i : default
@inline Base.get(x::GroupedInvRefPool, i::Integer, default) = 1 <= i <= x.ngroups ? i : default
DataAPI.invrefpool(g::GroupedArray{T}) where {T} = GroupedInvRefPool{T}(g.ngroups)

@compile_workload begin
p1 = [1, 2, 3, 2]
p2 = [1, 1, 2, 2]
GroupedArray(p1)
GroupedArray(p1; sort = nothing)
GroupedArray(p1, p2)
GroupedArray(p1, p2; sort = nothing)
p3 = ["a", "b", "c", "c"]
GroupedArray(p1, p3)
p4 = Union{Int,Missing}[1, 2, missing, 2]
GroupedArray(p4)
GroupedArray(p4; coalesce = true)
p5 = Union{String,Missing}["a", "b", missing, "c"]
GroupedArray(p5)
end

export GroupedArray, GroupedVector, GroupedMatrix
end # module
31 changes: 30 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,35 @@ invrefpools = DataAPI.invrefpool(g)
g = GroupedArray([missing, missing, missing])
@test all(ismissing(x) for x in g)


# sort=nothing and sort=false
g_sort = GroupedArray(p1_missing; sort = true)
g_nosort = GroupedArray(p1_missing; sort = nothing)
g_false = GroupedArray(p1_missing; sort = false)
@test g_nosort.ngroups == g_sort.ngroups
@test g_false.ngroups == g_sort.ngroups

# empty arrays
g = GroupedArray(Int[])
@test length(g) == 0
@test g.ngroups == 0

# convert methods
g = GroupedArray([1, 2, 3, 1])
g_missing = convert(GroupedArray{Union{Int, Missing}, 1}, g)
@test eltype(g_missing) == Union{Int, Missing}
@test all(g .== g_missing)
g_back = convert(GroupedArray{Int, 1}, g_missing)
@test eltype(g_back) == Int
@test all(g .== g_back)
# convert with missing should error
g_with_missing = GroupedArray([1, missing, 2])
@test_throws InexactError convert(GroupedArray{Int, 1}, g_with_missing)

# Base.get for GroupedInvRefPool with Integer
g = GroupedArray(PooledArray(p1_missing), p2)
invrefpools = DataAPI.invrefpool(g)
@test get(invrefpools, 1, -1) == 1
@test get(invrefpools, g.ngroups, -1) == g.ngroups
@test get(invrefpools, g.ngroups + 1, -1) == -1


Loading