Skip to content

Commit

Permalink
Merge pull request #14 from JuliaData/anandijain-master
Browse files Browse the repository at this point in the history
Add narrowtypes functionality
  • Loading branch information
quinnj authored Nov 19, 2020
2 parents a16711d + f4c4d50 commit 52d0974
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 0 deletions.
38 changes: 38 additions & 0 deletions src/TableOperations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -410,4 +410,42 @@ Base.eltype(x::RowPartitions) = Vector{MaterializedRow}
return resize!(v, i), y === nothing ? nothing : (y[2],)
end

struct NarrowTypes{T} <: Tables.AbstractColumns
x::T
schema::Tables.Schema
end

schema(nt::NarrowTypes) = getfield(nt, :schema)

narrowarray(x) = mapreduce(typeof, Base.promote_typejoin, x)
narrowarray(::Type{T}, x::AbstractArray{T}) where {T} = x
narrowarray(::Type{T}, x::AbstractArray{S}) where {T, S} = Vector{T}(x)

"""
Tables.narrowtypes(source) => Tables.NarrowTypes
source |> Tables.narrowtypes() => Tables.NarrowTypes
Take a Tables.jl-compatible source, with potentially "wide" column types, and re-infer the schema by
promoting the types of each actual value for each column. Useful, for example, when a columnar table source
has a column type of `Any`, and a more concrete type is desired. Uses `Base.promote_typejoin` internally
to do actual type promotion.
"""
function narrowtypes(table)
t = Tables.columns(table)
return NarrowTypes(t, Tables.Schema(Tables.columnnames(t), [narrowarray(Tables.getcolumn(t, nm)) for nm in Tables.columnnames(t)]))
end

narrowtypes() = x -> narrowtypes(x)

Tables.istable(::Type{<:NarrowTypes}) = true
Tables.columnaccess(::Type{<:NarrowTypes}) = true
Tables.columns(x::NarrowTypes) = x
Tables.schema(nt::NarrowTypes) = schema(nt)

Tables.columnnames(nt::NarrowTypes) = schema(nt).names

Tables.getcolumn(nt::NarrowTypes, nm::Symbol) = narrowarray(Tables.columntype(schema(nt), nm), Tables.getcolumn(getfield(nt, 1), nm))
Tables.getcolumn(nt::NarrowTypes, i::Int) = narrowarray(Tables.columntype(schema(nt), i), Tables.getcolumn(getfield(nt, 1), i))

end # module
12 changes: 12 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -315,4 +315,16 @@ parts = collect(Tables.partitions(TableOperations.makepartitions(rtable2, 3)))
@test length(parts[end]) == 1
@test parts[end][1].a == 20

end

@testset "TableOperations.narrowtypes" begin

ctable_type_any = (A=Any[1, missing, 3], B=Any[1.0, 2.0, 3.0], C=Any["hey", "there", "sailor"])

nt = TableOperations.narrowtypes(ctable_type_any)
@test Tables.istable(nt)
@test Tables.columnaccess(nt)
@test Tables.schema(nt) == Tables.schema(ctable)
@test Tables.columnnames(nt) == Tables.columnnames(ctable)

end

2 comments on commit 52d0974

@quinnj
Copy link
Member Author

@quinnj quinnj commented on 52d0974 Nov 19, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/24927

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v1.0.0 -m "<description of version>" 52d0974c0b24f730f3b71e635cec5d9abc82d549
git push origin v1.0.0

Please sign in to comment.