Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transform interface and example Power transformation #1

Merged
merged 17 commits into from
Feb 2, 2021
94 changes: 94 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
name: CI
# Run on master, tags, or any pull request
on:
schedule:
- cron: '0 2 * * *' # Daily at 2 AM UTC (8 PM CST)
push:
branches: [master]
tags: ["*"]
pull_request:
jobs:
test:
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
version:
- "1.5" # Invenia Prod version
glennmoy marked this conversation as resolved.
Show resolved Hide resolved
- "1" # Latest Release
os:
- ubuntu-latest
- macOS-latest
- windows-latest
arch:
- x64
- x86
exclude:
# Test 32-bit only on Linux
- os: macOS-latest
arch: x86
- os: windows-latest
arch: x86
include:
# Add a 1.5 job because that's what Invenia actually uses
- os: ubuntu-latest
version: 1.5
arch: x64
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: actions/cache@v2
env:
cache-name: cache-artifacts
with:
path: ~/.julia/artifacts
key: ${{ runner.os }}-${{ matrix.arch }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
restore-keys: |
${{ runner.os }}-${{ matrix.arch }}-test-${{ env.cache-name }}-
${{ runner.os }}-${{ matrix.arch }}-test-
${{ runner.os }}-${{ matrix.arch }}-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@latest
- uses: julia-actions/julia-runtest@latest
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
with:
file: lcov.info

slack:
name: Notify Slack Failure
needs: test
runs-on: ubuntu-latest
if: always() && github.event_name == 'schedule'
steps:
- uses: technote-space/workflow-conclusion-action@v2
- uses: voxmedia/github-action-slack-notify-build@v1
if: env.WORKFLOW_CONCLUSION == 'failure'
with:
channel: nightly-rse
status: FAILED
color: danger
env:
SLACK_BOT_TOKEN: ${{ secrets.RSE_SLACK_BOT_TOKEN }}

docs:
name: Documentation
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
with:
version: '1'
- run: |
julia --project=docs -e '
using Pkg
Pkg.develop(PackageSpec(path=pwd()))
Pkg.instantiate()
include("docs/make.jl")'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
2 changes: 1 addition & 1 deletion .github/workflows/CompatHelper.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: CompatHelper
on:
schedule:
- cron: 0 0 * * *
- cron: '0 0 * * *' # Everyday at midnight
workflow_dispatch:
jobs:
CompatHelper:
Expand Down
29 changes: 29 additions & 0 deletions .github/workflows/JuliaNightly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: JuliaNightly
# Nightly Scheduled Julia Nightly Run
on:
schedule:
- cron: '0 2 * * *' # Daily at 2 AM UTC (8 PM CST)
jobs:
test:
name: Julia Nightly - Ubuntu - x64
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
with:
version: nightly
arch: x64
- uses: actions/cache@v2
env:
cache-name: julia-nightly-cache-artifacts
with:
path: ~/.julia/artifacts
key: ${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
restore-keys: |
${{ env.cache-name }}-
- uses: julia-actions/julia-buildpkg@latest
- uses: julia-actions/julia-runtest@latest
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
with:
file: lcov.info
34 changes: 0 additions & 34 deletions .travis.yml

This file was deleted.

16 changes: 13 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
name = "Transform"
name = "Transforms"
uuid = "8fd68953-04b8-4117-ac19-158bf6de9782"
authors = ["Invenia Technical Computing Corporation"]
version = "0.1.0"

[deps]
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
julia = "1"
AxisArrays = "0.4"
AxisKeys = "0.1"
DataFrames = "0.22"
Tables = "1.3"
julia = "1.5"

[extras]
AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9"
AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
test = ["AxisArrays", "AxisKeys", "DataFrames", "Test"]
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Transform
# Transforms

[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://invenia.github.io/Transform.jl/stable)
[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://invenia.github.io/Transform.jl/dev)
[![Build Status](https://travis-ci.com/invenia/Transform.jl.svg?branch=master)](https://travis-ci.com/invenia/Transform.jl)
[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://invenia.github.io/Transforms.jl/stable)
[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://invenia.github.io/Transforms.jl/dev)
[![Build Status](https://travis-ci.com/invenia/Transforms.jl.svg?branch=master)](https://travis-ci.com/invenia/Transforms.jl)
[![Code Style: Blue](https://img.shields.io/badge/code%20style-blue-4495d1.svg)](https://github.com/invenia/BlueStyle)
[![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor's%20Guide-blueviolet)](https://github.com/SciML/ColPrac)
2 changes: 1 addition & 1 deletion docs/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[[Transform]]
[[Transforms]]
path = ".."
uuid = "8fd68953-04b8-4117-ac19-158bf6de9782"
version = "0.1.0"
Expand Down
2 changes: 1 addition & 1 deletion docs/Project.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[deps]
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Transform = "8fd68953-04b8-4117-ac19-158bf6de9782"
Transforms = "8fd68953-04b8-4117-ac19-158bf6de9782"
12 changes: 6 additions & 6 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
using Transform
using Transforms
using Documenter

makedocs(;
modules=[Transform],
modules=[Transforms],
authors="Invenia Technical Computing Corporation",
repo="https://github.com/invenia/Transform.jl/blob/{commit}{path}#L{line}",
sitename="Transform.jl",
repo="https://github.com/invenia/Transforms.jl/blob/{commit}{path}#L{line}",
sitename="Transforms.jl",
format=Documenter.HTML(;
prettyurls=get(ENV, "CI", "false") == "true",
canonical="https://invenia.github.io/Transform.jl",
canonical="https://invenia.github.io/Transforms.jl",
assets=String[],
),
pages=[
Expand All @@ -19,5 +19,5 @@ makedocs(;
)

deploydocs(;
repo="github.com/invenia/Transform.jl",
repo="github.com/invenia/Transforms.jl",
)
6 changes: 3 additions & 3 deletions docs/src/index.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
```@meta
CurrentModule = Transform
CurrentModule = Transforms
```

# Transform
# Transforms

```@index
```

```@autodocs
Modules = [Transform]
Modules = [Transforms]
```
5 changes: 0 additions & 5 deletions src/Transform.jl

This file was deleted.

12 changes: 12 additions & 0 deletions src/Transforms.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
module Transforms

using Tables

export Transform, Power
export transform, transform!

include("utils.jl")
include("transformers.jl")
include("power.jl")

end
13 changes: 13 additions & 0 deletions src/power.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""
Power(exponent) <: Transform

Raise the data by the given `exponent`.
"""
struct Power <: Transform
exponent::Real
end

function _apply!(x::AbstractArray{T}, P::Power; kwargs...) where T <: Real
x[:] = x .^ P.exponent
return x
end
86 changes: 86 additions & 0 deletions src/transformers.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@

"""
Transform

Abstract supertype for all Transforms.
"""
abstract type Transform end

# Make Transforms callable types
(t::Transform)(x; kwargs...) = apply(x, t; kwargs...)


"""
transform!(::T, data)

Defines the feature engineering pipeline for some type `T`, which comprises a collection of
[`Transform`](@ref)s to be peformed on the `data`.

`transform!` should be overloaded for custom types `T` that require feature engineering.
"""
function transform! end

"""
transform(::T, data)

Non-mutating version of [`transform!`](@ref).
"""
function transform end
glennmoy marked this conversation as resolved.
Show resolved Hide resolved

"""
Transforms.apply!(data::T, ::Transform; kwargs...) -> T

Applies the [`Transform`](@ref) mutating the input `data`.
Where possible, this should be extended for new data types `T`.
"""
function apply! end

"""
Transforms.apply(data::T, ::Transform; kwargs...) -> T

Non-mutating version of [`apply!`](@ref), which it delegates to by default.
Does not need to be extended unless a mutating [`Transform`](@ref) is not possible.
"""
function apply end

"""
apply!(A::AbstractArray{T}, ::Transform; dims=:, kwargs...) where T <: Real

Applies the [`Transform`](@ref) to each element of `A`.
Optionally specify the `dims` to apply the [`Transform`](@ref) along certain dimensions.
"""
function apply!(
A::AbstractArray{T}, t::Transform; dims=:, kwargs...
) where T <: Real
dims == Colon() && return _apply!(A, t; kwargs...)

for x in eachslice(A; dims=dims)
_apply!(x, t; kwargs...)
end

return A
end

apply(x, t::Transform; kwargs...) = apply!(_try_copy(x), t; kwargs...)

"""
Transforms.apply!(table::T, ::Transform; cols=nothing)::T where T

Applies the [`Transform`](@ref) to each of the specified columns in the `table`.
If no `cols` are specified, then the [`Transform`](@ref) is applied to all columns.
"""
function apply!(table::T, t::Transform; cols=nothing)::T where T
# TODO: We could probably handle iterators of tables here
Tables.istable(table) || throw(MethodError(apply!, (table, t)))

# Extract a columns iterator that we should be able to use to mutate the data.
# NOTE: Mutation is not guaranteed for all table types, but it avoid copying the data
columntable = Tables.columns(table)

cnames = cols === nothing ? propertynames(columntable) : cols
for cname in cnames
apply!(getproperty(columntable, cname), t)
end

return table
end
13 changes: 13 additions & 0 deletions src/utils.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""
_try_copy(data)

Try to `copy` the data, fallback to `deepcopy` if not supported.
Not all objects support `copy`, but we should use it to improve performance if possible.
"""
function _try_copy(data)
try
copy(data)
catch
deepcopy(data)
end
end
Loading