Skip to content

Commit

Permalink
add docs
Browse files Browse the repository at this point in the history
  • Loading branch information
pdeffebach committed Dec 18, 2023
1 parent 8573e3c commit 28b7a4b
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 14 deletions.
10 changes: 5 additions & 5 deletions docs/src/dplyr.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,22 +136,22 @@ Similarly, to select the first column, use the syntax `$1`.
@select msleep $1
```

To select all the columns *except* a specific column, use the `Not` function for inverse selection. We also need to wrap `Not` in the `$` sign, because it is not a symbol.
To select all the columns *except* a specific column, use the `Not` function for inverse selection.

```@repl 1
@select msleep $(Not(:name))
@select msleep Not(:name)
```

To select a range of columns by name, use the `Between` operator:

```@repl 1
@select msleep $(Between(:name, :order))
@select msleep Between(:name, :order)
```

To select all columns that start with the character string `"sl"` use [regular expressions](https://regexone.com/):
To select all columns that start with the character string `"sl"` use [regular expressions](https://regexone.com/) in conjunction with `Cols`.

```@repl 1
@select msleep $(r"^sl")
@select msleep Cols(r"^sl")
```

Regular expressions are powerful, but can be difficult for new users to understand. Here are some quick tips.
Expand Down
16 changes: 10 additions & 6 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,6 @@ but exported by DataFramesMeta for convenience.

# Provided macros

!!! note

Newer versions of DataFrames.jl support the operators `Between`, `All`, `Cols`,
and `Not` when selecting and transforming columns. DataFramesMeta does not currently
support this syntax.

## `@select` and `@select!`

Column selections and transformations. Only newly created columns are kept.
Expand All @@ -79,6 +73,16 @@ gd = groupby(df, :x);
@select!(gd, :y = 2 .* :y .* first(:y))
```

To select or de-select multiple columns, use `Not`, `Between`, `All`, and `Cols`.
These multi-column selectors are all re-exported from DataFrames.jl.

```julia
@select df Not(:x)
@select df Between(:x, :y)
@select df All()
@select df Cols(r"x") # Regular expressions.
```

## `@transform` and `@transform!`

Add additional columns based on keyword-like arguments. Operates on both a
Expand Down
20 changes: 18 additions & 2 deletions src/macros.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1816,7 +1816,7 @@ end
function select_helper(x, args...)
x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false)

t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs)
t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags, allow_multicol = true) for ex in exprs)
quote
$select($x, $(t...); $(kw...))
end
Expand Down Expand Up @@ -1881,6 +1881,14 @@ transformations by row, `@select` allows `@byrow` at the
beginning of a block of selections (i.e. `@byrow begin... end`).
All transformations in the block will operate by row.
To select many columns at once use the tools `Not`, `Between`, `All`, and `Cols`.
* `@select df Not(:a)` keeps all columns except for `:a`
* `@select df Between(:a, :z)` keeps all columns between `:a` and `:z`, inclusive
* `@select df All()` keeps all columns
* `@select df Cols(...)` can be used to combine many different selectors, as well as use
regular expressions. For example `Cols(r"a")` selects all columns that start with `"a"`.
$ASTABLE_MACRO_FLAG_DOCS
$ASTABLE_RHS_SELECT_TRANSFORM_DOCS
Expand All @@ -1899,7 +1907,7 @@ When inputs are given in "block" format, the last lines may be written
```
@select gd begin
:a
@select copycols = false
@kwarg copycols = false
end
```
Expand Down Expand Up @@ -2054,6 +2062,14 @@ transformations by row, `@select!` allows `@byrow` at the
beginning of a block of select!ations (i.e. `@byrow begin... end`).
All transformations in the block will operate by row.
To select many columns at once use the tools `Not`, `Between`, `All`, and `Cols`.
* `@select df Not(:a)` keeps all columns except for `:a`
* `@select df Between(:a, :z)` keeps all columns between `:a` and `:z`, inclusive
* `@select df All()` keeps all columns
* `@select df Cols(...)` can be used to combine many different selectors, as well as use
regular expressions. For example `Cols(r"a")` selects all columns that start with `"a"`.
$ASTABLE_MACRO_FLAG_DOCS
$ASTABLE_RHS_SELECT_TRANSFORM_DOCS
Expand Down
25 changes: 24 additions & 1 deletion src/parsing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ function get_column_expr(e::Expr)
Base.depwarn("cols is deprecated use $DOLLAR to escape column names instead", :cols)
return e.args[2]
end
if e.head === :call
e1 = e.args[1]
if e1 === :All || e1 === :Not || e1 === :Between || e1 == :Cols
s = "Multi-column references outside of @select, @rselect, @select!" *
" and @rselect! must be wrapped in AsTable"
throw(ArgumentError(s))
end
end
return nothing
end
get_column_expr(x::QuoteNode) = x
Expand Down Expand Up @@ -314,10 +322,12 @@ end
function fun_to_vec(ex::Expr;
gensym_names::Bool=false,
outer_flags::NamedTuple=deepcopy(DEFAULT_FLAGS),
no_dest::Bool=false)
no_dest::Bool=false,
allow_multicol::Bool=false)
# classify the type of expression
# :x # handled via dispatch
# $:x # handled as though above
# All(), Between(...), Cols(...), Not(...), requires allow_multicol (only true in select)
# f(:x) # requires no_dest, for `@with` and `@subset` in future
# :y = :x # Simple pair
# :y = $:x # Extract and return simple pair (no function)
Expand All @@ -342,6 +352,19 @@ function fun_to_vec(ex::Expr;
# :x
# handled below via dispatch on ::QuoteNode

# Do we give special treatment to All() etc?
# Only applies when allow_multicol is
# set to true.
# Otherwise, these are treated as
# normal functions
if allow_multicol
exhead = ex.head
if ex.head === :call
exhead === :All || exhead === :Not || exhead === :Cols || exhead === :Between
return ex
end
end

ex_col = get_column_expr(ex)
if ex_col !== nothing
return ex_col
Expand Down
90 changes: 90 additions & 0 deletions test/multicol.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
module TestMultiCol

using Test
using DataFrames
using DataFramesMeta

df = DataFrame(A = 1, AA = 2, B = 3)

@testset "select_multi" begin
df = DataFrame(A = 1, AA = 2, B = 3)

t = @select df Not(:A)
@test t == DataFrame(AA = 2, B = 3)

t = @select df All()
@test t == DataFrame(A = 1, AA = 2, B = 3)

t = @select df Cols(r"A")
@test t == DataFrame(A = 1, AA = 2)

t = @select df Between(:AA, :B)
@test t == DataFrame(AA = 2, B = 3)
end

@testset "othermacros_multi" begin
df = DataFrame(A = 1, AA = 2, B = 3)

@test_throws LoadError @eval @with df Not(:A)

@test_throws LoadError @eval @with df All()

@test_throws LoadError @eval @with df Cols(r"A")

@test_throws LoadError @eval @with df Between(:AA, :B)

@test_throws LoadError @eval @with(df, begin
1
Not(:A)
end)

@test_throws LoadError @eval @with df begin
1
All()
end

@test_throws LoadError @eval @with df begin
1
Cols(r"A")
end

@test_throws LoadError @eval @with df begin
1
Between(:AA, :B)
end
end

@testset "othermacros_multi" begin
df = DataFrame(A = 1, AA = 2, B = 3)

@test_throws LoadError @eval @select df :y = Not(:A)

@test_throws LoadError @eval @select df :y = All()

@test_throws LoadError @eval @select df :y = Cols(r"A")

@test_throws LoadError @eval @select df :y = Between(:AA, :B)

@test_throws LoadError @eval @select(df, :y = begin
1
Not(:A)
end)

@test_throws LoadError @eval @select df :y = begin
1
All()
end

@test_throws LoadError @eval @select df :y = begin
1
Cols(r"A")
end

@test_throws LoadError @eval @select df :y = begin
1
Between(:AA, :B)
end
end


end # module

0 comments on commit 28b7a4b

Please sign in to comment.