From aad7f90e21c0b313fc53327f93634a439fa00e5e Mon Sep 17 00:00:00 2001 From: jkrumbiegel <22495855+jkrumbiegel@users.noreply.github.com> Date: Tue, 21 Jun 2022 19:16:09 +0200 Subject: [PATCH] Use begin block and allow variable export (#50) --- CHANGELOG.md | 23 ++++++++++++++++++ Project.toml | 2 +- README.md | 39 +++++++++++++++++++++--------- src/Chain.jl | 23 ++++++++++++------ test/runtests.jl | 63 ++++++++++++++++++++++++++++++++++++++++-------- 5 files changed, 120 insertions(+), 30 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..bc75556 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,23 @@ +# v0.5 + +**Breaking**: The `@chain` macro now creates a `begin` block, not a `let` block. +This means that variables that are assigned within the macro are available outside. +Technically, situations are imaginable where this could lead to overwritten variables if someone used large expressions with intermediate variable names in begin blocks spliced into the chain. +It is however quite unlikely for the normal way that `@chain` is intended to be used. + +Additionally, it is now possible to use the syntax `variable = some_expression` to make use of the feature that variables can be exported. +The `some_expression` part is handled exactly like before. +This enables you to carry parts of a computation forward to a later step in the chain or outside of it: + +```julia +@chain df begin + transform(...) + select(...) + intermediate = subset(...) + groupby(...) + combine(...) + join(intermediate) +end + +@show intermediate +``` \ No newline at end of file diff --git a/Project.toml b/Project.toml index 8f676d3..002cf3d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Chain" uuid = "8be319e6-bccf-4806-a6f7-6fae938471bc" authors = ["Julius Krumbiegel"] -version = "0.4.10" +version = "0.5.0" [compat] julia = "1" diff --git a/README.md b/README.md index e7c470e..91f8926 100644 --- a/README.md +++ b/README.md @@ -107,14 +107,14 @@ result = @chain df begin end ``` -The pipeless block is equivalent to this: +The chain block is equivalent to this: ```julia -result = let - var1 = dropmissing(df) - var2 = filter(r -> r.weight < 6, var1) - var3 = groupby(var2, :group) - var4 = combine(var3, :weight => sum => :total_weight) +result = begin + local var"##1" = dropmissing(df) + local var"##2" = filter(r -> r.weight < 6, var"##1") + local var"##3" = groupby(var"##2", :group) + local var"##4" = combine(var"##3", :weight => sum => :total_weight) end ``` @@ -151,6 +151,21 @@ This works well for short sequences that are still easy to parse visually withou @chain 1:10 filter(isodd, _) sum sqrt ``` +## Variable assignments in the chain + +You can prefix any of the expressions that Chain.jl can handle with a variable assignment. +The previous value will be spliced into the right-hand-side expression and the result will be available afterwards under the chosen variable name. + +```julia +@chain 1:10 begin + _ * 3 + filtered = filter(iseven, _) + sum +end + +filtered == [6, 12, 18, 24, 30] +``` + ## The `@aside` macro For debugging, it's often useful to look at values in the middle of a pipeline. @@ -172,12 +187,12 @@ end Which is again equivalent to this: ```julia -result = let - var1 = dropmissing(df) - var2 = filter(r -> r.weight < 6, var1) - var3 = groupby(var2, :group) - println("There are $(length(var3)) groups after step 3.") - var4 = combine(var3, :weight => sum => :total_weight) +result = begin + local var"##1" = dropmissing(df) + local var"##2" = filter(r -> r.weight < 6, var"##1") + local var"##3" = groupby(var"##2", :group) + println("There are $(length(var"##3")) groups after step 3.") + local var"##4" = combine(var"##3", :weight => sum => :total_weight) end ``` diff --git a/src/Chain.jl b/src/Chain.jl index be144fa..74f43cb 100644 --- a/src/Chain.jl +++ b/src/Chain.jl @@ -6,8 +6,8 @@ is_aside(x) = false is_aside(x::Expr) = x.head == :macrocall && x.args[1] == Symbol("@aside") -insert_first_arg(symbol::Symbol, firstarg) = Expr(:call, symbol, firstarg) -insert_first_arg(any, firstarg) = insertionerror(any) +insert_first_arg(symbol::Symbol, firstarg; assignment = false) = Expr(:call, symbol, firstarg) +insert_first_arg(any, firstarg; assignment = false) = insertionerror(any) function insertionerror(expr) error( @@ -35,12 +35,21 @@ function is_moduled_symbol(e::Expr) e.args[2].value isa Symbol end -function insert_first_arg(e::Expr, firstarg) +function insert_first_arg(e::Expr, firstarg; assignment = false) head = e.head args = e.args + # variable = ... + # set assignment = true and rerun with right hand side + if !assignment && head == :(=) && length(args) == 2 + if !(args[1] isa Symbol) + error("You can only use assignment syntax with a Symbol as a variable name, not $(args[1]).") + end + variable = args[1] + righthandside = insert_first_arg(args[2], firstarg; assignment = true) + :($variable = $righthandside) # Module.SubModule.symbol - if is_moduled_symbol(e) + elseif is_moduled_symbol(e) Expr(:call, e, firstarg) # f(args...) --> f(firstarg, args...) @@ -104,7 +113,7 @@ function rewrite(expr, replacement) new_expr = insert_first_arg(new_expr, replacement) end replacement = gensym() - new_expr = Expr(Symbol("="), replacement, new_expr) + new_expr = :(local $replacement = $new_expr) end (new_expr, replacement) @@ -188,7 +197,7 @@ function rewrite_chain_block(block) # we just do the firstvar transformation for the first non LineNumberNode # we encounter if !(did_first || expr isa LineNumberNode) - expr = Expr(Symbol("="), firstvar, expr) + expr = :(local $firstvar = $expr) did_first = true push!(rewritten_exprs, expr) continue @@ -198,7 +207,7 @@ function rewrite_chain_block(block) push!(rewritten_exprs, rewritten) end - result = Expr(:let, Expr(:block), Expr(:block, rewritten_exprs..., replacement)) + result = Expr(:block, rewritten_exprs..., replacement) :($(esc(result))) end diff --git a/test/runtests.jl b/test/runtests.jl index a5a2542..49e994c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -109,15 +109,6 @@ end sum end end) - - # variable defined in chain block doesn't leak out - z = @chain [1, 2, 3] begin - @aside inside_var = 5 - @aside @test inside_var == 5 - sum(_) + inside_var - end - @test z == 11 - @test_throws UndefVarError inside_var end @testset "nested chains" begin @@ -441,4 +432,56 @@ end @test 36 == @chain 1:3 begin @chain _ sum _ ^ 2 end -end \ No newline at end of file +end + +@testset "variable assignment syntax" begin + result = @chain 1:10 begin + x = filter(iseven, _) + y = sum + sqrt + end + @test x == filter(iseven, 1:10) + @test y == sum(x) + @test result == sqrt(y) +end + +module TestModule + using Chain +end + +@testset "no variable leaks" begin + + allnames() = Set(names(TestModule, all = true)) + _names = allnames() + + TestModule.eval(quote + @chain 1:10 begin + sum + sqrt + end + end) + + @test setdiff(allnames(), _names) == Set() + + TestModule.eval(quote + @chain begin + 1:10 + sum(_) + sqrt(_) + end + end) + + @test setdiff(allnames(), _names) == Set() + + TestModule.eval(quote + @chain begin + 1:10 + x = sum(_) + y = sqrt(_) + end + end) + + @test setdiff(allnames(), _names) == Set([:x, :y]) +end + +