Skip to content

Commit

Permalink
Improve polish names (#15)
Browse files Browse the repository at this point in the history
* Use Unicode.normalize in _preprocess_name

* Improve _preprocess_name to replace % and # with letters

* Require Julia v1.7 as new minimum
  • Loading branch information
TheRoniOne authored Apr 7, 2024
1 parent cd3469b commit 212fe7d
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 8 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ jobs:
fail-fast: false
matrix:
version:
- "1.6"
- "1.9"
- "1.7"
- "1.10"
- "nightly"
os:
- ubuntu-latest
Expand Down
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ version = "1.0.6"
[deps]
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[compat]
PrettyTables = "1, 2"
Tables = "1"
julia = "1.6"
Unicode = "1"
julia = "1.7"
13 changes: 8 additions & 5 deletions src/polish_names.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
using Base: String
using Unicode: normalize

struct Style{T} end

Style(s::Symbol) = Style{s}()

const SPECIAL_CHARS = r"[\s\-\.\_\/\:\\\*\?\"\'\>\<\|]"
const SPECIAL_CHARS = r"[\s\-\.\_\/\:\\\*\?\"\'\>\<\|\!\,\$\@\^\[\]\{\}]"

"""
polish_names!(table::CleanTable; style::Symbol=:snake_case)
Expand Down Expand Up @@ -43,7 +44,7 @@ Return a vector of symbols containing new names that are unique and formated usi
"""
function generate_polished_names(names; style::Symbol=:snake_case)
names = _preprocess_name.(names)

return generate_polished_names(names, Style(style))
end

Expand Down Expand Up @@ -78,13 +79,15 @@ function generate_polished_names(names, ::Style)
end

function _preprocess_name(name)
preprocessed = String(name)
preprocessed = normalize(String(name); stripmark=true)

matched = match(r"^[[:upper:]]+$", preprocessed)
matched = match(r"^[[:upper:]]+|\%|\#$", preprocessed)
if matched !== nothing
return lowercase(preprocessed)
preprocessed = lowercase(preprocessed)
end

preprocessed = replace(preprocessed, "%" => "Percent", "#" => "Number")

return preprocessed
end

Expand Down
8 changes: 8 additions & 0 deletions test/test_polish_names.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ using DataFrames: DataFrame
" _aName with_loTsOfProblems_1",
" _aName with_loTsOfProblems_1_a/b'c",
"ID",
"NOËL%",
"NOEL #",
];
style=:snake_case,
) == Vector{Symbol}([
Expand All @@ -42,6 +44,8 @@ using DataFrames: DataFrame
:a_name_with_lo_ts_of_problems_1_1,
:a_name_with_lo_ts_of_problems_1_a_b_c,
:id,
:noel_percent,
:noel_number,
])

@test generate_polished_names(
Expand All @@ -52,6 +56,8 @@ using DataFrames: DataFrame
" _aNameABC with_loTsOfProblemsDEF",
" _aNameABC with_loTsOfProblemsDEF_a/b'c",
"ID",
"NOËL%",
"NOEL #",
];
style=:camelCase,
) == Vector{Symbol}([
Expand All @@ -61,6 +67,8 @@ using DataFrames: DataFrame
:aNameABCWithLoTsOfProblemsDEF,
:aNameABCWithLoTsOfProblemsDEFABC,
:id,
:noelPercent,
:noelNumber,
])

let err = nothing
Expand Down

0 comments on commit 212fe7d

Please sign in to comment.