Skip to content

Commit

Permalink
Merge pull request #59 from GregFa/main
Browse files Browse the repository at this point in the history
fiixed tests and added more tests
  • Loading branch information
GregFa authored Aug 24, 2024
2 parents 6c20a6d + befb05f commit 6a3cf7c
Show file tree
Hide file tree
Showing 10 changed files with 199 additions and 70 deletions.
6 changes: 3 additions & 3 deletions src/BigRiverQTL.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ module BigRiverQTL
#########
# Plots #
#########
include("plots/plots_helpers.jl")
export gmap2df, pmap2df

include("plots/plots_qtl.jl")
export plot_QTL

Expand All @@ -89,6 +86,9 @@ module BigRiverQTL
#########
# Utils #
#########
include("utils/convert_utils.jl")
export gmap2df, pmap2df

include("utils/subset_utils.jl")
export select_sample, select_marker

Expand Down
20 changes: 19 additions & 1 deletion src/io/export_to_type.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,25 @@ function get_gmap(filename::String)
# relative position
pos = [group.pos for group in gdf]

return Gmap(chr, marker, pos)
# unit
unit = ""
f = open(gmapfile, "r")

s = readline(f)
s_lower = lowercase(s) # Convert the input string to lowercase
if occursin("mbp", s_lower)
unit = "Mbp"
elseif occursin("mb", s_lower)
unit = "Mb"
elseif occursin("cm", s_lower)
unit = "cM"
else
@warn "No unit detected!"
end

close(f)

return Gmap(chr, marker, pos, unit)
end


Expand Down
54 changes: 0 additions & 54 deletions src/plots/plots_helpers.jl

This file was deleted.

1 change: 1 addition & 0 deletions src/plots/plots_qtl.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ function plot_QTL(vLOD::Vector{<:AbstractFloat}, dfgInfo::Gmap; kwargs...)
return plot_QTL(vLOD, gmap2df(dfgInfo); kwargs...)
end


function plot_QTL(scanresult::NamedTuple, dfgInfo::Gmap; kwargs...)
return plot_QTL(scanresult, gmap2df(dfgInfo); kwargs...)
end
Expand Down
2 changes: 2 additions & 0 deletions src/struct/datastructure.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
* `chr` contains chromosomes names.
* `marker_name` contains marker name's names for each chromosome.
* `pos` is a vector of vector containing relative position of marker_name in each chromosome.
* `unit` contains unit for the chromosome length.
"""
struct Gmap
chr::Vector{AbstractString}
marker_name::Vector{Vector{AbstractString}}
pos::Vector{Vector{Float64}}
unit::AbstractString
end


Expand Down
81 changes: 81 additions & 0 deletions src/utils/convert_utils.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""
map2df(gmap::Union{Gmap, Pmap}) -> DataFrame
Convert a genetic or physical map object to a DataFrame.
# Arguments
* `gmap`: A genetic map (`Gmap`) or phenotype map (`Pmap`) object that contains mapping information.
The object is expected to have at least three fields:
* `chr`: An array where each element is a chromosome identifier corresponding to markers.
* `marker_name`: A nested array where each sub-array contains the names of markers for a corresponding chromosome.
* `pos`: A nested array similar to `marker_name`, but containing the positions of markers.
# Returns
* `DataFrame`: A DataFrame with three columns:
* `Locus`: A flat list of all marker names.
* `Chr`: A list of chromosome identifiers corresponding to each marker.
* `Pos`: A flat list of all marker positions.
"""
function map2df(gmap::Union{Gmap, Pmap})
start=0
Chr =repeat(["0"],sum(length.(gmap.marker_name)))

for i in eachindex(gmap.chr)
l_i=length(gmap.marker_name[i])
Chr[start+1:l_i+start] .= gmap.chr[i]
start=start+l_i
end
df=DataFrame(
Locus = reduce(vcat,gmap.marker_name),
Chr = Chr,
Pos= reduce(vcat,gmap.pos)
)
return df
end


"""
gmap2df(gmap::Gmap) -> DataFrame
Convert a genetic map object (`Gmap`) into a DataFrame.
# Arguments
- `gmap`: A genetic map object containing mapping information.
This object should conform to the expected structure, which includes:
- `chr`: An array of chromosome identifiers.
- `marker_name`: A nested array of marker names organized by chromosome.
- `pos`: A nested array of marker positions, also organized by chromosome.
# Returns
- `DataFrame`: A DataFrame representing the genetic map data, with columns
for marker names (`Locus`), chromosome identifiers (`Chr`), and
marker positions (`Pos`).
"""
function gmap2df(gmap::Gmap)
return map2df(gmap)
end


"""
pmap2df(pmap::Pmap) -> DataFrame
Convert a phenotype map object (`Pmap`) into a DataFrame.
# Arguments
- `pmap`: A genetic map object containing mapping information.
This object should conform to the expected structure, which includes:
- `chr`: An array of chromosome identifiers.
- `marker_name`: A nested array of marker names organized by chromosome.
- `pos`: A nested array of marker positions, also organized by chromosome.
# Returns
- `DataFrame`: A DataFrame representing the phenotypic map data, with columns
for marker names (`Locus`), chromosome identifiers (`Chr`), and
marker positions (`Pos`).
"""
function pmap2df(pmap::Pmap)
return map2df(pmap)
end
2 changes: 1 addition & 1 deletion src/utils/subset_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ function subset_gmap(
# relative position
pos = [group.Pos for group in gdf]

return Gmap(chr, marker, pos)
return Gmap(chr, marker, pos, gmap.unit)
end


Expand Down
21 changes: 13 additions & 8 deletions test/plots_test.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@

###########################
# Test Plotting functions #
###########################
@testset "Testing plotting function" begin
########
data_dir = joinpath(@__DIR__, "data/BXD/");
file = joinpath(data_dir, "bxd.json");


# Transforming data to a optimised and accessible data type
data = get_geneticstudydata(file);

Expand Down Expand Up @@ -51,24 +54,26 @@
nperms = 1000,
);


#########
# Plots #
#########


# QTL plots
p1 = plot_QTL(single_results_perms, gInfo, mbColname = "Pos");
p1a = plot_QTL(single_results_perms, gInfo, mbColname = "Pos");
p1b = plot_QTL(single_results_perms.lod, gInfo, mbColname = "Pos");

# Manhattan plots
p2 = plot_manhattan(single_results_perms, gInfo, mbColname = "Pos");
p2a = plot_manhattan(single_results_perms, gInfo, mbColname = "Pos");
p2b = plot_manhattan(single_results_perms.lod, gInfo, mbColname = "Pos");

@testset "QTL plot Tests" begin
@test isa(p1[1][4], Plots.Series)
@test isa(p1a[1][4], Plots.Series)
@test isa(p1b[1][3], Plots.Series)
end

@testset "Mahattan plot Tests" begin
@test isa(p2[1][2], Plots.Series)
@testset "Manhattan plot Tests" begin
@test isa(p2a[1][2], Plots.Series)
@test isa(p2b[1][1], Plots.Series)
end

end
12 changes: 12 additions & 0 deletions test/test_gf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@ using Plots

data_dir = joinpath(@__DIR__, "data/BXD/");
file = joinpath(data_dir, "bxd.json");

# Transforming data to a optimised and accessible data type
data = get_geneticstudydata(file);

df_g = gmap2df(data.gmap);

df_p1 = pmap2df(data.pmap);

df_p2 = pmap2df(data.pmap);



jsondict = BigRiverQTL.parse_json(file)

# function get_geno(filename::String)
Expand Down
70 changes: 67 additions & 3 deletions test/utils_test.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
###############
# Get dataset #
###############

# get path
data_dir = joinpath(@__DIR__, "data/BXD/");
file = joinpath(data_dir, "bxd.json");

# Transforming data to a optimised and accessible data type
data = get_geneticstudydata(file);


########################
# Test encode_genotype #
Expand Down Expand Up @@ -64,9 +75,21 @@ end
end


##################
# Test selection #
##################
# Test the `get_data_completecases` function
@testset "Testing get_data_completecases function" begin
# Remove the missing data
data_no_missing = get_data_completecases(data);
tbl_missing = summary_missing(data_no_missing.geno, issorted = true);

@test sum(tbl_missing[1].percentage) == 0 # Check no missing row wise
@test sum(tbl_missing[2].percentage) == 0 # Check no missing column wise
@test data_no_missing.geno.sample_id == data_no_missing.pheno.sample_id # Ensure samples align with pheno
@test data_no_missing.geno.marker_name == data_no_missing.gmap.marker_name # Ensure loci align with gmap
end

###############
# Test subset #
###############

# Test the `select_marker` function
@testset "Testing select_marker function" begin
Expand Down Expand Up @@ -94,3 +117,44 @@ end
@test geno_subset_2.sample_id == ["sample1", "sample2"] # Check sample selection
@test size(geno_subset_2.val[1], 1) == 2 # Check size of the geno matrix
end



###################
# Test converting #
###################

test_gmap = Gmap(
["1", "2"],
[["marker1", "marker2", "marker3"],
["marker4", "marker5", "marker6", "marker7"]],
[[1.58, 2.75, 3.59],
[0.58, 3.67, 8.87, 12.03]],
"cM"
);

# Test the `gmap2df` function
@testset "Testing gmap2df function" begin
df_g = gmap2df(test_gmap);

@test isa(df_g, DataFrame) # Check type of output
@test size(df_g) == (7, 3) # Check size of the gmap dataframe
end

test_pmap = Pmap(
["1", "2"],
[["marker1", "marker2", "marker3"],
["marker4", "marker5", "marker6", "marker7"]],
[[1.58, 2.75, 3.59],
[0.58, 3.67, 8.87, 12.03]],
"cM"
);


# Test the `pmap2df` function
@testset "Testing pmap2df function" begin
df_p = pmap2df(test_pmap);

@test isa(df_p, DataFrame) # Check type of output
@test size(df_p) == (7, 3) # Check size of the gmap dataframe
end

0 comments on commit 6a3cf7c

Please sign in to comment.