Merge pull request #59 from GregFa/main

fiixed tests and added more tests
senresearch · Aug 24, 2024 · 6a3cf7c · 6a3cf7c
2 parents 6c20a6d + befb05f
commit 6a3cf7c
Show file tree

Hide file tree

Showing 10 changed files with 199 additions and 70 deletions.
diff --git a/src/BigRiverQTL.jl b/src/BigRiverQTL.jl
@@ -74,9 +74,6 @@ module BigRiverQTL
     #########
     # Plots #
     #########
-    include("plots/plots_helpers.jl")
-    export gmap2df, pmap2df
-
     include("plots/plots_qtl.jl")
     export plot_QTL 
 
@@ -89,6 +86,9 @@ module BigRiverQTL
     #########
     # Utils #
     #########
+    include("utils/convert_utils.jl")
+    export gmap2df, pmap2df
+
     include("utils/subset_utils.jl")
     export  select_sample, select_marker
 

diff --git a/src/io/export_to_type.jl b/src/io/export_to_type.jl
@@ -43,7 +43,25 @@ function get_gmap(filename::String)
 	# relative position
 	pos = [group.pos for group in gdf]
 
-	return Gmap(chr, marker, pos)
+		# unit
+		unit = ""
+		f = open(gmapfile, "r")
+
+		s = readline(f)
+		s_lower = lowercase(s)  # Convert the input string to lowercase
+		if occursin("mbp", s_lower)
+			unit = "Mbp"
+		elseif occursin("mb", s_lower)
+			unit = "Mb"
+		elseif occursin("cm", s_lower)
+			unit = "cM"
+		else
+			@warn "No unit detected!"
+		end
+
+		close(f)
+
+	return Gmap(chr, marker, pos,  unit)
 end
 
 

diff --git a/src/plots/plots_helpers.jl b/src/plots/plots_helpers.jl
diff --git a/src/plots/plots_qtl.jl b/src/plots/plots_qtl.jl
@@ -35,6 +35,7 @@ function plot_QTL(vLOD::Vector{<:AbstractFloat}, dfgInfo::Gmap; kwargs...)
     return plot_QTL(vLOD, gmap2df(dfgInfo); kwargs...)
 end
 
+
 function plot_QTL(scanresult::NamedTuple, dfgInfo::Gmap; kwargs...)
     return plot_QTL(scanresult, gmap2df(dfgInfo); kwargs...)
 end

diff --git a/src/struct/datastructure.jl b/src/struct/datastructure.jl
@@ -5,11 +5,13 @@
 * `chr` contains chromosomes names.
 * `marker_name` contains marker name's names for each chromosome.
 * `pos` is a vector of vector  containing relative position of marker_name in each chromosome.
+* `unit` contains unit for the chromosome length.
 """
 struct Gmap
 	chr::Vector{AbstractString}
 	marker_name::Vector{Vector{AbstractString}}
 	pos::Vector{Vector{Float64}}
+	unit::AbstractString
 end
 
 

diff --git a/src/utils/convert_utils.jl b/src/utils/convert_utils.jl
@@ -0,0 +1,81 @@
+"""
+    map2df(gmap::Union{Gmap, Pmap}) -> DataFrame
+
+Convert a genetic or physical map object to a DataFrame.
+
+# Arguments
+* `gmap`: A genetic map (`Gmap`) or phenotype map (`Pmap`) object that contains mapping information. 
+  The object is expected to have at least three fields:
+  * `chr`: An array where each element is a chromosome identifier corresponding to markers.
+  * `marker_name`: A nested array where each sub-array contains the names of markers for a corresponding chromosome.
+  * `pos`: A nested array similar to `marker_name`, but containing the positions of markers.
+
+# Returns
+* `DataFrame`: A DataFrame with three columns:
+  * `Locus`: A flat list of all marker names.
+  * `Chr`: A list of chromosome identifiers corresponding to each marker.
+  * `Pos`: A flat list of all marker positions.
+
+"""
+function map2df(gmap::Union{Gmap, Pmap})
+    start=0
+    Chr =repeat(["0"],sum(length.(gmap.marker_name)))
+
+    for i in eachindex(gmap.chr)
+        l_i=length(gmap.marker_name[i])
+        Chr[start+1:l_i+start] .= gmap.chr[i]
+        start=start+l_i
+    end
+    df=DataFrame(
+        Locus = reduce(vcat,gmap.marker_name),
+        Chr = Chr,
+        Pos= reduce(vcat,gmap.pos)
+    )
+    return df
+end
+
+
+"""
+    gmap2df(gmap::Gmap) -> DataFrame
+
+Convert a genetic map object (`Gmap`) into a DataFrame.
+
+# Arguments
+- `gmap`: A genetic map object containing mapping information. 
+  This object should conform to the expected structure, which includes:
+  - `chr`: An array of chromosome identifiers.
+  - `marker_name`: A nested array of marker names organized by chromosome.
+  - `pos`: A nested array of marker positions, also organized by chromosome.
+
+# Returns
+- `DataFrame`: A DataFrame representing the genetic map data, with columns 
+for marker names (`Locus`), chromosome identifiers (`Chr`), and 
+marker positions (`Pos`).
+
+"""
+function gmap2df(gmap::Gmap)
+    return map2df(gmap)
+end
+
+
+"""
+    pmap2df(pmap::Pmap) -> DataFrame
+
+Convert a phenotype map object (`Pmap`) into a DataFrame.
+
+# Arguments
+- `pmap`: A genetic map object containing mapping information. 
+  This object should conform to the expected structure, which includes:
+  - `chr`: An array of chromosome identifiers.
+  - `marker_name`: A nested array of marker names organized by chromosome.
+  - `pos`: A nested array of marker positions, also organized by chromosome.
+
+# Returns
+- `DataFrame`: A DataFrame representing the phenotypic map data, with columns 
+for marker names (`Locus`), chromosome identifiers (`Chr`), and 
+marker positions (`Pos`).
+
+"""
+function pmap2df(pmap::Pmap)
+    return map2df(pmap)
+end
diff --git a/src/utils/subset_utils.jl b/src/utils/subset_utils.jl
@@ -158,7 +158,7 @@ function subset_gmap(
 	# relative position
 	pos = [group.Pos for group in gdf]
 
-	return Gmap(chr, marker, pos)
+	return Gmap(chr, marker, pos, gmap.unit)
 end
 
 

diff --git a/test/plots_test.jl b/test/plots_test.jl
@@ -1,9 +1,12 @@
+
+###########################
+# Test Plotting functions #
+###########################
 @testset "Testing plotting function" begin
 	########
 	data_dir = joinpath(@__DIR__, "data/BXD/");
 	file = joinpath(data_dir, "bxd.json");
 
-
 	# Transforming data to a optimised and accessible data type
 	data = get_geneticstudydata(file);
 
@@ -51,24 +54,26 @@
 		nperms = 1000,
 	);
 
-
 	#########
 	# Plots #
 	#########
 
-
 	# QTL plots
-	p1 = plot_QTL(single_results_perms, gInfo, mbColname = "Pos");
+	p1a = plot_QTL(single_results_perms, gInfo, mbColname = "Pos");
+	p1b = plot_QTL(single_results_perms.lod, gInfo, mbColname = "Pos");
 
 	# Manhattan plots
-	p2 = plot_manhattan(single_results_perms, gInfo, mbColname = "Pos");
+	p2a = plot_manhattan(single_results_perms, gInfo, mbColname = "Pos");
+	p2b = plot_manhattan(single_results_perms.lod, gInfo, mbColname = "Pos");
 
 	@testset "QTL plot Tests" begin
-		@test isa(p1[1][4], Plots.Series)
+		@test isa(p1a[1][4], Plots.Series)
+		@test isa(p1b[1][3], Plots.Series)
 	end
 
-	@testset "Mahattan plot Tests" begin
-		@test isa(p2[1][2], Plots.Series)
+	@testset "Manhattan plot Tests" begin
+		@test isa(p2a[1][2], Plots.Series)
+		@test isa(p2b[1][1], Plots.Series)
 	end
 
 end
diff --git a/test/test_gf.jl b/test/test_gf.jl
@@ -13,6 +13,18 @@ using Plots
 
 data_dir = joinpath(@__DIR__, "data/BXD/");
 file = joinpath(data_dir, "bxd.json");
+
+# Transforming data to a optimised and accessible data type
+data = get_geneticstudydata(file);
+
+df_g = gmap2df(data.gmap);
+
+df_p1 = pmap2df(data.pmap);
+
+df_p2 = pmap2df(data.pmap);
+
+
+
 jsondict = BigRiverQTL.parse_json(file)
 
 # function get_geno(filename::String)

diff --git a/test/utils_test.jl b/test/utils_test.jl
@@ -1,3 +1,14 @@
+###############
+# Get dataset #
+###############
+
+# get path
+data_dir = joinpath(@__DIR__, "data/BXD/");
+file = joinpath(data_dir, "bxd.json");
+
+# Transforming data to a optimised and accessible data type
+data = get_geneticstudydata(file);
+
 
 ########################
 # Test encode_genotype #
@@ -64,9 +75,21 @@ end
 end
 
 
-##################
-# Test selection #
-##################
+# Test the `get_data_completecases` function
+@testset "Testing get_data_completecases function" begin
+    # Remove the  missing data
+    data_no_missing = get_data_completecases(data);
+    tbl_missing = summary_missing(data_no_missing.geno, issorted = true);
+
+    @test sum(tbl_missing[1].percentage) == 0 # Check no missing row wise
+    @test sum(tbl_missing[2].percentage) == 0 # Check no missing column wise
+    @test data_no_missing.geno.sample_id == data_no_missing.pheno.sample_id # Ensure samples align  with pheno
+    @test data_no_missing.geno.marker_name == data_no_missing.gmap.marker_name # Ensure loci align  with gmap
+end
+
+###############
+# Test subset #
+###############
 
 # Test the `select_marker` function
 @testset "Testing select_marker function" begin
@@ -94,3 +117,44 @@ end
     @test geno_subset_2.sample_id == ["sample1", "sample2"] # Check sample selection
     @test size(geno_subset_2.val[1], 1) == 2 # Check size of the geno matrix
 end
+
+
+
+###################
+# Test converting #
+###################
+
+test_gmap = Gmap(
+    ["1", "2"],
+    [["marker1", "marker2", "marker3"], 
+     ["marker4", "marker5", "marker6", "marker7"]],
+     [[1.58, 2.75, 3.59], 
+     [0.58, 3.67, 8.87, 12.03]],
+     "cM"
+);
+
+# Test the `gmap2df` function
+@testset "Testing gmap2df function" begin
+    df_g = gmap2df(test_gmap);
+
+    @test isa(df_g, DataFrame) # Check type of output
+    @test size(df_g) == (7, 3) # Check size of the gmap dataframe
+end
+
+test_pmap = Pmap(
+    ["1", "2"],
+    [["marker1", "marker2", "marker3"], 
+     ["marker4", "marker5", "marker6", "marker7"]],
+     [[1.58, 2.75, 3.59], 
+     [0.58, 3.67, 8.87, 12.03]],
+     "cM"
+);
+
+
+# Test the `pmap2df` function
+@testset "Testing pmap2df function" begin
+    df_p = pmap2df(test_pmap);
+
+    @test isa(df_p, DataFrame) # Check type of output
+    @test size(df_p) == (7, 3) # Check size of the gmap dataframe
+end
-Original file line number
+Diff line change
@@ Expand Up @@
         return plot_QTL(vLOD, gmap2df(dfgInfo); kwargs...)
     end
     function plot_QTL(scanresult::NamedTuple, dfgInfo::Gmap; kwargs...)
         return plot_QTL(scanresult, gmap2df(dfgInfo); kwargs...)
     end
@@ Expand Down @@