From ead29a5b9c6a871b8b3d6e4c3f5e77bcdf7f6c8d Mon Sep 17 00:00:00 2001 From: HLWeil Date: Wed, 29 May 2024 21:31:22 +0200 Subject: [PATCH] add comment column header and process converter --- src/Core/Conversion.fs | 35 +++++++++++++++---- src/Core/Helper/Regex.fs | 12 +++++++ src/Core/Table/CompositeHeader.fs | 15 +++++++- src/Json/Table/CompositeHeader.fs | 3 ++ .../AnnotationTable/CompositeHeader.fs | 7 ++++ tests/Core/ArcJsonConversion.Tests.fs | 18 ++++++++++ tests/Core/CompositeHeader.Tests.fs | 3 +- 7 files changed, 85 insertions(+), 8 deletions(-) diff --git a/src/Core/Conversion.fs b/src/Core/Conversion.fs index 929ebb23..23907298 100644 --- a/src/Core/Conversion.fs +++ b/src/Core/Conversion.fs @@ -317,6 +317,14 @@ module ProcessParsing = |> Some | _ -> None + let tryGetCommentGetter (generalI : int) (header : CompositeHeader) = + match header with + | CompositeHeader.Comment c -> + fun (matrix : System.Collections.Generic.Dictionary<(int * int),CompositeCell>) i -> + Comment(c,matrix.[generalI,i].AsFreeText) + |> Some + | _ -> None + /// Given the header sequence of an ArcTable, returns a function for parsing each row of the table to a process let getProcessGetter (processNameRoot : string) (headers : CompositeHeader seq) = @@ -366,6 +374,11 @@ module ProcessParsing = headers |> Seq.tryPick (fun (generalI,header) -> tryGetProtocolVersionGetter generalI header) + let commentGetters = + headers + |> Seq.choose (fun (generalI,header) -> tryGetCommentGetter generalI header) + |> Seq.toList + // This is a little more complex, as data and material objects can't contain characteristics. So in the case where the input of the table is a data object but characteristics exist. An additional sample object with the same name is created to contain the characteristics. let inputGetter = match headers |> Seq.tryPick (fun (generalI,header) -> tryGetInputGetter generalI header) with @@ -418,6 +431,8 @@ module ProcessParsing = let paramvalues = parameterValueGetters |> List.map (fun f -> f matrix i) |> Option.fromValueWithDefault [] let parameters = paramvalues |> Option.map (List.map (fun pv -> pv.Category.Value)) + let comments = commentGetters |> List.map (fun f -> f matrix i) |> Option.fromValueWithDefault [] + let protocol : Protocol option = Protocol.make None @@ -462,7 +477,7 @@ module ProcessParsing = None (Some inputs) (Some outputs) - None + comments /// Groups processes by their name, or by the name of the protocol they execute /// @@ -492,20 +507,20 @@ module ProcessParsing = processes |> List.groupBy (fun x -> if x.Name.IsSome && (x.Name.Value |> Process.decomposeName |> snd).IsSome then - (x.Name.Value |> Process.decomposeName |> fst), HashCodes.boxHashOption x.ExecutesProtocol, x.ParameterValues |> Option.map HashCodes.boxHashSeq + (x.Name.Value |> Process.decomposeName |> fst), HashCodes.boxHashOption x.ExecutesProtocol, x.ParameterValues |> Option.map HashCodes.boxHashSeq, x.Comments |> Option.map HashCodes.boxHashSeq elif x.ExecutesProtocol.IsSome && x.ExecutesProtocol.Value.Name.IsSome then - x.ExecutesProtocol.Value.Name.Value, HashCodes.boxHashOption x.ExecutesProtocol, x.ParameterValues |> Option.map HashCodes.boxHashSeq + x.ExecutesProtocol.Value.Name.Value, HashCodes.boxHashOption x.ExecutesProtocol, x.ParameterValues |> Option.map HashCodes.boxHashSeq, x.Comments |> Option.map HashCodes.boxHashSeq else - Identifier.createMissingIdentifier(), HashCodes.boxHashOption x.ExecutesProtocol, x.ParameterValues |> Option.map HashCodes.boxHashSeq + Identifier.createMissingIdentifier(), HashCodes.boxHashOption x.ExecutesProtocol, x.ParameterValues |> Option.map HashCodes.boxHashSeq, x.Comments |> Option.map HashCodes.boxHashSeq ) |> fun l -> l - |> List.mapi (fun i ((n,_,_),processes) -> + |> List.mapi (fun i ((n,_,_,_),processes) -> let pVs = processes.[0].ParameterValues let inputs = processes |> List.collect (fun p -> p.Inputs |> Option.defaultValue []) |> Option.fromValueWithDefault [] let outputs = processes |> List.collect (fun p -> p.Outputs |> Option.defaultValue []) |> Option.fromValueWithDefault [] let n = if l.Length > 1 then Process.composeName n i else n - Process.create(Name = n,?ExecutesProtocol = processes.[0].ExecutesProtocol,?ParameterValues = pVs,?Inputs = inputs,?Outputs = outputs) + Process.create(Name = n,?ExecutesProtocol = processes.[0].ExecutesProtocol,?ParameterValues = pVs,?Inputs = inputs,?Outputs = outputs,?Comments = processes.[0].Comments) ) @@ -530,6 +545,13 @@ module ProcessParsing = if prot.Version.IsSome then CompositeHeader.ProtocolVersion, CompositeCell.FreeText prot.Version.Value ] | None -> [] + let comments = + p.Comments + |> Option.defaultValue [] + |> List.map (fun c -> + CompositeHeader.Comment (Option.defaultValue "" c.Name), + CompositeCell.FreeText (Option.defaultValue "" c.Value) + ) // zip the inputs and outpus so they are aligned as rows p.Outputs |> Option.defaultValue [] |> List.zip (p.Inputs |> Option.defaultValue []) @@ -565,6 +587,7 @@ module ProcessParsing = yield JsonTypes.decomposeProcessInput inputForType yield! protVals yield! vals + yield! comments yield JsonTypes.decomposeProcessOutput outputForType ] ) diff --git a/src/Core/Helper/Regex.fs b/src/Core/Helper/Regex.fs index c2018283..bb6c3830 100644 --- a/src/Core/Helper/Regex.fs +++ b/src/Core/Helper/Regex.fs @@ -30,6 +30,9 @@ module Pattern = [] let iotype = "iotype" + [] + let commentKey = "commentKey" + /// This pattern is only used to remove any leftover #id attributes from previous Swate version. /// `"Parameter [biological replicate#2]"` This #id is deprecated but the pattern can still be used to remove any from files. /// Was deprecated before 2023. @@ -95,6 +98,9 @@ module Pattern = /// This pattern is used to match Output column and capture the IOType as `iotype` group. let OutputPattern = $@"Output\s\[(?<{MatchGroups.iotype}>.+)\]" + /// This pattern is used to match Comment column and capture the comment key as `commentKey` group. + let CommentPattern = $@"Comment\s\[(?<{MatchGroups.commentKey}>.+)\]" + /// This pattern matches any column header starting with some text, followed by one whitespace and a term name inside squared brackets. /// /// Captures column type as named group: "termcolumntype" (e.g. Component, Characteristic .. ). @@ -263,6 +269,12 @@ module ActivePatterns = r.Groups.["number"].Value |> int |> Some | _ -> None + let (|Comment|_|) input = + match input with + | Regex Pattern.CommentPattern r -> + Some r.Groups.[Pattern.MatchGroups.commentKey].Value + | _ -> None + open Pattern open ActivePatterns open System diff --git a/src/Core/Table/CompositeHeader.fs b/src/Core/Table/CompositeHeader.fs index 97bea594..67c53790 100644 --- a/src/Core/Table/CompositeHeader.fs +++ b/src/Core/Table/CompositeHeader.fs @@ -151,6 +151,7 @@ type CompositeHeader = | Output of IOType // single - fallback | FreeText of string + | Comment of string with @@ -179,7 +180,7 @@ type CompositeHeader = // iotype as input | 11 | 12 -> 2 // string as input - | 13 -> 3 + | 13 | 14 -> 3 | anyElse -> failwithf "Cannot assign input `Tag` (%i) to `CompositeHeader`" anyElse override this.ToString() = @@ -197,6 +198,7 @@ type CompositeHeader = | Date -> "Date" | Input io -> io.asInput | Output io -> io.asOutput + | Comment key -> $"Comment [{key}]" | FreeText str -> str /// If the column is a term column, returns the term as `OntologyAnnotation`. Otherwise returns an `OntologyAnnotation` with only the name. @@ -215,6 +217,7 @@ type CompositeHeader = | Date -> OntologyAnnotation.create (this.ToString()) // use owl ontology in the future | Input _ -> OntologyAnnotation.create (this.ToString()) // use owl ontology in the future | Output _ -> OntologyAnnotation.create (this.ToString()) // use owl ontology in the future + | Comment _ -> OntologyAnnotation.create (this.ToString()) // use owl ontology in the future | FreeText _ -> OntologyAnnotation.create (this.ToString()) // use owl ontology in the future // owl ontology: https://github.com/nfdi4plants/ARC_ontology/blob/main/ARC_v2.0.owl @@ -231,6 +234,8 @@ type CompositeHeader = | Regex.ActivePatterns.Regex Regex.Pattern.OutputPattern r -> let iotype = r.Groups.[Regex.Pattern.MatchGroups.iotype].Value Output <| IOType.ofString (iotype) + | Regex.ActivePatterns.Regex Regex.Pattern.CommentPattern r -> + Comment r.Groups.[Regex.Pattern.MatchGroups.commentKey].Value // Is term column | Regex.ActivePatterns.TermColumn r -> match r.TermColumnType with @@ -332,6 +337,7 @@ type CompositeHeader = match this with | FreeText _ | Input _ | Output _ + | Comment _ | ProtocolREF | ProtocolDescription | ProtocolUri | ProtocolVersion | Performer | Date -> true | anythingElse -> false @@ -414,6 +420,11 @@ type CompositeHeader = | Date -> true | anythingElse -> false + member this.isComment = + match this with + | Comment _ -> true + | anythingElse -> false + member this.isFreeText = match this with | FreeText _ -> true @@ -475,6 +486,8 @@ type CompositeHeader = "Only one output column per table. E.g. experimental samples or files." | U2.Case1 (FreeText _) | U2.Case2 "FreeText" -> "Placeholder" + | U2.Case1 (Comment _) | U2.Case2 "Comment" -> + "Comment" | _ -> failwith $"Unable to parse combination to existing CompositeHeader: `{header}`" #if FABLE_COMPILER diff --git a/src/Json/Table/CompositeHeader.fs b/src/Json/Table/CompositeHeader.fs index 3b285ac2..36009f81 100644 --- a/src/Json/Table/CompositeHeader.fs +++ b/src/Json/Table/CompositeHeader.fs @@ -14,6 +14,7 @@ module CompositeHeader = let t, v = match ch with | CompositeHeader.FreeText s -> s, [] + | CompositeHeader.Comment c -> "Comment", [Encode.string c] | CompositeHeader.Parameter oa -> "Parameter", [oaToJsonString oa] | CompositeHeader.Factor oa -> "Factor", [oaToJsonString oa] | CompositeHeader.Characteristic oa -> "Characteristic", [oaToJsonString oa] @@ -37,6 +38,7 @@ module CompositeHeader = let headerType = get.Required.Field HeaderType Decode.string let oa() = get.Required.Field HeaderValues (Decode.index 0 OntologyAnnotation.decoder) let io() = get.Required.Field HeaderValues (Decode.index 0 IOType.decoder) + let c() = get.Required.Field HeaderValues Decode.string match headerType with | "Characteristic" -> oa() |> CompositeHeader.Characteristic | "Parameter" -> oa() |> CompositeHeader.Parameter @@ -51,6 +53,7 @@ module CompositeHeader = | "ProtocolVersion" -> CompositeHeader.ProtocolVersion | "Performer" -> CompositeHeader.Performer | "Date" -> CompositeHeader.Date + | "Comment" -> c() |> CompositeHeader.Comment | anyelse -> CompositeHeader.FreeText anyelse ) diff --git a/src/Spreadsheet/AnnotationTable/CompositeHeader.fs b/src/Spreadsheet/AnnotationTable/CompositeHeader.fs index 31921c68..f0c7aa2d 100644 --- a/src/Spreadsheet/AnnotationTable/CompositeHeader.fs +++ b/src/Spreadsheet/AnnotationTable/CompositeHeader.fs @@ -98,6 +98,12 @@ module ActivePattern = |> Some | _ -> None + let (|Comment|_|) (cells : FsCell list) = + let cellValues = cells |> List.map (fun c -> c.ValueAsString()) + match cellValues with + | [Comment key] -> Some (CompositeHeader.Comment key, CompositeCell.freeTextFromFsCells) + | _ -> None + let (|ProtocolType|_|) (cells : FsCell list) = let parser s = if s = "Protocol Type" then Some s else None let header _ = CompositeHeader.ProtocolType @@ -136,6 +142,7 @@ let fromFsCells (cells : list) : CompositeHeader*(FsCell list -> Composi | Output o -> o | ProtocolType pt -> pt | ProtocolHeader ph -> ph + | Comment c -> c | FreeText ft -> ft | _ -> failwithf "Could not parse header group %O" cells diff --git a/tests/Core/ArcJsonConversion.Tests.fs b/tests/Core/ArcJsonConversion.Tests.fs index 4a731a36..4c337fb4 100644 --- a/tests/Core/ArcJsonConversion.Tests.fs +++ b/tests/Core/ArcJsonConversion.Tests.fs @@ -410,6 +410,24 @@ let private tests_ArcTableProcess = let t' = ArcTable.fromProcesses tableName1 processes Expect.arcTableEqual t' t "Table should be equal" ) + testCase "SingleRowIOAndComment GetAndFromProcesses" (fun () -> + let t = ArcTable.init(tableName1) + let commentKey = "MyCommentKey" + let commentValue = "MyCommentValue" + t.AddColumn(CompositeHeader.Input(IOType.Source),[|CompositeCell.createFreeText "Source"|]) + t.AddColumn(CompositeHeader.Comment(commentKey), [|CompositeCell.createFreeText commentValue|]) + t.AddColumn(CompositeHeader.Output(IOType.Sample),[|CompositeCell.createFreeText "Sample"|]) + let processes = t.GetProcesses() + Expect.hasLength processes 1 "" + let comments = Expect.wantSome processes.[0].Comments "" + Expect.hasLength comments 1 "" + let comment = comments.[0] + Expect.equal comment (Comment(commentKey,commentValue)) "" + let table = ArcTable.fromProcesses tableName1 processes + let expectedTable = t + Expect.arcTableEqual table expectedTable "Table should be equal" + ) + ] let private tests_ArcTablesProcessSeq = diff --git a/tests/Core/CompositeHeader.Tests.fs b/tests/Core/CompositeHeader.Tests.fs index 25437a5b..f29637dd 100644 --- a/tests/Core/CompositeHeader.Tests.fs +++ b/tests/Core/CompositeHeader.Tests.fs @@ -51,7 +51,7 @@ let private tests_compositeHeader = testList "CompositeHeader" [ testCase "Cases" <| fun _ -> let count = CompositeHeader.Cases.Length - Expect.equal count 14 "count" + Expect.equal count 15 "count" testCase "getExplanation" <| fun _ -> let cases = CompositeHeader.Cases |> Array.map snd for case in cases do @@ -318,6 +318,7 @@ let tests_ToTerm = testList "ToTerm" [ CompositeHeader.Input IOType.Source CompositeHeader.Output IOType.Sample CompositeHeader.FreeText "Hello World" + CompositeHeader.Comment "MyComment" ] |> List.distinct testCase "Ensure all headers listed" <| fun _ ->