Skip to content

Commit

Permalink
add comment column header and process converter
Browse files Browse the repository at this point in the history
  • Loading branch information
HLWeil committed May 29, 2024
1 parent 7da50e4 commit ead29a5
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 8 deletions.
35 changes: 29 additions & 6 deletions src/Core/Conversion.fs
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,14 @@ module ProcessParsing =
|> Some
| _ -> None

let tryGetCommentGetter (generalI : int) (header : CompositeHeader) =
match header with
| CompositeHeader.Comment c ->
fun (matrix : System.Collections.Generic.Dictionary<(int * int),CompositeCell>) i ->
Comment(c,matrix.[generalI,i].AsFreeText)
|> Some
| _ -> None

/// Given the header sequence of an ArcTable, returns a function for parsing each row of the table to a process
let getProcessGetter (processNameRoot : string) (headers : CompositeHeader seq) =

Expand Down Expand Up @@ -366,6 +374,11 @@ module ProcessParsing =
headers
|> Seq.tryPick (fun (generalI,header) -> tryGetProtocolVersionGetter generalI header)

let commentGetters =
headers
|> Seq.choose (fun (generalI,header) -> tryGetCommentGetter generalI header)
|> Seq.toList

// This is a little more complex, as data and material objects can't contain characteristics. So in the case where the input of the table is a data object but characteristics exist. An additional sample object with the same name is created to contain the characteristics.
let inputGetter =
match headers |> Seq.tryPick (fun (generalI,header) -> tryGetInputGetter generalI header) with
Expand Down Expand Up @@ -418,6 +431,8 @@ module ProcessParsing =
let paramvalues = parameterValueGetters |> List.map (fun f -> f matrix i) |> Option.fromValueWithDefault []
let parameters = paramvalues |> Option.map (List.map (fun pv -> pv.Category.Value))

let comments = commentGetters |> List.map (fun f -> f matrix i) |> Option.fromValueWithDefault []

let protocol : Protocol option =
Protocol.make
None
Expand Down Expand Up @@ -462,7 +477,7 @@ module ProcessParsing =
None
(Some inputs)
(Some outputs)
None
comments

/// Groups processes by their name, or by the name of the protocol they execute
///
Expand Down Expand Up @@ -492,20 +507,20 @@ module ProcessParsing =
processes
|> List.groupBy (fun x ->
if x.Name.IsSome && (x.Name.Value |> Process.decomposeName |> snd).IsSome then
(x.Name.Value |> Process.decomposeName |> fst), HashCodes.boxHashOption x.ExecutesProtocol, x.ParameterValues |> Option.map HashCodes.boxHashSeq
(x.Name.Value |> Process.decomposeName |> fst), HashCodes.boxHashOption x.ExecutesProtocol, x.ParameterValues |> Option.map HashCodes.boxHashSeq, x.Comments |> Option.map HashCodes.boxHashSeq
elif x.ExecutesProtocol.IsSome && x.ExecutesProtocol.Value.Name.IsSome then
x.ExecutesProtocol.Value.Name.Value, HashCodes.boxHashOption x.ExecutesProtocol, x.ParameterValues |> Option.map HashCodes.boxHashSeq
x.ExecutesProtocol.Value.Name.Value, HashCodes.boxHashOption x.ExecutesProtocol, x.ParameterValues |> Option.map HashCodes.boxHashSeq, x.Comments |> Option.map HashCodes.boxHashSeq
else
Identifier.createMissingIdentifier(), HashCodes.boxHashOption x.ExecutesProtocol, x.ParameterValues |> Option.map HashCodes.boxHashSeq
Identifier.createMissingIdentifier(), HashCodes.boxHashOption x.ExecutesProtocol, x.ParameterValues |> Option.map HashCodes.boxHashSeq, x.Comments |> Option.map HashCodes.boxHashSeq
)
|> fun l ->
l
|> List.mapi (fun i ((n,_,_),processes) ->
|> List.mapi (fun i ((n,_,_,_),processes) ->
let pVs = processes.[0].ParameterValues
let inputs = processes |> List.collect (fun p -> p.Inputs |> Option.defaultValue []) |> Option.fromValueWithDefault []
let outputs = processes |> List.collect (fun p -> p.Outputs |> Option.defaultValue []) |> Option.fromValueWithDefault []
let n = if l.Length > 1 then Process.composeName n i else n
Process.create(Name = n,?ExecutesProtocol = processes.[0].ExecutesProtocol,?ParameterValues = pVs,?Inputs = inputs,?Outputs = outputs)
Process.create(Name = n,?ExecutesProtocol = processes.[0].ExecutesProtocol,?ParameterValues = pVs,?Inputs = inputs,?Outputs = outputs,?Comments = processes.[0].Comments)
)


Expand All @@ -530,6 +545,13 @@ module ProcessParsing =
if prot.Version.IsSome then CompositeHeader.ProtocolVersion, CompositeCell.FreeText prot.Version.Value
]
| None -> []
let comments =
p.Comments
|> Option.defaultValue []
|> List.map (fun c ->
CompositeHeader.Comment (Option.defaultValue "" c.Name),
CompositeCell.FreeText (Option.defaultValue "" c.Value)
)
// zip the inputs and outpus so they are aligned as rows
p.Outputs |> Option.defaultValue []
|> List.zip (p.Inputs |> Option.defaultValue [])
Expand Down Expand Up @@ -565,6 +587,7 @@ module ProcessParsing =
yield JsonTypes.decomposeProcessInput inputForType
yield! protVals
yield! vals
yield! comments
yield JsonTypes.decomposeProcessOutput outputForType
]
)
Expand Down
12 changes: 12 additions & 0 deletions src/Core/Helper/Regex.fs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ module Pattern =
[<Literal>]
let iotype = "iotype"

[<Literal>]
let commentKey = "commentKey"

/// This pattern is only used to remove any leftover #id attributes from previous Swate version.
/// `"Parameter [biological replicate#2]"` This #id is deprecated but the pattern can still be used to remove any from files.
/// Was deprecated before 2023.
Expand Down Expand Up @@ -95,6 +98,9 @@ module Pattern =
/// This pattern is used to match Output column and capture the IOType as `iotype` group.
let OutputPattern = $@"Output\s\[(?<{MatchGroups.iotype}>.+)\]"

/// This pattern is used to match Comment column and capture the comment key as `commentKey` group.
let CommentPattern = $@"Comment\s\[(?<{MatchGroups.commentKey}>.+)\]"

/// This pattern matches any column header starting with some text, followed by one whitespace and a term name inside squared brackets.
///
/// Captures column type as named group: "termcolumntype" (e.g. Component, Characteristic .. ).
Expand Down Expand Up @@ -263,6 +269,12 @@ module ActivePatterns =
r.Groups.["number"].Value |> int |> Some
| _ -> None

let (|Comment|_|) input =
match input with
| Regex Pattern.CommentPattern r ->
Some r.Groups.[Pattern.MatchGroups.commentKey].Value
| _ -> None

open Pattern
open ActivePatterns
open System
Expand Down
15 changes: 14 additions & 1 deletion src/Core/Table/CompositeHeader.fs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ type CompositeHeader =
| Output of IOType
// single - fallback
| FreeText of string
| Comment of string

with

Expand Down Expand Up @@ -179,7 +180,7 @@ type CompositeHeader =
// iotype as input
| 11 | 12 -> 2
// string as input
| 13 -> 3
| 13 | 14 -> 3
| anyElse -> failwithf "Cannot assign input `Tag` (%i) to `CompositeHeader`" anyElse

override this.ToString() =
Expand All @@ -197,6 +198,7 @@ type CompositeHeader =
| Date -> "Date"
| Input io -> io.asInput
| Output io -> io.asOutput
| Comment key -> $"Comment [{key}]"
| FreeText str -> str

/// If the column is a term column, returns the term as `OntologyAnnotation`. Otherwise returns an `OntologyAnnotation` with only the name.
Expand All @@ -215,6 +217,7 @@ type CompositeHeader =
| Date -> OntologyAnnotation.create (this.ToString()) // use owl ontology in the future
| Input _ -> OntologyAnnotation.create (this.ToString()) // use owl ontology in the future
| Output _ -> OntologyAnnotation.create (this.ToString()) // use owl ontology in the future
| Comment _ -> OntologyAnnotation.create (this.ToString()) // use owl ontology in the future
| FreeText _ -> OntologyAnnotation.create (this.ToString()) // use owl ontology in the future
// owl ontology: https://github.com/nfdi4plants/ARC_ontology/blob/main/ARC_v2.0.owl

Expand All @@ -231,6 +234,8 @@ type CompositeHeader =
| Regex.ActivePatterns.Regex Regex.Pattern.OutputPattern r ->
let iotype = r.Groups.[Regex.Pattern.MatchGroups.iotype].Value
Output <| IOType.ofString (iotype)
| Regex.ActivePatterns.Regex Regex.Pattern.CommentPattern r ->
Comment r.Groups.[Regex.Pattern.MatchGroups.commentKey].Value
// Is term column
| Regex.ActivePatterns.TermColumn r ->
match r.TermColumnType with
Expand Down Expand Up @@ -332,6 +337,7 @@ type CompositeHeader =
match this with
| FreeText _
| Input _ | Output _
| Comment _
| ProtocolREF | ProtocolDescription | ProtocolUri | ProtocolVersion | Performer | Date -> true
| anythingElse -> false

Expand Down Expand Up @@ -414,6 +420,11 @@ type CompositeHeader =
| Date -> true
| anythingElse -> false

member this.isComment =
match this with
| Comment _ -> true
| anythingElse -> false

member this.isFreeText =
match this with
| FreeText _ -> true
Expand Down Expand Up @@ -475,6 +486,8 @@ type CompositeHeader =
"Only one output column per table. E.g. experimental samples or files."
| U2.Case1 (FreeText _) | U2.Case2 "FreeText" ->
"Placeholder"
| U2.Case1 (Comment _) | U2.Case2 "Comment" ->
"Comment"
| _ -> failwith $"Unable to parse combination to existing CompositeHeader: `{header}`"

#if FABLE_COMPILER
Expand Down
3 changes: 3 additions & 0 deletions src/Json/Table/CompositeHeader.fs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ module CompositeHeader =
let t, v =
match ch with
| CompositeHeader.FreeText s -> s, []
| CompositeHeader.Comment c -> "Comment", [Encode.string c]
| CompositeHeader.Parameter oa -> "Parameter", [oaToJsonString oa]
| CompositeHeader.Factor oa -> "Factor", [oaToJsonString oa]
| CompositeHeader.Characteristic oa -> "Characteristic", [oaToJsonString oa]
Expand All @@ -37,6 +38,7 @@ module CompositeHeader =
let headerType = get.Required.Field HeaderType Decode.string
let oa() = get.Required.Field HeaderValues (Decode.index 0 OntologyAnnotation.decoder)
let io() = get.Required.Field HeaderValues (Decode.index 0 IOType.decoder)
let c() = get.Required.Field HeaderValues Decode.string
match headerType with
| "Characteristic" -> oa() |> CompositeHeader.Characteristic
| "Parameter" -> oa() |> CompositeHeader.Parameter
Expand All @@ -51,6 +53,7 @@ module CompositeHeader =
| "ProtocolVersion" -> CompositeHeader.ProtocolVersion
| "Performer" -> CompositeHeader.Performer
| "Date" -> CompositeHeader.Date
| "Comment" -> c() |> CompositeHeader.Comment
| anyelse -> CompositeHeader.FreeText anyelse
)

Expand Down
7 changes: 7 additions & 0 deletions src/Spreadsheet/AnnotationTable/CompositeHeader.fs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ module ActivePattern =
|> Some
| _ -> None

let (|Comment|_|) (cells : FsCell list) =
let cellValues = cells |> List.map (fun c -> c.ValueAsString())
match cellValues with
| [Comment key] -> Some (CompositeHeader.Comment key, CompositeCell.freeTextFromFsCells)
| _ -> None

let (|ProtocolType|_|) (cells : FsCell list) =
let parser s = if s = "Protocol Type" then Some s else None
let header _ = CompositeHeader.ProtocolType
Expand Down Expand Up @@ -136,6 +142,7 @@ let fromFsCells (cells : list<FsCell>) : CompositeHeader*(FsCell list -> Composi
| Output o -> o
| ProtocolType pt -> pt
| ProtocolHeader ph -> ph
| Comment c -> c
| FreeText ft -> ft
| _ -> failwithf "Could not parse header group %O" cells

Expand Down
18 changes: 18 additions & 0 deletions tests/Core/ArcJsonConversion.Tests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,24 @@ let private tests_ArcTableProcess =
let t' = ArcTable.fromProcesses tableName1 processes
Expect.arcTableEqual t' t "Table should be equal"
)
testCase "SingleRowIOAndComment GetAndFromProcesses" (fun () ->
let t = ArcTable.init(tableName1)
let commentKey = "MyCommentKey"
let commentValue = "MyCommentValue"
t.AddColumn(CompositeHeader.Input(IOType.Source),[|CompositeCell.createFreeText "Source"|])
t.AddColumn(CompositeHeader.Comment(commentKey), [|CompositeCell.createFreeText commentValue|])
t.AddColumn(CompositeHeader.Output(IOType.Sample),[|CompositeCell.createFreeText "Sample"|])
let processes = t.GetProcesses()
Expect.hasLength processes 1 ""
let comments = Expect.wantSome processes.[0].Comments ""
Expect.hasLength comments 1 ""
let comment = comments.[0]
Expect.equal comment (Comment(commentKey,commentValue)) ""
let table = ArcTable.fromProcesses tableName1 processes
let expectedTable = t
Expect.arcTableEqual table expectedTable "Table should be equal"
)

]

let private tests_ArcTablesProcessSeq =
Expand Down
3 changes: 2 additions & 1 deletion tests/Core/CompositeHeader.Tests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ let private tests_compositeHeader =
testList "CompositeHeader" [
testCase "Cases" <| fun _ ->
let count = CompositeHeader.Cases.Length
Expect.equal count 14 "count"
Expect.equal count 15 "count"
testCase "getExplanation" <| fun _ ->
let cases = CompositeHeader.Cases |> Array.map snd
for case in cases do
Expand Down Expand Up @@ -318,6 +318,7 @@ let tests_ToTerm = testList "ToTerm" [
CompositeHeader.Input IOType.Source
CompositeHeader.Output IOType.Sample
CompositeHeader.FreeText "Hello World"
CompositeHeader.Comment "MyComment"
]
|> List.distinct
testCase "Ensure all headers listed" <| fun _ ->
Expand Down

0 comments on commit ead29a5

Please sign in to comment.