-
Notifications
You must be signed in to change notification settings - Fork 0
/
custom-markup.SAITOWAEI_EP.wok
43 lines (39 loc) · 1.39 KB
/
custom-markup.SAITOWAEI_EP.wok
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/wok -f
/** custom-markup.SAITOWAEI_EP.wok
*
* This script modifies the search result for a word in NEW Saito Waei Daijiten (converted
* to Epwing by dessed). The following changes will be applied to the data:
*
* 1. Split pos and sense by `<br>` when those are connected.
* 2. Add a class `pos` to lines start with "〈".
* 2. Add a class `sense1` to lines start with "NUMBER. " (e.g, "1. ").
* 4. Norimalize indicators, "◆" and "◇", inserted before example sentences, to "・".
*
* Usage:
* java -jar wok-0.1.0.jar -f custom-markup.SAITOWAEI_EP.wok -v idx=0 input > output
*
* Option:
* -v idx: The index number of the target column.
*
*/
FS = '\t'
FQ = Quote.Min
OFS = '\t'
OFQ = Quote.All
val p1 = """<br>(〈[^〉]+〉)(<span class="ebbo">[0-9]{1,2}</span>\.)""".r
val p2 = """<br>(〈((?!<br>).)*)""".r
val p3 = """<br>( ?<span class="ebbo">[0-9]{1,2}</span>\.((?!<br>).)*)""".r
val p4 = """<br>(◆|◇)""".r
In { _
.filter (_.size > 0)
.map (_.padTo(idx+1, ""))
.map { row =>
var str = row(idx)
str = p1.replaceAllIn(str, """<br>$1<br>$2""")
str = p2.replaceAllIn(str, """<br><span class="pos">$1</span>""")
str = p3.replaceAllIn(str, """<br><span class="sense1">$1</span>""")
str = p4.replaceAllIn(str, """<br>・""")
row.updated(idx, str)
}
.foreach (row => println(row: _*))
}