-
Notifications
You must be signed in to change notification settings - Fork 0
/
custom-markup.KQNEWEJ6.wok
49 lines (45 loc) · 2.01 KB
/
custom-markup.KQNEWEJ6.wok
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/wok -f
/** custom-markup.KQNEWEJ6.wok
*
* This script modifies the search result for a word in Kenkyusha Shin EiWa Cyujiten (converted
* to Epwing by dessed). The following changes will be applied to the data:
*
* 1. Fix too many ebkw problem.
* 2. Add a class `pos` to lines start with the image file of zB125.
* 2. Split `sense1` and `sense1` when those are connected.
* 4. Norimalize indicators, "◆" and "◇", inserted before example sentences, to "・".
*
* Usage:
* java -jar wok-0.1.0.jar -f custom-markup.KQNEWEJ6.wok -v idx=0 input > output
*
* Option:
* -v idx: The index number of the target column.
*
*/
FS = '\t'
FQ = Quote.Min
OFS = '\t'
OFQ = Quote.All
val p1 = """<span class="ebin"><span class="ebkw"></span>(<span class="ebul">((?!<br>).)*)""".r
val p2 = """(━((?!<br>).)*)""".r
val p3 = """<br><span class="ebkw"></span><span class="ebul">([0-9]{1,2})a</span>""".r
val p4 = """<br><span class="ebkw"></span>(<span class="ebul">[0-9]{1,2}</span>((?!<br>).)*)""".r
val p5 = """<br><span class="ebkw"></span>(<span class="ebul">[a-z]</span>((?!<br>).)*)""".r
val p6 = """<br><span class="ebkw"></span>(\(<span class="ebul">[0-9]{1,2}</span>\)((?!<br>).)*)""".r
val p7 = """<span class="ebkw"></span>""".r
In { _
.filter (_.size > 0)
.map (_.padTo(idx+1, ""))
.map { row =>
var str = row(idx)
str = p1.replaceAllIn(str, """<span class="ebin"><span class="ebkw">$1</span>""")
str = p2.replaceAllIn(str, """<span class="pos">$1</span>""")
str = p3.replaceAllIn(str, """<br><span class="ebkw"></span><span class="ebul">$1</span><br><span class="ebkw"></span><span class="ebul">a</span>""")
str = p4.replaceAllIn(str, """<br><span class="sense1">$1</span>""")
str = p5.replaceAllIn(str, """<br><span class="sense2">$1</span>""")
str = p6.replaceAllIn(str, """<br><span class="sense3">$1</span>""")
str = p7.replaceAllIn(str, "")
row.updated(idx, str)
}
.foreach (row => println(row: _*))
}