-
Notifications
You must be signed in to change notification settings - Fork 0
/
build-SVL-deck.wok
141 lines (117 loc) · 6.77 KB
/
build-SVL-deck.wok
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/wok -f
/** build-SVL-deck.wok
*
* Usage:
* java -jar wok-0.1.0.jar -f build-SVL-deck.wok
*
*/
OFS = '\t'
OFQ = Quote.All
OFS = '\t'
OFQ = Quote.All
/** The source file of the deck.
* SVL word set has several unsearchable words, and for handing this problem, this tsv file has two fields,
* `word` and `fixed word`, at the beginning of it. Anki also have a problem that it recognizes the first field
* of the tsv as the key for the card, so once import it to Anki, there is no way to change the key to the another.
* Those problems can be solved by using `fixed word` instead of `word`.
*
* Note: If you need to duplicate a field, you can do it simply with `col-copy.wok`.
*
* Fixed words are:
* `Mr.` -> `Mr`
* `Mrs.` -> `Mrs`
* `Messrs.` -> `Messrs`
* `Ms.` -> `Ms`
* `footlight -> `footlights`
*/
val SVL = "SVL.tsv"
/** The COCA 60K corpus file.
* `wordlist_60k_***.txt` is almost a correct TSV file but the header of around twenty lines, so for using it for the
* corpus here, there is need to cut it to be a strict TSV file.
* This corpus abailable for download from http://www.wordfrequency.info/.
*/
val COCA = "COCA.tsv"
/* The output directory of media files. */
val media = "SVL.media"
val corpus = List("-v@rawstr", s"corpus=$COCA")
val mediaDir = List("-v@rawstr", s"media=$media")
/* EPWING dictionaries. */
val SRD = List("-v@rawstr", raw"dic=Z:\BTSync\rubyu\dictionary\SRD")
val OALD7 = List("-v@rawstr", raw"dic=Z:\BTSync\rubyu\dictionary\OALD7")
val LDOCE5 = List("-v@rawstr", raw"dic=Z:\BTSync\rubyu\dictionary\LDOCE5")
val LDOCE5IN = List("-v@rawstr", raw"dic=Z:\BTSync\rubyu\dictionary\LDOCE5-INLINE")
val KENE7J5 = List("-v@rawstr", raw"dic=Z:\BTSync\rubyu\dictionary\KENE7J5")
val GENIUS = List("-v@rawstr", raw"dic=Z:\BTSync\rubyu\dictionary\GENIUS")
val GENIUD = List("-v@rawstr", raw"dic=Z:\BTSync\rubyu\dictionary\GENIUD")
val KQNEWEJ6 = List("-v@rawstr", raw"dic=Z:\BTSync\rubyu\dictionary\KQNEWEJ6")
val READERS2 = List("-v@rawstr", raw"dic=Z:\BTSync\rubyu\dictionary\READERS2")
val SAITOWAEI = List("-v@rawstr", raw"dic=Z:\BTSync\rubyu\dictionary\SAITOWAEI_EP")
val EIJIRO = List("-v@rawstr", raw"dic=Z:\BTSync\rubyu\dictionary\EIJIRO")
/* EBWin map files. */
val SRDMap = List("-v@rawstr", raw"ebmap=Z:\BTSync\rubyu\dictionary\SRD.map")
val OALD7Map = List("-v@rawstr", raw"ebmap=Z:\BTSync\rubyu\dictionary\OALD7.map")
val LDOCE5Map = List("-v@rawstr", raw"ebmap=Z:\BTSync\rubyu\dictionary\LDOCE5.map")
val GENIUSMap = List("-v@rawstr", raw"ebmap=Z:\BTSync\rubyu\dictionary\GENIUS.map")
val GENIUDMap = List("-v@rawstr", raw"ebmap=Z:\BTSync\rubyu\dictionary\GENIUD.map")
val KENE7J5Map = List("-v@rawstr", raw"ebmap=Z:\BTSync\rubyu\dictionary\KENE7J5.map")
val KQNEWEJ6Map = List("-v@rawstr", raw"ebmap=Z:\BTSync\rubyu\dictionary\KQNEWEJ6.map")
val READERS2Map = List("-v@rawstr", raw"ebmap=Z:\BTSync\rubyu\dictionary\READERS2.map")
val SAITOWAEIMap = List("-v@rawstr", raw"ebmap=Z:\BTSync\rubyu\dictionary\SAITOWAEI_EP.map")
val EIJIROMap = List("-v@rawstr", raw"ebmap=Z:\BTSync\rubyu\dictionary\EIJIRO.map")
/* The common header of the commands using Wok. */
val wok = List("java", "-jar", "wok-0.1.0.jar", "-f")
/* Definition of the deck. */
val definition = List(
/* A copied and normalized field for improvement of the quality of the query result. */
//wok ::: List("col-copy.wok", "-v", "src=0", "-v", "dst=1"),
/* COCA */
wok ::: List("coca-html.wok", "-v", "src=1", "-v", "dst=2", "-v", "update=true") ::: corpus,
/* Audio */
wok ::: List("eb-voice-html-mp3.wok", "-v", "src=1", "-v", "dst=3", "-v", "idx=1", "-v", "update=true") ::: LDOCE5 ::: mediaDir,
wok ::: List("eb-voice-html-mp3.wok", "-v", "src=1", "-v", "dst=3", "-v", "idx=1", "-v", "update=false") ::: OALD7 ::: mediaDir,
wok ::: List("eb-voice-html-mp3.wok", "-v", "src=1", "-v", "dst=3", "-v", "idx=0", "-v", "update=false") ::: KENE7J5 ::: mediaDir,
wok ::: List("eb-voice-html-mp3.wok", "-v", "src=1", "-v", "dst=3", "-v", "idx=0", "-v", "update=false") ::: GENIUS ::: mediaDir,
/* Content 1 */
wok ::: List("eb-html-min.wok", "-v", "src=1", "-v", "dst=4", "-v", "update=true") ::: SRD ::: SRDMap ::: mediaDir,
wok ::: List("custom-markup.SRD.wok", "-v", "idx=4"),
/* Content 2 */
wok ::: List("eb-html-min-KQNEWEJ6.wok", "-v", "src=1", "-v", "dst=5", "-v", "update=true") ::: KQNEWEJ6 ::: KQNEWEJ6Map ::: mediaDir,
wok ::: List("custom-markup.KQNEWEJ6.wok", "-v", "idx=5"),
/* Content 3 */
wok ::: List("eb-html-min.wok", "-v", "src=1", "-v", "dst=6", "-v", "update=true") ::: READERS2 ::: READERS2Map ::: mediaDir,
wok ::: List("custom-markup.READERS2.wok", "-v", "idx=6"),
/* Content 4 */
wok ::: List("eb-html-min.wok", "-v", "src=1", "-v", "dst=7", "-v", "update=true") ::: EIJIRO ::: EIJIROMap ::: mediaDir,
wok ::: List("custom-markup.EIJIRO.wok", "-v", "idx=7"),
/* Content 5 */
wok ::: List("eb-html-min.wok", "-v", "src=1", "-v", "dst=8", "-v", "update=true") ::: LDOCE5 ::: LDOCE5Map ::: mediaDir,
wok ::: List("custom-markup.LDOCE5.wok", "-v", "idx=8"),
/* Content 6 */
wok ::: List("eb-html-min.wok", "-v", "src=1", "-v", "dst=9", "-v", "update=true") ::: OALD7 ::: OALD7Map ::: mediaDir,
wok ::: List("custom-markup.OALD7.wok", "-v", "idx=9"),
/* Content 7 */
wok ::: List("eb-html-min.wok", "-v", "src=1", "-v", "dst=10", "-v", "update=true") ::: SAITOWAEI ::: SAITOWAEIMap ::: mediaDir,
wok ::: List("custom-markup.SAITOWAEI_EP.wok", "-v", "idx=10"),
/* Content 8 */
wok ::: List("eb-html-min.wok", "-v", "src=1", "-v", "dst=11", "-v", "update=true") ::: LDOCE5IN ::: LDOCE5Map ::: mediaDir,
wok ::: List("custom-markup.LDOCE5.wok", "-v", "idx=11"),
/* Limit the size of a row. */
wok ::: List("col-limit-by.wok", "-v@str", "length=300K"),
/* Detach the content of a row and save it as a HTML file. */
wok ::: List("col-detach.wok", "-v@str", "src=1,2,3,4,5,6,7,8,9,10,11", "-v", "dst=1") ::: mediaDir
)
def inputFileName(n: Int) =
if (n == 0) SVL
else outputFileName(n-1)
def outputFileName(n: Int) =
if (n == definition.size-1) "SVL.complete.tsv"
else f"SVL.temp.${ n+1 }%02d.tsv"
definition
.zipWithIndex
.foreach { case (x, i) =>
val input = inputFileName(i)
val output = outputFileName(i).fileOption.get
val command = x ::: List(input)
println(f"$i%02d: $command #> $output !")
command #> output !
}