-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.pl
executable file
·45 lines (36 loc) · 1.13 KB
/
parser.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# Reweigh the words
# Sample: word="",f=8671269 to word="",f=200
# Source: command line argument
# Output to terminal
# Biggest value = # of lines.
# Divide this by 240 and round up (255-14 to avoid 0-15 values)
# Divide all other values (lines left in the list) by that number and round down.
# All values should now be between 15 and 254.
# Open original file
use utf8;
open FILE, $ARGV[0] or die $!;
my $count=0;
# Count the # of lines
while (<FILE>) {
$count++;
}
# Calculate the divider to ensure results between 50 and 254
my $divider = int( $count / 205) + 1 ;
sub is_integer { $_[0] =~ /^[+-]?\d+$/ }
# Re-open the source file and update the weight
open FILE, "<:encoding(utf8)", $ARGV[0] or die $!;
while (my $line = <FILE>) {
$count--;
# Replace the weight if its a word line,
# otherwise print without actions
if ($line =~ /f=/) {
my $weighed = int( $count / $divider) + 50;
my ($name) = $line =~ m/=(.*),/;
if (length($name) > 1 && !is_integer($name)) {
$line =~ s/(\d*[.])?\d+/$weighed/g;
utf8::encode($line);
print $line;
}
}
}
close FILE;