Skip to content
This repository was archived by the owner on Mar 19, 2024. It is now read-only.

Commit 3e4a901

Browse files
Corrected brace escaping in perl script
Summary: In new versions of Perl, braces should be escaped in regex. Reviewed By: EdouardGrave Differential Revision: D4351672 fbshipit-source-id: dee6e24c705025a29612758547262d3695f349e7
1 parent 09d1156 commit 3e4a901

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

wikifil.pl

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@
3131
s/\[\[category:([^|\]]*)[^]]*\]\]/[[$1]]/ig; # show categories without markup
3232
s/\[\[[a-z\-]*:[^\]]*\]\]//g; # remove links to other languages
3333
s/\[\[[^\|\]]*\|/[[/g; # remove wiki url, preserve visible text
34-
s/{{[^}]*}}//g; # remove {{icons}} and {tables}
35-
s/{[^}]*}//g;
34+
s/\{\{[^\}]*\}\}//g; # remove {{icons}} and {tables}
35+
s/\{[^\}]*\}//g;
3636
s/\[//g; # remove [ and ]
3737
s/\]//g;
3838
s/&[^;]*;/ /g; # remove URL encoded chars

word-vector-example.sh

+4-4
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ DATADIR=data
1414
mkdir -p "${RESULTDIR}"
1515
mkdir -p "${DATADIR}"
1616

17-
if [ ! -f "${DATADIR}/text9" ]
17+
if [ ! -f "${DATADIR}/fil9" ]
1818
then
1919
wget -c http://mattmahoney.net/dc/enwik9.zip -P "${DATADIR}"
2020
unzip "${DATADIR}/enwik9.zip" -d "${DATADIR}"
21-
perl wikifil.pl "${DATADIR}/enwik9" > "${DATADIR}"/text9
21+
perl wikifil.pl "${DATADIR}/enwik9" > "${DATADIR}"/fil9
2222
fi
2323

2424
if [ ! -f "${DATADIR}/rw/rw.txt" ]
@@ -29,12 +29,12 @@ fi
2929

3030
make
3131

32-
./fasttext skipgram -input "${DATADIR}"/text9 -output "${RESULTDIR}"/text9 -lr 0.025 -dim 100 \
32+
./fasttext skipgram -input "${DATADIR}"/fil9 -output "${RESULTDIR}"/fil9 -lr 0.025 -dim 100 \
3333
-ws 5 -epoch 1 -minCount 5 -neg 5 -loss ns -bucket 2000000 \
3434
-minn 3 -maxn 6 -thread 4 -t 1e-4 -lrUpdateRate 100
3535

3636
cut -f 1,2 "${DATADIR}"/rw/rw.txt | awk '{print tolower($0)}' | tr '\t' '\n' > "${DATADIR}"/queries.txt
3737

38-
cat "${DATADIR}"/queries.txt | ./fasttext print-vectors "${RESULTDIR}"/text9.bin > "${RESULTDIR}"/vectors.txt
38+
cat "${DATADIR}"/queries.txt | ./fasttext print-vectors "${RESULTDIR}"/fil9.bin > "${RESULTDIR}"/vectors.txt
3939

4040
python eval.py -m "${RESULTDIR}"/vectors.txt -d "${DATADIR}"/rw/rw.txt

0 commit comments

Comments
 (0)