vadimkantorov
diff --git a/‎repro/hollywood-2/dot_kernel.py renamed to ‎repro/dot_kernel.py
+2-1 b/‎repro/hollywood-2/dot_kernel.py renamed to ‎repro/dot_kernel.py
+2-1
diff --git a/‎repro/hmdb-51/Makefile
+90 b/‎repro/hmdb-51/Makefile
+90
diff --git a/‎repro/hmdb-51/README.md
+47 b/‎repro/hmdb-51/README.md
+47
@@ -1,11 +1,12 @@
 #! /usr/bin/env python
 
+import os
 import sys
 import itertools
 import numpy as np
 import numpy.linalg as linalg
 
-IN = sys.argv[1:]
+IN = [os.path.join(sys.argv[1], x[:-1] + '.txt') for x in open(sys.argv[2])]
 
 skipComments = lambda path: itertools.ifilter(lambda x: not x.startswith('#'), open(path))
 ks = [None]*len(list(skipComments(IN[0])))
 
@@ -0,0 +1,90 @@
+# Parameter settings
+FV_GMM_NCOMPONENTS = 256
+FASTFV_KNN = 5
+
+# uncomment next three lines to enable flann
+#FASTFV_FLANN_NTREES = 4
+#FASTFV_FLANN_NCOMPARISONS = 32
+#FASTFV_QUANTIZER = --enableflann $(FASTFV_FLANN_NTREES) $(FASTFV_FLANN_NCOMPARISONS)
+
+# comment next line to disable s-t grids
+FV_SPATIO_TEMPORAL_GRIDS = --enablespatiotemporalgrids
+
+# comment next line and uncomment the one after to use yael
+FV = $(FASTFV)
+#FV = ../yaelfv.py
+
+# Tools paths
+FASTVIDEOFEAT = ../../bin/fastvideofeat
+FASTFV = ../../bin/fastfv
+YAELDIR = ../../bin/dependencies/yael/yael
+export PYTHONPATH := $(shell pwd)/$(YAELDIR):$(PYTHONPATH)
+
+# repro below
+clipFile = allClips.txt
+allClips = $(shell cat $(clipFile))
+ 
+videoDir = data/dataset/avi
+evalDir = data/dataset/testTrainMulti_7030_splits
+recodedDir = data/dataset/avi_recoded
+
+dataset = $(addprefix $(videoDir)/, $(allClips))
+features = $(addprefix data/features/, $(addsuffix .txt, $(allClips)))
+features_sampled = data/features_sampled.txt
+vocabs = $(addprefix data/vocabs/, 10-105.hog.gmm 106-213.hog.gmm 214-309.mbhx.gmm 310-405.mbhy.gmm)
+fisher_vectors = $(addprefix data/fisher_vectors/, $(addsuffix .txt, $(allClips)))
+kernel = data/kernel.txt
+classification = data/classification.txt
+report = data/report.txt
+artefactDirs = data/features data/vocabs data/fisher_vectors data/dataset/avi_recoded logs/features logs/fisher_vectors logs/vocabs logs/recoding
+
+log = $(subst data, logs, $@).log
+vocab = $(basename $(basename $(notdir $(1)))) $(1)
+
+all: $(report)
+	echo "Report on HMDB-51 classification task."
+	echo "Reported accuracies are accuracies on dataset splits."
+	echo ""
+	cat $(report)
+
+$(artefactDirs) data/dataset:
+	mkdir -p $@
+
+$(dataset): $(videoDir)
+
+$(features): $(dataset) | $(artefactDirs)
+	ffmpeg -y -an -i $(videoDir)/$(basename $(notdir $@)) -vcodec libxvid -q:v 1 $(recodedDir)/$(basename $(notdir $@)) > $(subst features,recoding, $(log)) 2>&1
+	$(FASTVIDEOFEAT) $(recodedDir)/$(basename $(notdir $@)) > $@ 2> $(log)
+
+$(features_sampled): $(features)
+	awk '{ if (rand() <= .01) print }' data/features/* > $@
+
+$(vocabs): $(features_sampled)
+	cat $(features_sampled) | ../../src/gmm_train.py --gmm_ncomponents $(FV_GMM_NCOMPONENTS) --vocab $(call vocab, $@) > $(log) 2>&1
+
+$(fisher_vectors): $(features) $(vocabs)
+	cat $(subst fisher_vectors,features, $@) | $(FV) --xpos 0 --ypos 1 --tpos 2 --knn $(FASTFV_KNN) $(FASTFV_QUANTIZER) $(FV_SPATIO_TEMPORAL_GRIDS) $(foreach g, $(vocabs), --vocab $(call vocab, $(g))) > $@ 2> $(log)
+
+$(kernel) : $(fisher_vectors)
+	../dot_kernel.py data/fisher_vectors $(clipFile) > $@
+
+$(classification) : $(kernel)
+	cat $(kernel) | ./classify.py $(evalDir) $(clipFile) > $@
+
+$(report): $(classification)
+	../report.py > $@
+
+$(videoDir): | data/dataset
+	wget http://www.rarlab.com/rar/rarlinux-3.9.1.tar.gz
+	wget http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar
+	wget http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar
+	mkdir -p $(videoDir)
+	tar -C data/dataset -xf rarlinux-3.9.1.tar.gz
+	data/dataset/rar/unrar x test_train_splits.rar data/dataset
+	data/dataset/rar/unrar x hmdb51_org.rar $(videoDir)
+	data/dataset/rar/unrar x '$(videoDir)/*.rar' $(videoDir)
+	find $(videoDir) -name '*.avi' -exec bash -c 'mv "{}" $(videoDir)/$$(echo $$(basename "{}") | sed "s/[][;()&?!]/_/g")' \; 2> /dev/null
+	rm rarlinux-3.9.1.tar.gz hmdb51_org.rar test_train_splits.rar $(videoDir)/*.rar
+
+clean:
+	rm -rf $(artefactDirs) $(features_sampled) $(kernel) $(classification) $(report)
@@ -0,0 +1,47 @@
+# Reproducing HMDB-51 results
+
+## Prerequisites
+ - 64-bit *nix, 800 Gb of free disk space, several gigs of RAM
+ - Python, NumPy, scikits-learn, PyYAML, ffmpeg with libxvid
+
+## Instructions
+
+Make sure all prerequisites are in place. Put the tools binaries in *bin* (you can get them from the [releases page](http://github.com/vadimkantorov/cvpr2014/releases)), and then run the repro with:
+> $ make --jobs 8
+
+The script will automatically download the HMDB-51 dataset and rar decompressor that are required for evaluation.
+You can adjust the number of cores used for parallel execution. In about an hour the scripts will fill the data and logs directories. After execution you will see a report like:
+```
+Report on HMDB-51 classification task.
+Reported accuracies are accuracies on dataset splits.
+
+Average frame count: 95
+Average frame size: 366x240
+Average descriptor count: 62462
+
+All fps are reported without taking file reading and writing into account, howevere, video decoding is included.
+
+Features (HOF, HOG, MBH enabled):
+  Average total fps: 775.76
+  Average HOG fps: 3807.06
+  Average HOF fps: 4364.18
+  Average MBH fps: 2840.89
+
+Fisher vectors (components: 256, s-t grids enabled: True, knn: 5, second order enabled: False, FLANN trees: -1, FLANN comparisons: -1):
+  Average total fps: 632.27
+
+Classification:
+  split_0           0.4588
+  split_1           0.4359
+  split_2           0.4595
+
+  mean: 0.4514
+```
+
+To remove all produced items (no worry, it will not remove the downloaded Hollywood-2 dataset), run:
+> $ make clean
+
+# Notes
+The code recodes HMDB-51 videos with libxvid to produce reasonable motion vectors, as original videos in HMDB-51 are quite messy.
+Key parameters are specified on top of the Makefile and explained in the [Performance section](https://github.com/vadimkantorov/cvpr2014/#performance). You could play with them, hopefully everything will still work.
+Features and Fisher vectors are not compressed for the sake of script clarity. For practical usage, feel free to modify the script to use gzip compression or modify the tools to output floats in binary format.