gsubramani
diff --git a/‎.idea/workspace.xml
Lines changed: 28 additions & 30 deletions b/‎.idea/workspace.xml
Lines changed: 28 additions & 30 deletions
diff --git a/‎models.pyc
0 Bytes b/‎models.pyc
0 Bytes
diff --git a/‎readme.md
Lines changed: 52 additions & 4 deletions b/‎readme.md
Lines changed: 52 additions & 4 deletions
diff --git a/‎robust_lsq.py
Lines changed: 6 additions & 4 deletions b/‎robust_lsq.py
Lines changed: 6 additions & 4 deletions
diff --git a/‎robust_lsq.pyc
227 Bytes b/‎robust_lsq.pyc
227 Bytes
@@ -1,10 +1,10 @@
-# Robust Least Squares
-Fitting a known model robustly to data. The two implementations use
+# Robust Least Squares and Outlier Detection
+Fitting a known model robustly to data using bayesian iteration. The two implementations use
 * RANSAC
 * M-Estimates
 
 The robust part is implemented, fitting the function is not. Model 
-fitting is borrowed from the scipy.minimize. 
+fitting is borrowed from the scipy.minimize. Feel free to use a different model fitting method.  
 
 ## Pre-requisites
 **numpy** is the only pre-requisite for **robust_lsq.py**.
@@ -27,10 +27,58 @@ such as **scipy.optimize.minimize**. Please see example
 
 ## Setup
 Please run **test.py** for an example of fitting a straight line
-to data robustly. 
+to data robustly with bayesian sampling. 
 
 ## How does it work? 
+The key idea is to determine the samples that fit the model best. 
+Bayesian updates are used. Bayes rule is given by: 
+
+P(data/model) = P(model/data)*P(data)/p(model)
+
+P(data/model) := normalization(P(model/data)*P(data))
+
+Note:
+1. P(model) is a constant and can be ignored. 
+1. In the next iteration P(data/model) becomes P(data).
+
+### ALGORITHM
+From an implementation perspective, these are the steps: 
+1. Build P(data) uniform distribution (or with prior knowledge) over data. 
+1. Sample n samples from data distribution. 
+1. Fit model to the selected n samples. 
+Essentially we are selecting(sampling) the best model given the data. 
+This is P(model/data) step. 
+1. Estimate a probability distribution: P(data/model). 
+    1. These are the errors of the data given the selected model. 
+    1. It is wise to use a function such as arctan(1/errors)
+    so errors are not amplified and create a useless probability distribution.
+    
+1. Compute P(data) with update: P(data/model) = normalize(P(data/model)*P(data)) 
+    1. Normalize probability distribution. 
+    1. This is the bayesian update step. 
+    
+1. Go to step 2. and iterate until desired convergence of P(data).  
+
+
 ### RANSAC
+For a RANSAC flavor of bayesian robust fitting, k samples are selected to fit the model.
+#### In classical RANSAC:
+1. The minimum number of samples (k) to fit a model is used.
+1. k samples are randomly selected p times.  
+1. The best set of samples that fit all the data is selected.   
+
+#### In this bayesian flavor:
+1. k samples are selected and fit using least squares (or anything else). 
+1. Samples are selected from a probability distribution estimated using bayesian updates. 
+
+### M-Estimates
+This is similar to RANSAC except when fitting the model, all samples are used to
+fit the model but are weighed according to their probability distribution. 
+The probability distribution(weights) is updated using bayesian updates. 
+ 
+### Outlier detection
+ The probability distribution over the data P(data) provides a way to 
+ perform outlier detection. Simply apply a threshold over this distribution. 
 
 
 ## license
 
@@ -7,7 +7,7 @@
 import numpy as np
 
 def robust_lsq_ransac(model_error_func, model_fit_func, X,
-               iterations=1000, fit_samples=2, fit_with_best_n=None, priors=None):
+               iterations=1000, fit_samples=2, fit_with_best_n=None, priors=None,norm_func = np.arctan):
 
     if priors == None:
         probabilities = np.ones(len(X))
@@ -25,7 +25,7 @@ def robust_lsq_ransac(model_error_func, model_fit_func, X,
         params = model_fit_func(X_subset)
         errors = model_error_func(X, params)
 
-        current_prob[:] = 1 / np.arctan(1 + errors[:])
+        current_prob[:] = 1 / norm_func(1 + errors[:])
         probabilities *= current_prob
         probabilities /= np.sum(probabilities)
 
@@ -38,7 +38,7 @@ def robust_lsq_ransac(model_error_func, model_fit_func, X,
 
 
 def robust_lsq_m_estimates(model_error_func, model_fit_func, X,
-                           iterations=1000, priors=None):
+                           iterations=1000, priors=None,norm_func = lambda x: 1/(1 + x**0.1)):
     if priors == None:
         probabilities = np.ones(len(X))
     else:
@@ -54,7 +54,9 @@ def robust_lsq_m_estimates(model_error_func, model_fit_func, X,
         if np.sum(errors) < best_errors:
             best_param = params
             best_errors = np.sum(errors)
-        current_prob[:] = 1 / (1 + errors[:] ** 0.1)
+        #current_prob[:] = 1 / (1 + errors[:] ** 0.1)
+
+        current_prob[:] = norm_func(errors[:])
         # current_prob = 1/np.arctan(1+errors)
         probabilities *= current_prob
         probabilities /= np.sum(probabilities)