function [munew,sigmanew,err,Xtilde] = EM(X,itlim) % usage: [munew,sigmanew,err,Xtilde] = EM(X,itlim) % description: converts the input data matrix X % with some (not all!) NaN entries in various % columns into a full matrix Xtilde with % conditional estimates for the missing data % computed from MLE estimates of mean and covariance. % Iteration limit itlim is optional with default % value of 5 and optional output argument err is % sum of infinity norms of differences in % successive mean vectors and covariance matrices. % Optional outputs mu, sigma are MLE of mean, covar. % Note: sigmanew is converted from MLE to unbiased % estimator by multiplying it by n/(n-1), which % is what is done to the returned value. It is % assumed that data has a multivariate normal % distribution. % programmer: t. shores last rev.: 8/10/07 [n,p] = size(X); if (nargin == 1) itlim = 1; end % initial parameter estimate Xtilde = X; err = 0; % create arrays of absent, present data ndxabsent = cell(n,1); ndxpresent = cell(n,1); % compute ndx and replace NaNs by mean of present for k = 1:p x = Xtilde(:,k); if (length(find(isnan(x)>0))) x(isnan(x)) = mean(x(~isnan(x))); Xtilde(:,k) = x; end end munew = mean(Xtilde)'; % use biased estimate of covariance for MLE sigmanew = (n-1)/n*cov(Xtilde); % main loop for m = 1:itlim muold = munew; sigmaold = sigmanew; T1 = zeros(size(muold)); T2 = zeros(size(sigmaold)); for k = 1:n x = X(k,:)'; tmp = find(isnan(x)); if (length(tmp)>0) ndxabsent{k,1} = tmp; end tmp = find(~isnan(x)); if (length(tmp)>0) ndxpresent{k,1} = tmp; end if (length(ndxabsent{k,1}) ~= 0) sigma11 = sigmaold(ndxabsent{k,1},ndxabsent{k,1}); sigma12 = sigmaold(ndxabsent{k,1},ndxpresent{k,1}); sigma22 = sigmaold(ndxpresent{k,1},ndxpresent{k,1}); x2 = x(ndxpresent{k,1}); x1 = muold(ndxabsent{k,1}) + sigma12*(sigma22\(x2 - muold(ndxpresent{k,1}))); Xtilde(k,ndxabsent{k,1}) = x1'; end T1 = T1 + Xtilde(k,:)'; T2 = T2 + Xtilde(k,:)'*Xtilde(k,:); if (length(ndxabsent{k,1}) ~= 0) T2(ndxabsent{k,1},ndxabsent{k,1}) = T2(ndxabsent{k,1},ndxabsent{k,1}) ... + sigma11 - sigma12*inv(sigma22)*sigma12'; end end munew = 1/n*T1; sigmanew = 1/n*T2-munew*munew'; end if (itlim > 0) err = norm(munew-muold,inf)+norm(sigmanew-sigmaold,inf); else err = norm(munew,inf)+norm(sigmanew,inf) % not really error end munew = munew'; sigmanew = n/(n-1)*sigmanew;