function [postMeanMu, sigmaSq, lambdaSq, Gma, Ala, rho, theta, m, clusts, glocs, B_bar, muSim] = clusterBayes(M, Y, X, clusts, noBatch, a0, b0, c0, d0) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% Performs the Gibbs sampler for the region of interest %%% %%% modified from DuBois Bowman's code by Brian Caffo %%% %%% on 12/06 %%% %%% %%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%--------load the data--------------%%% % Y = an n x v matrix of observed Voxel values where % v is the number of voxels (with data) and n is the number of % subjects % X = the design matrix, ASSUMED TO BE FULL RANK AND POPULATED ONLY % WITH ZEROS AND ONES!!! should be of dimension n x p where p is % the number of groups. %clusts = a structure containing everything about the ROI definitions % clusts.locs - a v x 1 vector that contains ints with each integer % corresponding to a voxel in a cluster. So, for example, if the % first 3 voxels are in the first cluster, clusts.locs should start % with [1 1 1 ...]; % M = the total number of iterations %nobatch = number of batches batchSize = M / noBatch; % this processing just gets everything into an appropriate form clusts.labs = unique(clusts.locs); G = length(clusts.labs); % redefine the cluster labels to the integers 1 : G. for g = 1 : G; locs = find(clusts.locs == clusts.labs(g)); glocs(g).clusts = locs; clusts.Vg(g) = length(locs); end; % define some constants, these mostly follow the notation from the % document maxVg = max([clusts.Vg]); G = length(clusts.Vg); K = size(X, 1); J = size(X,2); V = size(Y,2); % the number of subjects for each condition nj = sum(X, 1); % Contrast avereraged over subjects by group B_bar = inv(X'*X)*X'*Y; %-----Automated starting values-----------% % sigma for j = 1 : J; temp = (1 : K); % idxs is the indices of the subjects from each group idxs(j).value = temp(X(: , j) == 1); alphaStart(j).value = zeros(G, nj(j)); for g = 1 : G; sigmaSqStart(j).value(1, g) = mean(var(Y(idxs(j).value, glocs(g).clusts))); lambdaSqStart(j).value(1, g) = var(B_bar(j, glocs(g).clusts)); muStart(j).value(g).value = mean(Y(idxs(j).value, glocs(g).clusts, 1)); alphaStart(j).value(g, :) = mean(Y(idxs(j).value, glocs(g).clusts), 2); end; GammaStart(j).value = cov(alphaStart(j).value'); end; %-----Hyperparameters-------% % Wishart params for Gamma_j h0 = nj(j); %H0 = diag(diag(GammaStart(1).value)) ./ 2 + diag(diag(GammaStart(2).value)) ./ 2; H0 = GammaStart(1).value ./ 2 + GammaStart(2).value ./ 2; % hyperparameter for mu for j = 1 : J; for g = 1 : G; %mu0(j).value(g) = mean(muStart(j).value(g).value); %set it equal to zero. Only do this if you've done some sort of %within group scaling! mu0(j).value(g) = 0; end; end; for j = 1 : J; % the following group of code calculates the necessary % mean for updating alpha and puts it in a convenient form % the mean for that clusts for that subject clear temp; %in case it's already in use for i = 1 : K; temp(i).value = zeros(G, 1); for g = 1 : G; temp(i).value(g, 1) = mean(Y(i, glocs(g).clusts)); end; end; % now get it into the form needed, a structured list % for each subject with this particular value of j iter = 1; for i = idxs(j).value; B_bar_ij(iter).value = temp(i).value; iter = iter + 1; end; % Note that the parameter values do not contain a j index, parms.sigmasq = sigmaSqStart(j).value; parms.lambdasq = lambdaSqStart(j).value; parms.alpha = alphaStart(j).value; parms.Gamma = GammaStart(j).value; parms.mu = muStart(j).value; % this is needed in the first iteration, i.e. to have an empty matrix % to fill in muBar_j = zeros(G, 1); %% set up what we want to save and put in the first iteration's value postMeanMu(j).value = zeros(V, 1); % you can only save all of the mu simulations if you are running a % small number of jobs (or have a lot of memory) muSim(j).value = zeros(noBatch, V); for g = 1 : G; postMeanMu(j).value(glocs(g).clusts, 1) = parms.mu(g).value; %muSim(j).value(1, glocs(g).clusts) = parms.mu(g).value; end; sigmaSq(j).value = zeros(M + 1, G); sigmaSq(j).value(1, :) = parms.sigmasq; lambdaSq(j).value = zeros(M + 1, G); lambdaSq(j).value(1, :) = parms.lambdasq; Gma(j).value = zeros(M + 1, G ^ 2); Gma(j).value(1, :) = reshape(parms.Gamma, 1, G ^ 2); Ala(j).value = zeros(M + 1, G * nj(j)); Ala(j).value(1, :) = reshape(parms.alpha, 1, G * nj(j)); theta(j).value = zeros(M + 1, G); rho(j).value = zeros(M + 1, G); for g = 1 : G; theta(j).value(1, g) = sum(parms.mu(g).value); rho(j).value(1, g) = parms.Gamma(g, g) / (parms.Gamma(g, g) + parms.sigmasq(g)); end; %%%----------------------------------------------------%%% %%% Start the simulation %%% %%%----------------------------------------------------%%% for m = 1 : M; %%----------------- Gamma -------------%% H1 = 0; for i = 1 : nj(j); H1 = H1 + parms.alpha(: , i) * parms.alpha(: , i)'; end; center = h0 * H0 + H1; center = inv(center); % occasionally a numerical inverse of a symmetric matrix % is not symmetric. Below forces it to be symmetric. % This is a really poor solution. In contrast, one could % use a matrix inversion program specifically for positive % definite symmetric matrices center = (center + center') / 2; % note we need gammaInv later gammaInv = wishrnd(center, h0 + nj(j)); parms.Gamma = inv(gammaInv); for g = 1 : G; %%------------- mu ----------------------%% Vg = clusts.Vg(g); mu = parms.mu(g).value; omega = 1 / (1 / parms.lambdasq(g) + nj(j) / parms.sigmasq(g)); alpha_bar_gj = mean(parms.alpha(g, :)); condMean = omega * (mu0(j).value(g) / parms.lambdasq(g) + nj(j) * B_bar(j, glocs(g).clusts)'); parms.mu(g).value = normrnd(condMean, sqrt(omega)); %%----------------sigmasq----------------%% % I think that this is quite slow, and there are portions % that could be done at start up mat1 = Y(idxs(j).value, glocs(g).clusts); mat2 = repmat(parms.mu(g).value', nj(j), 1); mat3 = repmat(parms.alpha(g, :)', 1, Vg); temp = (mat1 - mat2 - mat3); Ugj = sum(sum(temp .^ 2)); parms.sigmasq(g) = 1 / gamrnd(a0 + nj(j) * Vg / 2, (1 / b0 + Ugj / 2) ^ (-1)); %%-------------------lambda--------------%% muDiff = (parms.mu(g).value - mu0(j).value(g)); muDiff = sum(muDiff .^ 2) / 2; ilambdasq = gamrnd(c0 + Vg / 2, (1 / d0 + muDiff)^(-1)); parms.lambdasq(g) = 1 / ilambdasq; % this is needed for the mean for alpha muBar_j(g) = mean(parms.mu(g).value); % save the mu simulations, it's easier to do this here, when % we're already looping over G postMeanMu(j).value(glocs(g).clusts, 1) * m / (m + 1) + parms.mu(g).value / (m + 1); if (mod(m, batchSize) == 0); %saves only every now and again muSim(j).value(m / batchSize, glocs(g).clusts) = parms.mu(g).value; end; theta(j).value(m + 1, g) = sum(parms.mu(g).value) / Vg; rho(j).value(m + 1, g) = parms.Gamma(g, g) / (parms.Gamma(g, g) + parms.sigmasq(g)); end; %%---------------alpha-----------------%% Dinv = diag(clusts.Vg ./ parms.lambdasq); psi = inv(gammaInv + Dinv); % this forces symmetry, which can be lost by numerical rounding as % the inversion function doesn't force it % psi = (psi + psi') / 2; % could eliminate this loop with by combining everything into % matrices, but it's working and I don't want to mess with it for i = 1 : nj(j); condMean = psi * Dinv * (B_bar_ij(i).value - muBar_j); parms.alpha(: , i) = mvnrnd(condMean, psi); end; %%---update all of the parmaters---%% % (note that mu is done above) sigmaSq(j).value(m + 1, :) = parms.sigmasq; lambdaSq(j).value(m + 1, :) = parms.lambdasq; Gma(j).value(m + 1, :) = reshape(parms.Gamma, 1, G ^ 2); Ala(j).value(m + 1, :) = reshape(parms.alpha, 1, G * nj(j)); %%---save in progress---%% if (mod(m, 100) == 0); disp(['Ending iteration ' num2str(m)]); % disp(['Saving for iteration ' m]); %save 'simvals' postMeanMu sigmaSq lambdaSq Gma Ala m j; end; end; end;