clear all
clc

%% download HapMap genetic maps (Only have to do for the first run!)
mkdir('recomb_hotspots');
for ii=1:22
    ii
    urlwrite(sprintf('ftp://ftp.ncbi.nlm.nih.gov/hapmap/recombination/2008-03_rel22_B36/rates/genetic_map_chr%d_b36.txt',ii),...
        sprintf('./recomb_hotspots/genetic_map_chr%d_b36.txt',ii));
    gzip(sprintf('./recomb_hotspots/genetic_map_chr%d_b36.txt',ii));
    urlwrite(sprintf('ftp://ftp.ncbi.nlm.nih.gov/hapmap/recombination/2006-10_rel21_phaseI+II/rates/genetic_map_chr%d.txt.gz',ii),...
        sprintf('./recomb_hotspots/genetic_map_chr%d_b35.txt.gz',ii));
    gunzip(sprintf('./recomb_hotspots/genetic_map_chr%d_b35.txt.gz',ii));
end

%% save genetic maps as matlab workspace (Only have to do for the first run!)
for ii=1:22
    ii
    fn = ['./recomb_hotspots/genetic_map_chr',num2str(ii),'_b36.txt'];
    fid = fopen(fn);
    C = textscan(fid,'%s%s%s','delimiter',' ','headerlines',1);
    pos0 = str2double(C{1});
    cm0 = str2double(C{3});
    rt0 = str2double(C{2});
    save(['./recomb_hotspots/genetic_map_chr',num2str(ii),'_b36.mat'],'pos0','cm0','rt0');
    delete(fn);
end

for ii=1:22
    ii
    fn = ['./recomb_hotspots/genetic_map_chr',num2str(ii),'_b35.txt'];
    fid = fopen(fn);
    C = textscan(fid,'%s%s%s','delimiter',' ','headerlines',1);
    pos0 = str2double(C{1});
    cm0 = str2double(C{3});
    rt0 = str2double(C{2});
    save(['./recomb_hotspots/genetic_map_chr',num2str(ii),'_b35.mat'],'pos0','cm0','rt0');
    delete(fn);
end

%% set parameters

alph = 1E-2; % P value threshold : the algorithm looks at loci with P value below this threshold
cm = 2; % the pruning distance in 0.1 centimorgan units
hm = 50; % Number of P value bins
lr2_range = [-5 -2.5]; % log10(r-square) range for your phenotype

%% your data needs to be read in here

load your_data.mat % replace "your_data" with your data file
% It should contain the following variables:
%
% p         : List of P values
% chr       : List of chromosome numbers (same size as p)
% pos       : List of physical postions (same size as p)
% n         : List of sample sizes (for each SNP)

n0 = median(n);
pMin = min(p);

suz = find(p<alph);
ss = hit2loci_cM36(p(suz),chr(suz),pos(suz),cm/10); % If your "pos" values refer to NCBI-build36

% If your positions are on build 35 use the following command instead:
% ss = hit2loci_cM35(p(suz),chr(suz),pos(suz),cm/10);

suz = suz(ss);

% suz       : output variable contains the indices of independent hits below "alph"

%% estimating all hits
pMinLim = log(-log10(alph));
p_lim = [10.^-exp(linspace(pMinLim,log(-log10(pMin))+.1,hm)) 0];

fo = zeros(hm,1);
for i=1:hm
    fo(i) = sum(p(suz)<p_lim(i) & p(suz)>p_lim(i+1));
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Load estimated number of hits for ~2.5M HapMap SNPs for 
% a given alpha level and cm pruning parameter
% NOTE: If the number of your SNPs is not ~2.5M or the general LD 
% structure differs from that of HapMap2-CEU you need to
% calculate these values by phenotype permutation based on your data.

load(sprintf('null_data_alpha%1.5f_0p%dcM.mat',alph,cm))

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% NOTE: "est_totvar_theor" is a simplified version of the original 
% function used in the paper. Power calculations in this function 
% do not account for the pruning procedure, hence overestimate total 
% explained variance (by underestimating the power). In the original function 
% power matrix P was estimated by simulations. Results between the
% two methods may differ a couple of percent of total explained variance.

[x se r2 invH] = est_totvar_theor(fo,fo0,fso0,n0);
mu = r2*x;
x0 = log10(r2);
if imag(se)~=0
    error('Too many bins');
else
    fprintf('%2.3f%% CI=[%2.3f%%-%2.3f%%]\n',100*mu,100*(mu-1.96*se),100*(mu+1.96*se));
end

bar(x0,x)
set(gca,'fontsize',12)
xlabel('log_1_0 (r^2)')
ylabel('count');


