chars = ['a' 'c' 'g' 't']; % the dinucs array shows the frequency of observing the character in the % row followed by the character in the column % these values show strong preference for c-c dinucs = [2, 1, 2, 0; 0, 8, 0, 1; 2, 0, 2, 0; 1, 0, 0, 1]; % these values restrict transitions more %dinucs = [2, 0, 2, 0; 0, 8, 0, 0; 2, 0, 2, 0; 1, 1, 0, 1]; % calculate mononucleotide frequencies only as the probability of % starting with each nucleotide monocounts = sum(dinucs,2); monofreqs = monocounts/sum(monocounts); cmonofreqs = cumsum(monofreqs); % calculate dinucleotide frequencies and cumulative dinuc freqs freqs = dinucs./repmat(monocounts,1,4); cfreqs = cumsum(freqs,2); disp('Dinucleotide frequencies (transition probabilities)'); fprintf(' %c %c %c %c\n',chars) for i=1:4 fprintf('%c %f %f %f %f\n',chars(i),freqs(i,:)) end nseq = 10; for ntries=1:20 rnums = rand(nseq,1); % start sequence using mononucleotide frequencies seq(1) = min(find(cmonofreqs>=rnums(1))); for i=2:nseq % extend it using the appropriate row from the dinuc freqs seq(i) = min(find(cfreqs(seq(i-1),:)>=rnums(i))); end output=chars(seq); disp(strvcat(output)); end % force emission to match state (normal Markov model, not hidden) emit = diag(repmat(1,4,1)); [seq2,states]=hmmgenerate(10,freqs,emit) output2=chars(seq2); disp(strvcat(output2));