-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmt_plot_seq_effects.m
More file actions
113 lines (95 loc) · 3.02 KB
/
mt_plot_seq_effects.m
File metadata and controls
113 lines (95 loc) · 3.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
%MT_PLOT_SEQ_EFFECTS - Plots sequence effects for all arrays
%
% MT_PLOT_SEQ_EFFECTS(PROBES,VARARGIN)
%
% INPUT
% PROBES Probe structure for which to calculate cor factors
% VARARGIN 'chars' : nucelotides to count ('A','C', 'G', and/or 'T').
% default [2,3] (i.e. 'C' and 'G')
% 'difference' : Show difference values w.r.t mean instead of absolute values
% 'seq_range' : Use only part of probes sequences (default: 1:25)
% 'limit' : If for certain nucelotide count, probe count < limit, then
% do not show that nucleotide count. Default: 100
%
%
% DESCRIPTION
% Displays single-nucleotide sequence effects for probes with a
% specific amount of certain nucleotides.
%
% SEE ALSO
% MT_COR_HYBAMP, MT_PLOT_ARRAY, MT_PLOT_AMP_EFFECTS, MT_PLOT_GENE
%
% (c) Marc Hulsman, 2009
% Information & Communication Theory Group
% Faculty of Electrical Engineering, Mathematics and Computer Science
% Delft University of Technology, Mekelweg 4, 2628 CD Delft, The Netherlands
function mt_plot_seqeffects(probes,varargin)
chars = [2,3];
seq_range = 1:25;
limit = 100;
sel_arrays = 1:size(probes.pm,1);
for i = 1:length(varargin)
if(isstr(varargin{i}))
switch(varargin{i})
case 'chars',
chars = varargin{i+1};
i = i + 1;
case 'sel_arrays',
i = i + 1;
sel_arrays = varargin{i};
case 'difference',
difference = 1;
case 'seq_range',
i = i + 1;
seq_range = varargin{i};
case 'limit',
i = i + 1;
limit = varargin{i};
end;
end;
end;
dna_letters = ['A','C','G','T'];
q = upper(char(probes.sequence));
q = q(:,seq_range);
nprobe = size(probes.pm,2);
nucleo_count = zeros(nprobe,4);
for i = 1:length(dna_letters)
t = (q == dna_letters(i));
nucleo_count(:,i) = sum(t,2);
end;
if(length(chars) > 1)
gc_content = sum(nucleo_count(:,chars),2);
else
gc_content = nucleo_count(:,chars);
end;
gc_u = unique(gc_content)';
gc_nr = histc(gc_content,gc_u)';
[gc_u; gc_nr]
remove = gc_nr < limit;
gc_u = gc_u(~remove);
gc_nr = gc_nr(~remove);
narray = size(probes.pm,1);
signal = mt_real_signal(probes);
data = zeros(narray,length(gc_u));
for i = 1:length(gc_u)
data(:,i) = median(signal(:,find(gc_content == gc_u(i))),2);
end;
x = jet(length(sel_arrays));
formats = {'-', ':', '-.', '--'};
if(exist('difference'))
data = data - repmat(mean(data),narray,1);
end;
for a = 1:length(sel_arrays)
i = sel_arrays(a);
plot(data(i,:),formats{mod(i, 4) + 1},'Color',x(a,:));
hold on;
end;
hold off;
set(gca,'XTick',1:length(gc_u))
set(gca,'XTickLabel',gc_u)
xlabel('Number of specified nucleotides in probe')
ylabel('Log_2 median expression intensity')
title(strcat('Sequence effect for nucleotide(s): ', dna_letters(chars)));
if(length(sel_arrays) < 30)
legend(probes.array_names(sel_arrays), 'Location','EastOutside')
end;