-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheckTree.js
More file actions
executable file
·306 lines (286 loc) · 9.13 KB
/
checkTree.js
File metadata and controls
executable file
·306 lines (286 loc) · 9.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
#!/usr/bin/env node
var through2 = require('through2');
var byline = require('byline');
var fs = require('fs');
var argv = require('minimist')(process.argv.slice(2));
var url = argv.swagger || 'https://data.gramene.org/vitis1/swagger';
var idFile = argv.ids;
global.gramene = {defaultServer: url};
var gramene = require('gramene-search-client').client.grameneClient;
let GrameneTrees = require('gramene-trees-client');
var reader = byline(fs.createReadStream(idFile));
var fetcher = through2.obj(function(id, enc, done) {
var that = this;
id = id.toString();
gramene.then(function(client) {
client['Data access']['genetrees']({idList:[id],rows:-1}).then(function(res) {
that.push(res.obj);
done();
})
})
});
var checkTree = function checkTree() {
var results = [];
var transform = function(tree, enc, done) {
let genetree = GrameneTrees.genetree.tree(tree);
GrameneTrees.extensions.addConsensus(genetree);
// find Zea (taxon_id 4575) speciation nodes (bfs traversal)
// compare coverage of each protein in the subtree to the consensus at this node
// for each protein output gene id, protein id, taxon_id, and the ratio above and pct identity with consensus
function walkTo(node, test, process) {
if (test(node)) {
process(node);
}
else {
if (node.hasChildren()) {
node.children.forEach(function(childNode) {
walkTo(childNode, test, process)
})
}
}
}
function isMaizeSubtree(node) {
const isMaizeRE = new RegExp(/zea/i);
if (isMaizeRE.test(node.model.taxon_name))
return true;
if (node.model.taxon_name === "unknown") {
let isMaize = true;
node.children.forEach(function(childNode) {
isMaize &= isMaizeSubtree(childNode)
});
return isMaize;
}
return false;
}
function isSorghumSubtree(node) {
const isSorghumRE = new RegExp(/sorghum/i);
if (isSorghumRE.test(node.model.taxon_name))
return true;
if (node.model.taxon_name === "unknown") {
let isSorghum = true;
node.children.forEach(function(childNode) {
isSorghum &= isSorghumSubtree(childNode)
});
return isSorghum;
}
return false;
}
function isOryzaSubtree(node) {
const isOryzaRE = new RegExp(/oryza/i);
if (isOryzaRE.test(node.model.taxon_name))
return true;
if (node.model.taxon_name === "unknown") {
let isOryza = true;
node.children.forEach(function(childNode) {
isOryza &= isOryzaSubtree(childNode)
});
return isOryza;
}
return false;
}
function isNotSorghumSpeciation(node) {
const isSorghum = new RegExp(/sorghum/i);
return (node.model.node_type === "speciation" && !isSorghum.test(node.model.taxon_name))
}
function isSorghumSpeciation(node) {
const isSorghum = new RegExp(/sorghum/i);
return (node.model.node_type === "speciation" && isSorghum.test(node.model.taxon_name))
}
function isZeaSpeciation(node) {
return (node.model.taxon_name === "Zea" && node.model.node_type === "speciation")
}
function isAndropogoneaeSpeciation(node) {
return (node.model.taxon_name === "Andropogoneae" && node.model.node_type === "speciation")
}
function isPoaceaeSpeciation(node) {
return (node.model.taxon_name === "Poaceae" && node.model.node_type === "speciation")
}
function isPN(node) {
return (node.model.taxon_id === 29760 && !node.hasChildren())
}
function isFlagged(node) {
return (!node.hasChildren() && (node.model.taxon_id === 29760 || node.model.taxon_id === 297600000 || node.model.taxon_id === 4558))
}
function isRosidSpeciation(node) {
return (node.model.taxon_name === "rosids" && node.model.node_type === "speciation")
}
function coverage_similarity(a,b) {
let seqA = a.model.consensus.sequence;
let seqB = b.model.consensus.sequence;
// make a vector of weights that normalize the frequency in the msa
let weight = [];
a.model.consensus.frequency.forEach(function(freq) {
weight.push(freq/a.model.consensus.nSeqs);
});
let totalA=0;
let aligned=0;
let matches=0;
const gapCode = '-'.charCodeAt(0);
for(var i=0; i<seqA.length; i++) {
if (seqA[i] !== gapCode) {
totalA += weight[i];
if (seqB[i] !== gapCode) {
aligned += weight[i];
if (seqB[i] === seqA[i]) {
matches += weight[i];
}
else {
matches -= weight[i];
}
}
}
}
return matches < 0 ? 0 : matches/totalA;
}
// function coverage_similarity(a,b) {
// let seqA = a.model.consensus.sequence;
// let seqB = b.model.consensus.sequence;
// let total=0;
// let aligned=0;
// const gapCode = '-'.charCodeAt(0);
// for(var i=0; i<seqA.length; i++) {
// if (seqA[i] !== gapCode) {
// total++;
// if (seqB[i] !== gapCode) {
// aligned++;
// }
// }
// }
// return aligned/total;
// }
function compareToConsensus(node) {
node.all(function(leaf) {
if (!leaf.hasChildren()) {
results.push([
genetree.model.tree_stable_id,
genetree.model.taxon_id,
node.model.node_id,
node.model.consensus.nSeqs,
leaf.model.taxon_id,
leaf.model.system_name,
coverage_similarity(node, leaf),
leaf.parent.model.node_type,
leaf.model.gene_stable_id
].join("\t"))
}
})
}
function compareToConsensusOfSorghum(nodeA) {
let sorghum_node;
let maize_node;
nodeA.children.forEach(function(childNode) {
if (childNode.model.taxon_name === "Sorghum bicolor") {
sorghum_node = childNode;
}
else {
maize_node = childNode;
}
});
if (maize_node && sorghum_node) {
maize_node.all(function(leaf) {
if (!leaf.hasChildren()) {
results.push([
genetree.model.tree_stable_id,
nodeA.model.node_id,
nodeA.model.consensus.nSeqs,
leaf.model.taxon_name,
coverage_similartiy(sorghum_node, leaf),
GrameneTrees.extensions.identity(leaf, sorghum_node),
leaf.model.gene_stable_id
].join("\t"))
}
})
}
}
function compareToConsensusOfArabidopsis(nodeA) {
let ath_node;
let grape_node;
nodeA.children.forEach(function(childNode) {
console.log(childNode.model.taxon_id);
if (childNode.model.taxon_id === 3702) {
ath_node = childNode;
}
else {
grape_node = childNode;
}
});
if (ath_node && grape_node) {
grape_node.all(function(leaf) {
if (!leaf.hasChildren()) {
results.push([
genetree.model.tree_stable_id,
nodeA.model.node_id,
nodeA.model.consensus.nSeqs,
leaf.model.taxon_id,
coverage_similarty(ath_node, leaf),
GrameneTrees.extensions.identity(leaf, ath_node),
leaf.model.gene_stable_id
].join("\t"))
}
})
}
}
function compareSorghumToConsensusOfRice(poaceae_node) {
let rice_node;
walkTo(poaceae_node,
function(node) {
return (node.model.taxon_id === 39947)
},
function(node) {
rice_node = node;
}
);
let sorghum_node;
walkTo(poaceae_node,
function(node) {
return (node.model.taxon_id === 4558)
},
function(node) {
sorghum_node = node;
}
);
if (rice_node && sorghum_node) {
sorghum_node.all(function(leaf) {
if (!leaf.hasChildren()) {
results.push([
genetree.model.tree_stable_id,
poaceae_node.model.node_id,
poaceae_node.model.consensus.nSeqs,
leaf.model.taxon_name,
coverage_similarity(rice_node, leaf),
leaf.model.gene_stable_id
].join("\t"))
}
})
}
}
function getSince(grape_gene) {
let node = grape_gene;
while (node.parent && node.model.taxon_id !== 3398) {
node = node.parent;
}
results.push([
grape_gene.model.gene_stable_id,
node.model.node_id,
node.model.taxon_id,
node.model.taxon_name
].join("\t"));
}
// walkTo(genetree, isPoaceaeSpeciation, compareSorghumToConsensusOfRice);
// walkTo(genetree, isRosidSpeciation, compareToConsensus);
// walkTo(genetree, isSorghumSpeciation, compareToConsensus);
// walkTo(genetree, isSorghumSubtree, compareToConsensus);
// walkTo(genetree, isMaizeSubtree, compareToConsensus);
walkTo(genetree, isOryzaSubtree, compareToConsensus);
done();
};
var flush = function(done) {
this.push(results.join("\n"));
done();
}
return through2.obj(transform, flush);
};
reader
.pipe(fetcher)
.pipe(checkTree())
.pipe(process.stdout);