-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcfg_transform.cpp
More file actions
356 lines (330 loc) · 13 KB
/
cfg_transform.cpp
File metadata and controls
356 lines (330 loc) · 13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
#include <cassert>
#include "cfg_transform.h"
#include <algorithm>
#include <iostream>
#include <queue>
#include <unordered_map>
#include "highlevel.h"
#include "x86_64.h"
ControlFlowGraphTransform::ControlFlowGraphTransform(ControlFlowGraph* cfg)
: m_cfg(cfg) {}
ControlFlowGraphTransform::~ControlFlowGraphTransform() {}
ControlFlowGraph* ControlFlowGraphTransform::get_orig_cfg() {
return m_cfg;
}
ControlFlowGraph* ControlFlowGraphTransform::transform_cfg() {
auto result = new ControlFlowGraph();
//auto v = new LiveVregs(m_cfg);
//v->execute();
// map of basic blocks of original CFG to basic blocks in transformed CFG
std::map<BasicBlock*, BasicBlock*> block_map;
// iterate over all basic blocks, transforming each one
for (auto i = m_cfg->bb_begin(); i != m_cfg->bb_end(); ++i) {
BasicBlock* orig = *i;
//// transform the instructions
//m_live_set = v->get_fact_at_end_of_block(orig);
//LiveVregsControlFlowGraphPrinter printer(m_cfg, v);
//printer.print_basic_block(orig);
InstructionSequence* result_iseq = transform_basic_block(orig);
// create result basic block
BasicBlock* result_bb = result->create_basic_block(orig->get_kind(), orig->get_label());
block_map[orig] = result_bb;
// copy instructions into result basic block
for (auto j = result_iseq->cbegin(); j != result_iseq->cend(); ++j) {
result_bb->add_instruction((*j)->duplicate());
}
delete result_iseq;
}
// add edges to transformed CFG
for (auto i = m_cfg->bb_begin(); i != m_cfg->bb_end(); ++i) {
BasicBlock* orig = *i;
const ControlFlowGraph::EdgeList& outgoing_edges = m_cfg->get_outgoing_edges(orig);
for (auto j = outgoing_edges.cbegin(); j != outgoing_edges.cend(); ++j) {
Edge* orig_edge = *j;
BasicBlock* transformed_source = block_map[orig_edge->get_source()];
BasicBlock* transformed_target = block_map[orig_edge->get_target()];
result->create_edge(transformed_source, transformed_target, orig_edge->get_kind());
}
}
result = prune(result);
return result;
}
ControlFlowGraph* ControlFlowGraphTransform::prune(ControlFlowGraph* cfg) {
m_cfg = cfg;
m_live_vregs = new LiveVregs(cfg);
m_live_vregs->execute();
auto result = new ControlFlowGraph();
// map of basic blocks of original CFG to basic blocks in transformed CFG
std::map<BasicBlock*, BasicBlock*> block_map;
// iterate over all basic blocks, transforming each one
for (auto i = cfg->bb_begin(); i != cfg->bb_end(); ++i) {
BasicBlock* orig = *i;
//// transform the instructions
InstructionSequence* result_iseq = prune_basic_block(orig);
// create result basic block
BasicBlock* result_bb = result->create_basic_block(orig->get_kind(), orig->get_label());
block_map[orig] = result_bb;
// copy instructions into result basic block
for (auto j = result_iseq->cbegin(); j != result_iseq->cend(); ++j) {
result_bb->add_instruction((*j)->duplicate());
}
delete result_iseq;
}
// add edges to transformed CFG
for (auto i = cfg->bb_begin(); i != cfg->bb_end(); ++i) {
BasicBlock* orig = *i;
const ControlFlowGraph::EdgeList& outgoing_edges = cfg->get_outgoing_edges(orig);
for (auto j = outgoing_edges.cbegin(); j != outgoing_edges.cend(); ++j) {
Edge* orig_edge = *j;
BasicBlock* transformed_source = block_map[orig_edge->get_source()];
BasicBlock* transformed_target = block_map[orig_edge->get_target()];
result->create_edge(transformed_source, transformed_target, orig_edge->get_kind());
}
}
return result;
}
InstructionSequence* ControlFlowGraphTransform::prune_basic_block(BasicBlock* iseq) {
if (iseq->get_length() == 0) return iseq;
std::queue<int> mregs({
MREG_RAX, MREG_RBX, MREG_RCX, MREG_RDX,
MREG_R8, MREG_R9, MREG_R11, MREG_R12,
MREG_R13, MREG_R14, MREG_R15,
});
std::unordered_map<int, int> vreg_to_mreg;
LiveVregsControlFlowGraphPrinter printer(m_cfg, m_live_vregs);
auto result = new InstructionSequence();
for (unsigned i = 0; i < iseq->get_length(); i++) {
auto ins = iseq->get_instruction(i)->duplicate();
const auto set = m_live_vregs->get_fact_at_end_of_block(iseq);
if (ins->get_opcode() == HINS_LOAD_ICONST)
if (!set.test(ins->get_operand(0).get_base_reg()))
continue;
if (ins->get_opcode() == HINS_MOV)
if (!set.test(ins->get_operand(0).get_base_reg()) && ins->get_operand(1).get_kind() == OPERAND_INT_LITERAL)
continue;
result->add_instruction(ins);
}
return result;
}
///////////////////////////////////
// High Level CFG Transformation //
///////////////////////////////////
InstructionSequence* HighLevelControlFlowGraphTransform::transform_basic_block(InstructionSequence* iseq) {
if (iseq->get_length() == 0) return iseq;
//////////////////
// https://stackoverflow.com/questions/11408934/using-a-stdtuple-as-key-for-stdunordered-map
using key_t = std::tuple<int, int, int>;
struct key_hash : std::unary_function<key_t, std::size_t> {
std::size_t operator()(const key_t& k) const {
return std::get<0>(k) ^ std::get<1>(k) ^ std::get<2>(k);
}
};
struct key_equal : public std::binary_function<key_t, key_t, bool> {
bool operator()(const key_t& v0, const key_t& v1) const {
return (
std::get<0>(v0) == std::get<0>(v1) &&
std::get<1>(v0) == std::get<1>(v1) &&
std::get<2>(v0) == std::get<2>(v1)
);
}
};
using map_t = std::unordered_map<key_t, int, key_hash, key_equal>;
/////////////////
auto result = new InstructionSequence();
int lvn = 1;
std::unordered_map<int, int> const_to_vn;
std::unordered_map<int, int> vn_to_const;
std::unordered_map<int, int> vreg_to_vn;
std::unordered_map<int, std::vector<int>> vn_to_vregs;
map_t op_to_vn;
for (unsigned i = 0; i < iseq->get_length(); i++) {
auto ins = iseq->get_instruction(i)->duplicate();
const int opcode = ins->get_opcode();
switch (opcode) {
case HINS_LOAD_ICONST:
{
const int ival = ins->get_operand(1).get_int_value();
const int vreg = ins->get_operand(0).get_base_reg();
// if the left op has been seen before
if (vreg_to_vn.find(vreg) != vreg_to_vn.end()) {
// erase it from the respective vn's vregs
const int left_vn = vreg_to_vn[vreg];
std::vector<int>& vregs = vn_to_vregs[left_vn];
vregs.erase(std::remove(vregs.begin(), vregs.end(), vreg), vregs.end());
}
// if we've seen this const before
if (const_to_vn.find(ival) != const_to_vn.end()) {
// only add it to the vregs that hold it
const int vn = const_to_vn[ival];
vreg_to_vn[vreg] = vn;
vn_to_vregs[vn].push_back(vreg);
}
else {
const int vn = lvn++;
const_to_vn[ival] = vn;
vn_to_const[vn] = ival;
vreg_to_vn[vreg] = vn;
vn_to_vregs[vn].push_back(vreg);
}
result->add_instruction(ins);
break;
}
case HINS_MOV:
{
const Operand left_op = ins->get_operand(0);
const Operand right_op = ins->get_operand(1);
const int right_vn = vreg_to_vn.find(right_op.get_base_reg()) != vreg_to_vn.end()
? vreg_to_vn[right_op.get_base_reg()]
: lvn++;
// if the left op has been seen before
if (vreg_to_vn.find(left_op.get_base_reg()) != vreg_to_vn.end()) {
// erase it from the respective vn's vregs
const int left_vn = vreg_to_vn[left_op.get_base_reg()];
std::vector<int>& vregs = vn_to_vregs[left_vn];
vregs.erase(std::remove(vregs.begin(), vregs.end(), left_op.get_base_reg()), vregs.end());
}
// if the right op has been seen before
if (vreg_to_vn.find(right_op.get_base_reg()) != vreg_to_vn.end()) {
// replace it with the first instance of the op
std::vector<int>& vregs = vn_to_vregs[right_vn];
ins->set_operand(1, Operand(OPERAND_VREG, vregs.front()));
}
vreg_to_vn[left_op.get_base_reg()] = right_vn;
vn_to_vregs[right_vn].push_back(left_op.get_base_reg());
result->add_instruction(ins);
break;
}
case HINS_LOCALADDR:
{
const Operand left_op = ins->get_operand(0);
// if the left op has been seen before
if (vreg_to_vn.find(left_op.get_base_reg()) != vreg_to_vn.end()) {
// erase it from the respective vn's vregs
const int left_vn = vreg_to_vn[left_op.get_base_reg()];
std::vector<int>& vregs = vn_to_vregs[left_vn];
vregs.erase(std::remove(vregs.begin(), vregs.end(), left_op.get_base_reg()), vregs.end());
}
const int left_vn = lvn++;
vreg_to_vn[left_op.get_base_reg()] = left_vn;
vn_to_vregs[left_vn].push_back(left_op.get_base_reg());
result->add_instruction(ins);
break;
}
case HINS_INT_ADD:
case HINS_INT_MUL:
{
const Operand dest_op = ins->get_operand(0);
const Operand left_op = ins->get_operand(1);
const Operand right_op = ins->get_operand(2);
int left_vn, right_vn;
// we have to check if the operands are ints specifically due to localaddr computations
if (left_op.get_kind() == OPERAND_INT_LITERAL)
left_vn = const_to_vn.find(left_op.get_int_value()) != const_to_vn.end()
? const_to_vn[left_op.get_int_value()]
: lvn++;
else
left_vn = vreg_to_vn.find(left_op.get_base_reg()) != vreg_to_vn.end()
? vreg_to_vn[left_op.get_base_reg()]
: lvn++;
if (right_op.get_kind() == OPERAND_INT_LITERAL)
right_vn = const_to_vn.find(right_op.get_int_value()) != const_to_vn.end()
? const_to_vn[right_op.get_int_value()]
: lvn++;
else
right_vn = vreg_to_vn.find(right_op.get_base_reg()) != vreg_to_vn.end()
? vreg_to_vn[right_op.get_base_reg()]
: lvn++;
// if the left op has been seen before
if (!vn_to_vregs[left_vn].empty()) {
// replace it with the first instance of the op
std::vector<int>& vregs = vn_to_vregs[left_vn];
ins->set_operand(1, Operand(OPERAND_VREG, vregs.front()));
}
// if the right op has been seen before
if (!vn_to_vregs[right_vn].empty()) {
// replace it with the first instance of the op
std::vector<int>& vregs = vn_to_vregs[right_vn];
ins->set_operand(2, Operand(OPERAND_VREG, vregs.front()));
}
key_t op_key = std::make_tuple(ins->get_opcode(), std::min(left_vn, right_vn),
std::max(left_vn, right_vn));
const int dest_vn = op_to_vn.find(op_key) != op_to_vn.end() ? op_to_vn[op_key] : lvn++;
// if this instruction has not been seen before
if (op_to_vn.find(op_key) == op_to_vn.end()) op_to_vn[op_key] = dest_vn;
vreg_to_vn[dest_op.get_base_reg()] = dest_vn;
vn_to_vregs[dest_vn].push_back(dest_op.get_base_reg());
result->add_instruction(ins);
break;
}
case HINS_INT_SUB:
case HINS_INT_DIV:
case HINS_INT_MOD:
{
const Operand dest_op = ins->get_operand(0);
const Operand left_op = ins->get_operand(1);
const Operand right_op = ins->get_operand(2);
const int left_vn = vreg_to_vn.find(left_op.get_base_reg()) != vreg_to_vn.end()
? vreg_to_vn[left_op.get_base_reg()]
: lvn++;
const int right_vn = vreg_to_vn.find(right_op.get_base_reg()) != vreg_to_vn.end()
? vreg_to_vn[right_op.get_base_reg()]
: lvn++;
// if the left op has been seen before
if (vreg_to_vn.find(left_op.get_base_reg()) != vreg_to_vn.end()) {
// replace it with the first instance of the op
std::vector<int>& vregs = vn_to_vregs[left_vn];
ins->set_operand(1, Operand(OPERAND_VREG, vregs.front()));
}
// if the right op has been seen before
if (vreg_to_vn.find(right_op.get_base_reg()) != vreg_to_vn.end()) {
// replace it with the first instance of the op
std::vector<int>& vregs = vn_to_vregs[right_vn];
ins->set_operand(2, Operand(OPERAND_VREG, vregs.front()));
}
key_t op_key = std::make_tuple(ins->get_opcode(), left_vn, right_vn);
const int dest_vn = op_to_vn.find(op_key) != op_to_vn.end() ? op_to_vn[op_key] : lvn++;
// if this instruction has not been seen before
if (op_to_vn.find(op_key) == op_to_vn.end()) op_to_vn[op_key] = dest_vn;
vreg_to_vn[dest_op.get_base_reg()] = dest_vn;
vn_to_vregs[dest_vn].push_back(dest_op.get_base_reg());
result->add_instruction(ins);
break;
}
case HINS_INT_NEGATE:
{
break;
}
case HINS_READ_INT:
{
const Operand dest_op = ins->get_operand(0);
const int dest_vn = lvn++;
vreg_to_vn[dest_op.get_base_reg()] = dest_vn;
vn_to_vregs[dest_vn].push_back(dest_op.get_base_reg());
result->add_instruction(ins);
break;
}
default:
result->add_instruction(ins);
break;
}
ins = result->get_last();
for (unsigned j = 1; j < ins->get_num_operands(); j++) {
auto op = ins->get_operand(j);
if (op.get_kind() == OPERAND_LABEL || op.get_kind() == OPERAND_INT_LITERAL) continue;
int vreg = op.get_base_reg();
if (vreg_to_vn.find(vreg) != vreg_to_vn.end()) {
int vn = vreg_to_vn[vreg];
// if it's a constant
if (vn_to_const.find(vn) != vn_to_const.end())
ins->set_operand(j, Operand(OPERAND_INT_LITERAL, vn_to_const[vn]));
}
}
}
return result;
}
///////////////////////////////
// X86_64 CFG Transformation //
///////////////////////////////
InstructionSequence* X86_64ControlFlowGraphTransform::transform_basic_block(InstructionSequence* iseq) {
return iseq;
}