emit.c (14695B)
1 #include "all.h" 2 3 4 #define CMP(X) \ 5 X(Ciule, "be") \ 6 X(Ciult, "b") \ 7 X(Cisle, "le") \ 8 X(Cislt, "l") \ 9 X(Cisgt, "g") \ 10 X(Cisge, "ge") \ 11 X(Ciugt, "a") \ 12 X(Ciuge, "ae") \ 13 X(Cieq, "z") \ 14 X(Cine, "nz") \ 15 X(NCmpI+Cfle, "be") \ 16 X(NCmpI+Cflt, "b") \ 17 X(NCmpI+Cfgt, "a") \ 18 X(NCmpI+Cfge, "ae") \ 19 X(NCmpI+Cfeq, "z") \ 20 X(NCmpI+Cfne, "nz") \ 21 X(NCmpI+Cfo, "np") \ 22 X(NCmpI+Cfuo, "p") 23 24 enum { 25 SLong = 0, 26 SWord = 1, 27 SShort = 2, 28 SByte = 3, 29 30 Ki = -1, /* matches Kw and Kl */ 31 Ka = -2, /* matches all classes */ 32 }; 33 34 /* Instruction format strings: 35 * 36 * if the format string starts with -, the instruction 37 * is assumed to be 3-address and is put in 2-address 38 * mode using an extra mov if necessary 39 * 40 * if the format string starts with +, the same as the 41 * above applies, but commutativity is also assumed 42 * 43 * %k is used to set the class of the instruction, 44 * it'll expand to "l", "q", "ss", "sd", depending 45 * on the instruction class 46 * %0 designates the first argument 47 * %1 designates the second argument 48 * %= designates the result 49 * 50 * if %k is not used, a prefix to 0, 1, or = must be 51 * added, it can be: 52 * M - memory reference 53 * L - long (64 bits) 54 * W - word (32 bits) 55 * H - short (16 bits) 56 * B - byte (8 bits) 57 * S - single precision float 58 * D - double precision float 59 */ 60 static struct { 61 short op; 62 short cls; 63 char *fmt; 64 } omap[] = { 65 { Oadd, Ka, "+add%k %1, %=" }, 66 { Osub, Ka, "-sub%k %1, %=" }, 67 { Oand, Ki, "+and%k %1, %=" }, 68 { Oor, Ki, "+or%k %1, %=" }, 69 { Oxor, Ki, "+xor%k %1, %=" }, 70 { Osar, Ki, "-sar%k %B1, %=" }, 71 { Oshr, Ki, "-shr%k %B1, %=" }, 72 { Oshl, Ki, "-shl%k %B1, %=" }, 73 { Omul, Ki, "+imul%k %1, %=" }, 74 { Omul, Ks, "+mulss %1, %=" }, 75 { Omul, Kd, "+mulsd %1, %=" }, 76 { Odiv, Ka, "-div%k %1, %=" }, 77 { Ostorel, Ka, "movq %L0, %M1" }, 78 { Ostorew, Ka, "movl %W0, %M1" }, 79 { Ostoreh, Ka, "movw %H0, %M1" }, 80 { Ostoreb, Ka, "movb %B0, %M1" }, 81 { Ostores, Ka, "movss %S0, %M1" }, 82 { Ostored, Ka, "movsd %D0, %M1" }, 83 { Oload, Ka, "mov%k %M0, %=" }, 84 { Oloadsw, Kl, "movslq %M0, %L=" }, 85 { Oloadsw, Kw, "movl %M0, %W=" }, 86 { Oloaduw, Ki, "movl %M0, %W=" }, 87 { Oloadsh, Ki, "movsw%k %M0, %=" }, 88 { Oloaduh, Ki, "movzw%k %M0, %=" }, 89 { Oloadsb, Ki, "movsb%k %M0, %=" }, 90 { Oloadub, Ki, "movzb%k %M0, %=" }, 91 { Oextsw, Kl, "movslq %W0, %L=" }, 92 { Oextuw, Kl, "movl %W0, %W=" }, 93 { Oextsh, Ki, "movsw%k %H0, %=" }, 94 { Oextuh, Ki, "movzw%k %H0, %=" }, 95 { Oextsb, Ki, "movsb%k %B0, %=" }, 96 { Oextub, Ki, "movzb%k %B0, %=" }, 97 98 { Oexts, Kd, "cvtss2sd %0, %=" }, 99 { Otruncd, Ks, "cvtsd2ss %0, %=" }, 100 { Ostosi, Ki, "cvttss2si%k %0, %=" }, 101 { Odtosi, Ki, "cvttsd2si%k %0, %=" }, 102 { Oswtof, Ka, "cvtsi2%k %W0, %=" }, 103 { Osltof, Ka, "cvtsi2%k %L0, %=" }, 104 { Ocast, Ki, "movq %D0, %L=" }, 105 { Ocast, Ka, "movq %L0, %D=" }, 106 107 { Oaddr, Ki, "lea%k %M0, %=" }, 108 { Oswap, Ki, "xchg%k %0, %1" }, 109 { Osign, Kl, "cqto" }, 110 { Osign, Kw, "cltd" }, 111 { Oxdiv, Ki, "div%k %0" }, 112 { Oxidiv, Ki, "idiv%k %0" }, 113 { Oxcmp, Ks, "ucomiss %S0, %S1" }, 114 { Oxcmp, Kd, "ucomisd %D0, %D1" }, 115 { Oxcmp, Ki, "cmp%k %0, %1" }, 116 { Oxtest, Ki, "test%k %0, %1" }, 117 #define X(c, s) \ 118 { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" }, 119 CMP(X) 120 #undef X 121 { NOp, 0, 0 } 122 }; 123 124 static char *rname[][4] = { 125 [RAX] = {"rax", "eax", "ax", "al"}, 126 [RBX] = {"rbx", "ebx", "bx", "bl"}, 127 [RCX] = {"rcx", "ecx", "cx", "cl"}, 128 [RDX] = {"rdx", "edx", "dx", "dl"}, 129 [RSI] = {"rsi", "esi", "si", "sil"}, 130 [RDI] = {"rdi", "edi", "di", "dil"}, 131 [RBP] = {"rbp", "ebp", "bp", "bpl"}, 132 [RSP] = {"rsp", "esp", "sp", "spl"}, 133 [R8 ] = {"r8" , "r8d", "r8w", "r8b"}, 134 [R9 ] = {"r9" , "r9d", "r9w", "r9b"}, 135 [R10] = {"r10", "r10d", "r10w", "r10b"}, 136 [R11] = {"r11", "r11d", "r11w", "r11b"}, 137 [R12] = {"r12", "r12d", "r12w", "r12b"}, 138 [R13] = {"r13", "r13d", "r13w", "r13b"}, 139 [R14] = {"r14", "r14d", "r14w", "r14b"}, 140 [R15] = {"r15", "r15d", "r15w", "r15b"}, 141 }; 142 143 144 static int 145 slot(Ref r, Fn *fn) 146 { 147 int s; 148 149 s = rsval(r); 150 assert(s <= fn->slot); 151 /* specific to NAlign == 3 */ 152 if (s < 0) 153 return -4 * s; 154 else if (fn->vararg) 155 return -176 + -4 * (fn->slot - s); 156 else 157 return -4 * (fn->slot - s); 158 } 159 160 static void 161 emitcon(Con *con, FILE *f) 162 { 163 char *p, *l; 164 165 switch (con->type) { 166 case CAddr: 167 l = str(con->sym.id); 168 p = l[0] == '"' ? "" : T.assym; 169 if (con->sym.type == SThr) { 170 if (T.apple) 171 fprintf(f, "%s%s@TLVP", p, l); 172 else 173 fprintf(f, "%%fs:%s%s@tpoff", p, l); 174 } else 175 fprintf(f, "%s%s", p, l); 176 if (con->bits.i) 177 fprintf(f, "%+"PRId64, con->bits.i); 178 break; 179 case CBits: 180 fprintf(f, "%"PRId64, con->bits.i); 181 break; 182 default: 183 die("unreachable"); 184 } 185 } 186 187 static char * 188 regtoa(int reg, int sz) 189 { 190 static char buf[6]; 191 192 assert(reg <= XMM15); 193 if (reg >= XMM0) { 194 sprintf(buf, "xmm%d", reg-XMM0); 195 return buf; 196 } else 197 return rname[reg][sz]; 198 } 199 200 static Ref 201 getarg(char c, Ins *i) 202 { 203 switch (c) { 204 case '0': 205 return i->arg[0]; 206 case '1': 207 return i->arg[1]; 208 case '=': 209 return i->to; 210 default: 211 die("invalid arg letter %c", c); 212 } 213 } 214 215 static void emitins(Ins, Fn *, FILE *); 216 217 static void 218 emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f) 219 { 220 Ins icp; 221 222 icp.op = Ocopy; 223 icp.arg[0] = r2; 224 icp.to = r1; 225 icp.cls = k; 226 emitins(icp, fn, f); 227 } 228 229 static void 230 emitf(char *s, Ins *i, Fn *fn, FILE *f) 231 { 232 static char clstoa[][3] = {"l", "q", "ss", "sd"}; 233 char c; 234 int sz; 235 Ref ref; 236 Mem *m; 237 Con off; 238 239 switch (*s) { 240 case '+': 241 if (req(i->arg[1], i->to)) { 242 ref = i->arg[0]; 243 i->arg[0] = i->arg[1]; 244 i->arg[1] = ref; 245 } 246 /* fall through */ 247 case '-': 248 assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) && 249 "cannot convert to 2-address"); 250 emitcopy(i->to, i->arg[0], i->cls, fn, f); 251 s++; 252 break; 253 } 254 255 fputc('\t', f); 256 Next: 257 while ((c = *s++) != '%') 258 if (!c) { 259 fputc('\n', f); 260 return; 261 } else 262 fputc(c, f); 263 switch ((c = *s++)) { 264 case '%': 265 fputc('%', f); 266 break; 267 case 'k': 268 fputs(clstoa[i->cls], f); 269 break; 270 case '0': 271 case '1': 272 case '=': 273 sz = KWIDE(i->cls) ? SLong : SWord; 274 s--; 275 goto Ref; 276 case 'D': 277 case 'S': 278 sz = SLong; /* does not matter for floats */ 279 Ref: 280 c = *s++; 281 ref = getarg(c, i); 282 switch (rtype(ref)) { 283 case RTmp: 284 assert(isreg(ref)); 285 fprintf(f, "%%%s", regtoa(ref.val, sz)); 286 break; 287 case RSlot: 288 fprintf(f, "%d(%%rbp)", slot(ref, fn)); 289 break; 290 case RMem: 291 Mem: 292 m = &fn->mem[ref.val]; 293 if (rtype(m->base) == RSlot) { 294 off.type = CBits; 295 off.bits.i = slot(m->base, fn); 296 addcon(&m->offset, &off, 1); 297 m->base = TMP(RBP); 298 } 299 if (m->offset.type != CUndef) 300 emitcon(&m->offset, f); 301 fputc('(', f); 302 if (!req(m->base, R)) 303 fprintf(f, "%%%s", regtoa(m->base.val, SLong)); 304 else if (m->offset.type == CAddr) 305 fprintf(f, "%%rip"); 306 if (!req(m->index, R)) 307 fprintf(f, ", %%%s, %d", 308 regtoa(m->index.val, SLong), 309 m->scale 310 ); 311 fputc(')', f); 312 break; 313 case RCon: 314 fputc('$', f); 315 emitcon(&fn->con[ref.val], f); 316 break; 317 default: 318 die("unreachable"); 319 } 320 break; 321 case 'L': 322 sz = SLong; 323 goto Ref; 324 case 'W': 325 sz = SWord; 326 goto Ref; 327 case 'H': 328 sz = SShort; 329 goto Ref; 330 case 'B': 331 sz = SByte; 332 goto Ref; 333 case 'M': 334 c = *s++; 335 ref = getarg(c, i); 336 switch (rtype(ref)) { 337 case RMem: 338 goto Mem; 339 case RSlot: 340 fprintf(f, "%d(%%rbp)", slot(ref, fn)); 341 break; 342 case RCon: 343 off = fn->con[ref.val]; 344 emitcon(&off, f); 345 if (off.type == CAddr) 346 if (off.sym.type != SThr || T.apple) 347 fprintf(f, "(%%rip)"); 348 break; 349 case RTmp: 350 assert(isreg(ref)); 351 fprintf(f, "(%%%s)", regtoa(ref.val, SLong)); 352 break; 353 default: 354 die("unreachable"); 355 } 356 break; 357 default: 358 die("invalid format specifier %%%c", c); 359 } 360 goto Next; 361 } 362 363 static void *negmask[4] = { 364 [Ks] = (uint32_t[4]){ 0x80000000 }, 365 [Kd] = (uint64_t[2]){ 0x8000000000000000 }, 366 }; 367 368 static void 369 emitins(Ins i, Fn *fn, FILE *f) 370 { 371 Ref r; 372 int64_t val; 373 int o, t0; 374 Ins ineg; 375 Con *con; 376 char *sym; 377 378 switch (i.op) { 379 default: 380 Table: 381 /* most instructions are just pulled out of 382 * the table omap[], some special cases are 383 * detailed below */ 384 for (o=0;; o++) { 385 /* this linear search should really be a binary 386 * search */ 387 if (omap[o].op == NOp) 388 die("no match for %s(%c)", 389 optab[i.op].name, "wlsd"[i.cls]); 390 if (omap[o].op == i.op) 391 if (omap[o].cls == i.cls 392 || (omap[o].cls == Ki && KBASE(i.cls) == 0) 393 || (omap[o].cls == Ka)) 394 break; 395 } 396 emitf(omap[o].fmt, &i, fn, f); 397 break; 398 case Onop: 399 /* just do nothing for nops, they are inserted 400 * by some passes */ 401 break; 402 case Omul: 403 /* here, we try to use the 3-addresss form 404 * of multiplication when possible */ 405 if (rtype(i.arg[1]) == RCon) { 406 r = i.arg[0]; 407 i.arg[0] = i.arg[1]; 408 i.arg[1] = r; 409 } 410 if (KBASE(i.cls) == 0 /* only available for ints */ 411 && rtype(i.arg[0]) == RCon 412 && rtype(i.arg[1]) == RTmp) { 413 emitf("imul%k %0, %1, %=", &i, fn, f); 414 break; 415 } 416 goto Table; 417 case Osub: 418 /* we have to use the negation trick to handle 419 * some 3-address subtractions */ 420 if (req(i.to, i.arg[1]) && !req(i.arg[0], i.to)) { 421 ineg = (Ins){Oneg, i.cls, i.to, {i.to}}; 422 emitins(ineg, fn, f); 423 emitf("add%k %0, %=", &i, fn, f); 424 break; 425 } 426 goto Table; 427 case Oneg: 428 if (!req(i.to, i.arg[0])) 429 emitf("mov%k %0, %=", &i, fn, f); 430 if (KBASE(i.cls) == 0) 431 emitf("neg%k %=", &i, fn, f); 432 else 433 fprintf(f, 434 "\txorp%c %sfp%d(%%rip), %%%s\n", 435 "xxsd"[i.cls], 436 T.asloc, 437 stashbits(negmask[i.cls], 16), 438 regtoa(i.to.val, SLong) 439 ); 440 break; 441 case Odiv: 442 /* use xmm15 to adjust the instruction when the 443 * conversion to 2-address in emitf() would fail */ 444 if (req(i.to, i.arg[1])) { 445 i.arg[1] = TMP(XMM0+15); 446 emitf("mov%k %=, %1", &i, fn, f); 447 emitf("mov%k %0, %=", &i, fn, f); 448 i.arg[0] = i.to; 449 } 450 goto Table; 451 case Ocopy: 452 /* copies are used for many things; see my note 453 * to understand how to load big constants: 454 * https://c9x.me/notes/2015-09-19.html */ 455 assert(rtype(i.to) != RMem); 456 if (req(i.to, R) || req(i.arg[0], R)) 457 break; 458 if (req(i.to, i.arg[0])) 459 break; 460 t0 = rtype(i.arg[0]); 461 if (i.cls == Kl 462 && t0 == RCon 463 && fn->con[i.arg[0].val].type == CBits) { 464 val = fn->con[i.arg[0].val].bits.i; 465 if (isreg(i.to)) 466 if (val >= 0 && val <= UINT32_MAX) { 467 emitf("movl %W0, %W=", &i, fn, f); 468 break; 469 } 470 if (rtype(i.to) == RSlot) 471 if (val < INT32_MIN || val > INT32_MAX) { 472 emitf("movl %0, %=", &i, fn, f); 473 emitf("movl %0>>32, 4+%=", &i, fn, f); 474 break; 475 } 476 } 477 if (isreg(i.to) 478 && t0 == RCon 479 && fn->con[i.arg[0].val].type == CAddr) { 480 emitf("lea%k %M0, %=", &i, fn, f); 481 break; 482 } 483 if (rtype(i.to) == RSlot 484 && (t0 == RSlot || t0 == RMem)) { 485 i.cls = KWIDE(i.cls) ? Kd : Ks; 486 i.arg[1] = TMP(XMM0+15); 487 emitf("mov%k %0, %1", &i, fn, f); 488 emitf("mov%k %1, %=", &i, fn, f); 489 break; 490 } 491 /* conveniently, the assembler knows if it 492 * should use movabsq when reading movq */ 493 emitf("mov%k %0, %=", &i, fn, f); 494 break; 495 case Oaddr: 496 if (!T.apple 497 && rtype(i.arg[0]) == RCon 498 && fn->con[i.arg[0].val].sym.type == SThr) { 499 /* derive the symbol address from the TCB 500 * address at offset 0 of %fs */ 501 assert(isreg(i.to)); 502 con = &fn->con[i.arg[0].val]; 503 sym = str(con->sym.id); 504 emitf("movq %%fs:0, %L=", &i, fn, f); 505 fprintf(f, "\tleaq %s%s@tpoff", 506 sym[0] == '"' ? "" : T.assym, sym); 507 if (con->bits.i) 508 fprintf(f, "%+"PRId64, con->bits.i); 509 fprintf(f, "(%%%s), %%%s\n", 510 regtoa(i.to.val, SLong), 511 regtoa(i.to.val, SLong)); 512 break; 513 } 514 goto Table; 515 case Ocall: 516 /* calls simply have a weird syntax in AT&T 517 * assembly... */ 518 switch (rtype(i.arg[0])) { 519 case RCon: 520 fprintf(f, "\tcallq "); 521 emitcon(&fn->con[i.arg[0].val], f); 522 fprintf(f, "\n"); 523 break; 524 case RTmp: 525 emitf("callq *%L0", &i, fn, f); 526 break; 527 default: 528 die("invalid call argument"); 529 } 530 break; 531 case Osalloc: 532 /* there is no good reason why this is here 533 * maybe we should split Osalloc in 2 different 534 * instructions depending on the result 535 */ 536 emitf("subq %L0, %%rsp", &i, fn, f); 537 if (!req(i.to, R)) 538 emitcopy(i.to, TMP(RSP), Kl, fn, f); 539 break; 540 case Oswap: 541 if (KBASE(i.cls) == 0) 542 goto Table; 543 /* for floats, there is no swap instruction 544 * so we use xmm15 as a temporary 545 */ 546 emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f); 547 emitcopy(i.arg[0], i.arg[1], i.cls, fn, f); 548 emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f); 549 break; 550 case Odbgloc: 551 emitdbgloc(i.arg[0].val, i.arg[1].val, f); 552 break; 553 } 554 } 555 556 static uint64_t 557 framesz(Fn *fn) 558 { 559 uint64_t i, o, f; 560 561 /* specific to NAlign == 3 */ 562 for (i=0, o=0; i<NCLR; i++) 563 o ^= 1 & (fn->reg >> amd64_sysv_rclob[i]); 564 f = fn->slot; 565 f = (f + 3) & -4; 566 return 4*f + 8*o + 176*fn->vararg; 567 } 568 569 void 570 amd64_emitfn(Fn *fn, FILE *f) 571 { 572 static char *ctoa[] = { 573 #define X(c, s) [c] = s, 574 CMP(X) 575 #undef X 576 }; 577 static int id0; 578 Blk *b, *s; 579 Ins *i, itmp; 580 int *r, c, o, n, lbl; 581 uint64_t fs; 582 583 emitfnlnk(fn->name, &fn->lnk, f); 584 fputs("\tendbr64\n\tpushq %rbp\n\tmovq %rsp, %rbp\n", f); 585 fs = framesz(fn); 586 if (fs) 587 fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", fs); 588 if (fn->vararg) { 589 o = -176; 590 for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8) 591 fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o); 592 for (n=0; n<8; ++n, o+=16) 593 fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o); 594 } 595 for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++) 596 if (fn->reg & BIT(*r)) { 597 itmp.arg[0] = TMP(*r); 598 emitf("pushq %L0", &itmp, fn, f); 599 fs += 8; 600 } 601 602 for (lbl=0, b=fn->start; b; b=b->link) { 603 if (lbl || b->npred > 1) 604 fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id); 605 for (i=b->ins; i!=&b->ins[b->nins]; i++) 606 emitins(*i, fn, f); 607 lbl = 1; 608 switch (b->jmp.type) { 609 case Jhlt: 610 fprintf(f, "\tud2\n"); 611 break; 612 case Jret0: 613 if (fn->dynalloc) 614 fprintf(f, 615 "\tmovq %%rbp, %%rsp\n" 616 "\tsubq $%"PRIu64", %%rsp\n", 617 fs 618 ); 619 for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;) 620 if (fn->reg & BIT(*--r)) { 621 itmp.arg[0] = TMP(*r); 622 emitf("popq %L0", &itmp, fn, f); 623 } 624 fprintf(f, 625 "\tleave\n" 626 "\tret\n" 627 ); 628 break; 629 case Jjmp: 630 Jmp: 631 if (b->s1 != b->link) 632 fprintf(f, "\tjmp %sbb%d\n", 633 T.asloc, id0+b->s1->id); 634 else 635 lbl = 0; 636 break; 637 default: 638 c = b->jmp.type - Jjf; 639 if (0 <= c && c <= NCmp) { 640 if (b->link == b->s2) { 641 s = b->s1; 642 b->s1 = b->s2; 643 b->s2 = s; 644 } else 645 c = cmpneg(c); 646 fprintf(f, "\tj%s %sbb%d\n", ctoa[c], 647 T.asloc, id0+b->s2->id); 648 goto Jmp; 649 } 650 die("unhandled jump %d", b->jmp.type); 651 } 652 } 653 id0 += fn->nblk; 654 if (!T.apple) 655 elf_emitfnfin(fn->name, f); 656 }