qbe

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

sysv.c (15023B)


      1 #include "all.h"
      2 
      3 typedef struct AClass AClass;
      4 typedef struct RAlloc RAlloc;
      5 
      6 struct AClass {
      7 	Typ *type;
      8 	int inmem;
      9 	int align;
     10 	uint size;
     11 	int cls[2];
     12 	Ref ref[2];
     13 };
     14 
     15 struct RAlloc {
     16 	Ins i;
     17 	RAlloc *link;
     18 };
     19 
     20 static void
     21 classify(AClass *a, Typ *t, uint s)
     22 {
     23 	Field *f;
     24 	int *cls;
     25 	uint n, s1;
     26 
     27 	for (n=0, s1=s; n<t->nunion; n++, s=s1)
     28 		for (f=t->fields[n]; f->type!=FEnd; f++) {
     29 			assert(s <= 16);
     30 			cls = &a->cls[s/8];
     31 			switch (f->type) {
     32 			case FEnd:
     33 				die("unreachable");
     34 			case FPad:
     35 				/* don't change anything */
     36 				s += f->len;
     37 				break;
     38 			case Fs:
     39 			case Fd:
     40 				if (*cls == Kx)
     41 					*cls = Kd;
     42 				s += f->len;
     43 				break;
     44 			case Fb:
     45 			case Fh:
     46 			case Fw:
     47 			case Fl:
     48 				*cls = Kl;
     49 				s += f->len;
     50 				break;
     51 			case FTyp:
     52 				classify(a, &typ[f->len], s);
     53 				s += typ[f->len].size;
     54 				break;
     55 			}
     56 		}
     57 }
     58 
     59 static void
     60 typclass(AClass *a, Typ *t)
     61 {
     62 	uint sz, al;
     63 
     64 	sz = t->size;
     65 	al = 1u << t->align;
     66 
     67 	/* the ABI requires sizes to be rounded
     68 	 * up to the nearest multiple of 8, moreover
     69 	 * it makes it easy load and store structures
     70 	 * in registers
     71 	 */
     72 	if (al < 8)
     73 		al = 8;
     74 	sz = (sz + al-1) & -al;
     75 
     76 	a->type = t;
     77 	a->size = sz;
     78 	a->align = t->align;
     79 
     80 	if (t->isdark || sz > 16 || sz == 0) {
     81 		/* large or unaligned structures are
     82 		 * required to be passed in memory
     83 		 */
     84 		a->inmem = 1;
     85 		return;
     86 	}
     87 
     88 	a->cls[0] = Kx;
     89 	a->cls[1] = Kx;
     90 	a->inmem = 0;
     91 	classify(a, t, 0);
     92 }
     93 
     94 static int
     95 retr(Ref reg[2], AClass *aret)
     96 {
     97 	static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
     98 	int n, k, ca, nr[2];
     99 
    100 	nr[0] = nr[1] = 0;
    101 	ca = 0;
    102 	for (n=0; (uint)n*8<aret->size; n++) {
    103 		k = KBASE(aret->cls[n]);
    104 		reg[n] = TMP(retreg[k][nr[k]++]);
    105 		ca += 1 << (2 * k);
    106 	}
    107 	return ca;
    108 }
    109 
    110 static void
    111 selret(Blk *b, Fn *fn)
    112 {
    113 	int j, k, ca;
    114 	Ref r, r0, reg[2];
    115 	AClass aret;
    116 
    117 	j = b->jmp.type;
    118 
    119 	if (!isret(j) || j == Jret0)
    120 		return;
    121 
    122 	r0 = b->jmp.arg;
    123 	b->jmp.type = Jret0;
    124 
    125 	if (j == Jretc) {
    126 		typclass(&aret, &typ[fn->retty]);
    127 		if (aret.inmem) {
    128 			assert(rtype(fn->retr) == RTmp);
    129 			emit(Ocopy, Kl, TMP(RAX), fn->retr, R);
    130 			emit(Oblit1, 0, R, INT(aret.type->size), R);
    131 			emit(Oblit0, 0, R, r0, fn->retr);
    132 			ca = 1;
    133 		} else {
    134 			ca = retr(reg, &aret);
    135 			if (aret.size > 8) {
    136 				r = newtmp("abi", Kl, fn);
    137 				emit(Oload, Kl, reg[1], r, R);
    138 				emit(Oadd, Kl, r, r0, getcon(8, fn));
    139 			}
    140 			emit(Oload, Kl, reg[0], r0, R);
    141 		}
    142 	} else {
    143 		k = j - Jretw;
    144 		if (KBASE(k) == 0) {
    145 			emit(Ocopy, k, TMP(RAX), r0, R);
    146 			ca = 1;
    147 		} else {
    148 			emit(Ocopy, k, TMP(XMM0), r0, R);
    149 			ca = 1 << 2;
    150 		}
    151 	}
    152 
    153 	b->jmp.arg = CALL(ca);
    154 }
    155 
    156 static int
    157 argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env)
    158 {
    159 	int varc, envc, nint, ni, nsse, ns, n, *pn;
    160 	AClass *a;
    161 	Ins *i;
    162 
    163 	if (aret && aret->inmem)
    164 		nint = 5; /* hidden argument */
    165 	else
    166 		nint = 6;
    167 	nsse = 8;
    168 	varc = 0;
    169 	envc = 0;
    170 	for (i=i0, a=ac; i<i1; i++, a++)
    171 		switch (i->op - op + Oarg) {
    172 		case Oarg:
    173 			if (KBASE(i->cls) == 0)
    174 				pn = &nint;
    175 			else
    176 				pn = &nsse;
    177 			if (*pn > 0) {
    178 				--*pn;
    179 				a->inmem = 0;
    180 			} else
    181 				a->inmem = 2;
    182 			a->align = 3;
    183 			a->size = 8;
    184 			a->cls[0] = i->cls;
    185 			break;
    186 		case Oargc:
    187 			n = i->arg[0].val;
    188 			typclass(a, &typ[n]);
    189 			if (a->inmem)
    190 				continue;
    191 			ni = ns = 0;
    192 			for (n=0; (uint)n*8<a->size; n++)
    193 				if (KBASE(a->cls[n]) == 0)
    194 					ni++;
    195 				else
    196 					ns++;
    197 			if (nint >= ni && nsse >= ns) {
    198 				nint -= ni;
    199 				nsse -= ns;
    200 			} else
    201 				a->inmem = 1;
    202 			break;
    203 		case Oarge:
    204 			envc = 1;
    205 			if (op == Opar)
    206 				*env = i->to;
    207 			else
    208 				*env = i->arg[0];
    209 			break;
    210 		case Oargv:
    211 			varc = 1;
    212 			break;
    213 		default:
    214 			die("unreachable");
    215 		}
    216 
    217 	if (varc && envc)
    218 		err("sysv abi does not support variadic env calls");
    219 
    220 	return ((varc|envc) << 12) | ((6-nint) << 4) | ((8-nsse) << 8);
    221 }
    222 
    223 int amd64_sysv_rsave[] = {
    224 	RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
    225 	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
    226 	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1
    227 };
    228 int amd64_sysv_rclob[] = {RBX, R12, R13, R14, R15, -1};
    229 
    230 MAKESURE(sysv_arrays_ok,
    231 	sizeof amd64_sysv_rsave == (NGPS+NFPS+1) * sizeof(int) &&
    232 	sizeof amd64_sysv_rclob == (NCLR+1) * sizeof(int)
    233 );
    234 
    235 /* layout of call's second argument (RCall)
    236  *
    237  *  29     12    8    4  3  0
    238  *  |0...00|x|xxxx|xxxx|xx|xx|                  range
    239  *          |    |    |  |  ` gp regs returned (0..2)
    240  *          |    |    |  ` sse regs returned   (0..2)
    241  *          |    |    ` gp regs passed         (0..6)
    242  *          |    ` sse regs passed             (0..8)
    243  *          ` 1 if rax is used to pass data    (0..1)
    244  */
    245 
    246 bits
    247 amd64_sysv_retregs(Ref r, int p[2])
    248 {
    249 	bits b;
    250 	int ni, nf;
    251 
    252 	assert(rtype(r) == RCall);
    253 	b = 0;
    254 	ni = r.val & 3;
    255 	nf = (r.val >> 2) & 3;
    256 	if (ni >= 1)
    257 		b |= BIT(RAX);
    258 	if (ni >= 2)
    259 		b |= BIT(RDX);
    260 	if (nf >= 1)
    261 		b |= BIT(XMM0);
    262 	if (nf >= 2)
    263 		b |= BIT(XMM1);
    264 	if (p) {
    265 		p[0] = ni;
    266 		p[1] = nf;
    267 	}
    268 	return b;
    269 }
    270 
    271 bits
    272 amd64_sysv_argregs(Ref r, int p[2])
    273 {
    274 	bits b;
    275 	int j, ni, nf, ra;
    276 
    277 	assert(rtype(r) == RCall);
    278 	b = 0;
    279 	ni = (r.val >> 4) & 15;
    280 	nf = (r.val >> 8) & 15;
    281 	ra = (r.val >> 12) & 1;
    282 	for (j=0; j<ni; j++)
    283 		b |= BIT(amd64_sysv_rsave[j]);
    284 	for (j=0; j<nf; j++)
    285 		b |= BIT(XMM0+j);
    286 	if (p) {
    287 		p[0] = ni + ra;
    288 		p[1] = nf;
    289 	}
    290 	return b | (ra ? BIT(RAX) : 0);
    291 }
    292 
    293 static Ref
    294 rarg(int ty, int *ni, int *ns)
    295 {
    296 	if (KBASE(ty) == 0)
    297 		return TMP(amd64_sysv_rsave[(*ni)++]);
    298 	else
    299 		return TMP(XMM0 + (*ns)++);
    300 }
    301 
    302 static void
    303 selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
    304 {
    305 	Ins *i;
    306 	AClass *ac, *a, aret;
    307 	int ca, ni, ns, al;
    308 	uint stk, off;
    309 	Ref r, r1, r2, reg[2], env;
    310 	RAlloc *ra;
    311 
    312 	env = R;
    313 	ac = alloc((i1-i0) * sizeof ac[0]);
    314 
    315 	if (!req(i1->arg[1], R)) {
    316 		assert(rtype(i1->arg[1]) == RType);
    317 		typclass(&aret, &typ[i1->arg[1].val]);
    318 		ca = argsclass(i0, i1, ac, Oarg, &aret, &env);
    319 	} else
    320 		ca = argsclass(i0, i1, ac, Oarg, 0, &env);
    321 
    322 	for (stk=0, a=&ac[i1-i0]; a>ac;)
    323 		if ((--a)->inmem) {
    324 			if (a->align > 4)
    325 				err("sysv abi requires alignments of 16 or less");
    326 			stk += a->size;
    327 			if (a->align == 4)
    328 				stk += stk & 15;
    329 		}
    330 	stk += stk & 15;
    331 	if (stk) {
    332 		r = getcon(-(int64_t)stk, fn);
    333 		emit(Osalloc, Kl, R, r, R);
    334 	}
    335 
    336 	if (!req(i1->arg[1], R)) {
    337 		if (aret.inmem) {
    338 			/* get the return location from eax
    339 			 * it saves one callee-save reg */
    340 			r1 = newtmp("abi", Kl, fn);
    341 			emit(Ocopy, Kl, i1->to, TMP(RAX), R);
    342 			ca += 1;
    343 		} else {
    344 			/* todo, may read out of bounds.
    345 			 * gcc did this up until 5.2, but
    346 			 * this should still be fixed.
    347 			 */
    348 			if (aret.size > 8) {
    349 				r = newtmp("abi", Kl, fn);
    350 				aret.ref[1] = newtmp("abi", aret.cls[1], fn);
    351 				emit(Ostorel, 0, R, aret.ref[1], r);
    352 				emit(Oadd, Kl, r, i1->to, getcon(8, fn));
    353 			}
    354 			aret.ref[0] = newtmp("abi", aret.cls[0], fn);
    355 			emit(Ostorel, 0, R, aret.ref[0], i1->to);
    356 			ca += retr(reg, &aret);
    357 			if (aret.size > 8)
    358 				emit(Ocopy, aret.cls[1], aret.ref[1], reg[1], R);
    359 			emit(Ocopy, aret.cls[0], aret.ref[0], reg[0], R);
    360 			r1 = i1->to;
    361 		}
    362 		/* allocate return pad */
    363 		ra = alloc(sizeof *ra);
    364 		/* specific to NAlign == 3 */
    365 		al = aret.align >= 2 ? aret.align - 2 : 0;
    366 		ra->i = (Ins){Oalloc+al, Kl, r1, {getcon(aret.size, fn)}};
    367 		ra->link = (*rap);
    368 		*rap = ra;
    369 	} else {
    370 		ra = 0;
    371 		if (KBASE(i1->cls) == 0) {
    372 			emit(Ocopy, i1->cls, i1->to, TMP(RAX), R);
    373 			ca += 1;
    374 		} else {
    375 			emit(Ocopy, i1->cls, i1->to, TMP(XMM0), R);
    376 			ca += 1 << 2;
    377 		}
    378 	}
    379 
    380 	emit(Ocall, i1->cls, R, i1->arg[0], CALL(ca));
    381 
    382 	if (!req(R, env))
    383 		emit(Ocopy, Kl, TMP(RAX), env, R);
    384 	else if ((ca >> 12) & 1) /* vararg call */
    385 		emit(Ocopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
    386 
    387 	ni = ns = 0;
    388 	if (ra && aret.inmem)
    389 		emit(Ocopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
    390 
    391 	for (i=i0, a=ac; i<i1; i++, a++) {
    392 		if (i->op >= Oarge || a->inmem)
    393 			continue;
    394 		r1 = rarg(a->cls[0], &ni, &ns);
    395 		if (i->op == Oargc) {
    396 			if (a->size > 8) {
    397 				r2 = rarg(a->cls[1], &ni, &ns);
    398 				r = newtmp("abi", Kl, fn);
    399 				emit(Oload, a->cls[1], r2, r, R);
    400 				emit(Oadd, Kl, r, i->arg[1], getcon(8, fn));
    401 			}
    402 			emit(Oload, a->cls[0], r1, i->arg[1], R);
    403 		} else
    404 			emit(Ocopy, i->cls, r1, i->arg[0], R);
    405 	}
    406 
    407 	if (!stk)
    408 		return;
    409 
    410 	r = newtmp("abi", Kl, fn);
    411 	for (i=i0, a=ac, off=0; i<i1; i++, a++) {
    412 		if (i->op >= Oarge || !a->inmem)
    413 			continue;
    414 		r1 = newtmp("abi", Kl, fn);
    415 		if (i->op == Oargc) {
    416 			if (a->align == 4)
    417 				off += off & 15;
    418 			emit(Oblit1, 0, R, INT(a->type->size), R);
    419 			emit(Oblit0, 0, R, i->arg[1], r1);
    420 		} else
    421 			emit(Ostorel, 0, R, i->arg[0], r1);
    422 		emit(Oadd, Kl, r1, r, getcon(off, fn));
    423 		off += a->size;
    424 	}
    425 	emit(Osalloc, Kl, r, getcon(stk, fn), R);
    426 }
    427 
    428 static int
    429 selpar(Fn *fn, Ins *i0, Ins *i1)
    430 {
    431 	AClass *ac, *a, aret;
    432 	Ins *i;
    433 	int ni, ns, s, al, fa;
    434 	Ref r, env;
    435 
    436 	env = R;
    437 	ac = alloc((i1-i0) * sizeof ac[0]);
    438 	curi = &insb[NIns];
    439 	ni = ns = 0;
    440 
    441 	if (fn->retty >= 0) {
    442 		typclass(&aret, &typ[fn->retty]);
    443 		fa = argsclass(i0, i1, ac, Opar, &aret, &env);
    444 	} else
    445 		fa = argsclass(i0, i1, ac, Opar, 0, &env);
    446 	fn->reg = amd64_sysv_argregs(CALL(fa), 0);
    447 
    448 	for (i=i0, a=ac; i<i1; i++, a++) {
    449 		if (i->op != Oparc || a->inmem)
    450 			continue;
    451 		if (a->size > 8) {
    452 			r = newtmp("abi", Kl, fn);
    453 			a->ref[1] = newtmp("abi", Kl, fn);
    454 			emit(Ostorel, 0, R, a->ref[1], r);
    455 			emit(Oadd, Kl, r, i->to, getcon(8, fn));
    456 		}
    457 		a->ref[0] = newtmp("abi", Kl, fn);
    458 		emit(Ostorel, 0, R, a->ref[0], i->to);
    459 		/* specific to NAlign == 3 */
    460 		al = a->align >= 2 ? a->align - 2 : 0;
    461 		emit(Oalloc+al, Kl, i->to, getcon(a->size, fn), R);
    462 	}
    463 
    464 	if (fn->retty >= 0 && aret.inmem) {
    465 		r = newtmp("abi", Kl, fn);
    466 		emit(Ocopy, Kl, r, rarg(Kl, &ni, &ns), R);
    467 		fn->retr = r;
    468 	}
    469 
    470 	for (i=i0, a=ac, s=4; i<i1; i++, a++) {
    471 		switch (a->inmem) {
    472 		case 1:
    473 			if (a->align > 4)
    474 				err("sysv abi requires alignments of 16 or less");
    475 			if (a->align == 4)
    476 				s = (s+3) & -4;
    477 			fn->tmp[i->to.val].slot = -s;
    478 			s += a->size / 4;
    479 			continue;
    480 		case 2:
    481 			emit(Oload, i->cls, i->to, SLOT(-s), R);
    482 			s += 2;
    483 			continue;
    484 		}
    485 		if (i->op == Opare)
    486 			continue;
    487 		r = rarg(a->cls[0], &ni, &ns);
    488 		if (i->op == Oparc) {
    489 			emit(Ocopy, a->cls[0], a->ref[0], r, R);
    490 			if (a->size > 8) {
    491 				r = rarg(a->cls[1], &ni, &ns);
    492 				emit(Ocopy, a->cls[1], a->ref[1], r, R);
    493 			}
    494 		} else
    495 			emit(Ocopy, i->cls, i->to, r, R);
    496 	}
    497 
    498 	if (!req(R, env))
    499 		emit(Ocopy, Kl, env, TMP(RAX), R);
    500 
    501 	return fa | (s*4)<<12;
    502 }
    503 
    504 static Blk *
    505 split(Fn *fn, Blk *b)
    506 {
    507 	Blk *bn;
    508 
    509 	++fn->nblk;
    510 	bn = newblk();
    511 	bn->nins = &insb[NIns] - curi;
    512 	idup(&bn->ins, curi, bn->nins);
    513 	curi = &insb[NIns];
    514 	bn->visit = ++b->visit;
    515 	strf(bn->name, "%s.%d", b->name, b->visit);
    516 	bn->loop = b->loop;
    517 	bn->link = b->link;
    518 	b->link = bn;
    519 	return bn;
    520 }
    521 
    522 static void
    523 chpred(Blk *b, Blk *bp, Blk *bp1)
    524 {
    525 	Phi *p;
    526 	uint a;
    527 
    528 	for (p=b->phi; p; p=p->link) {
    529 		for (a=0; p->blk[a]!=bp; a++)
    530 			assert(a+1<p->narg);
    531 		p->blk[a] = bp1;
    532 	}
    533 }
    534 
    535 static void
    536 selvaarg(Fn *fn, Blk *b, Ins *i)
    537 {
    538 	Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap;
    539 	Blk *b0, *bstk, *breg;
    540 	int isint;
    541 
    542 	c4 = getcon(4, fn);
    543 	c8 = getcon(8, fn);
    544 	c16 = getcon(16, fn);
    545 	ap = i->arg[0];
    546 	isint = KBASE(i->cls) == 0;
    547 
    548 	/* @b [...]
    549 	       r0 =l add ap, (0 or 4)
    550 	       nr =l loadsw r0
    551 	       r1 =w cultw nr, (48 or 176)
    552 	       jnz r1, @breg, @bstk
    553 	   @breg
    554 	       r0 =l add ap, 16
    555 	       r1 =l loadl r0
    556 	       lreg =l add r1, nr
    557 	       r0 =w add nr, (8 or 16)
    558 	       r1 =l add ap, (0 or 4)
    559 	       storew r0, r1
    560 	   @bstk
    561 	       r0 =l add ap, 8
    562 	       lstk =l loadl r0
    563 	       r1 =l add lstk, 8
    564 	       storel r1, r0
    565 	   @b0
    566 	       %loc =l phi @breg %lreg, @bstk %lstk
    567 	       i->to =(i->cls) load %loc
    568 	*/
    569 
    570 	loc = newtmp("abi", Kl, fn);
    571 	emit(Oload, i->cls, i->to, loc, R);
    572 	b0 = split(fn, b);
    573 	b0->jmp = b->jmp;
    574 	b0->s1 = b->s1;
    575 	b0->s2 = b->s2;
    576 	if (b->s1)
    577 		chpred(b->s1, b, b0);
    578 	if (b->s2 && b->s2 != b->s1)
    579 		chpred(b->s2, b, b0);
    580 
    581 	lreg = newtmp("abi", Kl, fn);
    582 	nr = newtmp("abi", Kl, fn);
    583 	r0 = newtmp("abi", Kw, fn);
    584 	r1 = newtmp("abi", Kl, fn);
    585 	emit(Ostorew, Kw, R, r0, r1);
    586 	emit(Oadd, Kl, r1, ap, isint ? CON_Z : c4);
    587 	emit(Oadd, Kw, r0, nr, isint ? c8 : c16);
    588 	r0 = newtmp("abi", Kl, fn);
    589 	r1 = newtmp("abi", Kl, fn);
    590 	emit(Oadd, Kl, lreg, r1, nr);
    591 	emit(Oload, Kl, r1, r0, R);
    592 	emit(Oadd, Kl, r0, ap, c16);
    593 	breg = split(fn, b);
    594 	breg->jmp.type = Jjmp;
    595 	breg->s1 = b0;
    596 
    597 	lstk = newtmp("abi", Kl, fn);
    598 	r0 = newtmp("abi", Kl, fn);
    599 	r1 = newtmp("abi", Kl, fn);
    600 	emit(Ostorel, Kw, R, r1, r0);
    601 	emit(Oadd, Kl, r1, lstk, c8);
    602 	emit(Oload, Kl, lstk, r0, R);
    603 	emit(Oadd, Kl, r0, ap, c8);
    604 	bstk = split(fn, b);
    605 	bstk->jmp.type = Jjmp;
    606 	bstk->s1 = b0;
    607 
    608 	b0->phi = alloc(sizeof *b0->phi);
    609 	*b0->phi = (Phi){
    610 		.cls = Kl, .to = loc,
    611 		.narg = 2,
    612 		.blk = vnew(2, sizeof b0->phi->blk[0], PFn),
    613 		.arg = vnew(2, sizeof b0->phi->arg[0], PFn),
    614 	};
    615 	b0->phi->blk[0] = bstk;
    616 	b0->phi->blk[1] = breg;
    617 	b0->phi->arg[0] = lstk;
    618 	b0->phi->arg[1] = lreg;
    619 	r0 = newtmp("abi", Kl, fn);
    620 	r1 = newtmp("abi", Kw, fn);
    621 	b->jmp.type = Jjnz;
    622 	b->jmp.arg = r1;
    623 	b->s1 = breg;
    624 	b->s2 = bstk;
    625 	c = getcon(isint ? 48 : 176, fn);
    626 	emit(Ocmpw+Ciult, Kw, r1, nr, c);
    627 	emit(Oloadsw, Kl, nr, r0, R);
    628 	emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4);
    629 }
    630 
    631 static void
    632 selvastart(Fn *fn, int fa, Ref ap)
    633 {
    634 	Ref r0, r1;
    635 	int gp, fp, sp;
    636 
    637 	gp = ((fa >> 4) & 15) * 8;
    638 	fp = 48 + ((fa >> 8) & 15) * 16;
    639 	sp = fa >> 12;
    640 	r0 = newtmp("abi", Kl, fn);
    641 	r1 = newtmp("abi", Kl, fn);
    642 	emit(Ostorel, Kw, R, r1, r0);
    643 	emit(Oadd, Kl, r1, TMP(RBP), getcon(-176, fn));
    644 	emit(Oadd, Kl, r0, ap, getcon(16, fn));
    645 	r0 = newtmp("abi", Kl, fn);
    646 	r1 = newtmp("abi", Kl, fn);
    647 	emit(Ostorel, Kw, R, r1, r0);
    648 	emit(Oadd, Kl, r1, TMP(RBP), getcon(sp, fn));
    649 	emit(Oadd, Kl, r0, ap, getcon(8, fn));
    650 	r0 = newtmp("abi", Kl, fn);
    651 	emit(Ostorew, Kw, R, getcon(fp, fn), r0);
    652 	emit(Oadd, Kl, r0, ap, getcon(4, fn));
    653 	emit(Ostorew, Kw, R, getcon(gp, fn), ap);
    654 }
    655 
    656 void
    657 amd64_sysv_abi(Fn *fn)
    658 {
    659 	Blk *b;
    660 	Ins *i, *i0, *ip;
    661 	RAlloc *ral;
    662 	int n, fa;
    663 
    664 	for (b=fn->start; b; b=b->link)
    665 		b->visit = 0;
    666 
    667 	/* lower parameters */
    668 	for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
    669 		if (!ispar(i->op))
    670 			break;
    671 	fa = selpar(fn, b->ins, i);
    672 	n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
    673 	i0 = alloc(n * sizeof(Ins));
    674 	ip = icpy(ip = i0, curi, &insb[NIns] - curi);
    675 	ip = icpy(ip, i, &b->ins[b->nins] - i);
    676 	b->nins = n;
    677 	b->ins = i0;
    678 
    679 	/* lower calls, returns, and vararg instructions */
    680 	ral = 0;
    681 	b = fn->start;
    682 	do {
    683 		if (!(b = b->link))
    684 			b = fn->start; /* do it last */
    685 		if (b->visit)
    686 			continue;
    687 		curi = &insb[NIns];
    688 		selret(b, fn);
    689 		for (i=&b->ins[b->nins]; i!=b->ins;)
    690 			switch ((--i)->op) {
    691 			default:
    692 				emiti(*i);
    693 				break;
    694 			case Ocall:
    695 				for (i0=i; i0>b->ins; i0--)
    696 					if (!isarg((i0-1)->op))
    697 						break;
    698 				selcall(fn, i0, i, &ral);
    699 				i = i0;
    700 				break;
    701 			case Ovastart:
    702 				selvastart(fn, fa, i->arg[0]);
    703 				break;
    704 			case Ovaarg:
    705 				selvaarg(fn, b, i);
    706 				break;
    707 			case Oarg:
    708 			case Oargc:
    709 				die("unreachable");
    710 			}
    711 		if (b == fn->start)
    712 			for (; ral; ral=ral->link)
    713 				emiti(ral->i);
    714 		b->nins = &insb[NIns] - curi;
    715 		idup(&b->ins, curi, b->nins);
    716 	} while (b != fn->start);
    717 
    718 	if (debug['A']) {
    719 		fprintf(stderr, "\n> After ABI lowering:\n");
    720 		printfn(fn, stderr);
    721 	}
    722 }