361 lines
12 KiB
ArmAsm
361 lines
12 KiB
ArmAsm
|
/* -----------------------------------------------------------------------
|
||
|
tile.S - Copyright (c) 2011 Tilera Corp.
|
||
|
|
||
|
Tilera TILEPro and TILE-Gx Foreign Function Interface
|
||
|
|
||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||
|
a copy of this software and associated documentation files (the
|
||
|
``Software''), to deal in the Software without restriction, including
|
||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||
|
permit persons to whom the Software is furnished to do so, subject to
|
||
|
the following conditions:
|
||
|
|
||
|
The above copyright notice and this permission notice shall be included
|
||
|
in all copies or substantial portions of the Software.
|
||
|
|
||
|
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
|
||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||
|
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||
|
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||
|
DEALINGS IN THE SOFTWARE.
|
||
|
----------------------------------------------------------------------- */
|
||
|
|
||
|
#define LIBFFI_ASM
|
||
|
#include <fficonfig.h>
|
||
|
#include <ffi.h>
|
||
|
|
||
|
/* Number of bytes in a register. */
|
||
|
#define REG_SIZE FFI_SIZEOF_ARG
|
||
|
|
||
|
/* Number of bytes in stack linkage area for backtracing.
|
||
|
|
||
|
A note about the ABI: on entry to a procedure, sp points to a stack
|
||
|
slot where it must spill the return address if it's not a leaf.
|
||
|
REG_SIZE bytes beyond that is a slot owned by the caller which
|
||
|
contains the sp value that the caller had when it was originally
|
||
|
entered (i.e. the caller's frame pointer). */
|
||
|
#define LINKAGE_SIZE (2 * REG_SIZE)
|
||
|
|
||
|
/* The first 10 registers are used to pass arguments and return values. */
|
||
|
#define NUM_ARG_REGS 10
|
||
|
|
||
|
#ifdef __tilegx__
|
||
|
#define SW st
|
||
|
#define LW ld
|
||
|
#define BGZT bgtzt
|
||
|
#else
|
||
|
#define SW sw
|
||
|
#define LW lw
|
||
|
#define BGZT bgzt
|
||
|
#endif
|
||
|
|
||
|
|
||
|
/* void ffi_call_tile (int_reg_t reg_args[NUM_ARG_REGS],
|
||
|
const int_reg_t *stack_args,
|
||
|
unsigned long stack_args_bytes,
|
||
|
void (*fnaddr)(void));
|
||
|
|
||
|
On entry, REG_ARGS contain the outgoing register values,
|
||
|
and STACK_ARGS contains STACK_ARG_BYTES of additional values
|
||
|
to be passed on the stack. If STACK_ARG_BYTES is zero, then
|
||
|
STACK_ARGS is ignored.
|
||
|
|
||
|
When the invoked function returns, the values of r0-r9 are
|
||
|
blindly stored back into REG_ARGS for the caller to examine. */
|
||
|
|
||
|
.section .text.ffi_call_tile, "ax", @progbits
|
||
|
.align 8
|
||
|
.globl ffi_call_tile
|
||
|
FFI_HIDDEN(ffi_call_tile)
|
||
|
ffi_call_tile:
|
||
|
|
||
|
/* Incoming arguments. */
|
||
|
#define REG_ARGS r0
|
||
|
#define INCOMING_STACK_ARGS r1
|
||
|
#define STACK_ARG_BYTES r2
|
||
|
#define ORIG_FNADDR r3
|
||
|
|
||
|
/* Temporary values. */
|
||
|
#define FRAME_SIZE r10
|
||
|
#define TMP r11
|
||
|
#define TMP2 r12
|
||
|
#define OUTGOING_STACK_ARGS r13
|
||
|
#define REG_ADDR_PTR r14
|
||
|
#define RETURN_REG_ADDR r15
|
||
|
#define FNADDR r16
|
||
|
|
||
|
.cfi_startproc
|
||
|
{
|
||
|
/* Save return address. */
|
||
|
SW sp, lr
|
||
|
.cfi_offset lr, 0
|
||
|
/* Prepare to spill incoming r52. */
|
||
|
addi TMP, sp, -REG_SIZE
|
||
|
/* Increase frame size to have room to spill r52 and REG_ARGS.
|
||
|
The +7 is to round up mod 8. */
|
||
|
addi FRAME_SIZE, STACK_ARG_BYTES, \
|
||
|
REG_SIZE + REG_SIZE + LINKAGE_SIZE + 7
|
||
|
}
|
||
|
{
|
||
|
/* Round stack frame size to a multiple of 8 to satisfy ABI. */
|
||
|
andi FRAME_SIZE, FRAME_SIZE, -8
|
||
|
/* Compute where to spill REG_ARGS value. */
|
||
|
addi TMP2, sp, -(REG_SIZE * 2)
|
||
|
}
|
||
|
{
|
||
|
/* Spill incoming r52. */
|
||
|
SW TMP, r52
|
||
|
.cfi_offset r52, -REG_SIZE
|
||
|
/* Set up our frame pointer. */
|
||
|
move r52, sp
|
||
|
.cfi_def_cfa_register r52
|
||
|
/* Push stack frame. */
|
||
|
sub sp, sp, FRAME_SIZE
|
||
|
}
|
||
|
{
|
||
|
/* Prepare to set up stack linkage. */
|
||
|
addi TMP, sp, REG_SIZE
|
||
|
/* Prepare to memcpy stack args. */
|
||
|
addi OUTGOING_STACK_ARGS, sp, LINKAGE_SIZE
|
||
|
/* Save REG_ARGS which we will need after we call the subroutine. */
|
||
|
SW TMP2, REG_ARGS
|
||
|
}
|
||
|
{
|
||
|
/* Set up linkage info to hold incoming stack pointer. */
|
||
|
SW TMP, r52
|
||
|
}
|
||
|
{
|
||
|
/* Skip stack args memcpy if we don't have any stack args (common). */
|
||
|
blezt STACK_ARG_BYTES, .Ldone_stack_args_memcpy
|
||
|
}
|
||
|
|
||
|
.Lmemcpy_stack_args:
|
||
|
{
|
||
|
/* Load incoming argument from stack_args. */
|
||
|
LW TMP, INCOMING_STACK_ARGS
|
||
|
addi INCOMING_STACK_ARGS, INCOMING_STACK_ARGS, REG_SIZE
|
||
|
}
|
||
|
{
|
||
|
/* Store stack argument into outgoing stack argument area. */
|
||
|
SW OUTGOING_STACK_ARGS, TMP
|
||
|
addi OUTGOING_STACK_ARGS, OUTGOING_STACK_ARGS, REG_SIZE
|
||
|
addi STACK_ARG_BYTES, STACK_ARG_BYTES, -REG_SIZE
|
||
|
}
|
||
|
{
|
||
|
BGZT STACK_ARG_BYTES, .Lmemcpy_stack_args
|
||
|
}
|
||
|
.Ldone_stack_args_memcpy:
|
||
|
|
||
|
{
|
||
|
/* Copy aside ORIG_FNADDR so we can overwrite its register. */
|
||
|
move FNADDR, ORIG_FNADDR
|
||
|
/* Prepare to load argument registers. */
|
||
|
addi REG_ADDR_PTR, r0, REG_SIZE
|
||
|
/* Load outgoing r0. */
|
||
|
LW r0, r0
|
||
|
}
|
||
|
|
||
|
/* Load up argument registers from the REG_ARGS array. */
|
||
|
#define LOAD_REG(REG, PTR) \
|
||
|
{ \
|
||
|
LW REG, PTR ; \
|
||
|
addi PTR, PTR, REG_SIZE \
|
||
|
}
|
||
|
|
||
|
LOAD_REG(r1, REG_ADDR_PTR)
|
||
|
LOAD_REG(r2, REG_ADDR_PTR)
|
||
|
LOAD_REG(r3, REG_ADDR_PTR)
|
||
|
LOAD_REG(r4, REG_ADDR_PTR)
|
||
|
LOAD_REG(r5, REG_ADDR_PTR)
|
||
|
LOAD_REG(r6, REG_ADDR_PTR)
|
||
|
LOAD_REG(r7, REG_ADDR_PTR)
|
||
|
LOAD_REG(r8, REG_ADDR_PTR)
|
||
|
LOAD_REG(r9, REG_ADDR_PTR)
|
||
|
|
||
|
{
|
||
|
/* Call the subroutine. */
|
||
|
jalr FNADDR
|
||
|
}
|
||
|
|
||
|
{
|
||
|
/* Restore original lr. */
|
||
|
LW lr, r52
|
||
|
/* Prepare to recover ARGS, which we spilled earlier. */
|
||
|
addi TMP, r52, -(2 * REG_SIZE)
|
||
|
}
|
||
|
{
|
||
|
/* Restore ARGS, so we can fill it in with the return regs r0-r9. */
|
||
|
LW RETURN_REG_ADDR, TMP
|
||
|
/* Prepare to restore original r52. */
|
||
|
addi TMP, r52, -REG_SIZE
|
||
|
}
|
||
|
|
||
|
{
|
||
|
/* Pop stack frame. */
|
||
|
move sp, r52
|
||
|
/* Restore original r52. */
|
||
|
LW r52, TMP
|
||
|
}
|
||
|
|
||
|
#define STORE_REG(REG, PTR) \
|
||
|
{ \
|
||
|
SW PTR, REG ; \
|
||
|
addi PTR, PTR, REG_SIZE \
|
||
|
}
|
||
|
|
||
|
/* Return all register values by reference. */
|
||
|
STORE_REG(r0, RETURN_REG_ADDR)
|
||
|
STORE_REG(r1, RETURN_REG_ADDR)
|
||
|
STORE_REG(r2, RETURN_REG_ADDR)
|
||
|
STORE_REG(r3, RETURN_REG_ADDR)
|
||
|
STORE_REG(r4, RETURN_REG_ADDR)
|
||
|
STORE_REG(r5, RETURN_REG_ADDR)
|
||
|
STORE_REG(r6, RETURN_REG_ADDR)
|
||
|
STORE_REG(r7, RETURN_REG_ADDR)
|
||
|
STORE_REG(r8, RETURN_REG_ADDR)
|
||
|
STORE_REG(r9, RETURN_REG_ADDR)
|
||
|
|
||
|
{
|
||
|
jrp lr
|
||
|
}
|
||
|
|
||
|
.cfi_endproc
|
||
|
.size ffi_call_tile, .-ffi_call_tile
|
||
|
|
||
|
/* ffi_closure_tile(...)
|
||
|
|
||
|
On entry, lr points to the closure plus 8 bytes, and r10
|
||
|
contains the actual return address.
|
||
|
|
||
|
This function simply dumps all register parameters into a stack array
|
||
|
and passes the closure, the registers array, and the stack arguments
|
||
|
to C code that does all of the actual closure processing. */
|
||
|
|
||
|
.section .text.ffi_closure_tile, "ax", @progbits
|
||
|
.align 8
|
||
|
.globl ffi_closure_tile
|
||
|
FFI_HIDDEN(ffi_closure_tile)
|
||
|
|
||
|
.cfi_startproc
|
||
|
/* Room to spill all NUM_ARG_REGS incoming registers, plus frame linkage. */
|
||
|
#define CLOSURE_FRAME_SIZE (((NUM_ARG_REGS * REG_SIZE * 2 + LINKAGE_SIZE) + 7) & -8)
|
||
|
ffi_closure_tile:
|
||
|
{
|
||
|
#ifdef __tilegx__
|
||
|
st sp, lr
|
||
|
.cfi_offset lr, 0
|
||
|
#else
|
||
|
/* Save return address (in r10 due to closure stub wrapper). */
|
||
|
SW sp, r10
|
||
|
.cfi_return_column r10
|
||
|
.cfi_offset r10, 0
|
||
|
#endif
|
||
|
/* Compute address for stack frame linkage. */
|
||
|
addli r10, sp, -(CLOSURE_FRAME_SIZE - REG_SIZE)
|
||
|
}
|
||
|
{
|
||
|
/* Save incoming stack pointer in linkage area. */
|
||
|
SW r10, sp
|
||
|
.cfi_offset sp, -(CLOSURE_FRAME_SIZE - REG_SIZE)
|
||
|
/* Push a new stack frame. */
|
||
|
addli sp, sp, -CLOSURE_FRAME_SIZE
|
||
|
.cfi_adjust_cfa_offset CLOSURE_FRAME_SIZE
|
||
|
}
|
||
|
|
||
|
{
|
||
|
/* Create pointer to where to start spilling registers. */
|
||
|
addi r10, sp, LINKAGE_SIZE
|
||
|
}
|
||
|
|
||
|
/* Spill all the incoming registers. */
|
||
|
STORE_REG(r0, r10)
|
||
|
STORE_REG(r1, r10)
|
||
|
STORE_REG(r2, r10)
|
||
|
STORE_REG(r3, r10)
|
||
|
STORE_REG(r4, r10)
|
||
|
STORE_REG(r5, r10)
|
||
|
STORE_REG(r6, r10)
|
||
|
STORE_REG(r7, r10)
|
||
|
STORE_REG(r8, r10)
|
||
|
{
|
||
|
/* Save r9. */
|
||
|
SW r10, r9
|
||
|
#ifdef __tilegx__
|
||
|
/* Pointer to closure is passed in r11. */
|
||
|
move r0, r11
|
||
|
#else
|
||
|
/* Compute pointer to the closure object. Because the closure
|
||
|
starts with a "jal ffi_closure_tile", we can just take the
|
||
|
value of lr (a phony return address pointing into the closure)
|
||
|
and subtract 8. */
|
||
|
addi r0, lr, -8
|
||
|
#endif
|
||
|
/* Compute a pointer to the register arguments we just spilled. */
|
||
|
addi r1, sp, LINKAGE_SIZE
|
||
|
}
|
||
|
{
|
||
|
/* Compute a pointer to the extra stack arguments (if any). */
|
||
|
addli r2, sp, CLOSURE_FRAME_SIZE + LINKAGE_SIZE
|
||
|
/* Call C code to deal with all of the grotty details. */
|
||
|
jal ffi_closure_tile_inner
|
||
|
}
|
||
|
{
|
||
|
addli r10, sp, CLOSURE_FRAME_SIZE
|
||
|
}
|
||
|
{
|
||
|
/* Restore the return address. */
|
||
|
LW lr, r10
|
||
|
/* Compute pointer to registers array. */
|
||
|
addli r10, sp, LINKAGE_SIZE + (NUM_ARG_REGS * REG_SIZE)
|
||
|
}
|
||
|
/* Return all the register values, which C code may have set. */
|
||
|
LOAD_REG(r0, r10)
|
||
|
LOAD_REG(r1, r10)
|
||
|
LOAD_REG(r2, r10)
|
||
|
LOAD_REG(r3, r10)
|
||
|
LOAD_REG(r4, r10)
|
||
|
LOAD_REG(r5, r10)
|
||
|
LOAD_REG(r6, r10)
|
||
|
LOAD_REG(r7, r10)
|
||
|
LOAD_REG(r8, r10)
|
||
|
LOAD_REG(r9, r10)
|
||
|
{
|
||
|
/* Pop the frame. */
|
||
|
addli sp, sp, CLOSURE_FRAME_SIZE
|
||
|
jrp lr
|
||
|
}
|
||
|
|
||
|
.cfi_endproc
|
||
|
.size ffi_closure_tile, . - ffi_closure_tile
|
||
|
|
||
|
|
||
|
/* What follows are code template instructions that get copied to the
|
||
|
closure trampoline by ffi_prep_closure_loc. The zeroed operands
|
||
|
get replaced by their proper values at runtime. */
|
||
|
|
||
|
.section .text.ffi_template_tramp_tile, "ax", @progbits
|
||
|
.align 8
|
||
|
.globl ffi_template_tramp_tile
|
||
|
FFI_HIDDEN(ffi_template_tramp_tile)
|
||
|
ffi_template_tramp_tile:
|
||
|
#ifdef __tilegx__
|
||
|
{
|
||
|
moveli r11, 0 /* backpatched to address of containing closure. */
|
||
|
moveli r10, 0 /* backpatched to ffi_closure_tile. */
|
||
|
}
|
||
|
/* Note: the following bundle gets generated multiple times
|
||
|
depending on the pointer value (esp. useful for -m32 mode). */
|
||
|
{ shl16insli r11, r11, 0 ; shl16insli r10, r10, 0 }
|
||
|
{ info 2+8 /* for backtracer: -> pc in lr, frame size 0 */ ; jr r10 }
|
||
|
#else
|
||
|
/* 'jal .' yields a PC-relative offset of zero so we can OR in the
|
||
|
right offset at runtime. */
|
||
|
{ move r10, lr ; jal . /* ffi_closure_tile */ }
|
||
|
#endif
|
||
|
|
||
|
.size ffi_template_tramp_tile, . - ffi_template_tramp_tile
|