1/* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 Art assembly interpreter notes: 19 20 First validate assembly code by implementing ExecuteXXXImpl() style body (doesn't 21 handle invoke, allows higher-level code to create frame & shadow frame. 22 23 Once that's working, support direct entry code & eliminate shadow frame (and 24 excess locals allocation. 25 26 Some (hopefully) temporary ugliness. We'll treat rFP as pointing to the 27 base of the vreg array within the shadow frame. Access the other fields, 28 dex_pc_, method_ and number_of_vregs_ via negative offsets. For now, we'll continue 29 the shadow frame mechanism of double-storing object references - via rFP & 30 number_of_vregs_. 31 32 */ 33 34#include "asm_support.h" 35 36#if (__mips==32) && (__mips_isa_rev>=2) 37#define MIPS32REVGE2 /* mips32r2 and greater */ 38#if (__mips==32) && (__mips_isa_rev>=5) 39#define FPU64 /* 64 bit FPU */ 40#if (__mips==32) && (__mips_isa_rev>=6) 41#define MIPS32REVGE6 /* mips32r6 and greater */ 42#endif 43#endif 44#endif 45 46/* MIPS definitions and declarations 47 48 reg nick purpose 49 s0 rPC interpreted program counter, used for fetching instructions 50 s1 rFP interpreted frame pointer, used for accessing locals and args 51 s2 rSELF self (Thread) pointer 52 s3 rIBASE interpreted instruction base pointer, used for computed goto 53 s4 rINST first 16-bit code unit of current instruction 54 s6 rREFS base of object references in shadow frame (ideally, we'll get rid of this later). 55*/ 56 57/* single-purpose registers, given names for clarity */ 58#define rPC s0 59#define rFP s1 60#define rSELF s2 61#define rIBASE s3 62#define rINST s4 63#define rOBJ s5 64#define rREFS s6 65#define rTEMP s7 66 67#define rARG0 a0 68#define rARG1 a1 69#define rARG2 a2 70#define rARG3 a3 71#define rRESULT0 v0 72#define rRESULT1 v1 73 74/* GP register definitions */ 75#define zero $$0 /* always zero */ 76#define AT $$at /* assembler temp */ 77#define v0 $$2 /* return value */ 78#define v1 $$3 79#define a0 $$4 /* argument registers */ 80#define a1 $$5 81#define a2 $$6 82#define a3 $$7 83#define t0 $$8 /* temp registers (not saved across subroutine calls) */ 84#define t1 $$9 85#define t2 $$10 86#define t3 $$11 87#define t4 $$12 88#define t5 $$13 89#define t6 $$14 90#define t7 $$15 91#define ta0 $$12 /* alias */ 92#define ta1 $$13 93#define ta2 $$14 94#define ta3 $$15 95#define s0 $$16 /* saved across subroutine calls (callee saved) */ 96#define s1 $$17 97#define s2 $$18 98#define s3 $$19 99#define s4 $$20 100#define s5 $$21 101#define s6 $$22 102#define s7 $$23 103#define t8 $$24 /* two more temp registers */ 104#define t9 $$25 105#define k0 $$26 /* kernel temporary */ 106#define k1 $$27 107#define gp $$28 /* global pointer */ 108#define sp $$29 /* stack pointer */ 109#define s8 $$30 /* one more callee saved */ 110#define ra $$31 /* return address */ 111 112/* FP register definitions */ 113#define fv0 $$f0 114#define fv0f $$f1 115#define fv1 $$f2 116#define fv1f $$f3 117#define fa0 $$f12 118#define fa0f $$f13 119#define fa1 $$f14 120#define fa1f $$f15 121#define ft0 $$f4 122#define ft0f $$f5 123#define ft1 $$f6 124#define ft1f $$f7 125#define ft2 $$f8 126#define ft2f $$f9 127#define ft3 $$f10 128#define ft3f $$f11 129#define ft4 $$f16 130#define ft4f $$f17 131#define ft5 $$f18 132#define ft5f $$f19 133#define fs0 $$f20 134#define fs0f $$f21 135#define fs1 $$f22 136#define fs1f $$f23 137#define fs2 $$f24 138#define fs2f $$f25 139#define fs3 $$f26 140#define fs3f $$f27 141#define fs4 $$f28 142#define fs4f $$f29 143#define fs5 $$f30 144#define fs5f $$f31 145 146#ifndef MIPS32REVGE6 147#define fcc0 $$fcc0 148#define fcc1 $$fcc1 149#endif 150 151/* 152 * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs. So, 153 * to access other shadow frame fields, we need to use a backwards offset. Define those here. 154 */ 155#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET) 156#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET) 157#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET) 158#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET) 159#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET) 160#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET) 161#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET) 162#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET) 163#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET) 164 165#define MTERP_PROFILE_BRANCHES 1 166#define MTERP_LOGGING 0 167 168/* 169 * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects. Must 170 * be done *before* something throws. 171 * 172 * It's okay to do this more than once. 173 * 174 * NOTE: the fast interpreter keeps track of dex pc as a direct pointer to the mapped 175 * dex byte codes. However, the rest of the runtime expects dex pc to be an instruction 176 * offset into the code_items_[] array. For effiency, we will "export" the 177 * current dex pc as a direct pointer using the EXPORT_PC macro, and rely on GetDexPC 178 * to convert to a dex pc when needed. 179 */ 180#define EXPORT_PC() \ 181 sw rPC, OFF_FP_DEX_PC_PTR(rFP) 182 183#define EXPORT_DEX_PC(tmp) \ 184 lw tmp, OFF_FP_CODE_ITEM(rFP) \ 185 sw rPC, OFF_FP_DEX_PC_PTR(rFP) \ 186 addu tmp, CODEITEM_INSNS_OFFSET \ 187 subu tmp, rPC, tmp \ 188 sra tmp, tmp, 1 \ 189 sw tmp, OFF_FP_DEX_PC(rFP) 190 191/* 192 * Fetch the next instruction from rPC into rINST. Does not advance rPC. 193 */ 194#define FETCH_INST() lhu rINST, (rPC) 195 196/* 197 * Fetch the next instruction from the specified offset. Advances rPC 198 * to point to the next instruction. "_count" is in 16-bit code units. 199 * 200 * This must come AFTER anything that can throw an exception, or the 201 * exception catch may miss. (This also implies that it must come after 202 * EXPORT_PC().) 203 */ 204#define FETCH_ADVANCE_INST(_count) lhu rINST, ((_count)*2)(rPC); \ 205 addu rPC, rPC, ((_count) * 2) 206 207/* 208 * The operation performed here is similar to FETCH_ADVANCE_INST, except the 209 * src and dest registers are parameterized (not hard-wired to rPC and rINST). 210 */ 211#define PREFETCH_ADVANCE_INST(_dreg, _sreg, _count) \ 212 lhu _dreg, ((_count)*2)(_sreg) ; \ 213 addu _sreg, _sreg, (_count)*2 214 215/* 216 * Similar to FETCH_ADVANCE_INST, but does not update rPC. Used to load 217 * rINST ahead of possible exception point. Be sure to manually advance rPC 218 * later. 219 */ 220#define PREFETCH_INST(_count) lhu rINST, ((_count)*2)(rPC) 221 222/* Advance rPC by some number of code units. */ 223#define ADVANCE(_count) addu rPC, rPC, ((_count) * 2) 224 225/* 226 * Fetch the next instruction from an offset specified by rd. Updates 227 * rPC to point to the next instruction. "rd" must specify the distance 228 * in bytes, *not* 16-bit code units, and may be a signed value. 229 */ 230#define FETCH_ADVANCE_INST_RB(rd) addu rPC, rPC, rd; \ 231 lhu rINST, (rPC) 232 233/* 234 * Fetch a half-word code unit from an offset past the current PC. The 235 * "_count" value is in 16-bit code units. Does not advance rPC. 236 * 237 * The "_S" variant works the same but treats the value as signed. 238 */ 239#define FETCH(rd, _count) lhu rd, ((_count) * 2)(rPC) 240#define FETCH_S(rd, _count) lh rd, ((_count) * 2)(rPC) 241 242/* 243 * Fetch one byte from an offset past the current PC. Pass in the same 244 * "_count" as you would for FETCH, and an additional 0/1 indicating which 245 * byte of the halfword you want (lo/hi). 246 */ 247#define FETCH_B(rd, _count, _byte) lbu rd, ((_count) * 2 + _byte)(rPC) 248 249/* 250 * Put the instruction's opcode field into the specified register. 251 */ 252#define GET_INST_OPCODE(rd) and rd, rINST, 0xFF 253 254/* 255 * Put the prefetched instruction's opcode field into the specified register. 256 */ 257#define GET_PREFETCHED_OPCODE(dreg, sreg) andi dreg, sreg, 255 258 259/* 260 * Begin executing the opcode in rd. 261 */ 262#define GOTO_OPCODE(rd) sll rd, rd, ${handler_size_bits}; \ 263 addu rd, rIBASE, rd; \ 264 jalr zero, rd 265 266#define GOTO_OPCODE_BASE(_base, rd) sll rd, rd, ${handler_size_bits}; \ 267 addu rd, _base, rd; \ 268 jalr zero, rd 269 270/* 271 * Get/set the 32-bit value from a Dalvik register. 272 */ 273#define GET_VREG(rd, rix) LOAD_eas2(rd, rFP, rix) 274 275#define GET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \ 276 .set noat; l.s rd, (AT); .set at 277 278#define SET_VREG(rd, rix) .set noat; \ 279 sll AT, rix, 2; \ 280 addu t8, rFP, AT; \ 281 sw rd, 0(t8); \ 282 addu t8, rREFS, AT; \ 283 .set at; \ 284 sw zero, 0(t8) 285 286#define SET_VREG64(rlo, rhi, rix) .set noat; \ 287 sll AT, rix, 2; \ 288 addu t8, rFP, AT; \ 289 sw rlo, 0(t8); \ 290 sw rhi, 4(t8); \ 291 addu t8, rREFS, AT; \ 292 .set at; \ 293 sw zero, 0(t8); \ 294 sw zero, 4(t8) 295 296#ifdef FPU64 297#define SET_VREG64_F(rlo, rhi, rix) .set noat; \ 298 sll AT, rix, 2; \ 299 addu t8, rREFS, AT; \ 300 sw zero, 0(t8); \ 301 sw zero, 4(t8); \ 302 addu t8, rFP, AT; \ 303 mfhc1 AT, rlo; \ 304 sw AT, 4(t8); \ 305 .set at; \ 306 s.s rlo, 0(t8) 307#else 308#define SET_VREG64_F(rlo, rhi, rix) .set noat; \ 309 sll AT, rix, 2; \ 310 addu t8, rFP, AT; \ 311 s.s rlo, 0(t8); \ 312 s.s rhi, 4(t8); \ 313 addu t8, rREFS, AT; \ 314 .set at; \ 315 sw zero, 0(t8); \ 316 sw zero, 4(t8) 317#endif 318 319#define SET_VREG_OBJECT(rd, rix) .set noat; \ 320 sll AT, rix, 2; \ 321 addu t8, rFP, AT; \ 322 sw rd, 0(t8); \ 323 addu t8, rREFS, AT; \ 324 .set at; \ 325 sw rd, 0(t8) 326 327/* Combination of the SET_VREG and GOTO_OPCODE functions to save 1 instruction */ 328#define SET_VREG_GOTO(rd, rix, dst) .set noreorder; \ 329 sll dst, dst, ${handler_size_bits}; \ 330 addu dst, rIBASE, dst; \ 331 .set noat; \ 332 sll AT, rix, 2; \ 333 addu t8, rFP, AT; \ 334 sw rd, 0(t8); \ 335 addu t8, rREFS, AT; \ 336 .set at; \ 337 jalr zero, dst; \ 338 sw zero, 0(t8); \ 339 .set reorder 340 341/* Combination of the SET_VREG64 and GOTO_OPCODE functions to save 1 instruction */ 342#define SET_VREG64_GOTO(rlo, rhi, rix, dst) .set noreorder; \ 343 sll dst, dst, ${handler_size_bits}; \ 344 addu dst, rIBASE, dst; \ 345 .set noat; \ 346 sll AT, rix, 2; \ 347 addu t8, rFP, AT; \ 348 sw rlo, 0(t8); \ 349 sw rhi, 4(t8); \ 350 addu t8, rREFS, AT; \ 351 .set at; \ 352 sw zero, 0(t8); \ 353 jalr zero, dst; \ 354 sw zero, 4(t8); \ 355 .set reorder 356 357#define SET_VREG_F(rd, rix) .set noat; \ 358 sll AT, rix, 2; \ 359 addu t8, rFP, AT; \ 360 s.s rd, 0(t8); \ 361 addu t8, rREFS, AT; \ 362 .set at; \ 363 sw zero, 0(t8) 364 365#define GET_OPA(rd) srl rd, rINST, 8 366#ifdef MIPS32REVGE2 367#define GET_OPA4(rd) ext rd, rINST, 8, 4 368#else 369#define GET_OPA4(rd) GET_OPA(rd); and rd, 0xf 370#endif 371#define GET_OPB(rd) srl rd, rINST, 12 372 373/* 374 * Form an Effective Address rd = rbase + roff<<n; 375 * Uses reg AT 376 */ 377#define EASN(rd, rbase, roff, rshift) .set noat; \ 378 sll AT, roff, rshift; \ 379 addu rd, rbase, AT; \ 380 .set at 381 382#define EAS1(rd, rbase, roff) EASN(rd, rbase, roff, 1) 383#define EAS2(rd, rbase, roff) EASN(rd, rbase, roff, 2) 384#define EAS3(rd, rbase, roff) EASN(rd, rbase, roff, 3) 385#define EAS4(rd, rbase, roff) EASN(rd, rbase, roff, 4) 386 387/* 388 * Form an Effective Shift Right rd = rbase + roff>>n; 389 * Uses reg AT 390 */ 391#define ESRN(rd, rbase, roff, rshift) .set noat; \ 392 srl AT, roff, rshift; \ 393 addu rd, rbase, AT; \ 394 .set at 395 396#define LOAD_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \ 397 .set noat; lw rd, 0(AT); .set at 398 399#define STORE_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \ 400 .set noat; sw rd, 0(AT); .set at 401 402#define LOAD_RB_OFF(rd, rbase, off) lw rd, off(rbase) 403#define STORE_RB_OFF(rd, rbase, off) sw rd, off(rbase) 404 405#define STORE64_off(rlo, rhi, rbase, off) sw rlo, off(rbase); \ 406 sw rhi, (off+4)(rbase) 407#define LOAD64_off(rlo, rhi, rbase, off) lw rlo, off(rbase); \ 408 lw rhi, (off+4)(rbase) 409 410#define STORE64(rlo, rhi, rbase) STORE64_off(rlo, rhi, rbase, 0) 411#define LOAD64(rlo, rhi, rbase) LOAD64_off(rlo, rhi, rbase, 0) 412 413#ifdef FPU64 414#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \ 415 .set noat; \ 416 mfhc1 AT, rlo; \ 417 sw AT, (off+4)(rbase); \ 418 .set at 419#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \ 420 .set noat; \ 421 lw AT, (off+4)(rbase); \ 422 mthc1 AT, rlo; \ 423 .set at 424#else 425#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \ 426 s.s rhi, (off+4)(rbase) 427#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \ 428 l.s rhi, (off+4)(rbase) 429#endif 430 431#define STORE64_F(rlo, rhi, rbase) STORE64_off_F(rlo, rhi, rbase, 0) 432#define LOAD64_F(rlo, rhi, rbase) LOAD64_off_F(rlo, rhi, rbase, 0) 433 434 435#define LOAD_base_offMirrorArray_length(rd, rbase) LOAD_RB_OFF(rd, rbase, MIRROR_ARRAY_LENGTH_OFFSET) 436 437#define STACK_STORE(rd, off) sw rd, off(sp) 438#define STACK_LOAD(rd, off) lw rd, off(sp) 439#define CREATE_STACK(n) subu sp, sp, n 440#define DELETE_STACK(n) addu sp, sp, n 441 442#define LOAD_ADDR(dest, addr) la dest, addr 443#define LOAD_IMM(dest, imm) li dest, imm 444#define MOVE_REG(dest, src) move dest, src 445#define STACK_SIZE 128 446 447#define STACK_OFFSET_ARG04 16 448#define STACK_OFFSET_ARG05 20 449#define STACK_OFFSET_ARG06 24 450#define STACK_OFFSET_ARG07 28 451#define STACK_OFFSET_GP 84 452 453#define JAL(n) jal n 454#define BAL(n) bal n 455 456/* 457 * FP register usage restrictions: 458 * 1) We don't use the callee save FP registers so we don't have to save them. 459 * 2) We don't use the odd FP registers so we can share code with mips32r6. 460 */ 461#define STACK_STORE_FULL() CREATE_STACK(STACK_SIZE); \ 462 STACK_STORE(ra, 124); \ 463 STACK_STORE(s8, 120); \ 464 STACK_STORE(s0, 116); \ 465 STACK_STORE(s1, 112); \ 466 STACK_STORE(s2, 108); \ 467 STACK_STORE(s3, 104); \ 468 STACK_STORE(s4, 100); \ 469 STACK_STORE(s5, 96); \ 470 STACK_STORE(s6, 92); \ 471 STACK_STORE(s7, 88); 472 473#define STACK_LOAD_FULL() STACK_LOAD(gp, STACK_OFFSET_GP); \ 474 STACK_LOAD(s7, 88); \ 475 STACK_LOAD(s6, 92); \ 476 STACK_LOAD(s5, 96); \ 477 STACK_LOAD(s4, 100); \ 478 STACK_LOAD(s3, 104); \ 479 STACK_LOAD(s2, 108); \ 480 STACK_LOAD(s1, 112); \ 481 STACK_LOAD(s0, 116); \ 482 STACK_LOAD(s8, 120); \ 483 STACK_LOAD(ra, 124); \ 484 DELETE_STACK(STACK_SIZE) 485