Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.2.
   1/*
   2 * Copyright 2013 Tilera Corporation. All Rights Reserved.
   3 *
   4 *   This program is free software; you can redistribute it and/or
   5 *   modify it under the terms of the GNU General Public License
   6 *   as published by the Free Software Foundation, version 2.
   7 *
   8 *   This program is distributed in the hope that it will be useful, but
   9 *   WITHOUT ANY WARRANTY; without even the implied warranty of
  10 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11 *   NON INFRINGEMENT.  See the GNU General Public License for
  12 *   more details.
  13 *
  14 * A code-rewriter that handles unaligned exception.
  15 */
  16
  17#include <linux/smp.h>
  18#include <linux/ptrace.h>
  19#include <linux/slab.h>
  20#include <linux/thread_info.h>
  21#include <linux/uaccess.h>
  22#include <linux/mman.h>
  23#include <linux/types.h>
  24#include <linux/err.h>
  25#include <linux/module.h>
  26#include <linux/compat.h>
  27#include <linux/prctl.h>
  28#include <asm/cacheflush.h>
  29#include <asm/traps.h>
  30#include <asm/uaccess.h>
  31#include <asm/unaligned.h>
  32#include <arch/abi.h>
  33#include <arch/spr_def.h>
  34#include <arch/opcode.h>
  35
  36
  37/*
  38 * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
  39 * exception is supported out of single_step.c
  40 */
  41
  42int unaligned_printk;
  43
  44static int __init setup_unaligned_printk(char *str)
  45{
  46	long val;
  47	if (kstrtol(str, 0, &val) != 0)
  48		return 0;
  49	unaligned_printk = val;
  50	pr_info("Printk for each unaligned data accesses is %s\n",
  51		unaligned_printk ? "enabled" : "disabled");
  52	return 1;
  53}
  54__setup("unaligned_printk=", setup_unaligned_printk);
  55
  56unsigned int unaligned_fixup_count;
  57
  58#ifdef __tilegx__
  59
  60/*
  61 * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
  62 * The 1st 64-bit word saves fault PC address, 2nd word is the fault
  63 * instruction bundle followed by 14 JIT bundles.
  64 */
  65
  66struct unaligned_jit_fragment {
  67	unsigned long       pc;
  68	tilegx_bundle_bits  bundle;
  69	tilegx_bundle_bits  insn[14];
  70};
  71
  72/*
  73 * Check if a nop or fnop at bundle's pipeline X0.
  74 */
  75
  76static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
  77{
  78	return (((get_UnaryOpcodeExtension_X0(bundle) ==
  79		  NOP_UNARY_OPCODE_X0) &&
  80		 (get_RRROpcodeExtension_X0(bundle) ==
  81		  UNARY_RRR_0_OPCODE_X0) &&
  82		 (get_Opcode_X0(bundle) ==
  83		  RRR_0_OPCODE_X0)) ||
  84		((get_UnaryOpcodeExtension_X0(bundle) ==
  85		  FNOP_UNARY_OPCODE_X0) &&
  86		 (get_RRROpcodeExtension_X0(bundle) ==
  87		  UNARY_RRR_0_OPCODE_X0) &&
  88		 (get_Opcode_X0(bundle) ==
  89		  RRR_0_OPCODE_X0)));
  90}
  91
  92/*
  93 * Check if nop or fnop at bundle's pipeline X1.
  94 */
  95
  96static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
  97{
  98	return (((get_UnaryOpcodeExtension_X1(bundle) ==
  99		  NOP_UNARY_OPCODE_X1) &&
 100		 (get_RRROpcodeExtension_X1(bundle) ==
 101		  UNARY_RRR_0_OPCODE_X1) &&
 102		 (get_Opcode_X1(bundle) ==
 103		  RRR_0_OPCODE_X1)) ||
 104		((get_UnaryOpcodeExtension_X1(bundle) ==
 105		  FNOP_UNARY_OPCODE_X1) &&
 106		 (get_RRROpcodeExtension_X1(bundle) ==
 107		  UNARY_RRR_0_OPCODE_X1) &&
 108		 (get_Opcode_X1(bundle) ==
 109		  RRR_0_OPCODE_X1)));
 110}
 111
 112/*
 113 * Check if nop or fnop at bundle's Y0 pipeline.
 114 */
 115
 116static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
 117{
 118	return (((get_UnaryOpcodeExtension_Y0(bundle) ==
 119		  NOP_UNARY_OPCODE_Y0) &&
 120		 (get_RRROpcodeExtension_Y0(bundle) ==
 121		  UNARY_RRR_1_OPCODE_Y0) &&
 122		 (get_Opcode_Y0(bundle) ==
 123		  RRR_1_OPCODE_Y0)) ||
 124		((get_UnaryOpcodeExtension_Y0(bundle) ==
 125		  FNOP_UNARY_OPCODE_Y0) &&
 126		 (get_RRROpcodeExtension_Y0(bundle) ==
 127		  UNARY_RRR_1_OPCODE_Y0) &&
 128		 (get_Opcode_Y0(bundle) ==
 129		  RRR_1_OPCODE_Y0)));
 130}
 131
 132/*
 133 * Check if nop or fnop at bundle's pipeline Y1.
 134 */
 135
 136static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
 137{
 138	return (((get_UnaryOpcodeExtension_Y1(bundle) ==
 139		  NOP_UNARY_OPCODE_Y1) &&
 140		 (get_RRROpcodeExtension_Y1(bundle) ==
 141		  UNARY_RRR_1_OPCODE_Y1) &&
 142		 (get_Opcode_Y1(bundle) ==
 143		  RRR_1_OPCODE_Y1)) ||
 144		((get_UnaryOpcodeExtension_Y1(bundle) ==
 145		  FNOP_UNARY_OPCODE_Y1) &&
 146		 (get_RRROpcodeExtension_Y1(bundle) ==
 147		  UNARY_RRR_1_OPCODE_Y1) &&
 148		 (get_Opcode_Y1(bundle) ==
 149		  RRR_1_OPCODE_Y1)));
 150}
 151
 152/*
 153 * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
 154 */
 155
 156static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
 157{
 158	return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
 159}
 160
 161/*
 162 * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
 163 */
 164
 165static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
 166{
 167	return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
 168}
 169
 170/*
 171 * Find the destination, source registers of fault unalign access instruction
 172 * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
 173 * clob3, which are guaranteed different from any register used in the fault
 174 * bundle. r_alias is used to return if the other instructions other than the
 175 * unalign load/store shares same register with ra, rb and rd.
 176 */
 177
 178static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
 179		      uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
 180		      uint64_t *clob3, bool *r_alias)
 181{
 182	int i;
 183	uint64_t reg;
 184	uint64_t reg_map = 0, alias_reg_map = 0, map;
 185	bool alias = false;
 186
 187	/*
 188	 * Parse fault bundle, find potential used registers and mark
 189	 * corresponding bits in reg_map and alias_map. These 2 bit maps
 190	 * are used to find the scratch registers and determine if there
 191	 * is register alais.
 192	 */
 193	if (bundle & TILEGX_BUNDLE_MODE_MASK) {  /* Y Mode Bundle. */
 194
 195		reg = get_SrcA_Y2(bundle);
 196		reg_map |= 1ULL << reg;
 197		*ra = reg;
 198		reg = get_SrcBDest_Y2(bundle);
 199		reg_map |= 1ULL << reg;
 200
 201		if (rd) {
 202			/* Load. */
 203			*rd = reg;
 204			alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
 205		} else {
 206			/* Store. */
 207			*rb = reg;
 208			alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
 209		}
 210
 211		if (!is_bundle_y1_nop(bundle)) {
 212			reg = get_SrcA_Y1(bundle);
 213			reg_map |= (1ULL << reg);
 214			map = (1ULL << reg);
 215
 216			reg = get_SrcB_Y1(bundle);
 217			reg_map |= (1ULL << reg);
 218			map |= (1ULL << reg);
 219
 220			reg = get_Dest_Y1(bundle);
 221			reg_map |= (1ULL << reg);
 222			map |= (1ULL << reg);
 223
 224			if (map & alias_reg_map)
 225				alias = true;
 226		}
 227
 228		if (!is_bundle_y0_nop(bundle)) {
 229			reg = get_SrcA_Y0(bundle);
 230			reg_map |= (1ULL << reg);
 231			map = (1ULL << reg);
 232
 233			reg = get_SrcB_Y0(bundle);
 234			reg_map |= (1ULL << reg);
 235			map |= (1ULL << reg);
 236
 237			reg = get_Dest_Y0(bundle);
 238			reg_map |= (1ULL << reg);
 239			map |= (1ULL << reg);
 240
 241			if (map & alias_reg_map)
 242				alias = true;
 243		}
 244	} else	{ /* X Mode Bundle. */
 245
 246		reg = get_SrcA_X1(bundle);
 247		reg_map |= (1ULL << reg);
 248		*ra = reg;
 249		if (rd)	{
 250			/* Load. */
 251			reg = get_Dest_X1(bundle);
 252			reg_map |= (1ULL << reg);
 253			*rd = reg;
 254			alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
 255		} else {
 256			/* Store. */
 257			reg = get_SrcB_X1(bundle);
 258			reg_map |= (1ULL << reg);
 259			*rb = reg;
 260			alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
 261		}
 262
 263		if (!is_bundle_x0_nop(bundle)) {
 264			reg = get_SrcA_X0(bundle);
 265			reg_map |= (1ULL << reg);
 266			map = (1ULL << reg);
 267
 268			reg = get_SrcB_X0(bundle);
 269			reg_map |= (1ULL << reg);
 270			map |= (1ULL << reg);
 271
 272			reg = get_Dest_X0(bundle);
 273			reg_map |= (1ULL << reg);
 274			map |= (1ULL << reg);
 275
 276			if (map & alias_reg_map)
 277				alias = true;
 278		}
 279	}
 280
 281	/*
 282	 * "alias" indicates if the unalign access registers have collision
 283	 * with others in the same bundle. We jsut simply test all register
 284	 * operands case (RRR), ignored the case with immidate. If a bundle
 285	 * has no register alias, we may do fixup in a simple or fast manner.
 286	 * So if an immidata field happens to hit with a register, we may end
 287	 * up fall back to the generic handling.
 288	 */
 289
 290	*r_alias = alias;
 291
 292	/* Flip bits on reg_map. */
 293	reg_map ^= -1ULL;
 294
 295	/* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
 296	for (i = 0; i < TREG_SP; i++) {
 297		if (reg_map & (0x1ULL << i)) {
 298			if (*clob1 == -1) {
 299				*clob1 = i;
 300			} else if (*clob2 == -1) {
 301				*clob2 = i;
 302			} else if (*clob3 == -1) {
 303				*clob3 = i;
 304				return;
 305			}
 306		}
 307	}
 308}
 309
 310/*
 311 * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
 312 * is unexpected.
 313 */
 314
 315static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
 316		       uint64_t clob1, uint64_t clob2,  uint64_t clob3)
 317{
 318	bool unexpected = false;
 319	if ((ra >= 56) && (ra != TREG_ZERO))
 320		unexpected = true;
 321
 322	if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
 323		unexpected = true;
 324
 325	if (rd != -1) {
 326		if ((rd >= 56) && (rd != TREG_ZERO))
 327			unexpected = true;
 328	} else {
 329		if ((rb >= 56) && (rb != TREG_ZERO))
 330			unexpected = true;
 331	}
 332	return unexpected;
 333}
 334
 335
 336#define  GX_INSN_X0_MASK   ((1ULL << 31) - 1)
 337#define  GX_INSN_X1_MASK   (((1ULL << 31) - 1) << 31)
 338#define  GX_INSN_Y0_MASK   ((0xFULL << 27) | (0xFFFFFULL))
 339#define  GX_INSN_Y1_MASK   (GX_INSN_Y0_MASK << 31)
 340#define  GX_INSN_Y2_MASK   ((0x7FULL << 51) | (0x7FULL << 20))
 341
 342#ifdef __LITTLE_ENDIAN
 343#define  GX_INSN_BSWAP(_bundle_)    (_bundle_)
 344#else
 345#define  GX_INSN_BSWAP(_bundle_)    swab64(_bundle_)
 346#endif /* __LITTLE_ENDIAN */
 347
 348/*
 349 * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
 350 * The corresponding static function jix_x#_###(.) generates partial or
 351 * whole bundle based on the template and given arguments.
 352 */
 353
 354#define __JIT_CODE(_X_)						\
 355	asm (".pushsection .rodata.unalign_data, \"a\"\n"	\
 356	     _X_"\n"						\
 357	     ".popsection\n")
 358
 359__JIT_CODE("__unalign_jit_x1_mtspr:   {mtspr 0,  r0}");
 360static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
 361{
 362	extern  tilegx_bundle_bits __unalign_jit_x1_mtspr;
 363	return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
 364		create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
 365}
 366
 367__JIT_CODE("__unalign_jit_x1_mfspr:   {mfspr r0, 0}");
 368static tilegx_bundle_bits  jit_x1_mfspr(int reg, int spr)
 369{
 370	extern  tilegx_bundle_bits __unalign_jit_x1_mfspr;
 371	return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
 372		create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
 373}
 374
 375__JIT_CODE("__unalign_jit_x0_addi:   {addi  r0, r0, 0; iret}");
 376static tilegx_bundle_bits  jit_x0_addi(int rd, int ra, int imm8)
 377{
 378	extern  tilegx_bundle_bits __unalign_jit_x0_addi;
 379	return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
 380		create_Dest_X0(rd) | create_SrcA_X0(ra) |
 381		create_Imm8_X0(imm8);
 382}
 383
 384__JIT_CODE("__unalign_jit_x1_ldna:   {ldna  r0, r0}");
 385static tilegx_bundle_bits  jit_x1_ldna(int rd, int ra)
 386{
 387	extern  tilegx_bundle_bits __unalign_jit_x1_ldna;
 388	return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) &  GX_INSN_X1_MASK) |
 389		create_Dest_X1(rd) | create_SrcA_X1(ra);
 390}
 391
 392__JIT_CODE("__unalign_jit_x0_dblalign:   {dblalign r0, r0 ,r0}");
 393static tilegx_bundle_bits  jit_x0_dblalign(int rd, int ra, int rb)
 394{
 395	extern  tilegx_bundle_bits __unalign_jit_x0_dblalign;
 396	return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
 397		create_Dest_X0(rd) | create_SrcA_X0(ra) |
 398		create_SrcB_X0(rb);
 399}
 400
 401__JIT_CODE("__unalign_jit_x1_iret:   {iret}");
 402static tilegx_bundle_bits  jit_x1_iret(void)
 403{
 404	extern  tilegx_bundle_bits __unalign_jit_x1_iret;
 405	return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
 406}
 407
 408__JIT_CODE("__unalign_jit_x01_fnop:   {fnop;fnop}");
 409static tilegx_bundle_bits  jit_x0_fnop(void)
 410{
 411	extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
 412	return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
 413}
 414
 415static tilegx_bundle_bits  jit_x1_fnop(void)
 416{
 417	extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
 418	return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
 419}
 420
 421__JIT_CODE("__unalign_jit_y2_dummy:   {fnop; fnop; ld zero, sp}");
 422static tilegx_bundle_bits  jit_y2_dummy(void)
 423{
 424	extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
 425	return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
 426}
 427
 428static tilegx_bundle_bits  jit_y1_fnop(void)
 429{
 430	extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
 431	return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
 432}
 433
 434__JIT_CODE("__unalign_jit_x1_st1_add:  {st1_add r1, r0, 0}");
 435static tilegx_bundle_bits  jit_x1_st1_add(int ra, int rb, int imm8)
 436{
 437	extern  tilegx_bundle_bits __unalign_jit_x1_st1_add;
 438	return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
 439		(~create_SrcA_X1(-1)) &
 440		GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
 441		create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
 442}
 443
 444__JIT_CODE("__unalign_jit_x1_st:  {crc32_8 r1, r0, r0; st  r0, r0}");
 445static tilegx_bundle_bits  jit_x1_st(int ra, int rb)
 446{
 447	extern  tilegx_bundle_bits __unalign_jit_x1_st;
 448	return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
 449		create_SrcA_X1(ra) | create_SrcB_X1(rb);
 450}
 451
 452__JIT_CODE("__unalign_jit_x1_st_add:  {st_add  r1, r0, 0}");
 453static tilegx_bundle_bits  jit_x1_st_add(int ra, int rb, int imm8)
 454{
 455	extern  tilegx_bundle_bits __unalign_jit_x1_st_add;
 456	return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
 457		(~create_SrcA_X1(-1)) &
 458		GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
 459		create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
 460}
 461
 462__JIT_CODE("__unalign_jit_x1_ld:  {crc32_8 r1, r0, r0; ld  r0, r0}");
 463static tilegx_bundle_bits  jit_x1_ld(int rd, int ra)
 464{
 465	extern  tilegx_bundle_bits __unalign_jit_x1_ld;
 466	return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
 467		create_Dest_X1(rd) | create_SrcA_X1(ra);
 468}
 469
 470__JIT_CODE("__unalign_jit_x1_ld_add:  {ld_add  r1, r0, 0}");
 471static tilegx_bundle_bits  jit_x1_ld_add(int rd, int ra, int imm8)
 472{
 473	extern  tilegx_bundle_bits __unalign_jit_x1_ld_add;
 474	return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
 475		(~create_Dest_X1(-1)) &
 476		GX_INSN_X1_MASK) | create_Dest_X1(rd) |
 477		create_SrcA_X1(ra) | create_Imm8_X1(imm8);
 478}
 479
 480__JIT_CODE("__unalign_jit_x0_bfexts:  {bfexts r0, r0, 0, 0}");
 481static tilegx_bundle_bits  jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
 482{
 483	extern  tilegx_bundle_bits __unalign_jit_x0_bfexts;
 484	return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
 485		GX_INSN_X0_MASK) |
 486		create_Dest_X0(rd) | create_SrcA_X0(ra) |
 487		create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
 488}
 489
 490__JIT_CODE("__unalign_jit_x0_bfextu:  {bfextu r0, r0, 0, 0}");
 491static tilegx_bundle_bits  jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
 492{
 493	extern  tilegx_bundle_bits __unalign_jit_x0_bfextu;
 494	return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
 495		GX_INSN_X0_MASK) |
 496		create_Dest_X0(rd) | create_SrcA_X0(ra) |
 497		create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
 498}
 499
 500__JIT_CODE("__unalign_jit_x1_addi:  {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
 501static tilegx_bundle_bits  jit_x1_addi(int rd, int ra, int imm8)
 502{
 503	extern  tilegx_bundle_bits __unalign_jit_x1_addi;
 504	return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
 505		create_Dest_X1(rd) | create_SrcA_X1(ra) |
 506		create_Imm8_X1(imm8);
 507}
 508
 509__JIT_CODE("__unalign_jit_x0_shrui:  {shrui r0, r0, 0; iret}");
 510static tilegx_bundle_bits  jit_x0_shrui(int rd, int ra, int imm6)
 511{
 512	extern  tilegx_bundle_bits __unalign_jit_x0_shrui;
 513	return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
 514		GX_INSN_X0_MASK) |
 515		create_Dest_X0(rd) | create_SrcA_X0(ra) |
 516		create_ShAmt_X0(imm6);
 517}
 518
 519__JIT_CODE("__unalign_jit_x0_rotli:  {rotli r0, r0, 0; iret}");
 520static tilegx_bundle_bits  jit_x0_rotli(int rd, int ra, int imm6)
 521{
 522	extern  tilegx_bundle_bits __unalign_jit_x0_rotli;
 523	return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
 524		GX_INSN_X0_MASK) |
 525		create_Dest_X0(rd) | create_SrcA_X0(ra) |
 526		create_ShAmt_X0(imm6);
 527}
 528
 529__JIT_CODE("__unalign_jit_x1_bnezt:  {bnezt r0, __unalign_jit_x1_bnezt}");
 530static tilegx_bundle_bits  jit_x1_bnezt(int ra, int broff)
 531{
 532	extern  tilegx_bundle_bits __unalign_jit_x1_bnezt;
 533	return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
 534		GX_INSN_X1_MASK) |
 535		create_SrcA_X1(ra) | create_BrOff_X1(broff);
 536}
 537
 538#undef __JIT_CODE
 539
 540/*
 541 * This function generates unalign fixup JIT.
 542 *
 543 * We first find unalign load/store instruction's destination, source
 544 * registers: ra, rb and rd. and 3 scratch registers by calling
 545 * find_regs(...). 3 scratch clobbers should not alias with any register
 546 * used in the fault bundle. Then analyze the fault bundle to determine
 547 * if it's a load or store, operand width, branch or address increment etc.
 548 * At last generated JIT is copied into JIT code area in user space.
 549 */
 550
 551static
 552void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
 553		    int align_ctl)
 554{
 555	struct thread_info *info = current_thread_info();
 556	struct unaligned_jit_fragment frag;
 557	struct unaligned_jit_fragment *jit_code_area;
 558	tilegx_bundle_bits bundle_2 = 0;
 559	/* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
 560	bool     bundle_2_enable = true;
 561	uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1;
 562	/*
 563	 * Indicate if the unalign access
 564	 * instruction's registers hit with
 565	 * others in the same bundle.
 566	 */
 567	bool     alias = false;
 568	bool     load_n_store = true;
 569	bool     load_store_signed = false;
 570	unsigned int  load_store_size = 8;
 571	bool     y1_br = false;  /* True, for a branch in same bundle at Y1.*/
 572	int      y1_br_reg = 0;
 573	/* True for link operation. i.e. jalr or lnk at Y1 */
 574	bool     y1_lr = false;
 575	int      y1_lr_reg = 0;
 576	bool     x1_add = false;/* True, for load/store ADD instruction at X1*/
 577	int      x1_add_imm8 = 0;
 578	bool     unexpected = false;
 579	int      n = 0, k;
 580
 581	jit_code_area =
 582		(struct unaligned_jit_fragment *)(info->unalign_jit_base);
 583
 584	memset((void *)&frag, 0, sizeof(frag));
 585
 586	/* 0: X mode, Otherwise: Y mode. */
 587	if (bundle & TILEGX_BUNDLE_MODE_MASK) {
 588		unsigned int mod, opcode;
 589
 590		if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
 591		    get_RRROpcodeExtension_Y1(bundle) ==
 592		    UNARY_RRR_1_OPCODE_Y1) {
 593
 594			opcode = get_UnaryOpcodeExtension_Y1(bundle);
 595
 596			/*
 597			 * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
 598			 * pipeline.
 599			 */
 600			switch (opcode) {
 601			case JALR_UNARY_OPCODE_Y1:
 602			case JALRP_UNARY_OPCODE_Y1:
 603				y1_lr = true;
 604				y1_lr_reg = 55; /* Link register. */
 605				/* FALLTHROUGH */
 606			case JR_UNARY_OPCODE_Y1:
 607			case JRP_UNARY_OPCODE_Y1:
 608				y1_br = true;
 609				y1_br_reg = get_SrcA_Y1(bundle);
 610				break;
 611			case LNK_UNARY_OPCODE_Y1:
 612				/* "lnk" at Y1 pipeline. */
 613				y1_lr = true;
 614				y1_lr_reg = get_Dest_Y1(bundle);
 615				break;
 616			}
 617		}
 618
 619		opcode = get_Opcode_Y2(bundle);
 620		mod = get_Mode(bundle);
 621
 622		/*
 623		 *  bundle_2 is bundle after making Y2 as a dummy operation
 624		 *  - ld zero, sp
 625		 */
 626		bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
 627
 628		/* Make Y1 as fnop if Y1 is a branch or lnk operation. */
 629		if (y1_br || y1_lr) {
 630			bundle_2 &= ~(GX_INSN_Y1_MASK);
 631			bundle_2 |= jit_y1_fnop();
 632		}
 633
 634		if (is_y0_y1_nop(bundle_2))
 635			bundle_2_enable = false;
 636
 637		if (mod == MODE_OPCODE_YC2) {
 638			/* Store. */
 639			load_n_store = false;
 640			load_store_size = 1 << opcode;
 641			load_store_signed = false;
 642			find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
 643				  &clob3, &alias);
 644			if (load_store_size > 8)
 645				unexpected = true;
 646		} else {
 647			/* Load. */
 648			load_n_store = true;
 649			if (mod == MODE_OPCODE_YB2) {
 650				switch (opcode) {
 651				case LD_OPCODE_Y2:
 652					load_store_signed = false;
 653					load_store_size = 8;
 654					break;
 655				case LD4S_OPCODE_Y2:
 656					load_store_signed = true;
 657					load_store_size = 4;
 658					break;
 659				case LD4U_OPCODE_Y2:
 660					load_store_signed = false;
 661					load_store_size = 4;
 662					break;
 663				default:
 664					unexpected = true;
 665				}
 666			} else if (mod == MODE_OPCODE_YA2) {
 667				if (opcode == LD2S_OPCODE_Y2) {
 668					load_store_signed = true;
 669					load_store_size = 2;
 670				} else if (opcode == LD2U_OPCODE_Y2) {
 671					load_store_signed = false;
 672					load_store_size = 2;
 673				} else
 674					unexpected = true;
 675			} else
 676				unexpected = true;
 677			find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
 678				  &clob3, &alias);
 679		}
 680	} else {
 681		unsigned int opcode;
 682
 683		/* bundle_2 is bundle after making X1 as "fnop". */
 684		bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
 685
 686		if (is_x0_x1_nop(bundle_2))
 687			bundle_2_enable = false;
 688
 689		if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
 690			opcode = get_UnaryOpcodeExtension_X1(bundle);
 691
 692			if (get_RRROpcodeExtension_X1(bundle) ==
 693			    UNARY_RRR_0_OPCODE_X1) {
 694				load_n_store = true;
 695				find_regs(bundle, &rd, &ra, &rb, &clob1,
 696					  &clob2, &clob3, &alias);
 697
 698				switch (opcode) {
 699				case LD_UNARY_OPCODE_X1:
 700					load_store_signed = false;
 701					load_store_size = 8;
 702					break;
 703				case LD4S_UNARY_OPCODE_X1:
 704					load_store_signed = true;
 705					/* FALLTHROUGH */
 706				case LD4U_UNARY_OPCODE_X1:
 707					load_store_size = 4;
 708					break;
 709
 710				case LD2S_UNARY_OPCODE_X1:
 711					load_store_signed = true;
 712					/* FALLTHROUGH */
 713				case LD2U_UNARY_OPCODE_X1:
 714					load_store_size = 2;
 715					break;
 716				default:
 717					unexpected = true;
 718				}
 719			} else {
 720				load_n_store = false;
 721				load_store_signed = false;
 722				find_regs(bundle, 0, &ra, &rb,
 723					  &clob1, &clob2, &clob3,
 724					  &alias);
 725
 726				opcode = get_RRROpcodeExtension_X1(bundle);
 727				switch (opcode)	{
 728				case ST_RRR_0_OPCODE_X1:
 729					load_store_size = 8;
 730					break;
 731				case ST4_RRR_0_OPCODE_X1:
 732					load_store_size = 4;
 733					break;
 734				case ST2_RRR_0_OPCODE_X1:
 735					load_store_size = 2;
 736					break;
 737				default:
 738					unexpected = true;
 739				}
 740			}
 741		} else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
 742			load_n_store = true;
 743			opcode = get_Imm8OpcodeExtension_X1(bundle);
 744			switch (opcode)	{
 745			case LD_ADD_IMM8_OPCODE_X1:
 746				load_store_size = 8;
 747				break;
 748
 749			case LD4S_ADD_IMM8_OPCODE_X1:
 750				load_store_signed = true;
 751				/* FALLTHROUGH */
 752			case LD4U_ADD_IMM8_OPCODE_X1:
 753				load_store_size = 4;
 754				break;
 755
 756			case LD2S_ADD_IMM8_OPCODE_X1:
 757				load_store_signed = true;
 758				/* FALLTHROUGH */
 759			case LD2U_ADD_IMM8_OPCODE_X1:
 760				load_store_size = 2;
 761				break;
 762
 763			case ST_ADD_IMM8_OPCODE_X1:
 764				load_n_store = false;
 765				load_store_size = 8;
 766				break;
 767			case ST4_ADD_IMM8_OPCODE_X1:
 768				load_n_store = false;
 769				load_store_size = 4;
 770				break;
 771			case ST2_ADD_IMM8_OPCODE_X1:
 772				load_n_store = false;
 773				load_store_size = 2;
 774				break;
 775			default:
 776				unexpected = true;
 777			}
 778
 779			if (!unexpected) {
 780				x1_add = true;
 781				if (load_n_store)
 782					x1_add_imm8 = get_Imm8_X1(bundle);
 783				else
 784					x1_add_imm8 = get_Dest_Imm8_X1(bundle);
 785			}
 786
 787			find_regs(bundle, load_n_store ? (&rd) : NULL,
 788				  &ra, &rb, &clob1, &clob2, &clob3, &alias);
 789		} else
 790			unexpected = true;
 791	}
 792
 793	/*
 794	 * Some sanity check for register numbers extracted from fault bundle.
 795	 */
 796	if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
 797		unexpected = true;
 798
 799	/* Give warning if register ra has an aligned address. */
 800	if (!unexpected)
 801		WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
 802
 803
 804	/*
 805	 * Fault came from kernel space, here we only need take care of
 806	 * unaligned "get_user/put_user" macros defined in "uaccess.h".
 807	 * Basically, we will handle bundle like this:
 808	 * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
 809	 * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
 810	 * For either load or store, byte-wise operation is performed by calling
 811	 * get_user() or put_user(). If the macro returns non-zero value,
 812	 * set the value to rx, otherwise set zero to rx. Finally make pc point
 813	 * to next bundle and return.
 814	 */
 815
 816	if (EX1_PL(regs->ex1) != USER_PL) {
 817
 818		unsigned long rx = 0;
 819		unsigned long x = 0, ret = 0;
 820
 821		if (y1_br || y1_lr || x1_add ||
 822		    (load_store_signed !=
 823		     (load_n_store && load_store_size == 4))) {
 824			/* No branch, link, wrong sign-ext or load/store add. */
 825			unexpected = true;
 826		} else if (!unexpected) {
 827			if (bundle & TILEGX_BUNDLE_MODE_MASK) {
 828				/*
 829				 * Fault bundle is Y mode.
 830				 * Check if the Y1 and Y0 is the form of
 831				 * { movei rx, 0; nop/fnop }, if yes,
 832				 * find the rx.
 833				 */
 834
 835				if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
 836				    && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
 837				    (get_Imm8_Y1(bundle) == 0) &&
 838				    is_bundle_y0_nop(bundle)) {
 839					rx = get_Dest_Y1(bundle);
 840				} else if ((get_Opcode_Y0(bundle) ==
 841					    ADDI_OPCODE_Y0) &&
 842					   (get_SrcA_Y0(bundle) == TREG_ZERO) &&
 843					   (get_Imm8_Y0(bundle) == 0) &&
 844					   is_bundle_y1_nop(bundle)) {
 845					rx = get_Dest_Y0(bundle);
 846				} else {
 847					unexpected = true;
 848				}
 849			} else {
 850				/*
 851				 * Fault bundle is X mode.
 852				 * Check if the X0 is 'movei rx, 0',
 853				 * if yes, find the rx.
 854				 */
 855
 856				if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
 857				    && (get_Imm8OpcodeExtension_X0(bundle) ==
 858					ADDI_IMM8_OPCODE_X0) &&
 859				    (get_SrcA_X0(bundle) == TREG_ZERO) &&
 860				    (get_Imm8_X0(bundle) == 0)) {
 861					rx = get_Dest_X0(bundle);
 862				} else {
 863					unexpected = true;
 864				}
 865			}
 866
 867			/* rx should be less than 56. */
 868			if (!unexpected && (rx >= 56))
 869				unexpected = true;
 870		}
 871
 872		if (!search_exception_tables(regs->pc)) {
 873			/* No fixup in the exception tables for the pc. */
 874			unexpected = true;
 875		}
 876
 877		if (unexpected) {
 878			/* Unexpected unalign kernel fault. */
 879			struct task_struct *tsk = validate_current();
 880
 881			bust_spinlocks(1);
 882
 883			show_regs(regs);
 884
 885			if (unlikely(tsk->pid < 2)) {
 886				panic("Kernel unalign fault running %s!",
 887				      tsk->pid ? "init" : "the idle task");
 888			}
 889#ifdef SUPPORT_DIE
 890			die("Oops", regs);
 891#endif
 892			bust_spinlocks(1);
 893
 894			do_group_exit(SIGKILL);
 895
 896		} else {
 897			unsigned long i, b = 0;
 898			unsigned char *ptr =
 899				(unsigned char *)regs->regs[ra];
 900			if (load_n_store) {
 901				/* handle get_user(x, ptr) */
 902				for (i = 0; i < load_store_size; i++) {
 903					ret = get_user(b, ptr++);
 904					if (!ret) {
 905						/* Success! update x. */
 906#ifdef __LITTLE_ENDIAN
 907						x |= (b << (8 * i));
 908#else
 909						x <<= 8;
 910						x |= b;
 911#endif /* __LITTLE_ENDIAN */
 912					} else {
 913						x = 0;
 914						break;
 915					}
 916				}
 917
 918				/* Sign-extend 4-byte loads. */
 919				if (load_store_size == 4)
 920					x = (long)(int)x;
 921
 922				/* Set register rd. */
 923				regs->regs[rd] = x;
 924
 925				/* Set register rx. */
 926				regs->regs[rx] = ret;
 927
 928				/* Bump pc. */
 929				regs->pc += 8;
 930
 931			} else {
 932				/* Handle put_user(x, ptr) */
 933				x = regs->regs[rb];
 934#ifdef __LITTLE_ENDIAN
 935				b = x;
 936#else
 937				/*
 938				 * Swap x in order to store x from low
 939				 * to high memory same as the
 940				 * little-endian case.
 941				 */
 942				switch (load_store_size) {
 943				case 8:
 944					b = swab64(x);
 945					break;
 946				case 4:
 947					b = swab32(x);
 948					break;
 949				case 2:
 950					b = swab16(x);
 951					break;
 952				}
 953#endif /* __LITTLE_ENDIAN */
 954				for (i = 0; i < load_store_size; i++) {
 955					ret = put_user(b, ptr++);
 956					if (ret)
 957						break;
 958					/* Success! shift 1 byte. */
 959					b >>= 8;
 960				}
 961				/* Set register rx. */
 962				regs->regs[rx] = ret;
 963
 964				/* Bump pc. */
 965				regs->pc += 8;
 966			}
 967		}
 968
 969		unaligned_fixup_count++;
 970
 971		if (unaligned_printk) {
 972			pr_info("%s/%d - Unalign fixup for kernel access to userspace %lx\n",
 973				current->comm, current->pid, regs->regs[ra]);
 974		}
 975
 976		/* Done! Return to the exception handler. */
 977		return;
 978	}
 979
 980	if ((align_ctl == 0) || unexpected) {
 981		siginfo_t info = {
 982			.si_signo = SIGBUS,
 983			.si_code = BUS_ADRALN,
 984			.si_addr = (unsigned char __user *)0
 985		};
 986		if (unaligned_printk)
 987			pr_info("Unalign bundle: unexp @%llx, %llx\n",
 988				(unsigned long long)regs->pc,
 989				(unsigned long long)bundle);
 990
 991		if (ra < 56) {
 992			unsigned long uaa = (unsigned long)regs->regs[ra];
 993			/* Set bus Address. */
 994			info.si_addr = (unsigned char __user *)uaa;
 995		}
 996
 997		unaligned_fixup_count++;
 998
 999		trace_unhandled_signal("unaligned fixup trap", regs,
1000				       (unsigned long)info.si_addr, SIGBUS);
1001		force_sig_info(info.si_signo, &info, current);
1002		return;
1003	}
1004
1005#ifdef __LITTLE_ENDIAN
1006#define UA_FIXUP_ADDR_DELTA          1
1007#define UA_FIXUP_BFEXT_START(_B_)    0
1008#define UA_FIXUP_BFEXT_END(_B_)     (8 * (_B_) - 1)
1009#else /* __BIG_ENDIAN */
1010#define UA_FIXUP_ADDR_DELTA          -1
1011#define UA_FIXUP_BFEXT_START(_B_)   (64 - 8 * (_B_))
1012#define UA_FIXUP_BFEXT_END(_B_)      63
1013#endif /* __LITTLE_ENDIAN */
1014
1015
1016
1017	if ((ra != rb) && (rd != TREG_SP) && !alias &&
1018	    !y1_br && !y1_lr && !x1_add) {
1019		/*
1020		 * Simple case: ra != rb and no register alias found,
1021		 * and no branch or link. This will be the majority.
1022		 * We can do a little better for simplae case than the
1023		 * generic scheme below.
1024		 */
1025		if (!load_n_store) {
1026			/*
1027			 * Simple store: ra != rb, no need for scratch register.
1028			 * Just store and rotate to right bytewise.
1029			 */
1030#ifdef __BIG_ENDIAN
1031			frag.insn[n++] =
1032				jit_x0_addi(ra, ra, load_store_size - 1) |
1033				jit_x1_fnop();
1034#endif /* __BIG_ENDIAN */
1035			for (k = 0; k < load_store_size; k++) {
1036				/* Store a byte. */
1037				frag.insn[n++] =
1038					jit_x0_rotli(rb, rb, 56) |
1039					jit_x1_st1_add(ra, rb,
1040						       UA_FIXUP_ADDR_DELTA);
1041			}
1042#ifdef __BIG_ENDIAN
1043			frag.insn[n] = jit_x1_addi(ra, ra, 1);
1044#else
1045			frag.insn[n] = jit_x1_addi(ra, ra,
1046						   -1 * load_store_size);
1047#endif /* __LITTLE_ENDIAN */
1048
1049			if (load_store_size == 8) {
1050				frag.insn[n] |= jit_x0_fnop();
1051			} else if (load_store_size == 4) {
1052				frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
1053			} else { /* = 2 */
1054				frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
1055			}
1056			n++;
1057			if (bundle_2_enable)
1058				frag.insn[n++] = bundle_2;
1059			frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1060		} else {
1061			if (rd == ra) {
1062				/* Use two clobber registers: clob1/2. */
1063				frag.insn[n++] =
1064					jit_x0_addi(TREG_SP, TREG_SP, -16) |
1065					jit_x1_fnop();
1066				frag.insn[n++] =
1067					jit_x0_addi(clob1, ra, 7) |
1068					jit_x1_st_add(TREG_SP, clob1, -8);
1069				frag.insn[n++] =
1070					jit_x0_addi(clob2, ra, 0) |
1071					jit_x1_st(TREG_SP, clob2);
1072				frag.insn[n++] =
1073					jit_x0_fnop() |
1074					jit_x1_ldna(rd, ra);
1075				frag.insn[n++] =
1076					jit_x0_fnop() |
1077					jit_x1_ldna(clob1, clob1);
1078				/*
1079				 * Note: we must make sure that rd must not
1080				 * be sp. Recover clob1/2 from stack.
1081				 */
1082				frag.insn[n++] =
1083					jit_x0_dblalign(rd, clob1, clob2) |
1084					jit_x1_ld_add(clob2, TREG_SP, 8);
1085				frag.insn[n++] =
1086					jit_x0_fnop() |
1087					jit_x1_ld_add(clob1, TREG_SP, 16);
1088			} else {
1089				/* Use one clobber register: clob1 only. */
1090				frag.insn[n++] =
1091					jit_x0_addi(TREG_SP, TREG_SP, -16) |
1092					jit_x1_fnop();
1093				frag.insn[n++] =
1094					jit_x0_addi(clob1, ra, 7) |
1095					jit_x1_st(TREG_SP, clob1);
1096				frag.insn[n++] =
1097					jit_x0_fnop() |
1098					jit_x1_ldna(rd, ra);
1099				frag.insn[n++] =
1100					jit_x0_fnop() |
1101					jit_x1_ldna(clob1, clob1);
1102				/*
1103				 * Note: we must make sure that rd must not
1104				 * be sp. Recover clob1 from stack.
1105				 */
1106				frag.insn[n++] =
1107					jit_x0_dblalign(rd, clob1, ra) |
1108					jit_x1_ld_add(clob1, TREG_SP, 16);
1109			}
1110
1111			if (bundle_2_enable)
1112				frag.insn[n++] = bundle_2;
1113			/*
1114			 * For non 8-byte load, extract corresponding bytes and
1115			 * signed extension.
1116			 */
1117			if (load_store_size == 4) {
1118				if (load_store_signed)
1119					frag.insn[n++] =
1120						jit_x0_bfexts(
1121							rd, rd,
1122							UA_FIXUP_BFEXT_START(4),
1123							UA_FIXUP_BFEXT_END(4)) |
1124						jit_x1_fnop();
1125				else
1126					frag.insn[n++] =
1127						jit_x0_bfextu(
1128							rd, rd,
1129							UA_FIXUP_BFEXT_START(4),
1130							UA_FIXUP_BFEXT_END(4)) |
1131						jit_x1_fnop();
1132			} else if (load_store_size == 2) {
1133				if (load_store_signed)
1134					frag.insn[n++] =
1135						jit_x0_bfexts(
1136							rd, rd,
1137							UA_FIXUP_BFEXT_START(2),
1138							UA_FIXUP_BFEXT_END(2)) |
1139						jit_x1_fnop();
1140				else
1141					frag.insn[n++] =
1142						jit_x0_bfextu(
1143							rd, rd,
1144							UA_FIXUP_BFEXT_START(2),
1145							UA_FIXUP_BFEXT_END(2)) |
1146						jit_x1_fnop();
1147			}
1148
1149			frag.insn[n++] =
1150				jit_x0_fnop()  |
1151				jit_x1_iret();
1152		}
1153	} else if (!load_n_store) {
1154
1155		/*
1156		 * Generic memory store cases: use 3 clobber registers.
1157		 *
1158		 * Alloc space for saveing clob2,1,3 on user's stack.
1159		 * register clob3 points to where clob2 saved, followed by
1160		 * clob1 and 3 from high to low memory.
1161		 */
1162		frag.insn[n++] =
1163			jit_x0_addi(TREG_SP, TREG_SP, -32)    |
1164			jit_x1_fnop();
1165		frag.insn[n++] =
1166			jit_x0_addi(clob3, TREG_SP, 16)  |
1167			jit_x1_st_add(TREG_SP, clob3, 8);
1168#ifdef __LITTLE_ENDIAN
1169		frag.insn[n++] =
1170			jit_x0_addi(clob1, ra, 0)   |
1171			jit_x1_st_add(TREG_SP, clob1, 8);
1172#else
1173		frag.insn[n++] =
1174			jit_x0_addi(clob1, ra, load_store_size - 1)   |
1175			jit_x1_st_add(TREG_SP, clob1, 8);
1176#endif
1177		if (load_store_size == 8) {
1178			/*
1179			 * We save one byte a time, not for fast, but compact
1180			 * code. After each store, data source register shift
1181			 * right one byte. unchanged after 8 stores.
1182			 */
1183			frag.insn[n++] =
1184				jit_x0_addi(clob2, TREG_ZERO, 7)     |
1185				jit_x1_st_add(TREG_SP, clob2, 16);
1186			frag.insn[n++] =
1187				jit_x0_rotli(rb, rb, 56)      |
1188				jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1189			frag.insn[n++] =
1190				jit_x0_addi(clob2, clob2, -1) |
1191				jit_x1_bnezt(clob2, -1);
1192			frag.insn[n++] =
1193				jit_x0_fnop()                 |
1194				jit_x1_addi(clob2, y1_br_reg, 0);
1195		} else if (load_store_size == 4) {
1196			frag.insn[n++] =
1197				jit_x0_addi(clob2, TREG_ZERO, 3)     |
1198				jit_x1_st_add(TREG_SP, clob2, 16);
1199			frag.insn[n++] =
1200				jit_x0_rotli(rb, rb, 56)      |
1201				jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1202			frag.insn[n++] =
1203				jit_x0_addi(clob2, clob2, -1) |
1204				jit_x1_bnezt(clob2, -1);
1205			/*
1206			 * same as 8-byte case, but need shift another 4
1207			 * byte to recover rb for 4-byte store.
1208			 */
1209			frag.insn[n++] = jit_x0_rotli(rb, rb, 32)      |
1210				jit_x1_addi(clob2, y1_br_reg, 0);
1211		} else { /* =2 */
1212			frag.insn[n++] =
1213				jit_x0_addi(clob2, rb, 0)     |
1214				jit_x1_st_add(TREG_SP, clob2, 16);
1215			for (k = 0; k < 2; k++) {
1216				frag.insn[n++] =
1217					jit_x0_shrui(rb, rb, 8)  |
1218					jit_x1_st1_add(clob1, rb,
1219						       UA_FIXUP_ADDR_DELTA);
1220			}
1221			frag.insn[n++] =
1222				jit_x0_addi(rb, clob2, 0)       |
1223				jit_x1_addi(clob2, y1_br_reg, 0);
1224		}
1225
1226		if (bundle_2_enable)
1227			frag.insn[n++] = bundle_2;
1228
1229		if (y1_lr) {
1230			frag.insn[n++] =
1231				jit_x0_fnop()                    |
1232				jit_x1_mfspr(y1_lr_reg,
1233					     SPR_EX_CONTEXT_0_0);
1234		}
1235		if (y1_br) {
1236			frag.insn[n++] =
1237				jit_x0_fnop()                    |
1238				jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1239					     clob2);
1240		}
1241		if (x1_add) {
1242			frag.insn[n++] =
1243				jit_x0_addi(ra, ra, x1_add_imm8) |
1244				jit_x1_ld_add(clob2, clob3, -8);
1245		} else {
1246			frag.insn[n++] =
1247				jit_x0_fnop()                    |
1248				jit_x1_ld_add(clob2, clob3, -8);
1249		}
1250		frag.insn[n++] =
1251			jit_x0_fnop()   |
1252			jit_x1_ld_add(clob1, clob3, -8);
1253		frag.insn[n++] = jit_x0_fnop()   | jit_x1_ld(clob3, clob3);
1254		frag.insn[n++] = jit_x0_fnop()   | jit_x1_iret();
1255
1256	} else {
1257		/*
1258		 * Generic memory load cases.
1259		 *
1260		 * Alloc space for saveing clob1,2,3 on user's stack.
1261		 * register clob3 points to where clob1 saved, followed
1262		 * by clob2 and 3 from high to low memory.
1263		 */
1264
1265		frag.insn[n++] =
1266			jit_x0_addi(TREG_SP, TREG_SP, -32) |
1267			jit_x1_fnop();
1268		frag.insn[n++] =
1269			jit_x0_addi(clob3, TREG_SP, 16) |
1270			jit_x1_st_add(TREG_SP, clob3, 8);
1271		frag.insn[n++] =
1272			jit_x0_addi(clob2, ra, 0) |
1273			jit_x1_st_add(TREG_SP, clob2, 8);
1274
1275		if (y1_br) {
1276			frag.insn[n++] =
1277				jit_x0_addi(clob1, y1_br_reg, 0) |
1278				jit_x1_st_add(TREG_SP, clob1, 16);
1279		} else {
1280			frag.insn[n++] =
1281				jit_x0_fnop() |
1282				jit_x1_st_add(TREG_SP, clob1, 16);
1283		}
1284
1285		if (bundle_2_enable)
1286			frag.insn[n++] = bundle_2;
1287
1288		if (y1_lr) {
1289			frag.insn[n++] =
1290				jit_x0_fnop()  |
1291				jit_x1_mfspr(y1_lr_reg,
1292					     SPR_EX_CONTEXT_0_0);
1293		}
1294
1295		if (y1_br) {
1296			frag.insn[n++] =
1297				jit_x0_fnop() |
1298				jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1299					     clob1);
1300		}
1301
1302		frag.insn[n++] =
1303			jit_x0_addi(clob1, clob2, 7)      |
1304			jit_x1_ldna(rd, clob2);
1305		frag.insn[n++] =
1306			jit_x0_fnop()                     |
1307			jit_x1_ldna(clob1, clob1);
1308		frag.insn[n++] =
1309			jit_x0_dblalign(rd, clob1, clob2) |
1310			jit_x1_ld_add(clob1, clob3, -8);
1311		if (x1_add) {
1312			frag.insn[n++] =
1313				jit_x0_addi(ra, ra, x1_add_imm8) |
1314				jit_x1_ld_add(clob2, clob3, -8);
1315		} else {
1316			frag.insn[n++] =
1317				jit_x0_fnop()  |
1318				jit_x1_ld_add(clob2, clob3, -8);
1319		}
1320
1321		frag.insn[n++] =
1322			jit_x0_fnop() |
1323			jit_x1_ld(clob3, clob3);
1324
1325		if (load_store_size == 4) {
1326			if (load_store_signed)
1327				frag.insn[n++] =
1328					jit_x0_bfexts(
1329						rd, rd,
1330						UA_FIXUP_BFEXT_START(4),
1331						UA_FIXUP_BFEXT_END(4)) |
1332					jit_x1_fnop();
1333			else
1334				frag.insn[n++] =
1335					jit_x0_bfextu(
1336						rd, rd,
1337						UA_FIXUP_BFEXT_START(4),
1338						UA_FIXUP_BFEXT_END(4)) |
1339					jit_x1_fnop();
1340		} else if (load_store_size == 2) {
1341			if (load_store_signed)
1342				frag.insn[n++] =
1343					jit_x0_bfexts(
1344						rd, rd,
1345						UA_FIXUP_BFEXT_START(2),
1346						UA_FIXUP_BFEXT_END(2)) |
1347					jit_x1_fnop();
1348			else
1349				frag.insn[n++] =
1350					jit_x0_bfextu(
1351						rd, rd,
1352						UA_FIXUP_BFEXT_START(2),
1353						UA_FIXUP_BFEXT_END(2)) |
1354					jit_x1_fnop();
1355		}
1356
1357		frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1358	}
1359
1360	/* Max JIT bundle count is 14. */
1361	WARN_ON(n > 14);
1362
1363	if (!unexpected) {
1364		int status = 0;
1365		int idx = (regs->pc >> 3) &
1366			((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
1367
1368		frag.pc = regs->pc;
1369		frag.bundle = bundle;
1370
1371		if (unaligned_printk) {
1372			pr_info("%s/%d, Unalign fixup: pc=%lx bundle=%lx %d %d %d %d %d %d %d %d\n",
1373				current->comm, current->pid,
1374				(unsigned long)frag.pc,
1375				(unsigned long)frag.bundle,
1376				(int)alias, (int)rd, (int)ra,
1377				(int)rb, (int)bundle_2_enable,
1378				(int)y1_lr, (int)y1_br, (int)x1_add);
1379
1380			for (k = 0; k < n; k += 2)
1381				pr_info("[%d] %016llx %016llx\n",
1382					k, (unsigned long long)frag.insn[k],
1383					(unsigned long long)frag.insn[k+1]);
1384		}
1385
1386		/* Swap bundle byte order for big endian sys. */
1387#ifdef __BIG_ENDIAN
1388		frag.bundle = GX_INSN_BSWAP(frag.bundle);
1389		for (k = 0; k < n; k++)
1390			frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
1391#endif /* __BIG_ENDIAN */
1392
1393		status = copy_to_user((void __user *)&jit_code_area[idx],
1394				      &frag, sizeof(frag));
1395		if (status) {
1396			/* Fail to copy JIT into user land. send SIGSEGV. */
1397			siginfo_t info = {
1398				.si_signo = SIGSEGV,
1399				.si_code = SEGV_MAPERR,
1400				.si_addr = (void __user *)&jit_code_area[idx]
1401			};
1402
1403			pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx\n",
1404				current->pid, current->comm,
1405				(unsigned long long)&jit_code_area[idx]);
1406
1407			trace_unhandled_signal("segfault in unalign fixup",
1408					       regs,
1409					       (unsigned long)info.si_addr,
1410					       SIGSEGV);
1411			force_sig_info(info.si_signo, &info, current);
1412			return;
1413		}
1414
1415
1416		/* Do a cheaper increment, not accurate. */
1417		unaligned_fixup_count++;
1418		__flush_icache_range((unsigned long)&jit_code_area[idx],
1419				     (unsigned long)&jit_code_area[idx] +
1420				     sizeof(frag));
1421
1422		/* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
1423		__insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
1424		__insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
1425
1426		/* Modify pc at the start of new JIT. */
1427		regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
1428		/* Set ICS in SPR_EX_CONTEXT_K_1. */
1429		regs->ex1 = PL_ICS_EX1(USER_PL, 1);
1430	}
1431}
1432
1433
1434/*
1435 * C function to generate unalign data JIT. Called from unalign data
1436 * interrupt handler.
1437 *
1438 * First check if unalign fix is disabled or exception did not not come from
1439 * user space or sp register points to unalign address, if true, generate a
1440 * SIGBUS. Then map a page into user space as JIT area if it is not mapped
1441 * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
1442 * back to exception handler.
1443 *
1444 * The exception handler will "iret" to new generated JIT code after
1445 * restoring caller saved registers. In theory, the JIT code will perform
1446 * another "iret" to resume user's program.
1447 */
1448
1449void do_unaligned(struct pt_regs *regs, int vecnum)
1450{
1451	tilegx_bundle_bits __user  *pc;
1452	tilegx_bundle_bits bundle;
1453	struct thread_info *info = current_thread_info();
1454	int align_ctl;
1455
1456	/* Checks the per-process unaligned JIT flags */
1457	align_ctl = unaligned_fixup;
1458	switch (task_thread_info(current)->align_ctl) {
1459	case PR_UNALIGN_NOPRINT:
1460		align_ctl = 1;
1461		break;
1462	case PR_UNALIGN_SIGBUS:
1463		align_ctl = 0;
1464		break;
1465	}
1466
1467	/* Enable iterrupt in order to access user land. */
1468	local_irq_enable();
1469
1470	/*
1471	 * The fault came from kernel space. Two choices:
1472	 * (a) unaligned_fixup < 1, we will first call get/put_user fixup
1473	 *     to return -EFAULT. If no fixup, simply panic the kernel.
1474	 * (b) unaligned_fixup >=1, we will try to fix the unaligned access
1475	 *     if it was triggered by get_user/put_user() macros. Panic the
1476	 *     kernel if it is not fixable.
1477	 */
1478
1479	if (EX1_PL(regs->ex1) != USER_PL) {
1480
1481		if (align_ctl < 1) {
1482			unaligned_fixup_count++;
1483			/* If exception came from kernel, try fix it up. */
1484			if (fixup_exception(regs)) {
1485				if (unaligned_printk)
1486					pr_info("Unalign fixup: %d %llx @%llx\n",
1487						(int)unaligned_fixup,
1488						(unsigned long long)regs->ex1,
1489						(unsigned long long)regs->pc);
1490			} else {
1491				/* Not fixable. Go panic. */
1492				panic("Unalign exception in Kernel. pc=%lx",
1493				      regs->pc);
1494			}
1495		} else {
1496			/*
1497			 * Try to fix the exception. If we can't, panic the
1498			 * kernel.
1499			 */
1500			bundle = GX_INSN_BSWAP(
1501				*((tilegx_bundle_bits *)(regs->pc)));
1502			jit_bundle_gen(regs, bundle, align_ctl);
1503		}
1504		return;
1505	}
1506
1507	/*
1508	 * Fault came from user with ICS or stack is not aligned.
1509	 * If so, we will trigger SIGBUS.
1510	 */
1511	if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
1512		siginfo_t info = {
1513			.si_signo = SIGBUS,
1514			.si_code = BUS_ADRALN,
1515			.si_addr = (unsigned char __user *)0
1516		};
1517
1518		if (unaligned_printk)
1519			pr_info("Unalign fixup: %d %llx @%llx\n",
1520				(int)unaligned_fixup,
1521				(unsigned long long)regs->ex1,
1522				(unsigned long long)regs->pc);
1523
1524		unaligned_fixup_count++;
1525
1526		trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
1527		force_sig_info(info.si_signo, &info, current);
1528		return;
1529	}
1530
1531
1532	/* Read the bundle casued the exception! */
1533	pc = (tilegx_bundle_bits __user *)(regs->pc);
1534	if (get_user(bundle, pc) != 0) {
1535		/* Probably never be here since pc is valid user address.*/
1536		siginfo_t info = {
1537			.si_signo = SIGSEGV,
1538			.si_code = SEGV_MAPERR,
1539			.si_addr = (void __user *)pc
1540		};
1541		pr_err("Couldn't read instruction at %p trying to step\n", pc);
1542		trace_unhandled_signal("segfault in unalign fixup", regs,
1543				       (unsigned long)info.si_addr, SIGSEGV);
1544		force_sig_info(info.si_signo, &info, current);
1545		return;
1546	}
1547
1548	if (!info->unalign_jit_base) {
1549		void __user *user_page;
1550
1551		/*
1552		 * Allocate a page in userland.
1553		 * For 64-bit processes we try to place the mapping far
1554		 * from anything else that might be going on (specifically
1555		 * 64 GB below the top of the user address space).  If it
1556		 * happens not to be possible to put it there, it's OK;
1557		 * the kernel will choose another location and we'll
1558		 * remember it for later.
1559		 */
1560		if (is_compat_task())
1561			user_page = NULL;
1562		else
1563			user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
1564				(current->pid << PAGE_SHIFT);
1565
1566		user_page = (void __user *) vm_mmap(NULL,
1567						    (unsigned long)user_page,
1568						    PAGE_SIZE,
1569						    PROT_EXEC | PROT_READ |
1570						    PROT_WRITE,
1571#ifdef CONFIG_HOMECACHE
1572						    MAP_CACHE_HOME_TASK |
1573#endif
1574						    MAP_PRIVATE |
1575						    MAP_ANONYMOUS,
1576						    0);
1577
1578		if (IS_ERR((void __force *)user_page)) {
1579			pr_err("Out of kernel pages trying do_mmap\n");
1580			return;
1581		}
1582
1583		/* Save the address in the thread_info struct */
1584		info->unalign_jit_base = user_page;
1585		if (unaligned_printk)
1586			pr_info("Unalign bundle: %d:%d, allocate page @%llx\n",
1587				raw_smp_processor_id(), current->pid,
1588				(unsigned long long)user_page);
1589	}
1590
1591	/* Generate unalign JIT */
1592	jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
1593}
1594
1595#endif /* __tilegx__ */