gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2022 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus ([email protected]).
  23    x86_64 support by Jan Hubicka ([email protected])
  24    VIA PadLock support by Michal Ludvig ([email protected])
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "elf/x86-64.h"
  34 #include "opcodes/i386-init.h"
  35 #include <limits.h>
  36
  37 #ifndef INFER_ADDR_PREFIX
  38 #define INFER_ADDR_PREFIX 1
  39 #endif
  40
  41 #ifndef DEFAULT_ARCH
  42 #define DEFAULT_ARCH "i386"
  43 #endif
  44
  45 #ifndef INLINE
  46 #if __GNUC__ >= 2
  47 #define INLINE __inline__
  48 #else
  49 #define INLINE
  50 #endif
  51 #endif
  52
  53 /* Prefixes will be emitted in the order defined below.
  54    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  55    instruction, and so must come before any prefixes.
  56    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  57    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  58 #define WAIT_PREFIX     0
  59 #define SEG_PREFIX      1
  60 #define ADDR_PREFIX     2
  61 #define DATA_PREFIX     3
  62 #define REP_PREFIX      4
  63 #define HLE_PREFIX      REP_PREFIX
  64 #define BND_PREFIX      REP_PREFIX
  65 #define LOCK_PREFIX     5
  66 #define REX_PREFIX      6       /* must come last.  */
  67 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  68
  69 /* we define the syntax here (modulo base,index,scale syntax) */
  70 #define REGISTER_PREFIX '%'
  71 #define IMMEDIATE_PREFIX '$'
  72 #define ABSOLUTE_PREFIX '*'
  73
  74 /* these are the instruction mnemonic suffixes in AT&T syntax or
  75    memory operand size in Intel syntax.  */
  76 #define WORD_MNEM_SUFFIX  'w'
  77 #define BYTE_MNEM_SUFFIX  'b'
  78 #define SHORT_MNEM_SUFFIX 's'
  79 #define LONG_MNEM_SUFFIX  'l'
  80 #define QWORD_MNEM_SUFFIX  'q'
  81 /* Intel Syntax.  Use a non-ascii letter since since it never appears
  82    in instructions.  */
  83 #define LONG_DOUBLE_MNEM_SUFFIX '\1'
  84
  85 #define END_OF_INSN '\0'
  86
  87 /* This matches the C -> StaticRounding alias in the opcode table.  */
  88 #define commutative staticrounding
  89
  90 /*
  91   'templates' is for grouping together 'template' structures for opcodes
  92   of the same name.  This is only used for storing the insns in the grand
  93   ole hash table of insns.
  94   The templates themselves start at START and range up to (but not including)
  95   END.
  96   */
  97 typedef struct
  98 {
  99   const insn_template *start;
 100   const insn_template *end;
 101 }
 102 templates;
 103
 104 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 105 typedef struct
 106 {
 107   unsigned int regmem;  /* codes register or memory operand */
 108   unsigned int reg;     /* codes register operand (or extended opcode) */
 109   unsigned int mode;    /* how to interpret regmem & reg */
 110 }
 111 modrm_byte;
 112
 113 /* x86-64 extension prefix.  */
 114 typedef int rex_byte;
 115
 116 /* 386 opcode byte to code indirect addressing.  */
 117 typedef struct
 118 {
 119   unsigned base;
 120   unsigned index;
 121   unsigned scale;
 122 }
 123 sib_byte;
 124
 125 /* x86 arch names, types and features */
 126 typedef struct
 127 {
 128   const char *name;             /* arch name */
 129   unsigned int len;             /* arch string length */
 130   enum processor_type type;     /* arch type */
 131   i386_cpu_flags flags;         /* cpu feature flags */
 132   unsigned int skip;            /* show_arch should skip this. */
 133 }
 134 arch_entry;
 135
 136 /* Used to turn off indicated flags.  */
 137 typedef struct
 138 {
 139   const char *name;             /* arch name */
 140   unsigned int len;             /* arch string length */
 141   i386_cpu_flags flags;         /* cpu feature flags */
 142 }
 143 noarch_entry;
 144
 145 static void update_code_flag (int, int);
 146 static void set_code_flag (int);
 147 static void set_16bit_gcc_code_flag (int);
 148 static void set_intel_syntax (int);
 149 static void set_intel_mnemonic (int);
 150 static void set_allow_index_reg (int);
 151 static void set_check (int);
 152 static void set_cpu_arch (int);
 153 #ifdef TE_PE
 154 static void pe_directive_secrel (int);
 155 static void pe_directive_secidx (int);
 156 #endif
 157 static void signed_cons (int);
 158 static char *output_invalid (int c);
 159 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 160                                     const char *);
 161 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 162                                        const char *);
 163 static int i386_att_operand (char *);
 164 static int i386_intel_operand (char *, int);
 165 static int i386_intel_simplify (expressionS *);
 166 static int i386_intel_parse_name (const char *, expressionS *);
 167 static const reg_entry *parse_register (char *, char **);
 168 static char *parse_insn (char *, char *);
 169 static char *parse_operands (char *, const char *);
 170 static void swap_operands (void);
 171 static void swap_2_operands (unsigned int, unsigned int);
 172 static enum flag_code i386_addressing_mode (void);
 173 static void optimize_imm (void);
 174 static void optimize_disp (void);
 175 static const insn_template *match_template (char);
 176 static int check_string (void);
 177 static int process_suffix (void);
 178 static int check_byte_reg (void);
 179 static int check_long_reg (void);
 180 static int check_qword_reg (void);
 181 static int check_word_reg (void);
 182 static int finalize_imm (void);
 183 static int process_operands (void);
 184 static const reg_entry *build_modrm_byte (void);
 185 static void output_insn (void);
 186 static void output_imm (fragS *, offsetT);
 187 static void output_disp (fragS *, offsetT);
 188 #ifndef I386COFF
 189 static void s_bss (int);
 190 #endif
 191 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 192 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 193
 194 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 195 static unsigned int x86_isa_1_used;
 196 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 197 static unsigned int x86_feature_2_used;
 198 /* Generate x86 used ISA and feature properties.  */
 199 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 200 #endif
 201
 202 static const char *default_arch = DEFAULT_ARCH;
 203
 204 /* parse_register() returns this when a register alias cannot be used.  */
 205 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 206                                    { Dw2Inval, Dw2Inval } };
 207
 208 static const reg_entry *reg_eax;
 209 static const reg_entry *reg_ds;
 210 static const reg_entry *reg_es;
 211 static const reg_entry *reg_ss;
 212 static const reg_entry *reg_st0;
 213 static const reg_entry *reg_k0;
 214
 215 /* VEX prefix.  */
 216 typedef struct
 217 {
 218   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 219   unsigned char bytes[4];
 220   unsigned int length;
 221   /* Destination or source register specifier.  */
 222   const reg_entry *register_specifier;
 223 } vex_prefix;
 224
 225 /* 'md_assemble ()' gathers together information and puts it into a
 226    i386_insn.  */
 227
 228 union i386_op
 229   {
 230     expressionS *disps;
 231     expressionS *imms;
 232     const reg_entry *regs;
 233   };
 234
 235 enum i386_error
 236   {
 237     operand_size_mismatch,
 238     operand_type_mismatch,
 239     register_type_mismatch,
 240     number_of_operands_mismatch,
 241     invalid_instruction_suffix,
 242     bad_imm4,
 243     unsupported_with_intel_mnemonic,
 244     unsupported_syntax,
 245     unsupported,
 246     invalid_sib_address,
 247     invalid_vsib_address,
 248     invalid_vector_register_set,
 249     invalid_tmm_register_set,
 250     invalid_dest_and_src_register_set,
 251     unsupported_vector_index_register,
 252     unsupported_broadcast,
 253     broadcast_needed,
 254     unsupported_masking,
 255     mask_not_on_destination,
 256     no_default_mask,
 257     unsupported_rc_sae,
 258     rc_sae_operand_not_last_imm,
 259     invalid_register_operand,
 260   };
 261
 262 struct _i386_insn
 263   {
 264     /* TM holds the template for the insn were currently assembling.  */
 265     insn_template tm;
 266
 267     /* SUFFIX holds the instruction size suffix for byte, word, dword
 268        or qword, if given.  */
 269     char suffix;
 270
 271     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 272     unsigned char opcode_length;
 273
 274     /* OPERANDS gives the number of given operands.  */
 275     unsigned int operands;
 276
 277     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 278        of given register, displacement, memory operands and immediate
 279        operands.  */
 280     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 281
 282     /* TYPES [i] is the type (see above #defines) which tells us how to
 283        use OP[i] for the corresponding operand.  */
 284     i386_operand_type types[MAX_OPERANDS];
 285
 286     /* Displacement expression, immediate expression, or register for each
 287        operand.  */
 288     union i386_op op[MAX_OPERANDS];
 289
 290     /* Flags for operands.  */
 291     unsigned int flags[MAX_OPERANDS];
 292 #define Operand_PCrel 1
 293 #define Operand_Mem   2
 294
 295     /* Relocation type for operand */
 296     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 297
 298     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 299        the base index byte below.  */
 300     const reg_entry *base_reg;
 301     const reg_entry *index_reg;
 302     unsigned int log2_scale_factor;
 303
 304     /* SEG gives the seg_entries of this insn.  They are zero unless
 305        explicit segment overrides are given.  */
 306     const reg_entry *seg[2];
 307
 308     /* Copied first memory operand string, for re-checking.  */
 309     char *memop1_string;
 310
 311     /* PREFIX holds all the given prefix opcodes (usually null).
 312        PREFIXES is the number of prefix opcodes.  */
 313     unsigned int prefixes;
 314     unsigned char prefix[MAX_PREFIXES];
 315
 316     /* Register is in low 3 bits of opcode.  */
 317     bool short_form;
 318
 319     /* The operand to a branch insn indicates an absolute branch.  */
 320     bool jumpabsolute;
 321
 322     /* Extended states.  */
 323     enum
 324       {
 325         /* Use MMX state.  */
 326         xstate_mmx = 1 << 0,
 327         /* Use XMM state.  */
 328         xstate_xmm = 1 << 1,
 329         /* Use YMM state.  */
 330         xstate_ymm = 1 << 2 | xstate_xmm,
 331         /* Use ZMM state.  */
 332         xstate_zmm = 1 << 3 | xstate_ymm,
 333         /* Use TMM state.  */
 334         xstate_tmm = 1 << 4,
 335         /* Use MASK state.  */
 336         xstate_mask = 1 << 5
 337       } xstate;
 338
 339     /* Has GOTPC or TLS relocation.  */
 340     bool has_gotpc_tls_reloc;
 341
 342     /* RM and SIB are the modrm byte and the sib byte where the
 343        addressing modes of this insn are encoded.  */
 344     modrm_byte rm;
 345     rex_byte rex;
 346     rex_byte vrex;
 347     sib_byte sib;
 348     vex_prefix vex;
 349
 350     /* Masking attributes.
 351
 352        The struct describes masking, applied to OPERAND in the instruction.
 353        REG is a pointer to the corresponding mask register.  ZEROING tells
 354        whether merging or zeroing mask is used.  */
 355     struct Mask_Operation
 356     {
 357       const reg_entry *reg;
 358       unsigned int zeroing;
 359       /* The operand where this operation is associated.  */
 360       unsigned int operand;
 361     } mask;
 362
 363     /* Rounding control and SAE attributes.  */
 364     struct RC_Operation
 365     {
 366       enum rc_type
 367         {
 368           rc_none = -1,
 369           rne,
 370           rd,
 371           ru,
 372           rz,
 373           saeonly
 374         } type;
 375
 376       unsigned int operand;
 377     } rounding;
 378
 379     /* Broadcasting attributes.
 380
 381        The struct describes broadcasting, applied to OPERAND.  TYPE is
 382        expresses the broadcast factor.  */
 383     struct Broadcast_Operation
 384     {
 385       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 386       unsigned int type;
 387
 388       /* Index of broadcasted operand.  */
 389       unsigned int operand;
 390
 391       /* Number of bytes to broadcast.  */
 392       unsigned int bytes;
 393     } broadcast;
 394
 395     /* Compressed disp8*N attribute.  */
 396     unsigned int memshift;
 397
 398     /* Prefer load or store in encoding.  */
 399     enum
 400       {
 401         dir_encoding_default = 0,
 402         dir_encoding_load,
 403         dir_encoding_store,
 404         dir_encoding_swap
 405       } dir_encoding;
 406
 407     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 408     enum
 409       {
 410         disp_encoding_default = 0,
 411         disp_encoding_8bit,
 412         disp_encoding_16bit,
 413         disp_encoding_32bit
 414       } disp_encoding;
 415
 416     /* Prefer the REX byte in encoding.  */
 417     bool rex_encoding;
 418
 419     /* Disable instruction size optimization.  */
 420     bool no_optimize;
 421
 422     /* How to encode vector instructions.  */
 423     enum
 424       {
 425         vex_encoding_default = 0,
 426         vex_encoding_vex,
 427         vex_encoding_vex3,
 428         vex_encoding_evex,
 429         vex_encoding_error
 430       } vec_encoding;
 431
 432     /* REP prefix.  */
 433     const char *rep_prefix;
 434
 435     /* HLE prefix.  */
 436     const char *hle_prefix;
 437
 438     /* Have BND prefix.  */
 439     const char *bnd_prefix;
 440
 441     /* Have NOTRACK prefix.  */
 442     const char *notrack_prefix;
 443
 444     /* Error message.  */
 445     enum i386_error error;
 446   };
 447
 448 typedef struct _i386_insn i386_insn;
 449
 450 /* Link RC type with corresponding string, that'll be looked for in
 451    asm.  */
 452 struct RC_name
 453 {
 454   enum rc_type type;
 455   const char *name;
 456   unsigned int len;
 457 };
 458
 459 static const struct RC_name RC_NamesTable[] =
 460 {
 461   {  rne, STRING_COMMA_LEN ("rn-sae") },
 462   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 463   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 464   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 465   {  saeonly,  STRING_COMMA_LEN ("sae") },
 466 };
 467
 468 /* List of chars besides those in app.c:symbol_chars that can start an
 469    operand.  Used to prevent the scrubber eating vital white-space.  */
 470 const char extra_symbol_chars[] = "*%-([{}"
 471 #ifdef LEX_AT
 472         "@"
 473 #endif
 474 #ifdef LEX_QM
 475         "?"
 476 #endif
 477         ;
 478
 479 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 480      && !defined (TE_GNU)                               \
 481      && !defined (TE_LINUX)                             \
 482      && !defined (TE_Haiku)                             \
 483      && !defined (TE_FreeBSD)                           \
 484      && !defined (TE_DragonFly)                         \
 485      && !defined (TE_NetBSD))
 486 /* This array holds the chars that always start a comment.  If the
 487    pre-processor is disabled, these aren't very useful.  The option
 488    --divide will remove '/' from this list.  */
 489 const char *i386_comment_chars = "#/";
 490 #define SVR4_COMMENT_CHARS 1
 491 #define PREFIX_SEPARATOR '\\'
 492
 493 #else
 494 const char *i386_comment_chars = "#";
 495 #define PREFIX_SEPARATOR '/'
 496 #endif
 497
 498 /* This array holds the chars that only start a comment at the beginning of
 499    a line.  If the line seems to have the form '# 123 filename'
 500    .line and .file directives will appear in the pre-processed output.
 501    Note that input_file.c hand checks for '#' at the beginning of the
 502    first line of the input file.  This is because the compiler outputs
 503    #NO_APP at the beginning of its output.
 504    Also note that comments started like this one will always work if
 505    '/' isn't otherwise defined.  */
 506 const char line_comment_chars[] = "#/";
 507
 508 const char line_separator_chars[] = ";";
 509
 510 /* Chars that can be used to separate mant from exp in floating point
 511    nums.  */
 512 const char EXP_CHARS[] = "eE";
 513
 514 /* Chars that mean this number is a floating point constant
 515    As in 0f12.456
 516    or    0d1.2345e12.  */
 517 const char FLT_CHARS[] = "fFdDxXhHbB";
 518
 519 /* Tables for lexical analysis.  */
 520 static char mnemonic_chars[256];
 521 static char register_chars[256];
 522 static char operand_chars[256];
 523 static char identifier_chars[256];
 524
 525 /* Lexical macros.  */
 526 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 527 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 528 #define is_register_char(x) (register_chars[(unsigned char) x])
 529 #define is_space_char(x) ((x) == ' ')
 530 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 531
 532 /* All non-digit non-letter characters that may occur in an operand.  */
 533 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 534
 535 /* md_assemble() always leaves the strings it's passed unaltered.  To
 536    effect this we maintain a stack of saved characters that we've smashed
 537    with '\0's (indicating end of strings for various sub-fields of the
 538    assembler instruction).  */
 539 static char save_stack[32];
 540 static char *save_stack_p;
 541 #define END_STRING_AND_SAVE(s) \
 542         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 543 #define RESTORE_END_STRING(s) \
 544         do { *(s) = *--save_stack_p; } while (0)
 545
 546 /* The instruction we're assembling.  */
 547 static i386_insn i;
 548
 549 /* Possible templates for current insn.  */
 550 static const templates *current_templates;
 551
 552 /* Per instruction expressionS buffers: max displacements & immediates.  */
 553 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 554 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 555
 556 /* Current operand we are working on.  */
 557 static int this_operand = -1;
 558
 559 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 560    these.  */
 561
 562 enum flag_code {
 563         CODE_32BIT,
 564         CODE_16BIT,
 565         CODE_64BIT };
 566
 567 static enum flag_code flag_code;
 568 static unsigned int object_64bit;
 569 static unsigned int disallow_64bit_reloc;
 570 static int use_rela_relocations = 0;
 571 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 572 static const char *tls_get_addr;
 573
 574 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 575      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 576      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 577
 578 /* The ELF ABI to use.  */
 579 enum x86_elf_abi
 580 {
 581   I386_ABI,
 582   X86_64_ABI,
 583   X86_64_X32_ABI
 584 };
 585
 586 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 587 #endif
 588
 589 #if defined (TE_PE) || defined (TE_PEP)
 590 /* Use big object file format.  */
 591 static int use_big_obj = 0;
 592 #endif
 593
 594 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 595 /* 1 if generating code for a shared library.  */
 596 static int shared = 0;
 597 #endif
 598
 599 /* 1 for intel syntax,
 600    0 if att syntax.  */
 601 static int intel_syntax = 0;
 602
 603 static enum x86_64_isa
 604 {
 605   amd64 = 1,    /* AMD64 ISA.  */
 606   intel64       /* Intel64 ISA.  */
 607 } isa64;
 608
 609 /* 1 for intel mnemonic,
 610    0 if att mnemonic.  */
 611 static int intel_mnemonic = !SYSV386_COMPAT;
 612
 613 /* 1 if pseudo registers are permitted.  */
 614 static int allow_pseudo_reg = 0;
 615
 616 /* 1 if register prefix % not required.  */
 617 static int allow_naked_reg = 0;
 618
 619 /* 1 if the assembler should add BND prefix for all control-transferring
 620    instructions supporting it, even if this prefix wasn't specified
 621    explicitly.  */
 622 static int add_bnd_prefix = 0;
 623
 624 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 625 static int allow_index_reg = 0;
 626
 627 /* 1 if the assembler should ignore LOCK prefix, even if it was
 628    specified explicitly.  */
 629 static int omit_lock_prefix = 0;
 630
 631 /* 1 if the assembler should encode lfence, mfence, and sfence as
 632    "lock addl $0, (%{re}sp)".  */
 633 static int avoid_fence = 0;
 634
 635 /* 1 if lfence should be inserted after every load.  */
 636 static int lfence_after_load = 0;
 637
 638 /* Non-zero if lfence should be inserted before indirect branch.  */
 639 static enum lfence_before_indirect_branch_kind
 640   {
 641     lfence_branch_none = 0,
 642     lfence_branch_register,
 643     lfence_branch_memory,
 644     lfence_branch_all
 645   }
 646 lfence_before_indirect_branch;
 647
 648 /* Non-zero if lfence should be inserted before ret.  */
 649 static enum lfence_before_ret_kind
 650   {
 651     lfence_before_ret_none = 0,
 652     lfence_before_ret_not,
 653     lfence_before_ret_or,
 654     lfence_before_ret_shl
 655   }
 656 lfence_before_ret;
 657
 658 /* Types of previous instruction is .byte or prefix.  */
 659 static struct
 660   {
 661     segT seg;
 662     const char *file;
 663     const char *name;
 664     unsigned int line;
 665     enum last_insn_kind
 666       {
 667         last_insn_other = 0,
 668         last_insn_directive,
 669         last_insn_prefix
 670       } kind;
 671   } last_insn;
 672
 673 /* 1 if the assembler should generate relax relocations.  */
 674
 675 static int generate_relax_relocations
 676   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 677
 678 static enum check_kind
 679   {
 680     check_none = 0,
 681     check_warning,
 682     check_error
 683   }
 684 sse_check, operand_check = check_warning;
 685
 686 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 687 static int align_branch_power = 0;
 688
 689 /* Types of branches to align.  */
 690 enum align_branch_kind
 691   {
 692     align_branch_none = 0,
 693     align_branch_jcc = 1,
 694     align_branch_fused = 2,
 695     align_branch_jmp = 3,
 696     align_branch_call = 4,
 697     align_branch_indirect = 5,
 698     align_branch_ret = 6
 699   };
 700
 701 /* Type bits of branches to align.  */
 702 enum align_branch_bit
 703   {
 704     align_branch_jcc_bit = 1 << align_branch_jcc,
 705     align_branch_fused_bit = 1 << align_branch_fused,
 706     align_branch_jmp_bit = 1 << align_branch_jmp,
 707     align_branch_call_bit = 1 << align_branch_call,
 708     align_branch_indirect_bit = 1 << align_branch_indirect,
 709     align_branch_ret_bit = 1 << align_branch_ret
 710   };
 711
 712 static unsigned int align_branch = (align_branch_jcc_bit
 713                                     | align_branch_fused_bit
 714                                     | align_branch_jmp_bit);
 715
 716 /* Types of condition jump used by macro-fusion.  */
 717 enum mf_jcc_kind
 718   {
 719     mf_jcc_jo = 0,  /* base opcode 0x70  */
 720     mf_jcc_jc,      /* base opcode 0x72  */
 721     mf_jcc_je,      /* base opcode 0x74  */
 722     mf_jcc_jna,     /* base opcode 0x76  */
 723     mf_jcc_js,      /* base opcode 0x78  */
 724     mf_jcc_jp,      /* base opcode 0x7a  */
 725     mf_jcc_jl,      /* base opcode 0x7c  */
 726     mf_jcc_jle,     /* base opcode 0x7e  */
 727   };
 728
 729 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 730 enum mf_cmp_kind
 731   {
 732     mf_cmp_test_and,  /* test/cmp */
 733     mf_cmp_alu_cmp,  /* add/sub/cmp */
 734     mf_cmp_incdec  /* inc/dec */
 735   };
 736
 737 /* The maximum padding size for fused jcc.  CMP like instruction can
 738    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 739    prefixes.   */
 740 #define MAX_FUSED_JCC_PADDING_SIZE 20
 741
 742 /* The maximum number of prefixes added for an instruction.  */
 743 static unsigned int align_branch_prefix_size = 5;
 744
 745 /* Optimization:
 746    1. Clear the REX_W bit with register operand if possible.
 747    2. Above plus use 128bit vector instruction to clear the full vector
 748       register.
 749  */
 750 static int optimize = 0;
 751
 752 /* Optimization:
 753    1. Clear the REX_W bit with register operand if possible.
 754    2. Above plus use 128bit vector instruction to clear the full vector
 755       register.
 756    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 757       "testb $imm7,%r8".
 758  */
 759 static int optimize_for_space = 0;
 760
 761 /* Register prefix used for error message.  */
 762 static const char *register_prefix = "%";
 763
 764 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 765    leave, push, and pop instructions so that gcc has the same stack
 766    frame as in 32 bit mode.  */
 767 static char stackop_size = '\0';
 768
 769 /* Non-zero to optimize code alignment.  */
 770 int optimize_align_code = 1;
 771
 772 /* Non-zero to quieten some warnings.  */
 773 static int quiet_warnings = 0;
 774
 775 /* CPU name.  */
 776 static const char *cpu_arch_name = NULL;
 777 static char *cpu_sub_arch_name = NULL;
 778
 779 /* CPU feature flags.  */
 780 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 781
 782 /* If we have selected a cpu we are generating instructions for.  */
 783 static int cpu_arch_tune_set = 0;
 784
 785 /* Cpu we are generating instructions for.  */
 786 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 787
 788 /* CPU feature flags of cpu we are generating instructions for.  */
 789 static i386_cpu_flags cpu_arch_tune_flags;
 790
 791 /* CPU instruction set architecture used.  */
 792 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 793
 794 /* CPU feature flags of instruction set architecture used.  */
 795 i386_cpu_flags cpu_arch_isa_flags;
 796
 797 /* If set, conditional jumps are not automatically promoted to handle
 798    larger than a byte offset.  */
 799 static unsigned int no_cond_jump_promotion = 0;
 800
 801 /* Encode SSE instructions with VEX prefix.  */
 802 static unsigned int sse2avx;
 803
 804 /* Encode aligned vector move as unaligned vector move.  */
 805 static unsigned int use_unaligned_vector_move;
 806
 807 /* Encode scalar AVX instructions with specific vector length.  */
 808 static enum
 809   {
 810     vex128 = 0,
 811     vex256
 812   } avxscalar;
 813
 814 /* Encode VEX WIG instructions with specific vex.w.  */
 815 static enum
 816   {
 817     vexw0 = 0,
 818     vexw1
 819   } vexwig;
 820
 821 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 822 static enum
 823   {
 824     evexl128 = 0,
 825     evexl256,
 826     evexl512
 827   } evexlig;
 828
 829 /* Encode EVEX WIG instructions with specific evex.w.  */
 830 static enum
 831   {
 832     evexw0 = 0,
 833     evexw1
 834   } evexwig;
 835
 836 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 837 static enum rc_type evexrcig = rne;
 838
 839 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 840 static symbolS *GOT_symbol;
 841
 842 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 843 unsigned int x86_dwarf2_return_column;
 844
 845 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 846 int x86_cie_data_alignment;
 847
 848 /* Interface to relax_segment.
 849    There are 3 major relax states for 386 jump insns because the
 850    different types of jumps add different sizes to frags when we're
 851    figuring out what sort of jump to choose to reach a given label.
 852
 853    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 854    branches which are handled by md_estimate_size_before_relax() and
 855    i386_generic_table_relax_frag().  */
 856
 857 /* Types.  */
 858 #define UNCOND_JUMP 0
 859 #define COND_JUMP 1
 860 #define COND_JUMP86 2
 861 #define BRANCH_PADDING 3
 862 #define BRANCH_PREFIX 4
 863 #define FUSED_JCC_PADDING 5
 864
 865 /* Sizes.  */
 866 #define CODE16  1
 867 #define SMALL   0
 868 #define SMALL16 (SMALL | CODE16)
 869 #define BIG     2
 870 #define BIG16   (BIG | CODE16)
 871
 872 #ifndef INLINE
 873 #ifdef __GNUC__
 874 #define INLINE __inline__
 875 #else
 876 #define INLINE
 877 #endif
 878 #endif
 879
 880 #define ENCODE_RELAX_STATE(type, size) \
 881   ((relax_substateT) (((type) << 2) | (size)))
 882 #define TYPE_FROM_RELAX_STATE(s) \
 883   ((s) >> 2)
 884 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 885     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 886
 887 /* This table is used by relax_frag to promote short jumps to long
 888    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 889    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 890    don't allow a short jump in a 32 bit code segment to be promoted to
 891    a 16 bit offset jump because it's slower (requires data size
 892    prefix), and doesn't work, unless the destination is in the bottom
 893    64k of the code segment (The top 16 bits of eip are zeroed).  */
 894
 895 const relax_typeS md_relax_table[] =
 896 {
 897   /* The fields are:
 898      1) most positive reach of this state,
 899      2) most negative reach of this state,
 900      3) how many bytes this mode will have in the variable part of the frag
 901      4) which index into the table to try if we can't fit into this one.  */
 902
 903   /* UNCOND_JUMP states.  */
 904   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 905   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 906   /* dword jmp adds 4 bytes to frag:
 907      0 extra opcode bytes, 4 displacement bytes.  */
 908   {0, 0, 4, 0},
 909   /* word jmp adds 2 byte2 to frag:
 910      0 extra opcode bytes, 2 displacement bytes.  */
 911   {0, 0, 2, 0},
 912
 913   /* COND_JUMP states.  */
 914   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 915   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 916   /* dword conditionals adds 5 bytes to frag:
 917      1 extra opcode byte, 4 displacement bytes.  */
 918   {0, 0, 5, 0},
 919   /* word conditionals add 3 bytes to frag:
 920      1 extra opcode byte, 2 displacement bytes.  */
 921   {0, 0, 3, 0},
 922
 923   /* COND_JUMP86 states.  */
 924   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 925   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 926   /* dword conditionals adds 5 bytes to frag:
 927      1 extra opcode byte, 4 displacement bytes.  */
 928   {0, 0, 5, 0},
 929   /* word conditionals add 4 bytes to frag:
 930      1 displacement byte and a 3 byte long branch insn.  */
 931   {0, 0, 4, 0}
 932 };
 933
 934 static const arch_entry cpu_arch[] =
 935 {
 936   /* Do not replace the first two entries - i386_target_format()
 937      relies on them being there in this order.  */
 938   { STRING_COMMA_LEN ("generic32"), PROCESSOR_GENERIC32,
 939     CPU_GENERIC32_FLAGS, 0 },
 940   { STRING_COMMA_LEN ("generic64"), PROCESSOR_GENERIC64,
 941     CPU_GENERIC64_FLAGS, 0 },
 942   { STRING_COMMA_LEN ("i8086"), PROCESSOR_UNKNOWN,
 943     CPU_NONE_FLAGS, 0 },
 944   { STRING_COMMA_LEN ("i186"), PROCESSOR_UNKNOWN,
 945     CPU_I186_FLAGS, 0 },
 946   { STRING_COMMA_LEN ("i286"), PROCESSOR_UNKNOWN,
 947     CPU_I286_FLAGS, 0 },
 948   { STRING_COMMA_LEN ("i386"), PROCESSOR_I386,
 949     CPU_I386_FLAGS, 0 },
 950   { STRING_COMMA_LEN ("i486"), PROCESSOR_I486,
 951     CPU_I486_FLAGS, 0 },
 952   { STRING_COMMA_LEN ("i586"), PROCESSOR_PENTIUM,
 953     CPU_I586_FLAGS, 0 },
 954   { STRING_COMMA_LEN ("i686"), PROCESSOR_PENTIUMPRO,
 955     CPU_I686_FLAGS, 0 },
 956   { STRING_COMMA_LEN ("pentium"), PROCESSOR_PENTIUM,
 957     CPU_I586_FLAGS, 0 },
 958   { STRING_COMMA_LEN ("pentiumpro"), PROCESSOR_PENTIUMPRO,
 959     CPU_PENTIUMPRO_FLAGS, 0 },
 960   { STRING_COMMA_LEN ("pentiumii"), PROCESSOR_PENTIUMPRO,
 961     CPU_P2_FLAGS, 0 },
 962   { STRING_COMMA_LEN ("pentiumiii"),PROCESSOR_PENTIUMPRO,
 963     CPU_P3_FLAGS, 0 },
 964   { STRING_COMMA_LEN ("pentium4"), PROCESSOR_PENTIUM4,
 965     CPU_P4_FLAGS, 0 },
 966   { STRING_COMMA_LEN ("prescott"), PROCESSOR_NOCONA,
 967     CPU_CORE_FLAGS, 0 },
 968   { STRING_COMMA_LEN ("nocona"), PROCESSOR_NOCONA,
 969     CPU_NOCONA_FLAGS, 0 },
 970   { STRING_COMMA_LEN ("yonah"), PROCESSOR_CORE,
 971     CPU_CORE_FLAGS, 1 },
 972   { STRING_COMMA_LEN ("core"), PROCESSOR_CORE,
 973     CPU_CORE_FLAGS, 0 },
 974   { STRING_COMMA_LEN ("merom"), PROCESSOR_CORE2,
 975     CPU_CORE2_FLAGS, 1 },
 976   { STRING_COMMA_LEN ("core2"), PROCESSOR_CORE2,
 977     CPU_CORE2_FLAGS, 0 },
 978   { STRING_COMMA_LEN ("corei7"), PROCESSOR_COREI7,
 979     CPU_COREI7_FLAGS, 0 },
 980   { STRING_COMMA_LEN ("iamcu"), PROCESSOR_IAMCU,
 981     CPU_IAMCU_FLAGS, 0 },
 982   { STRING_COMMA_LEN ("k6"), PROCESSOR_K6,
 983     CPU_K6_FLAGS, 0 },
 984   { STRING_COMMA_LEN ("k6_2"), PROCESSOR_K6,
 985     CPU_K6_2_FLAGS, 0 },
 986   { STRING_COMMA_LEN ("athlon"), PROCESSOR_ATHLON,
 987     CPU_ATHLON_FLAGS, 0 },
 988   { STRING_COMMA_LEN ("sledgehammer"), PROCESSOR_K8,
 989     CPU_K8_FLAGS, 1 },
 990   { STRING_COMMA_LEN ("opteron"), PROCESSOR_K8,
 991     CPU_K8_FLAGS, 0 },
 992   { STRING_COMMA_LEN ("k8"), PROCESSOR_K8,
 993     CPU_K8_FLAGS, 0 },
 994   { STRING_COMMA_LEN ("amdfam10"), PROCESSOR_AMDFAM10,
 995     CPU_AMDFAM10_FLAGS, 0 },
 996   { STRING_COMMA_LEN ("bdver1"), PROCESSOR_BD,
 997     CPU_BDVER1_FLAGS, 0 },
 998   { STRING_COMMA_LEN ("bdver2"), PROCESSOR_BD,
 999     CPU_BDVER2_FLAGS, 0 },
1000   { STRING_COMMA_LEN ("bdver3"), PROCESSOR_BD,
1001     CPU_BDVER3_FLAGS, 0 },
1002   { STRING_COMMA_LEN ("bdver4"), PROCESSOR_BD,
1003     CPU_BDVER4_FLAGS, 0 },
1004   { STRING_COMMA_LEN ("znver1"), PROCESSOR_ZNVER,
1005     CPU_ZNVER1_FLAGS, 0 },
1006   { STRING_COMMA_LEN ("znver2"), PROCESSOR_ZNVER,
1007     CPU_ZNVER2_FLAGS, 0 },
1008   { STRING_COMMA_LEN ("znver3"), PROCESSOR_ZNVER,
1009     CPU_ZNVER3_FLAGS, 0 },
1010   { STRING_COMMA_LEN ("btver1"), PROCESSOR_BT,
1011     CPU_BTVER1_FLAGS, 0 },
1012   { STRING_COMMA_LEN ("btver2"), PROCESSOR_BT,
1013     CPU_BTVER2_FLAGS, 0 },
1014   { STRING_COMMA_LEN (".8087"), PROCESSOR_UNKNOWN,
1015     CPU_8087_FLAGS, 0 },
1016   { STRING_COMMA_LEN (".287"), PROCESSOR_UNKNOWN,
1017     CPU_287_FLAGS, 0 },
1018   { STRING_COMMA_LEN (".387"), PROCESSOR_UNKNOWN,
1019     CPU_387_FLAGS, 0 },
1020   { STRING_COMMA_LEN (".687"), PROCESSOR_UNKNOWN,
1021     CPU_687_FLAGS, 0 },
1022   { STRING_COMMA_LEN (".cmov"), PROCESSOR_UNKNOWN,
1023     CPU_CMOV_FLAGS, 0 },
1024   { STRING_COMMA_LEN (".fxsr"), PROCESSOR_UNKNOWN,
1025     CPU_FXSR_FLAGS, 0 },
1026   { STRING_COMMA_LEN (".mmx"), PROCESSOR_UNKNOWN,
1027     CPU_MMX_FLAGS, 0 },
1028   { STRING_COMMA_LEN (".sse"), PROCESSOR_UNKNOWN,
1029     CPU_SSE_FLAGS, 0 },
1030   { STRING_COMMA_LEN (".sse2"), PROCESSOR_UNKNOWN,
1031     CPU_SSE2_FLAGS, 0 },
1032   { STRING_COMMA_LEN (".sse3"), PROCESSOR_UNKNOWN,
1033     CPU_SSE3_FLAGS, 0 },
1034   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1035     CPU_SSE4A_FLAGS, 0 },
1036   { STRING_COMMA_LEN (".ssse3"), PROCESSOR_UNKNOWN,
1037     CPU_SSSE3_FLAGS, 0 },
1038   { STRING_COMMA_LEN (".sse4.1"), PROCESSOR_UNKNOWN,
1039     CPU_SSE4_1_FLAGS, 0 },
1040   { STRING_COMMA_LEN (".sse4.2"), PROCESSOR_UNKNOWN,
1041     CPU_SSE4_2_FLAGS, 0 },
1042   { STRING_COMMA_LEN (".sse4"), PROCESSOR_UNKNOWN,
1043     CPU_SSE4_2_FLAGS, 0 },
1044   { STRING_COMMA_LEN (".avx"), PROCESSOR_UNKNOWN,
1045     CPU_AVX_FLAGS, 0 },
1046   { STRING_COMMA_LEN (".avx2"), PROCESSOR_UNKNOWN,
1047     CPU_AVX2_FLAGS, 0 },
1048   { STRING_COMMA_LEN (".avx512f"), PROCESSOR_UNKNOWN,
1049     CPU_AVX512F_FLAGS, 0 },
1050   { STRING_COMMA_LEN (".avx512cd"), PROCESSOR_UNKNOWN,
1051     CPU_AVX512CD_FLAGS, 0 },
1052   { STRING_COMMA_LEN (".avx512er"), PROCESSOR_UNKNOWN,
1053     CPU_AVX512ER_FLAGS, 0 },
1054   { STRING_COMMA_LEN (".avx512pf"), PROCESSOR_UNKNOWN,
1055     CPU_AVX512PF_FLAGS, 0 },
1056   { STRING_COMMA_LEN (".avx512dq"), PROCESSOR_UNKNOWN,
1057     CPU_AVX512DQ_FLAGS, 0 },
1058   { STRING_COMMA_LEN (".avx512bw"), PROCESSOR_UNKNOWN,
1059     CPU_AVX512BW_FLAGS, 0 },
1060   { STRING_COMMA_LEN (".avx512vl"), PROCESSOR_UNKNOWN,
1061     CPU_AVX512VL_FLAGS, 0 },
1062   { STRING_COMMA_LEN (".vmx"), PROCESSOR_UNKNOWN,
1063     CPU_VMX_FLAGS, 0 },
1064   { STRING_COMMA_LEN (".vmfunc"), PROCESSOR_UNKNOWN,
1065     CPU_VMFUNC_FLAGS, 0 },
1066   { STRING_COMMA_LEN (".smx"), PROCESSOR_UNKNOWN,
1067     CPU_SMX_FLAGS, 0 },
1068   { STRING_COMMA_LEN (".xsave"), PROCESSOR_UNKNOWN,
1069     CPU_XSAVE_FLAGS, 0 },
1070   { STRING_COMMA_LEN (".xsaveopt"), PROCESSOR_UNKNOWN,
1071     CPU_XSAVEOPT_FLAGS, 0 },
1072   { STRING_COMMA_LEN (".xsavec"), PROCESSOR_UNKNOWN,
1073     CPU_XSAVEC_FLAGS, 0 },
1074   { STRING_COMMA_LEN (".xsaves"), PROCESSOR_UNKNOWN,
1075     CPU_XSAVES_FLAGS, 0 },
1076   { STRING_COMMA_LEN (".aes"), PROCESSOR_UNKNOWN,
1077     CPU_AES_FLAGS, 0 },
1078   { STRING_COMMA_LEN (".pclmul"), PROCESSOR_UNKNOWN,
1079     CPU_PCLMUL_FLAGS, 0 },
1080   { STRING_COMMA_LEN (".clmul"), PROCESSOR_UNKNOWN,
1081     CPU_PCLMUL_FLAGS, 1 },
1082   { STRING_COMMA_LEN (".fsgsbase"), PROCESSOR_UNKNOWN,
1083     CPU_FSGSBASE_FLAGS, 0 },
1084   { STRING_COMMA_LEN (".rdrnd"), PROCESSOR_UNKNOWN,
1085     CPU_RDRND_FLAGS, 0 },
1086   { STRING_COMMA_LEN (".f16c"), PROCESSOR_UNKNOWN,
1087     CPU_F16C_FLAGS, 0 },
1088   { STRING_COMMA_LEN (".bmi2"), PROCESSOR_UNKNOWN,
1089     CPU_BMI2_FLAGS, 0 },
1090   { STRING_COMMA_LEN (".fma"), PROCESSOR_UNKNOWN,
1091     CPU_FMA_FLAGS, 0 },
1092   { STRING_COMMA_LEN (".fma4"), PROCESSOR_UNKNOWN,
1093     CPU_FMA4_FLAGS, 0 },
1094   { STRING_COMMA_LEN (".xop"), PROCESSOR_UNKNOWN,
1095     CPU_XOP_FLAGS, 0 },
1096   { STRING_COMMA_LEN (".lwp"), PROCESSOR_UNKNOWN,
1097     CPU_LWP_FLAGS, 0 },
1098   { STRING_COMMA_LEN (".movbe"), PROCESSOR_UNKNOWN,
1099     CPU_MOVBE_FLAGS, 0 },
1100   { STRING_COMMA_LEN (".cx16"), PROCESSOR_UNKNOWN,
1101     CPU_CX16_FLAGS, 0 },
1102   { STRING_COMMA_LEN (".ept"), PROCESSOR_UNKNOWN,
1103     CPU_EPT_FLAGS, 0 },
1104   { STRING_COMMA_LEN (".lzcnt"), PROCESSOR_UNKNOWN,
1105     CPU_LZCNT_FLAGS, 0 },
1106   { STRING_COMMA_LEN (".popcnt"), PROCESSOR_UNKNOWN,
1107     CPU_POPCNT_FLAGS, 0 },
1108   { STRING_COMMA_LEN (".hle"), PROCESSOR_UNKNOWN,
1109     CPU_HLE_FLAGS, 0 },
1110   { STRING_COMMA_LEN (".rtm"), PROCESSOR_UNKNOWN,
1111     CPU_RTM_FLAGS, 0 },
1112   { STRING_COMMA_LEN (".invpcid"), PROCESSOR_UNKNOWN,
1113     CPU_INVPCID_FLAGS, 0 },
1114   { STRING_COMMA_LEN (".clflush"), PROCESSOR_UNKNOWN,
1115     CPU_CLFLUSH_FLAGS, 0 },
1116   { STRING_COMMA_LEN (".nop"), PROCESSOR_UNKNOWN,
1117     CPU_NOP_FLAGS, 0 },
1118   { STRING_COMMA_LEN (".syscall"), PROCESSOR_UNKNOWN,
1119     CPU_SYSCALL_FLAGS, 0 },
1120   { STRING_COMMA_LEN (".rdtscp"), PROCESSOR_UNKNOWN,
1121     CPU_RDTSCP_FLAGS, 0 },
1122   { STRING_COMMA_LEN (".3dnow"), PROCESSOR_UNKNOWN,
1123     CPU_3DNOW_FLAGS, 0 },
1124   { STRING_COMMA_LEN (".3dnowa"), PROCESSOR_UNKNOWN,
1125     CPU_3DNOWA_FLAGS, 0 },
1126   { STRING_COMMA_LEN (".padlock"), PROCESSOR_UNKNOWN,
1127     CPU_PADLOCK_FLAGS, 0 },
1128   { STRING_COMMA_LEN (".pacifica"), PROCESSOR_UNKNOWN,
1129     CPU_SVME_FLAGS, 1 },
1130   { STRING_COMMA_LEN (".svme"), PROCESSOR_UNKNOWN,
1131     CPU_SVME_FLAGS, 0 },
1132   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1133     CPU_SSE4A_FLAGS, 0 },
1134   { STRING_COMMA_LEN (".abm"), PROCESSOR_UNKNOWN,
1135     CPU_ABM_FLAGS, 0 },
1136   { STRING_COMMA_LEN (".bmi"), PROCESSOR_UNKNOWN,
1137     CPU_BMI_FLAGS, 0 },
1138   { STRING_COMMA_LEN (".tbm"), PROCESSOR_UNKNOWN,
1139     CPU_TBM_FLAGS, 0 },
1140   { STRING_COMMA_LEN (".adx"), PROCESSOR_UNKNOWN,
1141     CPU_ADX_FLAGS, 0 },
1142   { STRING_COMMA_LEN (".rdseed"), PROCESSOR_UNKNOWN,
1143     CPU_RDSEED_FLAGS, 0 },
1144   { STRING_COMMA_LEN (".prfchw"), PROCESSOR_UNKNOWN,
1145     CPU_PRFCHW_FLAGS, 0 },
1146   { STRING_COMMA_LEN (".smap"), PROCESSOR_UNKNOWN,
1147     CPU_SMAP_FLAGS, 0 },
1148   { STRING_COMMA_LEN (".mpx"), PROCESSOR_UNKNOWN,
1149     CPU_MPX_FLAGS, 0 },
1150   { STRING_COMMA_LEN (".sha"), PROCESSOR_UNKNOWN,
1151     CPU_SHA_FLAGS, 0 },
1152   { STRING_COMMA_LEN (".clflushopt"), PROCESSOR_UNKNOWN,
1153     CPU_CLFLUSHOPT_FLAGS, 0 },
1154   { STRING_COMMA_LEN (".prefetchwt1"), PROCESSOR_UNKNOWN,
1155     CPU_PREFETCHWT1_FLAGS, 0 },
1156   { STRING_COMMA_LEN (".se1"), PROCESSOR_UNKNOWN,
1157     CPU_SE1_FLAGS, 0 },
1158   { STRING_COMMA_LEN (".clwb"), PROCESSOR_UNKNOWN,
1159     CPU_CLWB_FLAGS, 0 },
1160   { STRING_COMMA_LEN (".avx512ifma"), PROCESSOR_UNKNOWN,
1161     CPU_AVX512IFMA_FLAGS, 0 },
1162   { STRING_COMMA_LEN (".avx512vbmi"), PROCESSOR_UNKNOWN,
1163     CPU_AVX512VBMI_FLAGS, 0 },
1164   { STRING_COMMA_LEN (".avx512_4fmaps"), PROCESSOR_UNKNOWN,
1165     CPU_AVX512_4FMAPS_FLAGS, 0 },
1166   { STRING_COMMA_LEN (".avx512_4vnniw"), PROCESSOR_UNKNOWN,
1167     CPU_AVX512_4VNNIW_FLAGS, 0 },
1168   { STRING_COMMA_LEN (".avx512_vpopcntdq"), PROCESSOR_UNKNOWN,
1169     CPU_AVX512_VPOPCNTDQ_FLAGS, 0 },
1170   { STRING_COMMA_LEN (".avx512_vbmi2"), PROCESSOR_UNKNOWN,
1171     CPU_AVX512_VBMI2_FLAGS, 0 },
1172   { STRING_COMMA_LEN (".avx512_vnni"), PROCESSOR_UNKNOWN,
1173     CPU_AVX512_VNNI_FLAGS, 0 },
1174   { STRING_COMMA_LEN (".avx512_bitalg"), PROCESSOR_UNKNOWN,
1175     CPU_AVX512_BITALG_FLAGS, 0 },
1176   { STRING_COMMA_LEN (".avx_vnni"), PROCESSOR_UNKNOWN,
1177     CPU_AVX_VNNI_FLAGS, 0 },
1178   { STRING_COMMA_LEN (".clzero"), PROCESSOR_UNKNOWN,
1179     CPU_CLZERO_FLAGS, 0 },
1180   { STRING_COMMA_LEN (".mwaitx"), PROCESSOR_UNKNOWN,
1181     CPU_MWAITX_FLAGS, 0 },
1182   { STRING_COMMA_LEN (".ospke"), PROCESSOR_UNKNOWN,
1183     CPU_OSPKE_FLAGS, 0 },
1184   { STRING_COMMA_LEN (".rdpid"), PROCESSOR_UNKNOWN,
1185     CPU_RDPID_FLAGS, 0 },
1186   { STRING_COMMA_LEN (".ptwrite"), PROCESSOR_UNKNOWN,
1187     CPU_PTWRITE_FLAGS, 0 },
1188   { STRING_COMMA_LEN (".ibt"), PROCESSOR_UNKNOWN,
1189     CPU_IBT_FLAGS, 0 },
1190   { STRING_COMMA_LEN (".shstk"), PROCESSOR_UNKNOWN,
1191     CPU_SHSTK_FLAGS, 0 },
1192   { STRING_COMMA_LEN (".gfni"), PROCESSOR_UNKNOWN,
1193     CPU_GFNI_FLAGS, 0 },
1194   { STRING_COMMA_LEN (".vaes"), PROCESSOR_UNKNOWN,
1195     CPU_VAES_FLAGS, 0 },
1196   { STRING_COMMA_LEN (".vpclmulqdq"), PROCESSOR_UNKNOWN,
1197     CPU_VPCLMULQDQ_FLAGS, 0 },
1198   { STRING_COMMA_LEN (".wbnoinvd"), PROCESSOR_UNKNOWN,
1199     CPU_WBNOINVD_FLAGS, 0 },
1200   { STRING_COMMA_LEN (".pconfig"), PROCESSOR_UNKNOWN,
1201     CPU_PCONFIG_FLAGS, 0 },
1202   { STRING_COMMA_LEN (".waitpkg"), PROCESSOR_UNKNOWN,
1203     CPU_WAITPKG_FLAGS, 0 },
1204   { STRING_COMMA_LEN (".cldemote"), PROCESSOR_UNKNOWN,
1205     CPU_CLDEMOTE_FLAGS, 0 },
1206   { STRING_COMMA_LEN (".amx_int8"), PROCESSOR_UNKNOWN,
1207     CPU_AMX_INT8_FLAGS, 0 },
1208   { STRING_COMMA_LEN (".amx_bf16"), PROCESSOR_UNKNOWN,
1209     CPU_AMX_BF16_FLAGS, 0 },
1210   { STRING_COMMA_LEN (".amx_tile"), PROCESSOR_UNKNOWN,
1211     CPU_AMX_TILE_FLAGS, 0 },
1212   { STRING_COMMA_LEN (".movdiri"), PROCESSOR_UNKNOWN,
1213     CPU_MOVDIRI_FLAGS, 0 },
1214   { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
1215     CPU_MOVDIR64B_FLAGS, 0 },
1216   { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
1217     CPU_AVX512_BF16_FLAGS, 0 },
1218   { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
1219     CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
1220   { STRING_COMMA_LEN (".tdx"), PROCESSOR_UNKNOWN,
1221     CPU_TDX_FLAGS, 0 },
1222   { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
1223     CPU_ENQCMD_FLAGS, 0 },
1224   { STRING_COMMA_LEN (".serialize"), PROCESSOR_UNKNOWN,
1225     CPU_SERIALIZE_FLAGS, 0 },
1226   { STRING_COMMA_LEN (".rdpru"), PROCESSOR_UNKNOWN,
1227     CPU_RDPRU_FLAGS, 0 },
1228   { STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN,
1229     CPU_MCOMMIT_FLAGS, 0 },
1230   { STRING_COMMA_LEN (".sev_es"), PROCESSOR_UNKNOWN,
1231     CPU_SEV_ES_FLAGS, 0 },
1232   { STRING_COMMA_LEN (".tsxldtrk"), PROCESSOR_UNKNOWN,
1233     CPU_TSXLDTRK_FLAGS, 0 },
1234   { STRING_COMMA_LEN (".kl"), PROCESSOR_UNKNOWN,
1235     CPU_KL_FLAGS, 0 },
1236   { STRING_COMMA_LEN (".widekl"), PROCESSOR_UNKNOWN,
1237     CPU_WIDEKL_FLAGS, 0 },
1238   { STRING_COMMA_LEN (".uintr"), PROCESSOR_UNKNOWN,
1239     CPU_UINTR_FLAGS, 0 },
1240   { STRING_COMMA_LEN (".hreset"), PROCESSOR_UNKNOWN,
1241     CPU_HRESET_FLAGS, 0 },
1242   { STRING_COMMA_LEN (".avx512_fp16"), PROCESSOR_UNKNOWN,
1243     CPU_AVX512_FP16_FLAGS, 0 },
1244 };
1245
1246 static const noarch_entry cpu_noarch[] =
1247 {
1248   { STRING_COMMA_LEN ("no87"),  CPU_ANY_X87_FLAGS },
1249   { STRING_COMMA_LEN ("no287"),  CPU_ANY_287_FLAGS },
1250   { STRING_COMMA_LEN ("no387"),  CPU_ANY_387_FLAGS },
1251   { STRING_COMMA_LEN ("no687"),  CPU_ANY_687_FLAGS },
1252   { STRING_COMMA_LEN ("nocmov"),  CPU_ANY_CMOV_FLAGS },
1253   { STRING_COMMA_LEN ("nofxsr"),  CPU_ANY_FXSR_FLAGS },
1254   { STRING_COMMA_LEN ("nommx"),  CPU_ANY_MMX_FLAGS },
1255   { STRING_COMMA_LEN ("nosse"),  CPU_ANY_SSE_FLAGS },
1256   { STRING_COMMA_LEN ("nosse2"),  CPU_ANY_SSE2_FLAGS },
1257   { STRING_COMMA_LEN ("nosse3"),  CPU_ANY_SSE3_FLAGS },
1258   { STRING_COMMA_LEN ("nosse4a"),  CPU_ANY_SSE4A_FLAGS },
1259   { STRING_COMMA_LEN ("nossse3"),  CPU_ANY_SSSE3_FLAGS },
1260   { STRING_COMMA_LEN ("nosse4.1"),  CPU_ANY_SSE4_1_FLAGS },
1261   { STRING_COMMA_LEN ("nosse4.2"),  CPU_ANY_SSE4_2_FLAGS },
1262   { STRING_COMMA_LEN ("nosse4"),  CPU_ANY_SSE4_1_FLAGS },
1263   { STRING_COMMA_LEN ("noavx"),  CPU_ANY_AVX_FLAGS },
1264   { STRING_COMMA_LEN ("noavx2"),  CPU_ANY_AVX2_FLAGS },
1265   { STRING_COMMA_LEN ("noavx512f"), CPU_ANY_AVX512F_FLAGS },
1266   { STRING_COMMA_LEN ("noavx512cd"), CPU_ANY_AVX512CD_FLAGS },
1267   { STRING_COMMA_LEN ("noavx512er"), CPU_ANY_AVX512ER_FLAGS },
1268   { STRING_COMMA_LEN ("noavx512pf"), CPU_ANY_AVX512PF_FLAGS },
1269   { STRING_COMMA_LEN ("noavx512dq"), CPU_ANY_AVX512DQ_FLAGS },
1270   { STRING_COMMA_LEN ("noavx512bw"), CPU_ANY_AVX512BW_FLAGS },
1271   { STRING_COMMA_LEN ("noavx512vl"), CPU_ANY_AVX512VL_FLAGS },
1272   { STRING_COMMA_LEN ("noavx512ifma"), CPU_ANY_AVX512IFMA_FLAGS },
1273   { STRING_COMMA_LEN ("noavx512vbmi"), CPU_ANY_AVX512VBMI_FLAGS },
1274   { STRING_COMMA_LEN ("noavx512_4fmaps"), CPU_ANY_AVX512_4FMAPS_FLAGS },
1275   { STRING_COMMA_LEN ("noavx512_4vnniw"), CPU_ANY_AVX512_4VNNIW_FLAGS },
1276   { STRING_COMMA_LEN ("noavx512_vpopcntdq"), CPU_ANY_AVX512_VPOPCNTDQ_FLAGS },
1277   { STRING_COMMA_LEN ("noavx512_vbmi2"), CPU_ANY_AVX512_VBMI2_FLAGS },
1278   { STRING_COMMA_LEN ("noavx512_vnni"), CPU_ANY_AVX512_VNNI_FLAGS },
1279   { STRING_COMMA_LEN ("noavx512_bitalg"), CPU_ANY_AVX512_BITALG_FLAGS },
1280   { STRING_COMMA_LEN ("noavx_vnni"), CPU_ANY_AVX_VNNI_FLAGS },
1281   { STRING_COMMA_LEN ("noibt"), CPU_ANY_IBT_FLAGS },
1282   { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
1283   { STRING_COMMA_LEN ("noamx_int8"), CPU_ANY_AMX_INT8_FLAGS },
1284   { STRING_COMMA_LEN ("noamx_bf16"), CPU_ANY_AMX_BF16_FLAGS },
1285   { STRING_COMMA_LEN ("noamx_tile"), CPU_ANY_AMX_TILE_FLAGS },
1286   { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
1287   { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
1288   { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
1289   { STRING_COMMA_LEN ("noavx512_vp2intersect"),
1290     CPU_ANY_AVX512_VP2INTERSECT_FLAGS },
1291   { STRING_COMMA_LEN ("notdx"), CPU_ANY_TDX_FLAGS },
1292   { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
1293   { STRING_COMMA_LEN ("noserialize"), CPU_ANY_SERIALIZE_FLAGS },
1294   { STRING_COMMA_LEN ("notsxldtrk"), CPU_ANY_TSXLDTRK_FLAGS },
1295   { STRING_COMMA_LEN ("nokl"), CPU_ANY_KL_FLAGS },
1296   { STRING_COMMA_LEN ("nowidekl"), CPU_ANY_WIDEKL_FLAGS },
1297   { STRING_COMMA_LEN ("nouintr"), CPU_ANY_UINTR_FLAGS },
1298   { STRING_COMMA_LEN ("nohreset"), CPU_ANY_HRESET_FLAGS },
1299   { STRING_COMMA_LEN ("noavx512_fp16"), CPU_ANY_AVX512_FP16_FLAGS },
1300 };
1301
1302 #ifdef I386COFF
1303 /* Like s_lcomm_internal in gas/read.c but the alignment string
1304    is allowed to be optional.  */
1305
1306 static symbolS *
1307 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1308 {
1309   addressT align = 0;
1310
1311   SKIP_WHITESPACE ();
1312
1313   if (needs_align
1314       && *input_line_pointer == ',')
1315     {
1316       align = parse_align (needs_align - 1);
1317
1318       if (align == (addressT) -1)
1319         return NULL;
1320     }
1321   else
1322     {
1323       if (size >= 8)
1324         align = 3;
1325       else if (size >= 4)
1326         align = 2;
1327       else if (size >= 2)
1328         align = 1;
1329       else
1330         align = 0;
1331     }
1332
1333   bss_alloc (symbolP, size, align);
1334   return symbolP;
1335 }
1336
1337 static void
1338 pe_lcomm (int needs_align)
1339 {
1340   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1341 }
1342 #endif
1343
1344 const pseudo_typeS md_pseudo_table[] =
1345 {
1346 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1347   {"align", s_align_bytes, 0},
1348 #else
1349   {"align", s_align_ptwo, 0},
1350 #endif
1351   {"arch", set_cpu_arch, 0},
1352 #ifndef I386COFF
1353   {"bss", s_bss, 0},
1354 #else
1355   {"lcomm", pe_lcomm, 1},
1356 #endif
1357   {"ffloat", float_cons, 'f'},
1358   {"dfloat", float_cons, 'd'},
1359   {"tfloat", float_cons, 'x'},
1360   {"hfloat", float_cons, 'h'},
1361   {"bfloat16", float_cons, 'b'},
1362   {"value", cons, 2},
1363   {"slong", signed_cons, 4},
1364   {"noopt", s_ignore, 0},
1365   {"optim", s_ignore, 0},
1366   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1367   {"code16", set_code_flag, CODE_16BIT},
1368   {"code32", set_code_flag, CODE_32BIT},
1369 #ifdef BFD64
1370   {"code64", set_code_flag, CODE_64BIT},
1371 #endif
1372   {"intel_syntax", set_intel_syntax, 1},
1373   {"att_syntax", set_intel_syntax, 0},
1374   {"intel_mnemonic", set_intel_mnemonic, 1},
1375   {"att_mnemonic", set_intel_mnemonic, 0},
1376   {"allow_index_reg", set_allow_index_reg, 1},
1377   {"disallow_index_reg", set_allow_index_reg, 0},
1378   {"sse_check", set_check, 0},
1379   {"operand_check", set_check, 1},
1380 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1381   {"largecomm", handle_large_common, 0},
1382 #else
1383   {"file", dwarf2_directive_file, 0},
1384   {"loc", dwarf2_directive_loc, 0},
1385   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1386 #endif
1387 #ifdef TE_PE
1388   {"secrel32", pe_directive_secrel, 0},
1389   {"secidx", pe_directive_secidx, 0},
1390 #endif
1391   {0, 0, 0}
1392 };
1393
1394 /* For interface with expression ().  */
1395 extern char *input_line_pointer;
1396
1397 /* Hash table for instruction mnemonic lookup.  */
1398 static htab_t op_hash;
1399
1400 /* Hash table for register lookup.  */
1401 static htab_t reg_hash;
1402 \f
1403   /* Various efficient no-op patterns for aligning code labels.
1404      Note: Don't try to assemble the instructions in the comments.
1405      0L and 0w are not legal.  */
1406 static const unsigned char f32_1[] =
1407   {0x90};                               /* nop                  */
1408 static const unsigned char f32_2[] =
1409   {0x66,0x90};                          /* xchg %ax,%ax         */
1410 static const unsigned char f32_3[] =
1411   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1412 static const unsigned char f32_4[] =
1413   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1414 static const unsigned char f32_6[] =
1415   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1416 static const unsigned char f32_7[] =
1417   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1418 static const unsigned char f16_3[] =
1419   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1420 static const unsigned char f16_4[] =
1421   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1422 static const unsigned char jump_disp8[] =
1423   {0xeb};                               /* jmp disp8           */
1424 static const unsigned char jump32_disp32[] =
1425   {0xe9};                               /* jmp disp32          */
1426 static const unsigned char jump16_disp32[] =
1427   {0x66,0xe9};                          /* jmp disp32          */
1428 /* 32-bit NOPs patterns.  */
1429 static const unsigned char *const f32_patt[] = {
1430   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1431 };
1432 /* 16-bit NOPs patterns.  */
1433 static const unsigned char *const f16_patt[] = {
1434   f32_1, f32_2, f16_3, f16_4
1435 };
1436 /* nopl (%[re]ax) */
1437 static const unsigned char alt_3[] =
1438   {0x0f,0x1f,0x00};
1439 /* nopl 0(%[re]ax) */
1440 static const unsigned char alt_4[] =
1441   {0x0f,0x1f,0x40,0x00};
1442 /* nopl 0(%[re]ax,%[re]ax,1) */
1443 static const unsigned char alt_5[] =
1444   {0x0f,0x1f,0x44,0x00,0x00};
1445 /* nopw 0(%[re]ax,%[re]ax,1) */
1446 static const unsigned char alt_6[] =
1447   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1448 /* nopl 0L(%[re]ax) */
1449 static const unsigned char alt_7[] =
1450   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1451 /* nopl 0L(%[re]ax,%[re]ax,1) */
1452 static const unsigned char alt_8[] =
1453   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1454 /* nopw 0L(%[re]ax,%[re]ax,1) */
1455 static const unsigned char alt_9[] =
1456   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1457 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1458 static const unsigned char alt_10[] =
1459   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1460 /* data16 nopw %cs:0L(%eax,%eax,1) */
1461 static const unsigned char alt_11[] =
1462   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1463 /* 32-bit and 64-bit NOPs patterns.  */
1464 static const unsigned char *const alt_patt[] = {
1465   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1466   alt_9, alt_10, alt_11
1467 };
1468
1469 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1470    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1471
1472 static void
1473 i386_output_nops (char *where, const unsigned char *const *patt,
1474                   int count, int max_single_nop_size)
1475
1476 {
1477   /* Place the longer NOP first.  */
1478   int last;
1479   int offset;
1480   const unsigned char *nops;
1481
1482   if (max_single_nop_size < 1)
1483     {
1484       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1485                 max_single_nop_size);
1486       return;
1487     }
1488
1489   nops = patt[max_single_nop_size - 1];
1490
1491   /* Use the smaller one if the requsted one isn't available.  */
1492   if (nops == NULL)
1493     {
1494       max_single_nop_size--;
1495       nops = patt[max_single_nop_size - 1];
1496     }
1497
1498   last = count % max_single_nop_size;
1499
1500   count -= last;
1501   for (offset = 0; offset < count; offset += max_single_nop_size)
1502     memcpy (where + offset, nops, max_single_nop_size);
1503
1504   if (last)
1505     {
1506       nops = patt[last - 1];
1507       if (nops == NULL)
1508         {
1509           /* Use the smaller one plus one-byte NOP if the needed one
1510              isn't available.  */
1511           last--;
1512           nops = patt[last - 1];
1513           memcpy (where + offset, nops, last);
1514           where[offset + last] = *patt[0];
1515         }
1516       else
1517         memcpy (where + offset, nops, last);
1518     }
1519 }
1520
1521 static INLINE int
1522 fits_in_imm7 (offsetT num)
1523 {
1524   return (num & 0x7f) == num;
1525 }
1526
1527 static INLINE int
1528 fits_in_imm31 (offsetT num)
1529 {
1530   return (num & 0x7fffffff) == num;
1531 }
1532
1533 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1534    single NOP instruction LIMIT.  */
1535
1536 void
1537 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1538 {
1539   const unsigned char *const *patt = NULL;
1540   int max_single_nop_size;
1541   /* Maximum number of NOPs before switching to jump over NOPs.  */
1542   int max_number_of_nops;
1543
1544   switch (fragP->fr_type)
1545     {
1546     case rs_fill_nop:
1547     case rs_align_code:
1548       break;
1549     case rs_machine_dependent:
1550       /* Allow NOP padding for jumps and calls.  */
1551       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1552           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1553         break;
1554       /* Fall through.  */
1555     default:
1556       return;
1557     }
1558
1559   /* We need to decide which NOP sequence to use for 32bit and
1560      64bit. When -mtune= is used:
1561
1562      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1563      PROCESSOR_GENERIC32, f32_patt will be used.
1564      2. For the rest, alt_patt will be used.
1565
1566      When -mtune= isn't used, alt_patt will be used if
1567      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1568      be used.
1569
1570      When -march= or .arch is used, we can't use anything beyond
1571      cpu_arch_isa_flags.   */
1572
1573   if (flag_code == CODE_16BIT)
1574     {
1575       patt = f16_patt;
1576       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1577       /* Limit number of NOPs to 2 in 16-bit mode.  */
1578       max_number_of_nops = 2;
1579     }
1580   else
1581     {
1582       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1583         {
1584           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1585           switch (cpu_arch_tune)
1586             {
1587             case PROCESSOR_UNKNOWN:
1588               /* We use cpu_arch_isa_flags to check if we SHOULD
1589                  optimize with nops.  */
1590               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1591                 patt = alt_patt;
1592               else
1593                 patt = f32_patt;
1594               break;
1595             case PROCESSOR_PENTIUM4:
1596             case PROCESSOR_NOCONA:
1597             case PROCESSOR_CORE:
1598             case PROCESSOR_CORE2:
1599             case PROCESSOR_COREI7:
1600             case PROCESSOR_GENERIC64:
1601             case PROCESSOR_K6:
1602             case PROCESSOR_ATHLON:
1603             case PROCESSOR_K8:
1604             case PROCESSOR_AMDFAM10:
1605             case PROCESSOR_BD:
1606             case PROCESSOR_ZNVER:
1607             case PROCESSOR_BT:
1608               patt = alt_patt;
1609               break;
1610             case PROCESSOR_I386:
1611             case PROCESSOR_I486:
1612             case PROCESSOR_PENTIUM:
1613             case PROCESSOR_PENTIUMPRO:
1614             case PROCESSOR_IAMCU:
1615             case PROCESSOR_GENERIC32:
1616               patt = f32_patt;
1617               break;
1618             }
1619         }
1620       else
1621         {
1622           switch (fragP->tc_frag_data.tune)
1623             {
1624             case PROCESSOR_UNKNOWN:
1625               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1626                  PROCESSOR_UNKNOWN.  */
1627               abort ();
1628               break;
1629
1630             case PROCESSOR_I386:
1631             case PROCESSOR_I486:
1632             case PROCESSOR_PENTIUM:
1633             case PROCESSOR_IAMCU:
1634             case PROCESSOR_K6:
1635             case PROCESSOR_ATHLON:
1636             case PROCESSOR_K8:
1637             case PROCESSOR_AMDFAM10:
1638             case PROCESSOR_BD:
1639             case PROCESSOR_ZNVER:
1640             case PROCESSOR_BT:
1641             case PROCESSOR_GENERIC32:
1642               /* We use cpu_arch_isa_flags to check if we CAN optimize
1643                  with nops.  */
1644               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1645                 patt = alt_patt;
1646               else
1647                 patt = f32_patt;
1648               break;
1649             case PROCESSOR_PENTIUMPRO:
1650             case PROCESSOR_PENTIUM4:
1651             case PROCESSOR_NOCONA:
1652             case PROCESSOR_CORE:
1653             case PROCESSOR_CORE2:
1654             case PROCESSOR_COREI7:
1655               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1656                 patt = alt_patt;
1657               else
1658                 patt = f32_patt;
1659               break;
1660             case PROCESSOR_GENERIC64:
1661               patt = alt_patt;
1662               break;
1663             }
1664         }
1665
1666       if (patt == f32_patt)
1667         {
1668           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1669           /* Limit number of NOPs to 2 for older processors.  */
1670           max_number_of_nops = 2;
1671         }
1672       else
1673         {
1674           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1675           /* Limit number of NOPs to 7 for newer processors.  */
1676           max_number_of_nops = 7;
1677         }
1678     }
1679
1680   if (limit == 0)
1681     limit = max_single_nop_size;
1682
1683   if (fragP->fr_type == rs_fill_nop)
1684     {
1685       /* Output NOPs for .nop directive.  */
1686       if (limit > max_single_nop_size)
1687         {
1688           as_bad_where (fragP->fr_file, fragP->fr_line,
1689                         _("invalid single nop size: %d "
1690                           "(expect within [0, %d])"),
1691                         limit, max_single_nop_size);
1692           return;
1693         }
1694     }
1695   else if (fragP->fr_type != rs_machine_dependent)
1696     fragP->fr_var = count;
1697
1698   if ((count / max_single_nop_size) > max_number_of_nops)
1699     {
1700       /* Generate jump over NOPs.  */
1701       offsetT disp = count - 2;
1702       if (fits_in_imm7 (disp))
1703         {
1704           /* Use "jmp disp8" if possible.  */
1705           count = disp;
1706           where[0] = jump_disp8[0];
1707           where[1] = count;
1708           where += 2;
1709         }
1710       else
1711         {
1712           unsigned int size_of_jump;
1713
1714           if (flag_code == CODE_16BIT)
1715             {
1716               where[0] = jump16_disp32[0];
1717               where[1] = jump16_disp32[1];
1718               size_of_jump = 2;
1719             }
1720           else
1721             {
1722               where[0] = jump32_disp32[0];
1723               size_of_jump = 1;
1724             }
1725
1726           count -= size_of_jump + 4;
1727           if (!fits_in_imm31 (count))
1728             {
1729               as_bad_where (fragP->fr_file, fragP->fr_line,
1730                             _("jump over nop padding out of range"));
1731               return;
1732             }
1733
1734           md_number_to_chars (where + size_of_jump, count, 4);
1735           where += size_of_jump + 4;
1736         }
1737     }
1738
1739   /* Generate multiple NOPs.  */
1740   i386_output_nops (where, patt, count, limit);
1741 }
1742
1743 static INLINE int
1744 operand_type_all_zero (const union i386_operand_type *x)
1745 {
1746   switch (ARRAY_SIZE(x->array))
1747     {
1748     case 3:
1749       if (x->array[2])
1750         return 0;
1751       /* Fall through.  */
1752     case 2:
1753       if (x->array[1])
1754         return 0;
1755       /* Fall through.  */
1756     case 1:
1757       return !x->array[0];
1758     default:
1759       abort ();
1760     }
1761 }
1762
1763 static INLINE void
1764 operand_type_set (union i386_operand_type *x, unsigned int v)
1765 {
1766   switch (ARRAY_SIZE(x->array))
1767     {
1768     case 3:
1769       x->array[2] = v;
1770       /* Fall through.  */
1771     case 2:
1772       x->array[1] = v;
1773       /* Fall through.  */
1774     case 1:
1775       x->array[0] = v;
1776       /* Fall through.  */
1777       break;
1778     default:
1779       abort ();
1780     }
1781
1782   x->bitfield.class = ClassNone;
1783   x->bitfield.instance = InstanceNone;
1784 }
1785
1786 static INLINE int
1787 operand_type_equal (const union i386_operand_type *x,
1788                     const union i386_operand_type *y)
1789 {
1790   switch (ARRAY_SIZE(x->array))
1791     {
1792     case 3:
1793       if (x->array[2] != y->array[2])
1794         return 0;
1795       /* Fall through.  */
1796     case 2:
1797       if (x->array[1] != y->array[1])
1798         return 0;
1799       /* Fall through.  */
1800     case 1:
1801       return x->array[0] == y->array[0];
1802       break;
1803     default:
1804       abort ();
1805     }
1806 }
1807
1808 static INLINE int
1809 cpu_flags_all_zero (const union i386_cpu_flags *x)
1810 {
1811   switch (ARRAY_SIZE(x->array))
1812     {
1813     case 4:
1814       if (x->array[3])
1815         return 0;
1816       /* Fall through.  */
1817     case 3:
1818       if (x->array[2])
1819         return 0;
1820       /* Fall through.  */
1821     case 2:
1822       if (x->array[1])
1823         return 0;
1824       /* Fall through.  */
1825     case 1:
1826       return !x->array[0];
1827     default:
1828       abort ();
1829     }
1830 }
1831
1832 static INLINE int
1833 cpu_flags_equal (const union i386_cpu_flags *x,
1834                  const union i386_cpu_flags *y)
1835 {
1836   switch (ARRAY_SIZE(x->array))
1837     {
1838     case 4:
1839       if (x->array[3] != y->array[3])
1840         return 0;
1841       /* Fall through.  */
1842     case 3:
1843       if (x->array[2] != y->array[2])
1844         return 0;
1845       /* Fall through.  */
1846     case 2:
1847       if (x->array[1] != y->array[1])
1848         return 0;
1849       /* Fall through.  */
1850     case 1:
1851       return x->array[0] == y->array[0];
1852       break;
1853     default:
1854       abort ();
1855     }
1856 }
1857
1858 static INLINE int
1859 cpu_flags_check_cpu64 (i386_cpu_flags f)
1860 {
1861   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1862            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1863 }
1864
1865 static INLINE i386_cpu_flags
1866 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1867 {
1868   switch (ARRAY_SIZE (x.array))
1869     {
1870     case 4:
1871       x.array [3] &= y.array [3];
1872       /* Fall through.  */
1873     case 3:
1874       x.array [2] &= y.array [2];
1875       /* Fall through.  */
1876     case 2:
1877       x.array [1] &= y.array [1];
1878       /* Fall through.  */
1879     case 1:
1880       x.array [0] &= y.array [0];
1881       break;
1882     default:
1883       abort ();
1884     }
1885   return x;
1886 }
1887
1888 static INLINE i386_cpu_flags
1889 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1890 {
1891   switch (ARRAY_SIZE (x.array))
1892     {
1893     case 4:
1894       x.array [3] |= y.array [3];
1895       /* Fall through.  */
1896     case 3:
1897       x.array [2] |= y.array [2];
1898       /* Fall through.  */
1899     case 2:
1900       x.array [1] |= y.array [1];
1901       /* Fall through.  */
1902     case 1:
1903       x.array [0] |= y.array [0];
1904       break;
1905     default:
1906       abort ();
1907     }
1908   return x;
1909 }
1910
1911 static INLINE i386_cpu_flags
1912 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1913 {
1914   switch (ARRAY_SIZE (x.array))
1915     {
1916     case 4:
1917       x.array [3] &= ~y.array [3];
1918       /* Fall through.  */
1919     case 3:
1920       x.array [2] &= ~y.array [2];
1921       /* Fall through.  */
1922     case 2:
1923       x.array [1] &= ~y.array [1];
1924       /* Fall through.  */
1925     case 1:
1926       x.array [0] &= ~y.array [0];
1927       break;
1928     default:
1929       abort ();
1930     }
1931   return x;
1932 }
1933
1934 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1935
1936 #define CPU_FLAGS_ARCH_MATCH            0x1
1937 #define CPU_FLAGS_64BIT_MATCH           0x2
1938
1939 #define CPU_FLAGS_PERFECT_MATCH \
1940   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1941
1942 /* Return CPU flags match bits. */
1943
1944 static int
1945 cpu_flags_match (const insn_template *t)
1946 {
1947   i386_cpu_flags x = t->cpu_flags;
1948   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1949
1950   x.bitfield.cpu64 = 0;
1951   x.bitfield.cpuno64 = 0;
1952
1953   if (cpu_flags_all_zero (&x))
1954     {
1955       /* This instruction is available on all archs.  */
1956       match |= CPU_FLAGS_ARCH_MATCH;
1957     }
1958   else
1959     {
1960       /* This instruction is available only on some archs.  */
1961       i386_cpu_flags cpu = cpu_arch_flags;
1962
1963       /* AVX512VL is no standalone feature - match it and then strip it.  */
1964       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1965         return match;
1966       x.bitfield.cpuavx512vl = 0;
1967
1968       /* AVX and AVX2 present at the same time express an operand size
1969          dependency - strip AVX2 for the purposes here.  The operand size
1970          dependent check occurs in check_vecOperands().  */
1971       if (x.bitfield.cpuavx && x.bitfield.cpuavx2)
1972         x.bitfield.cpuavx2 = 0;
1973
1974       cpu = cpu_flags_and (x, cpu);
1975       if (!cpu_flags_all_zero (&cpu))
1976         {
1977           if (x.bitfield.cpuavx)
1978             {
1979               /* We need to check a few extra flags with AVX.  */
1980               if (cpu.bitfield.cpuavx
1981                   && (!t->opcode_modifier.sse2avx
1982                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1983                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1984                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1985                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1986                 match |= CPU_FLAGS_ARCH_MATCH;
1987             }
1988           else if (x.bitfield.cpuavx512f)
1989             {
1990               /* We need to check a few extra flags with AVX512F.  */
1991               if (cpu.bitfield.cpuavx512f
1992                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1993                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1994                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1995                 match |= CPU_FLAGS_ARCH_MATCH;
1996             }
1997           else
1998             match |= CPU_FLAGS_ARCH_MATCH;
1999         }
2000     }
2001   return match;
2002 }
2003
2004 static INLINE i386_operand_type
2005 operand_type_and (i386_operand_type x, i386_operand_type y)
2006 {
2007   if (x.bitfield.class != y.bitfield.class)
2008     x.bitfield.class = ClassNone;
2009   if (x.bitfield.instance != y.bitfield.instance)
2010     x.bitfield.instance = InstanceNone;
2011
2012   switch (ARRAY_SIZE (x.array))
2013     {
2014     case 3:
2015       x.array [2] &= y.array [2];
2016       /* Fall through.  */
2017     case 2:
2018       x.array [1] &= y.array [1];
2019       /* Fall through.  */
2020     case 1:
2021       x.array [0] &= y.array [0];
2022       break;
2023     default:
2024       abort ();
2025     }
2026   return x;
2027 }
2028
2029 static INLINE i386_operand_type
2030 operand_type_and_not (i386_operand_type x, i386_operand_type y)
2031 {
2032   gas_assert (y.bitfield.class == ClassNone);
2033   gas_assert (y.bitfield.instance == InstanceNone);
2034
2035   switch (ARRAY_SIZE (x.array))
2036     {
2037     case 3:
2038       x.array [2] &= ~y.array [2];
2039       /* Fall through.  */
2040     case 2:
2041       x.array [1] &= ~y.array [1];
2042       /* Fall through.  */
2043     case 1:
2044       x.array [0] &= ~y.array [0];
2045       break;
2046     default:
2047       abort ();
2048     }
2049   return x;
2050 }
2051
2052 static INLINE i386_operand_type
2053 operand_type_or (i386_operand_type x, i386_operand_type y)
2054 {
2055   gas_assert (x.bitfield.class == ClassNone ||
2056               y.bitfield.class == ClassNone ||
2057               x.bitfield.class == y.bitfield.class);
2058   gas_assert (x.bitfield.instance == InstanceNone ||
2059               y.bitfield.instance == InstanceNone ||
2060               x.bitfield.instance == y.bitfield.instance);
2061
2062   switch (ARRAY_SIZE (x.array))
2063     {
2064     case 3:
2065       x.array [2] |= y.array [2];
2066       /* Fall through.  */
2067     case 2:
2068       x.array [1] |= y.array [1];
2069       /* Fall through.  */
2070     case 1:
2071       x.array [0] |= y.array [0];
2072       break;
2073     default:
2074       abort ();
2075     }
2076   return x;
2077 }
2078
2079 static INLINE i386_operand_type
2080 operand_type_xor (i386_operand_type x, i386_operand_type y)
2081 {
2082   gas_assert (y.bitfield.class == ClassNone);
2083   gas_assert (y.bitfield.instance == InstanceNone);
2084
2085   switch (ARRAY_SIZE (x.array))
2086     {
2087     case 3:
2088       x.array [2] ^= y.array [2];
2089       /* Fall through.  */
2090     case 2:
2091       x.array [1] ^= y.array [1];
2092       /* Fall through.  */
2093     case 1:
2094       x.array [0] ^= y.array [0];
2095       break;
2096     default:
2097       abort ();
2098     }
2099   return x;
2100 }
2101
2102 static const i386_operand_type disp16_32 = OPERAND_TYPE_DISP16_32;
2103 static const i386_operand_type anydisp = OPERAND_TYPE_ANYDISP;
2104 static const i386_operand_type anyimm = OPERAND_TYPE_ANYIMM;
2105 static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM;
2106 static const i386_operand_type regmask = OPERAND_TYPE_REGMASK;
2107 static const i386_operand_type imm8 = OPERAND_TYPE_IMM8;
2108 static const i386_operand_type imm8s = OPERAND_TYPE_IMM8S;
2109 static const i386_operand_type imm16 = OPERAND_TYPE_IMM16;
2110 static const i386_operand_type imm32 = OPERAND_TYPE_IMM32;
2111 static const i386_operand_type imm32s = OPERAND_TYPE_IMM32S;
2112 static const i386_operand_type imm64 = OPERAND_TYPE_IMM64;
2113 static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32;
2114 static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S;
2115 static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S;
2116
2117 enum operand_type
2118 {
2119   reg,
2120   imm,
2121   disp,
2122   anymem
2123 };
2124
2125 static INLINE int
2126 operand_type_check (i386_operand_type t, enum operand_type c)
2127 {
2128   switch (c)
2129     {
2130     case reg:
2131       return t.bitfield.class == Reg;
2132
2133     case imm:
2134       return (t.bitfield.imm8
2135               || t.bitfield.imm8s
2136               || t.bitfield.imm16
2137               || t.bitfield.imm32
2138               || t.bitfield.imm32s
2139               || t.bitfield.imm64);
2140
2141     case disp:
2142       return (t.bitfield.disp8
2143               || t.bitfield.disp16
2144               || t.bitfield.disp32
2145               || t.bitfield.disp32s
2146               || t.bitfield.disp64);
2147
2148     case anymem:
2149       return (t.bitfield.disp8
2150               || t.bitfield.disp16
2151               || t.bitfield.disp32
2152               || t.bitfield.disp32s
2153               || t.bitfield.disp64
2154               || t.bitfield.baseindex);
2155
2156     default:
2157       abort ();
2158     }
2159
2160   return 0;
2161 }
2162
2163 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2164    between operand GIVEN and opeand WANTED for instruction template T.  */
2165
2166 static INLINE int
2167 match_operand_size (const insn_template *t, unsigned int wanted,
2168                     unsigned int given)
2169 {
2170   return !((i.types[given].bitfield.byte
2171             && !t->operand_types[wanted].bitfield.byte)
2172            || (i.types[given].bitfield.word
2173                && !t->operand_types[wanted].bitfield.word)
2174            || (i.types[given].bitfield.dword
2175                && !t->operand_types[wanted].bitfield.dword)
2176            || (i.types[given].bitfield.qword
2177                && !t->operand_types[wanted].bitfield.qword)
2178            || (i.types[given].bitfield.tbyte
2179                && !t->operand_types[wanted].bitfield.tbyte));
2180 }
2181
2182 /* Return 1 if there is no conflict in SIMD register between operand
2183    GIVEN and opeand WANTED for instruction template T.  */
2184
2185 static INLINE int
2186 match_simd_size (const insn_template *t, unsigned int wanted,
2187                  unsigned int given)
2188 {
2189   return !((i.types[given].bitfield.xmmword
2190             && !t->operand_types[wanted].bitfield.xmmword)
2191            || (i.types[given].bitfield.ymmword
2192                && !t->operand_types[wanted].bitfield.ymmword)
2193            || (i.types[given].bitfield.zmmword
2194                && !t->operand_types[wanted].bitfield.zmmword)
2195            || (i.types[given].bitfield.tmmword
2196                && !t->operand_types[wanted].bitfield.tmmword));
2197 }
2198
2199 /* Return 1 if there is no conflict in any size between operand GIVEN
2200    and opeand WANTED for instruction template T.  */
2201
2202 static INLINE int
2203 match_mem_size (const insn_template *t, unsigned int wanted,
2204                 unsigned int given)
2205 {
2206   return (match_operand_size (t, wanted, given)
2207           && !((i.types[given].bitfield.unspecified
2208                 && !i.broadcast.type
2209                 && !t->operand_types[wanted].bitfield.unspecified)
2210                || (i.types[given].bitfield.fword
2211                    && !t->operand_types[wanted].bitfield.fword)
2212                /* For scalar opcode templates to allow register and memory
2213                   operands at the same time, some special casing is needed
2214                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2215                   down-conversion vpmov*.  */
2216                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2217                     && t->operand_types[wanted].bitfield.byte
2218                        + t->operand_types[wanted].bitfield.word
2219                        + t->operand_types[wanted].bitfield.dword
2220                        + t->operand_types[wanted].bitfield.qword
2221                        > !!t->opcode_modifier.broadcast)
2222                    ? (i.types[given].bitfield.xmmword
2223                       || i.types[given].bitfield.ymmword
2224                       || i.types[given].bitfield.zmmword)
2225                    : !match_simd_size(t, wanted, given))));
2226 }
2227
2228 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2229    operands for instruction template T, and it has MATCH_REVERSE set if there
2230    is no size conflict on any operands for the template with operands reversed
2231    (and the template allows for reversing in the first place).  */
2232
2233 #define MATCH_STRAIGHT 1
2234 #define MATCH_REVERSE  2
2235
2236 static INLINE unsigned int
2237 operand_size_match (const insn_template *t)
2238 {
2239   unsigned int j, match = MATCH_STRAIGHT;
2240
2241   /* Don't check non-absolute jump instructions.  */
2242   if (t->opcode_modifier.jump
2243       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2244     return match;
2245
2246   /* Check memory and accumulator operand size.  */
2247   for (j = 0; j < i.operands; j++)
2248     {
2249       if (i.types[j].bitfield.class != Reg
2250           && i.types[j].bitfield.class != RegSIMD
2251           && t->opcode_modifier.anysize)
2252         continue;
2253
2254       if (t->operand_types[j].bitfield.class == Reg
2255           && !match_operand_size (t, j, j))
2256         {
2257           match = 0;
2258           break;
2259         }
2260
2261       if (t->operand_types[j].bitfield.class == RegSIMD
2262           && !match_simd_size (t, j, j))
2263         {
2264           match = 0;
2265           break;
2266         }
2267
2268       if (t->operand_types[j].bitfield.instance == Accum
2269           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2270         {
2271           match = 0;
2272           break;
2273         }
2274
2275       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2276         {
2277           match = 0;
2278           break;
2279         }
2280     }
2281
2282   if (!t->opcode_modifier.d)
2283     {
2284     mismatch:
2285       if (!match)
2286         i.error = operand_size_mismatch;
2287       return match;
2288     }
2289
2290   /* Check reverse.  */
2291   gas_assert (i.operands >= 2 && i.operands <= 3);
2292
2293   for (j = 0; j < i.operands; j++)
2294     {
2295       unsigned int given = i.operands - j - 1;
2296
2297       if (t->operand_types[j].bitfield.class == Reg
2298           && !match_operand_size (t, j, given))
2299         goto mismatch;
2300
2301       if (t->operand_types[j].bitfield.class == RegSIMD
2302           && !match_simd_size (t, j, given))
2303         goto mismatch;
2304
2305       if (t->operand_types[j].bitfield.instance == Accum
2306           && (!match_operand_size (t, j, given)
2307               || !match_simd_size (t, j, given)))
2308         goto mismatch;
2309
2310       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2311         goto mismatch;
2312     }
2313
2314   return match | MATCH_REVERSE;
2315 }
2316
2317 static INLINE int
2318 operand_type_match (i386_operand_type overlap,
2319                     i386_operand_type given)
2320 {
2321   i386_operand_type temp = overlap;
2322
2323   temp.bitfield.unspecified = 0;
2324   temp.bitfield.byte = 0;
2325   temp.bitfield.word = 0;
2326   temp.bitfield.dword = 0;
2327   temp.bitfield.fword = 0;
2328   temp.bitfield.qword = 0;
2329   temp.bitfield.tbyte = 0;
2330   temp.bitfield.xmmword = 0;
2331   temp.bitfield.ymmword = 0;
2332   temp.bitfield.zmmword = 0;
2333   temp.bitfield.tmmword = 0;
2334   if (operand_type_all_zero (&temp))
2335     goto mismatch;
2336
2337   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2338     return 1;
2339
2340  mismatch:
2341   i.error = operand_type_mismatch;
2342   return 0;
2343 }
2344
2345 /* If given types g0 and g1 are registers they must be of the same type
2346    unless the expected operand type register overlap is null.
2347    Some Intel syntax memory operand size checking also happens here.  */
2348
2349 static INLINE int
2350 operand_type_register_match (i386_operand_type g0,
2351                              i386_operand_type t0,
2352                              i386_operand_type g1,
2353                              i386_operand_type t1)
2354 {
2355   if (g0.bitfield.class != Reg
2356       && g0.bitfield.class != RegSIMD
2357       && (!operand_type_check (g0, anymem)
2358           || g0.bitfield.unspecified
2359           || (t0.bitfield.class != Reg
2360               && t0.bitfield.class != RegSIMD)))
2361     return 1;
2362
2363   if (g1.bitfield.class != Reg
2364       && g1.bitfield.class != RegSIMD
2365       && (!operand_type_check (g1, anymem)
2366           || g1.bitfield.unspecified
2367           || (t1.bitfield.class != Reg
2368               && t1.bitfield.class != RegSIMD)))
2369     return 1;
2370
2371   if (g0.bitfield.byte == g1.bitfield.byte
2372       && g0.bitfield.word == g1.bitfield.word
2373       && g0.bitfield.dword == g1.bitfield.dword
2374       && g0.bitfield.qword == g1.bitfield.qword
2375       && g0.bitfield.xmmword == g1.bitfield.xmmword
2376       && g0.bitfield.ymmword == g1.bitfield.ymmword
2377       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2378     return 1;
2379
2380   /* If expectations overlap in no more than a single size, all is fine. */
2381   g0 = operand_type_and (t0, t1);
2382   if (g0.bitfield.byte
2383       + g0.bitfield.word
2384       + g0.bitfield.dword
2385       + g0.bitfield.qword
2386       + g0.bitfield.xmmword
2387       + g0.bitfield.ymmword
2388       + g0.bitfield.zmmword <= 1)
2389     return 1;
2390
2391   i.error = register_type_mismatch;
2392
2393   return 0;
2394 }
2395
2396 static INLINE unsigned int
2397 register_number (const reg_entry *r)
2398 {
2399   unsigned int nr = r->reg_num;
2400
2401   if (r->reg_flags & RegRex)
2402     nr += 8;
2403
2404   if (r->reg_flags & RegVRex)
2405     nr += 16;
2406
2407   return nr;
2408 }
2409
2410 static INLINE unsigned int
2411 mode_from_disp_size (i386_operand_type t)
2412 {
2413   if (t.bitfield.disp8)
2414     return 1;
2415   else if (t.bitfield.disp16
2416            || t.bitfield.disp32
2417            || t.bitfield.disp32s)
2418     return 2;
2419   else
2420     return 0;
2421 }
2422
2423 static INLINE int
2424 fits_in_signed_byte (addressT num)
2425 {
2426   return num + 0x80 <= 0xff;
2427 }
2428
2429 static INLINE int
2430 fits_in_unsigned_byte (addressT num)
2431 {
2432   return num <= 0xff;
2433 }
2434
2435 static INLINE int
2436 fits_in_unsigned_word (addressT num)
2437 {
2438   return num <= 0xffff;
2439 }
2440
2441 static INLINE int
2442 fits_in_signed_word (addressT num)
2443 {
2444   return num + 0x8000 <= 0xffff;
2445 }
2446
2447 static INLINE int
2448 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2449 {
2450 #ifndef BFD64
2451   return 1;
2452 #else
2453   return num + 0x80000000 <= 0xffffffff;
2454 #endif
2455 }                               /* fits_in_signed_long() */
2456
2457 static INLINE int
2458 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2459 {
2460 #ifndef BFD64
2461   return 1;
2462 #else
2463   return num <= 0xffffffff;
2464 #endif
2465 }                               /* fits_in_unsigned_long() */
2466
2467 static INLINE valueT extend_to_32bit_address (addressT num)
2468 {
2469 #ifdef BFD64
2470   if (fits_in_unsigned_long(num))
2471     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2472
2473   if (!fits_in_signed_long (num))
2474     return num & 0xffffffff;
2475 #endif
2476
2477   return num;
2478 }
2479
2480 static INLINE int
2481 fits_in_disp8 (offsetT num)
2482 {
2483   int shift = i.memshift;
2484   unsigned int mask;
2485
2486   if (shift == -1)
2487     abort ();
2488
2489   mask = (1 << shift) - 1;
2490
2491   /* Return 0 if NUM isn't properly aligned.  */
2492   if ((num & mask))
2493     return 0;
2494
2495   /* Check if NUM will fit in 8bit after shift.  */
2496   return fits_in_signed_byte (num >> shift);
2497 }
2498
2499 static INLINE int
2500 fits_in_imm4 (offsetT num)
2501 {
2502   return (num & 0xf) == num;
2503 }
2504
2505 static i386_operand_type
2506 smallest_imm_type (offsetT num)
2507 {
2508   i386_operand_type t;
2509
2510   operand_type_set (&t, 0);
2511   t.bitfield.imm64 = 1;
2512
2513   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2514     {
2515       /* This code is disabled on the 486 because all the Imm1 forms
2516          in the opcode table are slower on the i486.  They're the
2517          versions with the implicitly specified single-position
2518          displacement, which has another syntax if you really want to
2519          use that form.  */
2520       t.bitfield.imm1 = 1;
2521       t.bitfield.imm8 = 1;
2522       t.bitfield.imm8s = 1;
2523       t.bitfield.imm16 = 1;
2524       t.bitfield.imm32 = 1;
2525       t.bitfield.imm32s = 1;
2526     }
2527   else if (fits_in_signed_byte (num))
2528     {
2529       t.bitfield.imm8 = 1;
2530       t.bitfield.imm8s = 1;
2531       t.bitfield.imm16 = 1;
2532       t.bitfield.imm32 = 1;
2533       t.bitfield.imm32s = 1;
2534     }
2535   else if (fits_in_unsigned_byte (num))
2536     {
2537       t.bitfield.imm8 = 1;
2538       t.bitfield.imm16 = 1;
2539       t.bitfield.imm32 = 1;
2540       t.bitfield.imm32s = 1;
2541     }
2542   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2543     {
2544       t.bitfield.imm16 = 1;
2545       t.bitfield.imm32 = 1;
2546       t.bitfield.imm32s = 1;
2547     }
2548   else if (fits_in_signed_long (num))
2549     {
2550       t.bitfield.imm32 = 1;
2551       t.bitfield.imm32s = 1;
2552     }
2553   else if (fits_in_unsigned_long (num))
2554     t.bitfield.imm32 = 1;
2555
2556   return t;
2557 }
2558
2559 static offsetT
2560 offset_in_range (offsetT val, int size)
2561 {
2562   addressT mask;
2563
2564   switch (size)
2565     {
2566     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2567     case 2: mask = ((addressT) 1 << 16) - 1; break;
2568 #ifdef BFD64
2569     case 4: mask = ((addressT) 1 << 32) - 1; break;
2570 #endif
2571     case sizeof (val): return val;
2572     default: abort ();
2573     }
2574
2575   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2576     {
2577       char val_buf[128];
2578       char masked_buf[128];
2579
2580       /* Coded this way in order to ease translation.  */
2581       sprintf_vma (val_buf, val);
2582       sprintf_vma (masked_buf, val & mask);
2583       as_warn (_("0x%s shortened to 0x%s"), val_buf, masked_buf);
2584     }
2585
2586   return val & mask;
2587 }
2588
2589 enum PREFIX_GROUP
2590 {
2591   PREFIX_EXIST = 0,
2592   PREFIX_LOCK,
2593   PREFIX_REP,
2594   PREFIX_DS,
2595   PREFIX_OTHER
2596 };
2597
2598 /* Returns
2599    a. PREFIX_EXIST if attempting to add a prefix where one from the
2600    same class already exists.
2601    b. PREFIX_LOCK if lock prefix is added.
2602    c. PREFIX_REP if rep/repne prefix is added.
2603    d. PREFIX_DS if ds prefix is added.
2604    e. PREFIX_OTHER if other prefix is added.
2605  */
2606
2607 static enum PREFIX_GROUP
2608 add_prefix (unsigned int prefix)
2609 {
2610   enum PREFIX_GROUP ret = PREFIX_OTHER;
2611   unsigned int q;
2612
2613   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2614       && flag_code == CODE_64BIT)
2615     {
2616       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2617           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2618           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2619           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2620         ret = PREFIX_EXIST;
2621       q = REX_PREFIX;
2622     }
2623   else
2624     {
2625       switch (prefix)
2626         {
2627         default:
2628           abort ();
2629
2630         case DS_PREFIX_OPCODE:
2631           ret = PREFIX_DS;
2632           /* Fall through.  */
2633         case CS_PREFIX_OPCODE:
2634         case ES_PREFIX_OPCODE:
2635         case FS_PREFIX_OPCODE:
2636         case GS_PREFIX_OPCODE:
2637         case SS_PREFIX_OPCODE:
2638           q = SEG_PREFIX;
2639           break;
2640
2641         case REPNE_PREFIX_OPCODE:
2642         case REPE_PREFIX_OPCODE:
2643           q = REP_PREFIX;
2644           ret = PREFIX_REP;
2645           break;
2646
2647         case LOCK_PREFIX_OPCODE:
2648           q = LOCK_PREFIX;
2649           ret = PREFIX_LOCK;
2650           break;
2651
2652         case FWAIT_OPCODE:
2653           q = WAIT_PREFIX;
2654           break;
2655
2656         case ADDR_PREFIX_OPCODE:
2657           q = ADDR_PREFIX;
2658           break;
2659
2660         case DATA_PREFIX_OPCODE:
2661           q = DATA_PREFIX;
2662           break;
2663         }
2664       if (i.prefix[q] != 0)
2665         ret = PREFIX_EXIST;
2666     }
2667
2668   if (ret)
2669     {
2670       if (!i.prefix[q])
2671         ++i.prefixes;
2672       i.prefix[q] |= prefix;
2673     }
2674   else
2675     as_bad (_("same type of prefix used twice"));
2676
2677   return ret;
2678 }
2679
2680 static void
2681 update_code_flag (int value, int check)
2682 {
2683   PRINTF_LIKE ((*as_error));
2684
2685   flag_code = (enum flag_code) value;
2686   if (flag_code == CODE_64BIT)
2687     {
2688       cpu_arch_flags.bitfield.cpu64 = 1;
2689       cpu_arch_flags.bitfield.cpuno64 = 0;
2690     }
2691   else
2692     {
2693       cpu_arch_flags.bitfield.cpu64 = 0;
2694       cpu_arch_flags.bitfield.cpuno64 = 1;
2695     }
2696   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2697     {
2698       if (check)
2699         as_error = as_fatal;
2700       else
2701         as_error = as_bad;
2702       (*as_error) (_("64bit mode not supported on `%s'."),
2703                    cpu_arch_name ? cpu_arch_name : default_arch);
2704     }
2705   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2706     {
2707       if (check)
2708         as_error = as_fatal;
2709       else
2710         as_error = as_bad;
2711       (*as_error) (_("32bit mode not supported on `%s'."),
2712                    cpu_arch_name ? cpu_arch_name : default_arch);
2713     }
2714   stackop_size = '\0';
2715 }
2716
2717 static void
2718 set_code_flag (int value)
2719 {
2720   update_code_flag (value, 0);
2721 }
2722
2723 static void
2724 set_16bit_gcc_code_flag (int new_code_flag)
2725 {
2726   flag_code = (enum flag_code) new_code_flag;
2727   if (flag_code != CODE_16BIT)
2728     abort ();
2729   cpu_arch_flags.bitfield.cpu64 = 0;
2730   cpu_arch_flags.bitfield.cpuno64 = 1;
2731   stackop_size = LONG_MNEM_SUFFIX;
2732 }
2733
2734 static void
2735 set_intel_syntax (int syntax_flag)
2736 {
2737   /* Find out if register prefixing is specified.  */
2738   int ask_naked_reg = 0;
2739
2740   SKIP_WHITESPACE ();
2741   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2742     {
2743       char *string;
2744       int e = get_symbol_name (&string);
2745
2746       if (strcmp (string, "prefix") == 0)
2747         ask_naked_reg = 1;
2748       else if (strcmp (string, "noprefix") == 0)
2749         ask_naked_reg = -1;
2750       else
2751         as_bad (_("bad argument to syntax directive."));
2752       (void) restore_line_pointer (e);
2753     }
2754   demand_empty_rest_of_line ();
2755
2756   intel_syntax = syntax_flag;
2757
2758   if (ask_naked_reg == 0)
2759     allow_naked_reg = (intel_syntax
2760                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2761   else
2762     allow_naked_reg = (ask_naked_reg < 0);
2763
2764   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2765
2766   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2767   identifier_chars['$'] = intel_syntax ? '$' : 0;
2768   register_prefix = allow_naked_reg ? "" : "%";
2769 }
2770
2771 static void
2772 set_intel_mnemonic (int mnemonic_flag)
2773 {
2774   intel_mnemonic = mnemonic_flag;
2775 }
2776
2777 static void
2778 set_allow_index_reg (int flag)
2779 {
2780   allow_index_reg = flag;
2781 }
2782
2783 static void
2784 set_check (int what)
2785 {
2786   enum check_kind *kind;
2787   const char *str;
2788
2789   if (what)
2790     {
2791       kind = &operand_check;
2792       str = "operand";
2793     }
2794   else
2795     {
2796       kind = &sse_check;
2797       str = "sse";
2798     }
2799
2800   SKIP_WHITESPACE ();
2801
2802   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2803     {
2804       char *string;
2805       int e = get_symbol_name (&string);
2806
2807       if (strcmp (string, "none") == 0)
2808         *kind = check_none;
2809       else if (strcmp (string, "warning") == 0)
2810         *kind = check_warning;
2811       else if (strcmp (string, "error") == 0)
2812         *kind = check_error;
2813       else
2814         as_bad (_("bad argument to %s_check directive."), str);
2815       (void) restore_line_pointer (e);
2816     }
2817   else
2818     as_bad (_("missing argument for %s_check directive"), str);
2819
2820   demand_empty_rest_of_line ();
2821 }
2822
2823 static void
2824 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2825                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2826 {
2827 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2828   static const char *arch;
2829
2830   /* Intel MCU is only supported on ELF.  */
2831   if (!IS_ELF)
2832     return;
2833
2834   if (!arch)
2835     {
2836       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2837          use default_arch.  */
2838       arch = cpu_arch_name;
2839       if (!arch)
2840         arch = default_arch;
2841     }
2842
2843   /* If we are targeting Intel MCU, we must enable it.  */
2844   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2845       == new_flag.bitfield.cpuiamcu)
2846     return;
2847
2848   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2849 #endif
2850 }
2851
2852 static void
2853 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2854 {
2855   SKIP_WHITESPACE ();
2856
2857   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2858     {
2859       char *string;
2860       int e = get_symbol_name (&string);
2861       unsigned int j;
2862       i386_cpu_flags flags;
2863
2864       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2865         {
2866           if (strcmp (string, cpu_arch[j].name) == 0)
2867             {
2868               if (*string != '.')
2869                 {
2870                   check_cpu_arch_compatible (string, cpu_arch[j].flags);
2871
2872                   cpu_arch_name = cpu_arch[j].name;
2873                   cpu_sub_arch_name = NULL;
2874                   cpu_arch_flags = cpu_arch[j].flags;
2875                   if (flag_code == CODE_64BIT)
2876                     {
2877                       cpu_arch_flags.bitfield.cpu64 = 1;
2878                       cpu_arch_flags.bitfield.cpuno64 = 0;
2879                     }
2880                   else
2881                     {
2882                       cpu_arch_flags.bitfield.cpu64 = 0;
2883                       cpu_arch_flags.bitfield.cpuno64 = 1;
2884                     }
2885                   cpu_arch_isa = cpu_arch[j].type;
2886                   cpu_arch_isa_flags = cpu_arch[j].flags;
2887                   if (!cpu_arch_tune_set)
2888                     {
2889                       cpu_arch_tune = cpu_arch_isa;
2890                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2891                     }
2892                   break;
2893                 }
2894
2895               flags = cpu_flags_or (cpu_arch_flags,
2896                                     cpu_arch[j].flags);
2897
2898               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2899                 {
2900                   if (cpu_sub_arch_name)
2901                     {
2902                       char *name = cpu_sub_arch_name;
2903                       cpu_sub_arch_name = concat (name,
2904                                                   cpu_arch[j].name,
2905                                                   (const char *) NULL);
2906                       free (name);
2907                     }
2908                   else
2909                     cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
2910                   cpu_arch_flags = flags;
2911                   cpu_arch_isa_flags = flags;
2912                 }
2913               else
2914                 cpu_arch_isa_flags
2915                   = cpu_flags_or (cpu_arch_isa_flags,
2916                                   cpu_arch[j].flags);
2917               (void) restore_line_pointer (e);
2918               demand_empty_rest_of_line ();
2919               return;
2920             }
2921         }
2922
2923       if (*string == '.' && j >= ARRAY_SIZE (cpu_arch))
2924         {
2925           /* Disable an ISA extension.  */
2926           for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
2927             if (strcmp (string + 1, cpu_noarch [j].name) == 0)
2928               {
2929                 flags = cpu_flags_and_not (cpu_arch_flags,
2930                                            cpu_noarch[j].flags);
2931                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2932                   {
2933                     if (cpu_sub_arch_name)
2934                       {
2935                         char *name = cpu_sub_arch_name;
2936                         cpu_sub_arch_name = concat (name, string,
2937                                                     (const char *) NULL);
2938                         free (name);
2939                       }
2940                     else
2941                       cpu_sub_arch_name = xstrdup (string);
2942                     cpu_arch_flags = flags;
2943                     cpu_arch_isa_flags = flags;
2944                   }
2945                 (void) restore_line_pointer (e);
2946                 demand_empty_rest_of_line ();
2947                 return;
2948               }
2949
2950           j = ARRAY_SIZE (cpu_arch);
2951         }
2952
2953       if (j >= ARRAY_SIZE (cpu_arch))
2954         as_bad (_("no such architecture: `%s'"), string);
2955
2956       *input_line_pointer = e;
2957     }
2958   else
2959     as_bad (_("missing cpu architecture"));
2960
2961   no_cond_jump_promotion = 0;
2962   if (*input_line_pointer == ','
2963       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2964     {
2965       char *string;
2966       char e;
2967
2968       ++input_line_pointer;
2969       e = get_symbol_name (&string);
2970
2971       if (strcmp (string, "nojumps") == 0)
2972         no_cond_jump_promotion = 1;
2973       else if (strcmp (string, "jumps") == 0)
2974         ;
2975       else
2976         as_bad (_("no such architecture modifier: `%s'"), string);
2977
2978       (void) restore_line_pointer (e);
2979     }
2980
2981   demand_empty_rest_of_line ();
2982 }
2983
2984 enum bfd_architecture
2985 i386_arch (void)
2986 {
2987   if (cpu_arch_isa == PROCESSOR_IAMCU)
2988     {
2989       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2990           || flag_code == CODE_64BIT)
2991         as_fatal (_("Intel MCU is 32bit ELF only"));
2992       return bfd_arch_iamcu;
2993     }
2994   else
2995     return bfd_arch_i386;
2996 }
2997
2998 unsigned long
2999 i386_mach (void)
3000 {
3001   if (startswith (default_arch, "x86_64"))
3002     {
3003       if (default_arch[6] == '\0')
3004         return bfd_mach_x86_64;
3005       else
3006         return bfd_mach_x64_32;
3007     }
3008   else if (!strcmp (default_arch, "i386")
3009            || !strcmp (default_arch, "iamcu"))
3010     {
3011       if (cpu_arch_isa == PROCESSOR_IAMCU)
3012         {
3013           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
3014             as_fatal (_("Intel MCU is 32bit ELF only"));
3015           return bfd_mach_i386_iamcu;
3016         }
3017       else
3018         return bfd_mach_i386_i386;
3019     }
3020   else
3021     as_fatal (_("unknown architecture"));
3022 }
3023 \f
3024 void
3025 md_begin (void)
3026 {
3027   /* Support pseudo prefixes like {disp32}.  */
3028   lex_type ['{'] = LEX_BEGIN_NAME;
3029
3030   /* Initialize op_hash hash table.  */
3031   op_hash = str_htab_create ();
3032
3033   {
3034     const insn_template *optab;
3035     templates *core_optab;
3036
3037     /* Setup for loop.  */
3038     optab = i386_optab;
3039     core_optab = XNEW (templates);
3040     core_optab->start = optab;
3041
3042     while (1)
3043       {
3044         ++optab;
3045         if (optab->name == NULL
3046             || strcmp (optab->name, (optab - 1)->name) != 0)
3047           {
3048             /* different name --> ship out current template list;
3049                add to hash table; & begin anew.  */
3050             core_optab->end = optab;
3051             if (str_hash_insert (op_hash, (optab - 1)->name, core_optab, 0))
3052               as_fatal (_("duplicate %s"), (optab - 1)->name);
3053
3054             if (optab->name == NULL)
3055               break;
3056             core_optab = XNEW (templates);
3057             core_optab->start = optab;
3058           }
3059       }
3060   }
3061
3062   /* Initialize reg_hash hash table.  */
3063   reg_hash = str_htab_create ();
3064   {
3065     const reg_entry *regtab;
3066     unsigned int regtab_size = i386_regtab_size;
3067
3068     for (regtab = i386_regtab; regtab_size--; regtab++)
3069       {
3070         switch (regtab->reg_type.bitfield.class)
3071           {
3072           case Reg:
3073             if (regtab->reg_type.bitfield.dword)
3074               {
3075                 if (regtab->reg_type.bitfield.instance == Accum)
3076                   reg_eax = regtab;
3077               }
3078             else if (regtab->reg_type.bitfield.tbyte)
3079               {
3080                 /* There's no point inserting st(<N>) in the hash table, as
3081                    parentheses aren't included in register_chars[] anyway.  */
3082                 if (regtab->reg_type.bitfield.instance != Accum)
3083                   continue;
3084                 reg_st0 = regtab;
3085               }
3086             break;
3087
3088           case SReg:
3089             switch (regtab->reg_num)
3090               {
3091               case 0: reg_es = regtab; break;
3092               case 2: reg_ss = regtab; break;
3093               case 3: reg_ds = regtab; break;
3094               }
3095             break;
3096
3097           case RegMask:
3098             if (!regtab->reg_num)
3099               reg_k0 = regtab;
3100             break;
3101           }
3102
3103         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3104           as_fatal (_("duplicate %s"), regtab->reg_name);
3105       }
3106   }
3107
3108   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3109   {
3110     int c;
3111     char *p;
3112
3113     for (c = 0; c < 256; c++)
3114       {
3115         if (ISDIGIT (c) || ISLOWER (c))
3116           {
3117             mnemonic_chars[c] = c;
3118             register_chars[c] = c;
3119             operand_chars[c] = c;
3120           }
3121         else if (ISUPPER (c))
3122           {
3123             mnemonic_chars[c] = TOLOWER (c);
3124             register_chars[c] = mnemonic_chars[c];
3125             operand_chars[c] = c;
3126           }
3127         else if (c == '{' || c == '}')
3128           {
3129             mnemonic_chars[c] = c;
3130             operand_chars[c] = c;
3131           }
3132 #ifdef SVR4_COMMENT_CHARS
3133         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3134           operand_chars[c] = c;
3135 #endif
3136
3137         if (ISALPHA (c) || ISDIGIT (c))
3138           identifier_chars[c] = c;
3139         else if (c >= 128)
3140           {
3141             identifier_chars[c] = c;
3142             operand_chars[c] = c;
3143           }
3144       }
3145
3146 #ifdef LEX_AT
3147     identifier_chars['@'] = '@';
3148 #endif
3149 #ifdef LEX_QM
3150     identifier_chars['?'] = '?';
3151     operand_chars['?'] = '?';
3152 #endif
3153     mnemonic_chars['_'] = '_';
3154     mnemonic_chars['-'] = '-';
3155     mnemonic_chars['.'] = '.';
3156     identifier_chars['_'] = '_';
3157     identifier_chars['.'] = '.';
3158
3159     for (p = operand_special_chars; *p != '\0'; p++)
3160       operand_chars[(unsigned char) *p] = *p;
3161   }
3162
3163   if (flag_code == CODE_64BIT)
3164     {
3165 #if defined (OBJ_COFF) && defined (TE_PE)
3166       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3167                                   ? 32 : 16);
3168 #else
3169       x86_dwarf2_return_column = 16;
3170 #endif
3171       x86_cie_data_alignment = -8;
3172     }
3173   else
3174     {
3175       x86_dwarf2_return_column = 8;
3176       x86_cie_data_alignment = -4;
3177     }
3178
3179   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3180      can be turned into BRANCH_PREFIX frag.  */
3181   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3182     abort ();
3183 }
3184
3185 void
3186 i386_print_statistics (FILE *file)
3187 {
3188   htab_print_statistics (file, "i386 opcode", op_hash);
3189   htab_print_statistics (file, "i386 register", reg_hash);
3190 }
3191 \f
3192 #ifdef DEBUG386
3193
3194 /* Debugging routines for md_assemble.  */
3195 static void pte (insn_template *);
3196 static void pt (i386_operand_type);
3197 static void pe (expressionS *);
3198 static void ps (symbolS *);
3199
3200 static void
3201 pi (const char *line, i386_insn *x)
3202 {
3203   unsigned int j;
3204
3205   fprintf (stdout, "%s: template ", line);
3206   pte (&x->tm);
3207   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3208            x->base_reg ? x->base_reg->reg_name : "none",
3209            x->index_reg ? x->index_reg->reg_name : "none",
3210            x->log2_scale_factor);
3211   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3212            x->rm.mode, x->rm.reg, x->rm.regmem);
3213   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3214            x->sib.base, x->sib.index, x->sib.scale);
3215   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3216            (x->rex & REX_W) != 0,
3217            (x->rex & REX_R) != 0,
3218            (x->rex & REX_X) != 0,
3219            (x->rex & REX_B) != 0);
3220   for (j = 0; j < x->operands; j++)
3221     {
3222       fprintf (stdout, "    #%d:  ", j + 1);
3223       pt (x->types[j]);
3224       fprintf (stdout, "\n");
3225       if (x->types[j].bitfield.class == Reg
3226           || x->types[j].bitfield.class == RegMMX
3227           || x->types[j].bitfield.class == RegSIMD
3228           || x->types[j].bitfield.class == RegMask
3229           || x->types[j].bitfield.class == SReg
3230           || x->types[j].bitfield.class == RegCR
3231           || x->types[j].bitfield.class == RegDR
3232           || x->types[j].bitfield.class == RegTR
3233           || x->types[j].bitfield.class == RegBND)
3234         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3235       if (operand_type_check (x->types[j], imm))
3236         pe (x->op[j].imms);
3237       if (operand_type_check (x->types[j], disp))
3238         pe (x->op[j].disps);
3239     }
3240 }
3241
3242 static void
3243 pte (insn_template *t)
3244 {
3245   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3246   static const char *const opc_spc[] = {
3247     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3248     "XOP08", "XOP09", "XOP0A",
3249   };
3250   unsigned int j;
3251
3252   fprintf (stdout, " %d operands ", t->operands);
3253   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3254     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3255   if (opc_spc[t->opcode_modifier.opcodespace])
3256     fprintf (stdout, "space %s ", opc_spc[t->opcode_modifier.opcodespace]);
3257   fprintf (stdout, "opcode %x ", t->base_opcode);
3258   if (t->extension_opcode != None)
3259     fprintf (stdout, "ext %x ", t->extension_opcode);
3260   if (t->opcode_modifier.d)
3261     fprintf (stdout, "D");
3262   if (t->opcode_modifier.w)
3263     fprintf (stdout, "W");
3264   fprintf (stdout, "\n");
3265   for (j = 0; j < t->operands; j++)
3266     {
3267       fprintf (stdout, "    #%d type ", j + 1);
3268       pt (t->operand_types[j]);
3269       fprintf (stdout, "\n");
3270     }
3271 }
3272
3273 static void
3274 pe (expressionS *e)
3275 {
3276   fprintf (stdout, "    operation     %d\n", e->X_op);
3277   fprintf (stdout, "    add_number    %" BFD_VMA_FMT "d (%" BFD_VMA_FMT "x)\n",
3278            e->X_add_number, e->X_add_number);
3279   if (e->X_add_symbol)
3280     {
3281       fprintf (stdout, "    add_symbol    ");
3282       ps (e->X_add_symbol);
3283       fprintf (stdout, "\n");
3284     }
3285   if (e->X_op_symbol)
3286     {
3287       fprintf (stdout, "    op_symbol    ");
3288       ps (e->X_op_symbol);
3289       fprintf (stdout, "\n");
3290     }
3291 }
3292
3293 static void
3294 ps (symbolS *s)
3295 {
3296   fprintf (stdout, "%s type %s%s",
3297            S_GET_NAME (s),
3298            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3299            segment_name (S_GET_SEGMENT (s)));
3300 }
3301
3302 static struct type_name
3303   {
3304     i386_operand_type mask;
3305     const char *name;
3306   }
3307 const type_names[] =
3308 {
3309   { OPERAND_TYPE_REG8, "r8" },
3310   { OPERAND_TYPE_REG16, "r16" },
3311   { OPERAND_TYPE_REG32, "r32" },
3312   { OPERAND_TYPE_REG64, "r64" },
3313   { OPERAND_TYPE_ACC8, "acc8" },
3314   { OPERAND_TYPE_ACC16, "acc16" },
3315   { OPERAND_TYPE_ACC32, "acc32" },
3316   { OPERAND_TYPE_ACC64, "acc64" },
3317   { OPERAND_TYPE_IMM8, "i8" },
3318   { OPERAND_TYPE_IMM8, "i8s" },
3319   { OPERAND_TYPE_IMM16, "i16" },
3320   { OPERAND_TYPE_IMM32, "i32" },
3321   { OPERAND_TYPE_IMM32S, "i32s" },
3322   { OPERAND_TYPE_IMM64, "i64" },
3323   { OPERAND_TYPE_IMM1, "i1" },
3324   { OPERAND_TYPE_BASEINDEX, "BaseIndex" },
3325   { OPERAND_TYPE_DISP8, "d8" },
3326   { OPERAND_TYPE_DISP16, "d16" },
3327   { OPERAND_TYPE_DISP32, "d32" },
3328   { OPERAND_TYPE_DISP32S, "d32s" },
3329   { OPERAND_TYPE_DISP64, "d64" },
3330   { OPERAND_TYPE_INOUTPORTREG, "InOutPortReg" },
3331   { OPERAND_TYPE_SHIFTCOUNT, "ShiftCount" },
3332   { OPERAND_TYPE_CONTROL, "control reg" },
3333   { OPERAND_TYPE_TEST, "test reg" },
3334   { OPERAND_TYPE_DEBUG, "debug reg" },
3335   { OPERAND_TYPE_FLOATREG, "FReg" },
3336   { OPERAND_TYPE_FLOATACC, "FAcc" },
3337   { OPERAND_TYPE_SREG, "SReg" },
3338   { OPERAND_TYPE_REGMMX, "rMMX" },
3339   { OPERAND_TYPE_REGXMM, "rXMM" },
3340   { OPERAND_TYPE_REGYMM, "rYMM" },
3341   { OPERAND_TYPE_REGZMM, "rZMM" },
3342   { OPERAND_TYPE_REGTMM, "rTMM" },
3343   { OPERAND_TYPE_REGMASK, "Mask reg" },
3344 };
3345
3346 static void
3347 pt (i386_operand_type t)
3348 {
3349   unsigned int j;
3350   i386_operand_type a;
3351
3352   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3353     {
3354       a = operand_type_and (t, type_names[j].mask);
3355       if (operand_type_equal (&a, &type_names[j].mask))
3356         fprintf (stdout, "%s, ",  type_names[j].name);
3357     }
3358   fflush (stdout);
3359 }
3360
3361 #endif /* DEBUG386 */
3362 \f
3363 static bfd_reloc_code_real_type
3364 reloc (unsigned int size,
3365        int pcrel,
3366        int sign,
3367        bfd_reloc_code_real_type other)
3368 {
3369   if (other != NO_RELOC)
3370     {
3371       reloc_howto_type *rel;
3372
3373       if (size == 8)
3374         switch (other)
3375           {
3376           case BFD_RELOC_X86_64_GOT32:
3377             return BFD_RELOC_X86_64_GOT64;
3378             break;
3379           case BFD_RELOC_X86_64_GOTPLT64:
3380             return BFD_RELOC_X86_64_GOTPLT64;
3381             break;
3382           case BFD_RELOC_X86_64_PLTOFF64:
3383             return BFD_RELOC_X86_64_PLTOFF64;
3384             break;
3385           case BFD_RELOC_X86_64_GOTPC32:
3386             other = BFD_RELOC_X86_64_GOTPC64;
3387             break;
3388           case BFD_RELOC_X86_64_GOTPCREL:
3389             other = BFD_RELOC_X86_64_GOTPCREL64;
3390             break;
3391           case BFD_RELOC_X86_64_TPOFF32:
3392             other = BFD_RELOC_X86_64_TPOFF64;
3393             break;
3394           case BFD_RELOC_X86_64_DTPOFF32:
3395             other = BFD_RELOC_X86_64_DTPOFF64;
3396             break;
3397           default:
3398             break;
3399           }
3400
3401 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3402       if (other == BFD_RELOC_SIZE32)
3403         {
3404           if (size == 8)
3405             other = BFD_RELOC_SIZE64;
3406           if (pcrel)
3407             {
3408               as_bad (_("there are no pc-relative size relocations"));
3409               return NO_RELOC;
3410             }
3411         }
3412 #endif
3413
3414       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3415       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3416         sign = -1;
3417
3418       rel = bfd_reloc_type_lookup (stdoutput, other);
3419       if (!rel)
3420         as_bad (_("unknown relocation (%u)"), other);
3421       else if (size != bfd_get_reloc_size (rel))
3422         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3423                 bfd_get_reloc_size (rel),
3424                 size);
3425       else if (pcrel && !rel->pc_relative)
3426         as_bad (_("non-pc-relative relocation for pc-relative field"));
3427       else if ((rel->complain_on_overflow == complain_overflow_signed
3428                 && !sign)
3429                || (rel->complain_on_overflow == complain_overflow_unsigned
3430                    && sign > 0))
3431         as_bad (_("relocated field and relocation type differ in signedness"));
3432       else
3433         return other;
3434       return NO_RELOC;
3435     }
3436
3437   if (pcrel)
3438     {
3439       if (!sign)
3440         as_bad (_("there are no unsigned pc-relative relocations"));
3441       switch (size)
3442         {
3443         case 1: return BFD_RELOC_8_PCREL;
3444         case 2: return BFD_RELOC_16_PCREL;
3445         case 4: return BFD_RELOC_32_PCREL;
3446         case 8: return BFD_RELOC_64_PCREL;
3447         }
3448       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3449     }
3450   else
3451     {
3452       if (sign > 0)
3453         switch (size)
3454           {
3455           case 4: return BFD_RELOC_X86_64_32S;
3456           }
3457       else
3458         switch (size)
3459           {
3460           case 1: return BFD_RELOC_8;
3461           case 2: return BFD_RELOC_16;
3462           case 4: return BFD_RELOC_32;
3463           case 8: return BFD_RELOC_64;
3464           }
3465       as_bad (_("cannot do %s %u byte relocation"),
3466               sign > 0 ? "signed" : "unsigned", size);
3467     }
3468
3469   return NO_RELOC;
3470 }
3471
3472 /* Here we decide which fixups can be adjusted to make them relative to
3473    the beginning of the section instead of the symbol.  Basically we need
3474    to make sure that the dynamic relocations are done correctly, so in
3475    some cases we force the original symbol to be used.  */
3476
3477 int
3478 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3479 {
3480 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3481   if (!IS_ELF)
3482     return 1;
3483
3484   /* Don't adjust pc-relative references to merge sections in 64-bit
3485      mode.  */
3486   if (use_rela_relocations
3487       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3488       && fixP->fx_pcrel)
3489     return 0;
3490
3491   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3492      and changed later by validate_fix.  */
3493   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3494       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3495     return 0;
3496
3497   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3498      for size relocations.  */
3499   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3500       || fixP->fx_r_type == BFD_RELOC_SIZE64
3501       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3502       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3503       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3504       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3505       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3506       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3507       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3508       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3509       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3510       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3511       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3512       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3513       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3514       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3515       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3516       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3517       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3518       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3519       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3520       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3521       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3522       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3523       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3524       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3525       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3526       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3527       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3528       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3529       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3530     return 0;
3531 #endif
3532   return 1;
3533 }
3534
3535 static INLINE bool
3536 want_disp32 (const insn_template *t)
3537 {
3538   return flag_code != CODE_64BIT
3539          || i.prefix[ADDR_PREFIX]
3540          || (t->base_opcode == 0x8d
3541              && t->opcode_modifier.opcodespace == SPACE_BASE
3542              && (!i.types[1].bitfield.qword
3543                 || t->opcode_modifier.size == SIZE32));
3544 }
3545
3546 static int
3547 intel_float_operand (const char *mnemonic)
3548 {
3549   /* Note that the value returned is meaningful only for opcodes with (memory)
3550      operands, hence the code here is free to improperly handle opcodes that
3551      have no operands (for better performance and smaller code). */
3552
3553   if (mnemonic[0] != 'f')
3554     return 0; /* non-math */
3555
3556   switch (mnemonic[1])
3557     {
3558     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3559        the fs segment override prefix not currently handled because no
3560        call path can make opcodes without operands get here */
3561     case 'i':
3562       return 2 /* integer op */;
3563     case 'l':
3564       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3565         return 3; /* fldcw/fldenv */
3566       break;
3567     case 'n':
3568       if (mnemonic[2] != 'o' /* fnop */)
3569         return 3; /* non-waiting control op */
3570       break;
3571     case 'r':
3572       if (mnemonic[2] == 's')
3573         return 3; /* frstor/frstpm */
3574       break;
3575     case 's':
3576       if (mnemonic[2] == 'a')
3577         return 3; /* fsave */
3578       if (mnemonic[2] == 't')
3579         {
3580           switch (mnemonic[3])
3581             {
3582             case 'c': /* fstcw */
3583             case 'd': /* fstdw */
3584             case 'e': /* fstenv */
3585             case 's': /* fsts[gw] */
3586               return 3;
3587             }
3588         }
3589       break;
3590     case 'x':
3591       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3592         return 0; /* fxsave/fxrstor are not really math ops */
3593       break;
3594     }
3595
3596   return 1;
3597 }
3598
3599 static INLINE void
3600 install_template (const insn_template *t)
3601 {
3602   unsigned int l;
3603
3604   i.tm = *t;
3605
3606   /* Note that for pseudo prefixes this produces a length of 1. But for them
3607      the length isn't interesting at all.  */
3608   for (l = 1; l < 4; ++l)
3609     if (!(t->base_opcode >> (8 * l)))
3610       break;
3611
3612   i.opcode_length = l;
3613 }
3614
3615 /* Build the VEX prefix.  */
3616
3617 static void
3618 build_vex_prefix (const insn_template *t)
3619 {
3620   unsigned int register_specifier;
3621   unsigned int vector_length;
3622   unsigned int w;
3623
3624   /* Check register specifier.  */
3625   if (i.vex.register_specifier)
3626     {
3627       register_specifier =
3628         ~register_number (i.vex.register_specifier) & 0xf;
3629       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3630     }
3631   else
3632     register_specifier = 0xf;
3633
3634   /* Use 2-byte VEX prefix by swapping destination and source operand
3635      if there are more than 1 register operand.  */
3636   if (i.reg_operands > 1
3637       && i.vec_encoding != vex_encoding_vex3
3638       && i.dir_encoding == dir_encoding_default
3639       && i.operands == i.reg_operands
3640       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3641       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3642       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3643       && i.rex == REX_B)
3644     {
3645       unsigned int xchg = i.operands - 1;
3646       union i386_op temp_op;
3647       i386_operand_type temp_type;
3648
3649       temp_type = i.types[xchg];
3650       i.types[xchg] = i.types[0];
3651       i.types[0] = temp_type;
3652       temp_op = i.op[xchg];
3653       i.op[xchg] = i.op[0];
3654       i.op[0] = temp_op;
3655
3656       gas_assert (i.rm.mode == 3);
3657
3658       i.rex = REX_R;
3659       xchg = i.rm.regmem;
3660       i.rm.regmem = i.rm.reg;
3661       i.rm.reg = xchg;
3662
3663       if (i.tm.opcode_modifier.d)
3664         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3665                             ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
3666       else /* Use the next insn.  */
3667         install_template (&t[1]);
3668     }
3669
3670   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3671      are no memory operands and at least 3 register ones.  */
3672   if (i.reg_operands >= 3
3673       && i.vec_encoding != vex_encoding_vex3
3674       && i.reg_operands == i.operands - i.imm_operands
3675       && i.tm.opcode_modifier.vex
3676       && i.tm.opcode_modifier.commutative
3677       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3678       && i.rex == REX_B
3679       && i.vex.register_specifier
3680       && !(i.vex.register_specifier->reg_flags & RegRex))
3681     {
3682       unsigned int xchg = i.operands - i.reg_operands;
3683       union i386_op temp_op;
3684       i386_operand_type temp_type;
3685
3686       gas_assert (i.tm.opcode_modifier.opcodespace == SPACE_0F);
3687       gas_assert (!i.tm.opcode_modifier.sae);
3688       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3689                                       &i.types[i.operands - 3]));
3690       gas_assert (i.rm.mode == 3);
3691
3692       temp_type = i.types[xchg];
3693       i.types[xchg] = i.types[xchg + 1];
3694       i.types[xchg + 1] = temp_type;
3695       temp_op = i.op[xchg];
3696       i.op[xchg] = i.op[xchg + 1];
3697       i.op[xchg + 1] = temp_op;
3698
3699       i.rex = 0;
3700       xchg = i.rm.regmem | 8;
3701       i.rm.regmem = ~register_specifier & 0xf;
3702       gas_assert (!(i.rm.regmem & 8));
3703       i.vex.register_specifier += xchg - i.rm.regmem;
3704       register_specifier = ~xchg & 0xf;
3705     }
3706
3707   if (i.tm.opcode_modifier.vex == VEXScalar)
3708     vector_length = avxscalar;
3709   else if (i.tm.opcode_modifier.vex == VEX256)
3710     vector_length = 1;
3711   else
3712     {
3713       unsigned int op;
3714
3715       /* Determine vector length from the last multi-length vector
3716          operand.  */
3717       vector_length = 0;
3718       for (op = t->operands; op--;)
3719         if (t->operand_types[op].bitfield.xmmword
3720             && t->operand_types[op].bitfield.ymmword
3721             && i.types[op].bitfield.ymmword)
3722           {
3723             vector_length = 1;
3724             break;
3725           }
3726     }
3727
3728   /* Check the REX.W bit and VEXW.  */
3729   if (i.tm.opcode_modifier.vexw == VEXWIG)
3730     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3731   else if (i.tm.opcode_modifier.vexw)
3732     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3733   else
3734     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3735
3736   /* Use 2-byte VEX prefix if possible.  */
3737   if (w == 0
3738       && i.vec_encoding != vex_encoding_vex3
3739       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3740       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3741     {
3742       /* 2-byte VEX prefix.  */
3743       unsigned int r;
3744
3745       i.vex.length = 2;
3746       i.vex.bytes[0] = 0xc5;
3747
3748       /* Check the REX.R bit.  */
3749       r = (i.rex & REX_R) ? 0 : 1;
3750       i.vex.bytes[1] = (r << 7
3751                         | register_specifier << 3
3752                         | vector_length << 2
3753                         | i.tm.opcode_modifier.opcodeprefix);
3754     }
3755   else
3756     {
3757       /* 3-byte VEX prefix.  */
3758       i.vex.length = 3;
3759
3760       switch (i.tm.opcode_modifier.opcodespace)
3761         {
3762         case SPACE_0F:
3763         case SPACE_0F38:
3764         case SPACE_0F3A:
3765           i.vex.bytes[0] = 0xc4;
3766           break;
3767         case SPACE_XOP08:
3768         case SPACE_XOP09:
3769         case SPACE_XOP0A:
3770           i.vex.bytes[0] = 0x8f;
3771           break;
3772         default:
3773           abort ();
3774         }
3775
3776       /* The high 3 bits of the second VEX byte are 1's compliment
3777          of RXB bits from REX.  */
3778       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3779
3780       i.vex.bytes[2] = (w << 7
3781                         | register_specifier << 3
3782                         | vector_length << 2
3783                         | i.tm.opcode_modifier.opcodeprefix);
3784     }
3785 }
3786
3787 static INLINE bool
3788 is_evex_encoding (const insn_template *t)
3789 {
3790   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3791          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3792          || t->opcode_modifier.sae;
3793 }
3794
3795 static INLINE bool
3796 is_any_vex_encoding (const insn_template *t)
3797 {
3798   return t->opcode_modifier.vex || is_evex_encoding (t);
3799 }
3800
3801 /* Build the EVEX prefix.  */
3802
3803 static void
3804 build_evex_prefix (void)
3805 {
3806   unsigned int register_specifier, w;
3807   rex_byte vrex_used = 0;
3808
3809   /* Check register specifier.  */
3810   if (i.vex.register_specifier)
3811     {
3812       gas_assert ((i.vrex & REX_X) == 0);
3813
3814       register_specifier = i.vex.register_specifier->reg_num;
3815       if ((i.vex.register_specifier->reg_flags & RegRex))
3816         register_specifier += 8;
3817       /* The upper 16 registers are encoded in the fourth byte of the
3818          EVEX prefix.  */
3819       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3820         i.vex.bytes[3] = 0x8;
3821       register_specifier = ~register_specifier & 0xf;
3822     }
3823   else
3824     {
3825       register_specifier = 0xf;
3826
3827       /* Encode upper 16 vector index register in the fourth byte of
3828          the EVEX prefix.  */
3829       if (!(i.vrex & REX_X))
3830         i.vex.bytes[3] = 0x8;
3831       else
3832         vrex_used |= REX_X;
3833     }
3834
3835   /* 4 byte EVEX prefix.  */
3836   i.vex.length = 4;
3837   i.vex.bytes[0] = 0x62;
3838
3839   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3840      bits from REX.  */
3841   gas_assert (i.tm.opcode_modifier.opcodespace >= SPACE_0F);
3842   gas_assert (i.tm.opcode_modifier.opcodespace <= SPACE_EVEXMAP6);
3843   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3844
3845   /* The fifth bit of the second EVEX byte is 1's compliment of the
3846      REX_R bit in VREX.  */
3847   if (!(i.vrex & REX_R))
3848     i.vex.bytes[1] |= 0x10;
3849   else
3850     vrex_used |= REX_R;
3851
3852   if ((i.reg_operands + i.imm_operands) == i.operands)
3853     {
3854       /* When all operands are registers, the REX_X bit in REX is not
3855          used.  We reuse it to encode the upper 16 registers, which is
3856          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3857          as 1's compliment.  */
3858       if ((i.vrex & REX_B))
3859         {
3860           vrex_used |= REX_B;
3861           i.vex.bytes[1] &= ~0x40;
3862         }
3863     }
3864
3865   /* EVEX instructions shouldn't need the REX prefix.  */
3866   i.vrex &= ~vrex_used;
3867   gas_assert (i.vrex == 0);
3868
3869   /* Check the REX.W bit and VEXW.  */
3870   if (i.tm.opcode_modifier.vexw == VEXWIG)
3871     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3872   else if (i.tm.opcode_modifier.vexw)
3873     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3874   else
3875     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3876
3877   /* The third byte of the EVEX prefix.  */
3878   i.vex.bytes[2] = ((w << 7)
3879                     | (register_specifier << 3)
3880                     | 4 /* Encode the U bit.  */
3881                     | i.tm.opcode_modifier.opcodeprefix);
3882
3883   /* The fourth byte of the EVEX prefix.  */
3884   /* The zeroing-masking bit.  */
3885   if (i.mask.reg && i.mask.zeroing)
3886     i.vex.bytes[3] |= 0x80;
3887
3888   /* Don't always set the broadcast bit if there is no RC.  */
3889   if (i.rounding.type == rc_none)
3890     {
3891       /* Encode the vector length.  */
3892       unsigned int vec_length;
3893
3894       if (!i.tm.opcode_modifier.evex
3895           || i.tm.opcode_modifier.evex == EVEXDYN)
3896         {
3897           unsigned int op;
3898
3899           /* Determine vector length from the last multi-length vector
3900              operand.  */
3901           for (op = i.operands; op--;)
3902             if (i.tm.operand_types[op].bitfield.xmmword
3903                 + i.tm.operand_types[op].bitfield.ymmword
3904                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3905               {
3906                 if (i.types[op].bitfield.zmmword)
3907                   {
3908                     i.tm.opcode_modifier.evex = EVEX512;
3909                     break;
3910                   }
3911                 else if (i.types[op].bitfield.ymmword)
3912                   {
3913                     i.tm.opcode_modifier.evex = EVEX256;
3914                     break;
3915                   }
3916                 else if (i.types[op].bitfield.xmmword)
3917                   {
3918                     i.tm.opcode_modifier.evex = EVEX128;
3919                     break;
3920                   }
3921                 else if (i.broadcast.type && op == i.broadcast.operand)
3922                   {
3923                     switch (i.broadcast.bytes)
3924                       {
3925                         case 64:
3926                           i.tm.opcode_modifier.evex = EVEX512;
3927                           break;
3928                         case 32:
3929                           i.tm.opcode_modifier.evex = EVEX256;
3930                           break;
3931                         case 16:
3932                           i.tm.opcode_modifier.evex = EVEX128;
3933                           break;
3934                         default:
3935                           abort ();
3936                       }
3937                     break;
3938                   }
3939               }
3940
3941           if (op >= MAX_OPERANDS)
3942             abort ();
3943         }
3944
3945       switch (i.tm.opcode_modifier.evex)
3946         {
3947         case EVEXLIG: /* LL' is ignored */
3948           vec_length = evexlig << 5;
3949           break;
3950         case EVEX128:
3951           vec_length = 0 << 5;
3952           break;
3953         case EVEX256:
3954           vec_length = 1 << 5;
3955           break;
3956         case EVEX512:
3957           vec_length = 2 << 5;
3958           break;
3959         default:
3960           abort ();
3961           break;
3962         }
3963       i.vex.bytes[3] |= vec_length;
3964       /* Encode the broadcast bit.  */
3965       if (i.broadcast.type)
3966         i.vex.bytes[3] |= 0x10;
3967     }
3968   else if (i.rounding.type != saeonly)
3969     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
3970   else
3971     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3972
3973   if (i.mask.reg)
3974     i.vex.bytes[3] |= i.mask.reg->reg_num;
3975 }
3976
3977 static void
3978 process_immext (void)
3979 {
3980   expressionS *exp;
3981
3982   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
3983      which is coded in the same place as an 8-bit immediate field
3984      would be.  Here we fake an 8-bit immediate operand from the
3985      opcode suffix stored in tm.extension_opcode.
3986
3987      AVX instructions also use this encoding, for some of
3988      3 argument instructions.  */
3989
3990   gas_assert (i.imm_operands <= 1
3991               && (i.operands <= 2
3992                   || (is_any_vex_encoding (&i.tm)
3993                       && i.operands <= 4)));
3994
3995   exp = &im_expressions[i.imm_operands++];
3996   i.op[i.operands].imms = exp;
3997   i.types[i.operands] = imm8;
3998   i.operands++;
3999   exp->X_op = O_constant;
4000   exp->X_add_number = i.tm.extension_opcode;
4001   i.tm.extension_opcode = None;
4002 }
4003
4004
4005 static int
4006 check_hle (void)
4007 {
4008   switch (i.tm.opcode_modifier.prefixok)
4009     {
4010     default:
4011       abort ();
4012     case PrefixLock:
4013     case PrefixNone:
4014     case PrefixNoTrack:
4015     case PrefixRep:
4016       as_bad (_("invalid instruction `%s' after `%s'"),
4017               i.tm.name, i.hle_prefix);
4018       return 0;
4019     case PrefixHLELock:
4020       if (i.prefix[LOCK_PREFIX])
4021         return 1;
4022       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4023       return 0;
4024     case PrefixHLEAny:
4025       return 1;
4026     case PrefixHLERelease:
4027       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4028         {
4029           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4030                   i.tm.name);
4031           return 0;
4032         }
4033       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4034         {
4035           as_bad (_("memory destination needed for instruction `%s'"
4036                     " after `xrelease'"), i.tm.name);
4037           return 0;
4038         }
4039       return 1;
4040     }
4041 }
4042
4043 /* Encode aligned vector move as unaligned vector move.  */
4044
4045 static void
4046 encode_with_unaligned_vector_move (void)
4047 {
4048   switch (i.tm.base_opcode)
4049     {
4050     case 0x28:  /* Load instructions.  */
4051     case 0x29:  /* Store instructions.  */
4052       /* movaps/movapd/vmovaps/vmovapd.  */
4053       if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4054           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4055         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4056       break;
4057     case 0x6f:  /* Load instructions.  */
4058     case 0x7f:  /* Store instructions.  */
4059       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4060       if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4061           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4062         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4063       break;
4064     default:
4065       break;
4066     }
4067 }
4068
4069 /* Try the shortest encoding by shortening operand size.  */
4070
4071 static void
4072 optimize_encoding (void)
4073 {
4074   unsigned int j;
4075
4076   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4077       && i.tm.base_opcode == 0x8d)
4078     {
4079       /* Optimize: -O:
4080            lea symbol, %rN    -> mov $symbol, %rN
4081            lea (%rM), %rN     -> mov %rM, %rN
4082            lea (,%rM,1), %rN  -> mov %rM, %rN
4083
4084            and in 32-bit mode for 16-bit addressing
4085
4086            lea (%rM), %rN     -> movzx %rM, %rN
4087
4088            and in 64-bit mode zap 32-bit addressing in favor of using a
4089            32-bit (or less) destination.
4090        */
4091       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4092         {
4093           if (!i.op[1].regs->reg_type.bitfield.word)
4094             i.tm.opcode_modifier.size = SIZE32;
4095           i.prefix[ADDR_PREFIX] = 0;
4096         }
4097
4098       if (!i.index_reg && !i.base_reg)
4099         {
4100           /* Handle:
4101                lea symbol, %rN    -> mov $symbol, %rN
4102            */
4103           if (flag_code == CODE_64BIT)
4104             {
4105               /* Don't transform a relocation to a 16-bit one.  */
4106               if (i.op[0].disps
4107                   && i.op[0].disps->X_op != O_constant
4108                   && i.op[1].regs->reg_type.bitfield.word)
4109                 return;
4110
4111               if (!i.op[1].regs->reg_type.bitfield.qword
4112                   || i.tm.opcode_modifier.size == SIZE32)
4113                 {
4114                   i.tm.base_opcode = 0xb8;
4115                   i.tm.opcode_modifier.modrm = 0;
4116                   if (!i.op[1].regs->reg_type.bitfield.word)
4117                     i.types[0].bitfield.imm32 = 1;
4118                   else
4119                     {
4120                       i.tm.opcode_modifier.size = SIZE16;
4121                       i.types[0].bitfield.imm16 = 1;
4122                     }
4123                 }
4124               else
4125                 {
4126                   /* Subject to further optimization below.  */
4127                   i.tm.base_opcode = 0xc7;
4128                   i.tm.extension_opcode = 0;
4129                   i.types[0].bitfield.imm32s = 1;
4130                   i.types[0].bitfield.baseindex = 0;
4131                 }
4132             }
4133           /* Outside of 64-bit mode address and operand sizes have to match if
4134              a relocation is involved, as otherwise we wouldn't (currently) or
4135              even couldn't express the relocation correctly.  */
4136           else if (i.op[0].disps
4137                    && i.op[0].disps->X_op != O_constant
4138                    && ((!i.prefix[ADDR_PREFIX])
4139                        != (flag_code == CODE_32BIT
4140                            ? i.op[1].regs->reg_type.bitfield.dword
4141                            : i.op[1].regs->reg_type.bitfield.word)))
4142             return;
4143           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4144              destination is going to grow encoding size.  */
4145           else if (flag_code == CODE_16BIT
4146                    && (optimize <= 1 || optimize_for_space)
4147                    && !i.prefix[ADDR_PREFIX]
4148                    && i.op[1].regs->reg_type.bitfield.dword)
4149             return;
4150           else
4151             {
4152               i.tm.base_opcode = 0xb8;
4153               i.tm.opcode_modifier.modrm = 0;
4154               if (i.op[1].regs->reg_type.bitfield.dword)
4155                 i.types[0].bitfield.imm32 = 1;
4156               else
4157                 i.types[0].bitfield.imm16 = 1;
4158
4159               if (i.op[0].disps
4160                   && i.op[0].disps->X_op == O_constant
4161                   && i.op[1].regs->reg_type.bitfield.dword
4162                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4163                      GCC 5. */
4164                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4165                 i.op[0].disps->X_add_number &= 0xffff;
4166             }
4167
4168           i.tm.operand_types[0] = i.types[0];
4169           i.imm_operands = 1;
4170           if (!i.op[0].imms)
4171             {
4172               i.op[0].imms = &im_expressions[0];
4173               i.op[0].imms->X_op = O_absent;
4174             }
4175         }
4176       else if (i.op[0].disps
4177                   && (i.op[0].disps->X_op != O_constant
4178                       || i.op[0].disps->X_add_number))
4179         return;
4180       else
4181         {
4182           /* Handle:
4183                lea (%rM), %rN     -> mov %rM, %rN
4184                lea (,%rM,1), %rN  -> mov %rM, %rN
4185                lea (%rM), %rN     -> movzx %rM, %rN
4186            */
4187           const reg_entry *addr_reg;
4188
4189           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4190             addr_reg = i.base_reg;
4191           else if (!i.base_reg
4192                    && i.index_reg->reg_num != RegIZ
4193                    && !i.log2_scale_factor)
4194             addr_reg = i.index_reg;
4195           else
4196             return;
4197
4198           if (addr_reg->reg_type.bitfield.word
4199               && i.op[1].regs->reg_type.bitfield.dword)
4200             {
4201               if (flag_code != CODE_32BIT)
4202                 return;
4203               i.tm.opcode_modifier.opcodespace = SPACE_0F;
4204               i.tm.base_opcode = 0xb7;
4205             }
4206           else
4207             i.tm.base_opcode = 0x8b;
4208
4209           if (addr_reg->reg_type.bitfield.dword
4210               && i.op[1].regs->reg_type.bitfield.qword)
4211             i.tm.opcode_modifier.size = SIZE32;
4212
4213           i.op[0].regs = addr_reg;
4214           i.reg_operands = 2;
4215         }
4216
4217       i.mem_operands = 0;
4218       i.disp_operands = 0;
4219       i.prefix[ADDR_PREFIX] = 0;
4220       i.prefix[SEG_PREFIX] = 0;
4221       i.seg[0] = NULL;
4222     }
4223
4224   if (optimize_for_space
4225       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4226       && i.reg_operands == 1
4227       && i.imm_operands == 1
4228       && !i.types[1].bitfield.byte
4229       && i.op[0].imms->X_op == O_constant
4230       && fits_in_imm7 (i.op[0].imms->X_add_number)
4231       && (i.tm.base_opcode == 0xa8
4232           || (i.tm.base_opcode == 0xf6
4233               && i.tm.extension_opcode == 0x0)))
4234     {
4235       /* Optimize: -Os:
4236            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4237        */
4238       unsigned int base_regnum = i.op[1].regs->reg_num;
4239       if (flag_code == CODE_64BIT || base_regnum < 4)
4240         {
4241           i.types[1].bitfield.byte = 1;
4242           /* Ignore the suffix.  */
4243           i.suffix = 0;
4244           /* Convert to byte registers.  */
4245           if (i.types[1].bitfield.word)
4246             j = 16;
4247           else if (i.types[1].bitfield.dword)
4248             j = 32;
4249           else
4250             j = 48;
4251           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4252             j += 8;
4253           i.op[1].regs -= j;
4254         }
4255     }
4256   else if (flag_code == CODE_64BIT
4257            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4258            && ((i.types[1].bitfield.qword
4259                 && i.reg_operands == 1
4260                 && i.imm_operands == 1
4261                 && i.op[0].imms->X_op == O_constant
4262                 && ((i.tm.base_opcode == 0xb8
4263                      && i.tm.extension_opcode == None
4264                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4265                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4266                         && ((i.tm.base_opcode == 0x24
4267                              || i.tm.base_opcode == 0xa8)
4268                             || (i.tm.base_opcode == 0x80
4269                                 && i.tm.extension_opcode == 0x4)
4270                             || ((i.tm.base_opcode == 0xf6
4271                                  || (i.tm.base_opcode | 1) == 0xc7)
4272                                 && i.tm.extension_opcode == 0x0)))
4273                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4274                         && i.tm.base_opcode == 0x83
4275                         && i.tm.extension_opcode == 0x4)))
4276                || (i.types[0].bitfield.qword
4277                    && ((i.reg_operands == 2
4278                         && i.op[0].regs == i.op[1].regs
4279                         && (i.tm.base_opcode == 0x30
4280                             || i.tm.base_opcode == 0x28))
4281                        || (i.reg_operands == 1
4282                            && i.operands == 1
4283                            && i.tm.base_opcode == 0x30)))))
4284     {
4285       /* Optimize: -O:
4286            andq $imm31, %r64   -> andl $imm31, %r32
4287            andq $imm7, %r64    -> andl $imm7, %r32
4288            testq $imm31, %r64  -> testl $imm31, %r32
4289            xorq %r64, %r64     -> xorl %r32, %r32
4290            subq %r64, %r64     -> subl %r32, %r32
4291            movq $imm31, %r64   -> movl $imm31, %r32
4292            movq $imm32, %r64   -> movl $imm32, %r32
4293         */
4294       i.tm.opcode_modifier.norex64 = 1;
4295       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4296         {
4297           /* Handle
4298                movq $imm31, %r64   -> movl $imm31, %r32
4299                movq $imm32, %r64   -> movl $imm32, %r32
4300            */
4301           i.tm.operand_types[0].bitfield.imm32 = 1;
4302           i.tm.operand_types[0].bitfield.imm32s = 0;
4303           i.tm.operand_types[0].bitfield.imm64 = 0;
4304           i.types[0].bitfield.imm32 = 1;
4305           i.types[0].bitfield.imm32s = 0;
4306           i.types[0].bitfield.imm64 = 0;
4307           i.types[1].bitfield.dword = 1;
4308           i.types[1].bitfield.qword = 0;
4309           if ((i.tm.base_opcode | 1) == 0xc7)
4310             {
4311               /* Handle
4312                    movq $imm31, %r64   -> movl $imm31, %r32
4313                */
4314               i.tm.base_opcode = 0xb8;
4315               i.tm.extension_opcode = None;
4316               i.tm.opcode_modifier.w = 0;
4317               i.tm.opcode_modifier.modrm = 0;
4318             }
4319         }
4320     }
4321   else if (optimize > 1
4322            && !optimize_for_space
4323            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4324            && i.reg_operands == 2
4325            && i.op[0].regs == i.op[1].regs
4326            && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4327                || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4328            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4329     {
4330       /* Optimize: -O2:
4331            andb %rN, %rN  -> testb %rN, %rN
4332            andw %rN, %rN  -> testw %rN, %rN
4333            andq %rN, %rN  -> testq %rN, %rN
4334            orb %rN, %rN   -> testb %rN, %rN
4335            orw %rN, %rN   -> testw %rN, %rN
4336            orq %rN, %rN   -> testq %rN, %rN
4337
4338            and outside of 64-bit mode
4339
4340            andl %rN, %rN  -> testl %rN, %rN
4341            orl %rN, %rN   -> testl %rN, %rN
4342        */
4343       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4344     }
4345   else if (i.reg_operands == 3
4346            && i.op[0].regs == i.op[1].regs
4347            && !i.types[2].bitfield.xmmword
4348            && (i.tm.opcode_modifier.vex
4349                || ((!i.mask.reg || i.mask.zeroing)
4350                    && i.rounding.type == rc_none
4351                    && is_evex_encoding (&i.tm)
4352                    && (i.vec_encoding != vex_encoding_evex
4353                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4354                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4355                        || (i.tm.operand_types[2].bitfield.zmmword
4356                            && i.types[2].bitfield.ymmword))))
4357            && ((i.tm.base_opcode == 0x55
4358                 || i.tm.base_opcode == 0x57
4359                 || i.tm.base_opcode == 0xdf
4360                 || i.tm.base_opcode == 0xef
4361                 || i.tm.base_opcode == 0xf8
4362                 || i.tm.base_opcode == 0xf9
4363                 || i.tm.base_opcode == 0xfa
4364                 || i.tm.base_opcode == 0xfb
4365                 || i.tm.base_opcode == 0x42
4366                 || i.tm.base_opcode == 0x47)
4367                && i.tm.extension_opcode == None))
4368     {
4369       /* Optimize: -O1:
4370            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4371            vpsubq and vpsubw:
4372              EVEX VOP %zmmM, %zmmM, %zmmN
4373                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4374                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4375              EVEX VOP %ymmM, %ymmM, %ymmN
4376                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4377                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4378              VEX VOP %ymmM, %ymmM, %ymmN
4379                -> VEX VOP %xmmM, %xmmM, %xmmN
4380            VOP, one of vpandn and vpxor:
4381              VEX VOP %ymmM, %ymmM, %ymmN
4382                -> VEX VOP %xmmM, %xmmM, %xmmN
4383            VOP, one of vpandnd and vpandnq:
4384              EVEX VOP %zmmM, %zmmM, %zmmN
4385                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4386                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4387              EVEX VOP %ymmM, %ymmM, %ymmN
4388                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4389                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4390            VOP, one of vpxord and vpxorq:
4391              EVEX VOP %zmmM, %zmmM, %zmmN
4392                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4393                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4394              EVEX VOP %ymmM, %ymmM, %ymmN
4395                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4396                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4397            VOP, one of kxord and kxorq:
4398              VEX VOP %kM, %kM, %kN
4399                -> VEX kxorw %kM, %kM, %kN
4400            VOP, one of kandnd and kandnq:
4401              VEX VOP %kM, %kM, %kN
4402                -> VEX kandnw %kM, %kM, %kN
4403        */
4404       if (is_evex_encoding (&i.tm))
4405         {
4406           if (i.vec_encoding != vex_encoding_evex)
4407             {
4408               i.tm.opcode_modifier.vex = VEX128;
4409               i.tm.opcode_modifier.vexw = VEXW0;
4410               i.tm.opcode_modifier.evex = 0;
4411             }
4412           else if (optimize > 1)
4413             i.tm.opcode_modifier.evex = EVEX128;
4414           else
4415             return;
4416         }
4417       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4418         {
4419           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4420           i.tm.opcode_modifier.vexw = VEXW0;
4421         }
4422       else
4423         i.tm.opcode_modifier.vex = VEX128;
4424
4425       if (i.tm.opcode_modifier.vex)
4426         for (j = 0; j < 3; j++)
4427           {
4428             i.types[j].bitfield.xmmword = 1;
4429             i.types[j].bitfield.ymmword = 0;
4430           }
4431     }
4432   else if (i.vec_encoding != vex_encoding_evex
4433            && !i.types[0].bitfield.zmmword
4434            && !i.types[1].bitfield.zmmword
4435            && !i.mask.reg
4436            && !i.broadcast.type
4437            && is_evex_encoding (&i.tm)
4438            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4439                || (i.tm.base_opcode & ~4) == 0xdb
4440                || (i.tm.base_opcode & ~4) == 0xeb)
4441            && i.tm.extension_opcode == None)
4442     {
4443       /* Optimize: -O1:
4444            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4445            vmovdqu32 and vmovdqu64:
4446              EVEX VOP %xmmM, %xmmN
4447                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4448              EVEX VOP %ymmM, %ymmN
4449                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4450              EVEX VOP %xmmM, mem
4451                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4452              EVEX VOP %ymmM, mem
4453                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4454              EVEX VOP mem, %xmmN
4455                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4456              EVEX VOP mem, %ymmN
4457                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4458            VOP, one of vpand, vpandn, vpor, vpxor:
4459              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4460                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4461              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4462                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4463              EVEX VOP{d,q} mem, %xmmM, %xmmN
4464                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4465              EVEX VOP{d,q} mem, %ymmM, %ymmN
4466                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4467        */
4468       for (j = 0; j < i.operands; j++)
4469         if (operand_type_check (i.types[j], disp)
4470             && i.op[j].disps->X_op == O_constant)
4471           {
4472             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4473                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4474                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4475             int evex_disp8, vex_disp8;
4476             unsigned int memshift = i.memshift;
4477             offsetT n = i.op[j].disps->X_add_number;
4478
4479             evex_disp8 = fits_in_disp8 (n);
4480             i.memshift = 0;
4481             vex_disp8 = fits_in_disp8 (n);
4482             if (evex_disp8 != vex_disp8)
4483               {
4484                 i.memshift = memshift;
4485                 return;
4486               }
4487
4488             i.types[j].bitfield.disp8 = vex_disp8;
4489             break;
4490           }
4491       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4492           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4493         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4494       i.tm.opcode_modifier.vex
4495         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4496       i.tm.opcode_modifier.vexw = VEXW0;
4497       /* VPAND, VPOR, and VPXOR are commutative.  */
4498       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4499         i.tm.opcode_modifier.commutative = 1;
4500       i.tm.opcode_modifier.evex = 0;
4501       i.tm.opcode_modifier.masking = 0;
4502       i.tm.opcode_modifier.broadcast = 0;
4503       i.tm.opcode_modifier.disp8memshift = 0;
4504       i.memshift = 0;
4505       if (j < i.operands)
4506         i.types[j].bitfield.disp8
4507           = fits_in_disp8 (i.op[j].disps->X_add_number);
4508     }
4509 }
4510
4511 /* Return non-zero for load instruction.  */
4512
4513 static int
4514 load_insn_p (void)
4515 {
4516   unsigned int dest;
4517   int any_vex_p = is_any_vex_encoding (&i.tm);
4518   unsigned int base_opcode = i.tm.base_opcode | 1;
4519
4520   if (!any_vex_p)
4521     {
4522       /* Anysize insns: lea, invlpg, clflush, prefetchnta, prefetcht0,
4523          prefetcht1, prefetcht2, prefetchtw, bndmk, bndcl, bndcu, bndcn,
4524          bndstx, bndldx, prefetchwt1, clflushopt, clwb, cldemote.  */
4525       if (i.tm.opcode_modifier.anysize)
4526         return 0;
4527
4528       /* pop.   */
4529       if (strcmp (i.tm.name, "pop") == 0)
4530         return 1;
4531     }
4532
4533   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4534     {
4535       /* popf, popa.   */
4536       if (i.tm.base_opcode == 0x9d
4537           || i.tm.base_opcode == 0x61)
4538         return 1;
4539
4540       /* movs, cmps, lods, scas.  */
4541       if ((i.tm.base_opcode | 0xb) == 0xaf)
4542         return 1;
4543
4544       /* outs, xlatb.  */
4545       if (base_opcode == 0x6f
4546           || i.tm.base_opcode == 0xd7)
4547         return 1;
4548       /* NB: For AMD-specific insns with implicit memory operands,
4549          they're intentionally not covered.  */
4550     }
4551
4552   /* No memory operand.  */
4553   if (!i.mem_operands)
4554     return 0;
4555
4556   if (any_vex_p)
4557     {
4558       /* vldmxcsr.  */
4559       if (i.tm.base_opcode == 0xae
4560           && i.tm.opcode_modifier.vex
4561           && i.tm.opcode_modifier.opcodespace == SPACE_0F
4562           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4563           && i.tm.extension_opcode == 2)
4564         return 1;
4565     }
4566   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4567     {
4568       /* test, not, neg, mul, imul, div, idiv.  */
4569       if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
4570           && i.tm.extension_opcode != 1)
4571         return 1;
4572
4573       /* inc, dec.  */
4574       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4575         return 1;
4576
4577       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4578       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4579         return 1;
4580
4581       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4582       if ((base_opcode == 0xc1
4583            || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
4584           && i.tm.extension_opcode != 6)
4585         return 1;
4586
4587       /* Check for x87 instructions.  */
4588       if (base_opcode >= 0xd8 && base_opcode <= 0xdf)
4589         {
4590           /* Skip fst, fstp, fstenv, fstcw.  */
4591           if (i.tm.base_opcode == 0xd9
4592               && (i.tm.extension_opcode == 2
4593                   || i.tm.extension_opcode == 3
4594                   || i.tm.extension_opcode == 6
4595                   || i.tm.extension_opcode == 7))
4596             return 0;
4597
4598           /* Skip fisttp, fist, fistp, fstp.  */
4599           if (i.tm.base_opcode == 0xdb
4600               && (i.tm.extension_opcode == 1
4601                   || i.tm.extension_opcode == 2
4602                   || i.tm.extension_opcode == 3
4603                   || i.tm.extension_opcode == 7))
4604             return 0;
4605
4606           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4607           if (i.tm.base_opcode == 0xdd
4608               && (i.tm.extension_opcode == 1
4609                   || i.tm.extension_opcode == 2
4610                   || i.tm.extension_opcode == 3
4611                   || i.tm.extension_opcode == 6
4612                   || i.tm.extension_opcode == 7))
4613             return 0;
4614
4615           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4616           if (i.tm.base_opcode == 0xdf
4617               && (i.tm.extension_opcode == 1
4618                   || i.tm.extension_opcode == 2
4619                   || i.tm.extension_opcode == 3
4620                   || i.tm.extension_opcode == 6
4621                   || i.tm.extension_opcode == 7))
4622             return 0;
4623
4624           return 1;
4625         }
4626     }
4627   else if (i.tm.opcode_modifier.opcodespace == SPACE_0F)
4628     {
4629       /* bt, bts, btr, btc.  */
4630       if (i.tm.base_opcode == 0xba
4631           && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
4632         return 1;
4633
4634       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
4635       if (i.tm.base_opcode == 0xc7
4636           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4637           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
4638               || i.tm.extension_opcode == 6))
4639         return 1;
4640
4641       /* fxrstor, ldmxcsr, xrstor.  */
4642       if (i.tm.base_opcode == 0xae
4643           && (i.tm.extension_opcode == 1
4644               || i.tm.extension_opcode == 2
4645               || i.tm.extension_opcode == 5))
4646         return 1;
4647
4648       /* lgdt, lidt, lmsw.  */
4649       if (i.tm.base_opcode == 0x01
4650           && (i.tm.extension_opcode == 2
4651               || i.tm.extension_opcode == 3
4652               || i.tm.extension_opcode == 6))
4653         return 1;
4654     }
4655
4656   dest = i.operands - 1;
4657
4658   /* Check fake imm8 operand and 3 source operands.  */
4659   if ((i.tm.opcode_modifier.immext
4660        || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
4661       && i.types[dest].bitfield.imm8)
4662     dest--;
4663
4664   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
4665   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4666       && (base_opcode == 0x1
4667           || base_opcode == 0x9
4668           || base_opcode == 0x11
4669           || base_opcode == 0x19
4670           || base_opcode == 0x21
4671           || base_opcode == 0x29
4672           || base_opcode == 0x31
4673           || base_opcode == 0x39
4674           || (base_opcode | 2) == 0x87))
4675     return 1;
4676
4677   /* xadd.  */
4678   if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4679       && base_opcode == 0xc1)
4680     return 1;
4681
4682   /* Check for load instruction.  */
4683   return (i.types[dest].bitfield.class != ClassNone
4684           || i.types[dest].bitfield.instance == Accum);
4685 }
4686
4687 /* Output lfence, 0xfaee8, after instruction.  */
4688
4689 static void
4690 insert_lfence_after (void)
4691 {
4692   if (lfence_after_load && load_insn_p ())
4693     {
4694       /* There are also two REP string instructions that require
4695          special treatment. Specifically, the compare string (CMPS)
4696          and scan string (SCAS) instructions set EFLAGS in a manner
4697          that depends on the data being compared/scanned. When used
4698          with a REP prefix, the number of iterations may therefore
4699          vary depending on this data. If the data is a program secret
4700          chosen by the adversary using an LVI method,
4701          then this data-dependent behavior may leak some aspect
4702          of the secret.  */
4703       if (((i.tm.base_opcode | 0x1) == 0xa7
4704            || (i.tm.base_opcode | 0x1) == 0xaf)
4705           && i.prefix[REP_PREFIX])
4706         {
4707             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4708                      i.tm.name);
4709         }
4710       char *p = frag_more (3);
4711       *p++ = 0xf;
4712       *p++ = 0xae;
4713       *p = 0xe8;
4714     }
4715 }
4716
4717 /* Output lfence, 0xfaee8, before instruction.  */
4718
4719 static void
4720 insert_lfence_before (void)
4721 {
4722   char *p;
4723
4724   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
4725     return;
4726
4727   if (i.tm.base_opcode == 0xff
4728       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4729     {
4730       /* Insert lfence before indirect branch if needed.  */
4731
4732       if (lfence_before_indirect_branch == lfence_branch_none)
4733         return;
4734
4735       if (i.operands != 1)
4736         abort ();
4737
4738       if (i.reg_operands == 1)
4739         {
4740           /* Indirect branch via register.  Don't insert lfence with
4741              -mlfence-after-load=yes.  */
4742           if (lfence_after_load
4743               || lfence_before_indirect_branch == lfence_branch_memory)
4744             return;
4745         }
4746       else if (i.mem_operands == 1
4747                && lfence_before_indirect_branch != lfence_branch_register)
4748         {
4749           as_warn (_("indirect `%s` with memory operand should be avoided"),
4750                    i.tm.name);
4751           return;
4752         }
4753       else
4754         return;
4755
4756       if (last_insn.kind != last_insn_other
4757           && last_insn.seg == now_seg)
4758         {
4759           as_warn_where (last_insn.file, last_insn.line,
4760                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4761                          last_insn.name, i.tm.name);
4762           return;
4763         }
4764
4765       p = frag_more (3);
4766       *p++ = 0xf;
4767       *p++ = 0xae;
4768       *p = 0xe8;
4769       return;
4770     }
4771
4772   /* Output or/not/shl and lfence before near ret.  */
4773   if (lfence_before_ret != lfence_before_ret_none
4774       && (i.tm.base_opcode == 0xc2
4775           || i.tm.base_opcode == 0xc3))
4776     {
4777       if (last_insn.kind != last_insn_other
4778           && last_insn.seg == now_seg)
4779         {
4780           as_warn_where (last_insn.file, last_insn.line,
4781                          _("`%s` skips -mlfence-before-ret on `%s`"),
4782                          last_insn.name, i.tm.name);
4783           return;
4784         }
4785
4786       /* Near ret ingore operand size override under CPU64.  */
4787       char prefix = flag_code == CODE_64BIT
4788                     ? 0x48
4789                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4790
4791       if (lfence_before_ret == lfence_before_ret_not)
4792         {
4793           /* not: 0xf71424, may add prefix
4794              for operand size override or 64-bit code.  */
4795           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4796           if (prefix)
4797             *p++ = prefix;
4798           *p++ = 0xf7;
4799           *p++ = 0x14;
4800           *p++ = 0x24;
4801           if (prefix)
4802             *p++ = prefix;
4803           *p++ = 0xf7;
4804           *p++ = 0x14;
4805           *p++ = 0x24;
4806         }
4807       else
4808         {
4809           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4810           if (prefix)
4811             *p++ = prefix;
4812           if (lfence_before_ret == lfence_before_ret_or)
4813             {
4814               /* or: 0x830c2400, may add prefix
4815                  for operand size override or 64-bit code.  */
4816               *p++ = 0x83;
4817               *p++ = 0x0c;
4818             }
4819           else
4820             {
4821               /* shl: 0xc1242400, may add prefix
4822                  for operand size override or 64-bit code.  */
4823               *p++ = 0xc1;
4824               *p++ = 0x24;
4825             }
4826
4827           *p++ = 0x24;
4828           *p++ = 0x0;
4829         }
4830
4831       *p++ = 0xf;
4832       *p++ = 0xae;
4833       *p = 0xe8;
4834     }
4835 }
4836
4837 /* This is the guts of the machine-dependent assembler.  LINE points to a
4838    machine dependent instruction.  This function is supposed to emit
4839    the frags/bytes it assembles to.  */
4840
4841 void
4842 md_assemble (char *line)
4843 {
4844   unsigned int j;
4845   char mnemonic[MAX_MNEM_SIZE], mnem_suffix;
4846   const insn_template *t;
4847
4848   /* Initialize globals.  */
4849   memset (&i, '\0', sizeof (i));
4850   i.rounding.type = rc_none;
4851   for (j = 0; j < MAX_OPERANDS; j++)
4852     i.reloc[j] = NO_RELOC;
4853   memset (disp_expressions, '\0', sizeof (disp_expressions));
4854   memset (im_expressions, '\0', sizeof (im_expressions));
4855   save_stack_p = save_stack;
4856
4857   /* First parse an instruction mnemonic & call i386_operand for the operands.
4858      We assume that the scrubber has arranged it so that line[0] is the valid
4859      start of a (possibly prefixed) mnemonic.  */
4860
4861   line = parse_insn (line, mnemonic);
4862   if (line == NULL)
4863     return;
4864   mnem_suffix = i.suffix;
4865
4866   line = parse_operands (line, mnemonic);
4867   this_operand = -1;
4868   xfree (i.memop1_string);
4869   i.memop1_string = NULL;
4870   if (line == NULL)
4871     return;
4872
4873   /* Now we've parsed the mnemonic into a set of templates, and have the
4874      operands at hand.  */
4875
4876   /* All Intel opcodes have reversed operands except for "bound", "enter",
4877      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
4878      "rmpadjust", and "rmpupdate".  We also don't reverse intersegment "jmp"
4879      and "call" instructions with 2 immediate operands so that the immediate
4880      segment precedes the offset consistently in Intel and AT&T modes.  */
4881   if (intel_syntax
4882       && i.operands > 1
4883       && (strcmp (mnemonic, "bound") != 0)
4884       && (strncmp (mnemonic, "invlpg", 6) != 0)
4885       && !startswith (mnemonic, "monitor")
4886       && !startswith (mnemonic, "mwait")
4887       && (strcmp (mnemonic, "pvalidate") != 0)
4888       && !startswith (mnemonic, "rmp")
4889       && (strcmp (mnemonic, "tpause") != 0)
4890       && (strcmp (mnemonic, "umwait") != 0)
4891       && !(operand_type_check (i.types[0], imm)
4892            && operand_type_check (i.types[1], imm)))
4893     swap_operands ();
4894
4895   /* The order of the immediates should be reversed
4896      for 2 immediates extrq and insertq instructions */
4897   if (i.imm_operands == 2
4898       && (strcmp (mnemonic, "extrq") == 0
4899           || strcmp (mnemonic, "insertq") == 0))
4900       swap_2_operands (0, 1);
4901
4902   if (i.imm_operands)
4903     optimize_imm ();
4904
4905   if (i.disp_operands && !want_disp32 (current_templates->start))
4906     {
4907       for (j = 0; j < i.operands; ++j)
4908         {
4909           const expressionS *exp = i.op[j].disps;
4910
4911           if (!operand_type_check (i.types[j], disp))
4912             continue;
4913
4914           if (exp->X_op != O_constant)
4915             continue;
4916
4917           /* Since displacement is signed extended to 64bit, don't allow
4918              disp32 and turn off disp32s if they are out of range.  */
4919           i.types[j].bitfield.disp32 = 0;
4920           if (fits_in_signed_long (exp->X_add_number))
4921             continue;
4922
4923           i.types[j].bitfield.disp32s = 0;
4924           if (i.types[j].bitfield.baseindex)
4925             {
4926               char number_buf[128];
4927
4928               /* Coded this way in order to allow for ease of translation.  */
4929               sprintf_vma (number_buf, exp->X_add_number);
4930               as_bad (_("0x%s out of range of signed 32bit displacement"),
4931                       number_buf);
4932               return;
4933             }
4934         }
4935     }
4936
4937   /* Don't optimize displacement for movabs since it only takes 64bit
4938      displacement.  */
4939   if (i.disp_operands
4940       && i.disp_encoding != disp_encoding_32bit
4941       && (flag_code != CODE_64BIT
4942           || strcmp (mnemonic, "movabs") != 0))
4943     optimize_disp ();
4944
4945   /* Next, we find a template that matches the given insn,
4946      making sure the overlap of the given operands types is consistent
4947      with the template operand types.  */
4948
4949   if (!(t = match_template (mnem_suffix)))
4950     return;
4951
4952   if (sse_check != check_none
4953       /* The opcode space check isn't strictly needed; it's there only to
4954          bypass the logic below when easily possible.  */
4955       && t->opcode_modifier.opcodespace >= SPACE_0F
4956       && t->opcode_modifier.opcodespace <= SPACE_0F3A
4957       && !i.tm.cpu_flags.bitfield.cpusse4a
4958       && !is_any_vex_encoding (t))
4959     {
4960       bool simd = false;
4961
4962       for (j = 0; j < t->operands; ++j)
4963         {
4964           if (t->operand_types[j].bitfield.class == RegMMX)
4965             break;
4966           if (t->operand_types[j].bitfield.class == RegSIMD)
4967             simd = true;
4968         }
4969
4970       if (j >= t->operands && simd)
4971         (sse_check == check_warning
4972          ? as_warn
4973          : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
4974     }
4975
4976   if (i.tm.opcode_modifier.fwait)
4977     if (!add_prefix (FWAIT_OPCODE))
4978       return;
4979
4980   /* Check if REP prefix is OK.  */
4981   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
4982     {
4983       as_bad (_("invalid instruction `%s' after `%s'"),
4984                 i.tm.name, i.rep_prefix);
4985       return;
4986     }
4987
4988   /* Check for lock without a lockable instruction.  Destination operand
4989      must be memory unless it is xchg (0x86).  */
4990   if (i.prefix[LOCK_PREFIX]
4991       && (i.tm.opcode_modifier.prefixok < PrefixLock
4992           || i.mem_operands == 0
4993           || (i.tm.base_opcode != 0x86
4994               && !(i.flags[i.operands - 1] & Operand_Mem))))
4995     {
4996       as_bad (_("expecting lockable instruction after `lock'"));
4997       return;
4998     }
4999
5000   /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
5001   if (i.prefix[DATA_PREFIX]
5002       && (is_any_vex_encoding (&i.tm)
5003           || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
5004           || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX))
5005     {
5006       as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
5007       return;
5008     }
5009
5010   /* Check if HLE prefix is OK.  */
5011   if (i.hle_prefix && !check_hle ())
5012     return;
5013
5014   /* Check BND prefix.  */
5015   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
5016     as_bad (_("expecting valid branch instruction after `bnd'"));
5017
5018   /* Check NOTRACK prefix.  */
5019   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
5020     as_bad (_("expecting indirect branch instruction after `notrack'"));
5021
5022   if (i.tm.cpu_flags.bitfield.cpumpx)
5023     {
5024       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
5025         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
5026       else if (flag_code != CODE_16BIT
5027                ? i.prefix[ADDR_PREFIX]
5028                : i.mem_operands && !i.prefix[ADDR_PREFIX])
5029         as_bad (_("16-bit address isn't allowed in MPX instructions"));
5030     }
5031
5032   /* Insert BND prefix.  */
5033   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
5034     {
5035       if (!i.prefix[BND_PREFIX])
5036         add_prefix (BND_PREFIX_OPCODE);
5037       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
5038         {
5039           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
5040           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
5041         }
5042     }
5043
5044   /* Check string instruction segment overrides.  */
5045   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
5046     {
5047       gas_assert (i.mem_operands);
5048       if (!check_string ())
5049         return;
5050       i.disp_operands = 0;
5051     }
5052
5053   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
5054     optimize_encoding ();
5055
5056   if (use_unaligned_vector_move)
5057     encode_with_unaligned_vector_move ();
5058
5059   if (!process_suffix ())
5060     return;
5061
5062   /* Update operand types and check extended states.  */
5063   for (j = 0; j < i.operands; j++)
5064     {
5065       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
5066       switch (i.tm.operand_types[j].bitfield.class)
5067         {
5068         default:
5069           break;
5070         case RegMMX:
5071           i.xstate |= xstate_mmx;
5072           break;
5073         case RegMask:
5074           i.xstate |= xstate_mask;
5075           break;
5076         case RegSIMD:
5077           if (i.tm.operand_types[j].bitfield.tmmword)
5078             i.xstate |= xstate_tmm;
5079           else if (i.tm.operand_types[j].bitfield.zmmword)
5080             i.xstate |= xstate_zmm;
5081           else if (i.tm.operand_types[j].bitfield.ymmword)
5082             i.xstate |= xstate_ymm;
5083           else if (i.tm.operand_types[j].bitfield.xmmword)
5084             i.xstate |= xstate_xmm;
5085           break;
5086         }
5087     }
5088
5089   /* Make still unresolved immediate matches conform to size of immediate
5090      given in i.suffix.  */
5091   if (!finalize_imm ())
5092     return;
5093
5094   if (i.types[0].bitfield.imm1)
5095     i.imm_operands = 0; /* kludge for shift insns.  */
5096
5097   /* We only need to check those implicit registers for instructions
5098      with 3 operands or less.  */
5099   if (i.operands <= 3)
5100     for (j = 0; j < i.operands; j++)
5101       if (i.types[j].bitfield.instance != InstanceNone
5102           && !i.types[j].bitfield.xmmword)
5103         i.reg_operands--;
5104
5105   /* For insns with operands there are more diddles to do to the opcode.  */
5106   if (i.operands)
5107     {
5108       if (!process_operands ())
5109         return;
5110     }
5111   else if (!quiet_warnings && i.tm.opcode_modifier.ugh)
5112     {
5113       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
5114       as_warn (_("translating to `%sp'"), i.tm.name);
5115     }
5116
5117   if (is_any_vex_encoding (&i.tm))
5118     {
5119       if (!cpu_arch_flags.bitfield.cpui286)
5120         {
5121           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
5122                   i.tm.name);
5123           return;
5124         }
5125
5126       /* Check for explicit REX prefix.  */
5127       if (i.prefix[REX_PREFIX] || i.rex_encoding)
5128         {
5129           as_bad (_("REX prefix invalid with `%s'"), i.tm.name);
5130           return;
5131         }
5132
5133       if (i.tm.opcode_modifier.vex)
5134         build_vex_prefix (t);
5135       else
5136         build_evex_prefix ();
5137
5138       /* The individual REX.RXBW bits got consumed.  */
5139       i.rex &= REX_OPCODE;
5140     }
5141
5142   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
5143      instructions may define INT_OPCODE as well, so avoid this corner
5144      case for those instructions that use MODRM.  */
5145   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
5146       && i.tm.base_opcode == INT_OPCODE
5147       && !i.tm.opcode_modifier.modrm
5148       && i.op[0].imms->X_add_number == 3)
5149     {
5150       i.tm.base_opcode = INT3_OPCODE;
5151       i.imm_operands = 0;
5152     }
5153
5154   if ((i.tm.opcode_modifier.jump == JUMP
5155        || i.tm.opcode_modifier.jump == JUMP_BYTE
5156        || i.tm.opcode_modifier.jump == JUMP_DWORD)
5157       && i.op[0].disps->X_op == O_constant)
5158     {
5159       /* Convert "jmp constant" (and "call constant") to a jump (call) to
5160          the absolute address given by the constant.  Since ix86 jumps and
5161          calls are pc relative, we need to generate a reloc.  */
5162       i.op[0].disps->X_add_symbol = &abs_symbol;
5163       i.op[0].disps->X_op = O_symbol;
5164     }
5165
5166   /* For 8 bit registers we need an empty rex prefix.  Also if the
5167      instruction already has a prefix, we need to convert old
5168      registers to new ones.  */
5169
5170   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
5171        && (i.op[0].regs->reg_flags & RegRex64) != 0)
5172       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
5173           && (i.op[1].regs->reg_flags & RegRex64) != 0)
5174       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
5175            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
5176           && i.rex != 0))
5177     {
5178       int x;
5179
5180       i.rex |= REX_OPCODE;
5181       for (x = 0; x < 2; x++)
5182         {
5183           /* Look for 8 bit operand that uses old registers.  */
5184           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
5185               && (i.op[x].regs->reg_flags & RegRex64) == 0)
5186             {
5187               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5188               /* In case it is "hi" register, give up.  */
5189               if (i.op[x].regs->reg_num > 3)
5190                 as_bad (_("can't encode register '%s%s' in an "
5191                           "instruction requiring REX prefix."),
5192                         register_prefix, i.op[x].regs->reg_name);
5193
5194               /* Otherwise it is equivalent to the extended register.
5195                  Since the encoding doesn't change this is merely
5196                  cosmetic cleanup for debug output.  */
5197
5198               i.op[x].regs = i.op[x].regs + 8;
5199             }
5200         }
5201     }
5202
5203   if (i.rex == 0 && i.rex_encoding)
5204     {
5205       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5206          that uses legacy register.  If it is "hi" register, don't add
5207          the REX_OPCODE byte.  */
5208       int x;
5209       for (x = 0; x < 2; x++)
5210         if (i.types[x].bitfield.class == Reg
5211             && i.types[x].bitfield.byte
5212             && (i.op[x].regs->reg_flags & RegRex64) == 0
5213             && i.op[x].regs->reg_num > 3)
5214           {
5215             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5216             i.rex_encoding = false;
5217             break;
5218           }
5219
5220       if (i.rex_encoding)
5221         i.rex = REX_OPCODE;
5222     }
5223
5224   if (i.rex != 0)
5225     add_prefix (REX_OPCODE | i.rex);
5226
5227   insert_lfence_before ();
5228
5229   /* We are ready to output the insn.  */
5230   output_insn ();
5231
5232   insert_lfence_after ();
5233
5234   last_insn.seg = now_seg;
5235
5236   if (i.tm.opcode_modifier.isprefix)
5237     {
5238       last_insn.kind = last_insn_prefix;
5239       last_insn.name = i.tm.name;
5240       last_insn.file = as_where (&last_insn.line);
5241     }
5242   else
5243     last_insn.kind = last_insn_other;
5244 }
5245
5246 static char *
5247 parse_insn (char *line, char *mnemonic)
5248 {
5249   char *l = line;
5250   char *token_start = l;
5251   char *mnem_p;
5252   int supported;
5253   const insn_template *t;
5254   char *dot_p = NULL;
5255
5256   while (1)
5257     {
5258       mnem_p = mnemonic;
5259       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5260         {
5261           if (*mnem_p == '.')
5262             dot_p = mnem_p;
5263           mnem_p++;
5264           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5265             {
5266               as_bad (_("no such instruction: `%s'"), token_start);
5267               return NULL;
5268             }
5269           l++;
5270         }
5271       if (!is_space_char (*l)
5272           && *l != END_OF_INSN
5273           && (intel_syntax
5274               || (*l != PREFIX_SEPARATOR
5275                   && *l != ',')))
5276         {
5277           as_bad (_("invalid character %s in mnemonic"),
5278                   output_invalid (*l));
5279           return NULL;
5280         }
5281       if (token_start == l)
5282         {
5283           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5284             as_bad (_("expecting prefix; got nothing"));
5285           else
5286             as_bad (_("expecting mnemonic; got nothing"));
5287           return NULL;
5288         }
5289
5290       /* Look up instruction (or prefix) via hash table.  */
5291       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5292
5293       if (*l != END_OF_INSN
5294           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5295           && current_templates
5296           && current_templates->start->opcode_modifier.isprefix)
5297         {
5298           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5299             {
5300               as_bad ((flag_code != CODE_64BIT
5301                        ? _("`%s' is only supported in 64-bit mode")
5302                        : _("`%s' is not supported in 64-bit mode")),
5303                       current_templates->start->name);
5304               return NULL;
5305             }
5306           /* If we are in 16-bit mode, do not allow addr16 or data16.
5307              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5308           if ((current_templates->start->opcode_modifier.size == SIZE16
5309                || current_templates->start->opcode_modifier.size == SIZE32)
5310               && flag_code != CODE_64BIT
5311               && ((current_templates->start->opcode_modifier.size == SIZE32)
5312                   ^ (flag_code == CODE_16BIT)))
5313             {
5314               as_bad (_("redundant %s prefix"),
5315                       current_templates->start->name);
5316               return NULL;
5317             }
5318
5319           if (current_templates->start->base_opcode == PSEUDO_PREFIX)
5320             {
5321               /* Handle pseudo prefixes.  */
5322               switch (current_templates->start->extension_opcode)
5323                 {
5324                 case Prefix_Disp8:
5325                   /* {disp8} */
5326                   i.disp_encoding = disp_encoding_8bit;
5327                   break;
5328                 case Prefix_Disp16:
5329                   /* {disp16} */
5330                   i.disp_encoding = disp_encoding_16bit;
5331                   break;
5332                 case Prefix_Disp32:
5333                   /* {disp32} */
5334                   i.disp_encoding = disp_encoding_32bit;
5335                   break;
5336                 case Prefix_Load:
5337                   /* {load} */
5338                   i.dir_encoding = dir_encoding_load;
5339                   break;
5340                 case Prefix_Store:
5341                   /* {store} */
5342                   i.dir_encoding = dir_encoding_store;
5343                   break;
5344                 case Prefix_VEX:
5345                   /* {vex} */
5346                   i.vec_encoding = vex_encoding_vex;
5347                   break;
5348                 case Prefix_VEX3:
5349                   /* {vex3} */
5350                   i.vec_encoding = vex_encoding_vex3;
5351                   break;
5352                 case Prefix_EVEX:
5353                   /* {evex} */
5354                   i.vec_encoding = vex_encoding_evex;
5355                   break;
5356                 case Prefix_REX:
5357                   /* {rex} */
5358                   i.rex_encoding = true;
5359                   break;
5360                 case Prefix_NoOptimize:
5361                   /* {nooptimize} */
5362                   i.no_optimize = true;
5363                   break;
5364                 default:
5365                   abort ();
5366                 }
5367             }
5368           else
5369             {
5370               /* Add prefix, checking for repeated prefixes.  */
5371               switch (add_prefix (current_templates->start->base_opcode))
5372                 {
5373                 case PREFIX_EXIST:
5374                   return NULL;
5375                 case PREFIX_DS:
5376                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5377                     i.notrack_prefix = current_templates->start->name;
5378                   break;
5379                 case PREFIX_REP:
5380                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5381                     i.hle_prefix = current_templates->start->name;
5382                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5383                     i.bnd_prefix = current_templates->start->name;
5384                   else
5385                     i.rep_prefix = current_templates->start->name;
5386                   break;
5387                 default:
5388                   break;
5389                 }
5390             }
5391           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5392           token_start = ++l;
5393         }
5394       else
5395         break;
5396     }
5397
5398   if (!current_templates)
5399     {
5400       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5401          Check if we should swap operand or force 32bit displacement in
5402          encoding.  */
5403       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5404         i.dir_encoding = dir_encoding_swap;
5405       else if (mnem_p - 3 == dot_p
5406                && dot_p[1] == 'd'
5407                && dot_p[2] == '8')
5408         i.disp_encoding = disp_encoding_8bit;
5409       else if (mnem_p - 4 == dot_p
5410                && dot_p[1] == 'd'
5411                && dot_p[2] == '3'
5412                && dot_p[3] == '2')
5413         i.disp_encoding = disp_encoding_32bit;
5414       else
5415         goto check_suffix;
5416       mnem_p = dot_p;
5417       *dot_p = '\0';
5418       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5419     }
5420
5421   if (!current_templates)
5422     {
5423     check_suffix:
5424       if (mnem_p > mnemonic)
5425         {
5426           /* See if we can get a match by trimming off a suffix.  */
5427           switch (mnem_p[-1])
5428             {
5429             case WORD_MNEM_SUFFIX:
5430               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5431                 i.suffix = SHORT_MNEM_SUFFIX;
5432               else
5433                 /* Fall through.  */
5434               case BYTE_MNEM_SUFFIX:
5435               case QWORD_MNEM_SUFFIX:
5436                 i.suffix = mnem_p[-1];
5437               mnem_p[-1] = '\0';
5438               current_templates
5439                 = (const templates *) str_hash_find (op_hash, mnemonic);
5440               break;
5441             case SHORT_MNEM_SUFFIX:
5442             case LONG_MNEM_SUFFIX:
5443               if (!intel_syntax)
5444                 {
5445                   i.suffix = mnem_p[-1];
5446                   mnem_p[-1] = '\0';
5447                   current_templates
5448                     = (const templates *) str_hash_find (op_hash, mnemonic);
5449                 }
5450               break;
5451
5452               /* Intel Syntax.  */
5453             case 'd':
5454               if (intel_syntax)
5455                 {
5456                   if (intel_float_operand (mnemonic) == 1)
5457                     i.suffix = SHORT_MNEM_SUFFIX;
5458                   else
5459                     i.suffix = LONG_MNEM_SUFFIX;
5460                   mnem_p[-1] = '\0';
5461                   current_templates
5462                     = (const templates *) str_hash_find (op_hash, mnemonic);
5463                 }
5464               break;
5465             }
5466         }
5467
5468       if (!current_templates)
5469         {
5470           as_bad (_("no such instruction: `%s'"), token_start);
5471           return NULL;
5472         }
5473     }
5474
5475   if (current_templates->start->opcode_modifier.jump == JUMP
5476       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5477     {
5478       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5479          predict taken and predict not taken respectively.
5480          I'm not sure that branch hints actually do anything on loop
5481          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5482          may work in the future and it doesn't hurt to accept them
5483          now.  */
5484       if (l[0] == ',' && l[1] == 'p')
5485         {
5486           if (l[2] == 't')
5487             {
5488               if (!add_prefix (DS_PREFIX_OPCODE))
5489                 return NULL;
5490               l += 3;
5491             }
5492           else if (l[2] == 'n')
5493             {
5494               if (!add_prefix (CS_PREFIX_OPCODE))
5495                 return NULL;
5496               l += 3;
5497             }
5498         }
5499     }
5500   /* Any other comma loses.  */
5501   if (*l == ',')
5502     {
5503       as_bad (_("invalid character %s in mnemonic"),
5504               output_invalid (*l));
5505       return NULL;
5506     }
5507
5508   /* Check if instruction is supported on specified architecture.  */
5509   supported = 0;
5510   for (t = current_templates->start; t < current_templates->end; ++t)
5511     {
5512       supported |= cpu_flags_match (t);
5513       if (supported == CPU_FLAGS_PERFECT_MATCH)
5514         {
5515           if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT))
5516             as_warn (_("use .code16 to ensure correct addressing mode"));
5517
5518           return l;
5519         }
5520     }
5521
5522   if (!(supported & CPU_FLAGS_64BIT_MATCH))
5523     as_bad (flag_code == CODE_64BIT
5524             ? _("`%s' is not supported in 64-bit mode")
5525             : _("`%s' is only supported in 64-bit mode"),
5526             current_templates->start->name);
5527   else
5528     as_bad (_("`%s' is not supported on `%s%s'"),
5529             current_templates->start->name,
5530             cpu_arch_name ? cpu_arch_name : default_arch,
5531             cpu_sub_arch_name ? cpu_sub_arch_name : "");
5532
5533   return NULL;
5534 }
5535
5536 static char *
5537 parse_operands (char *l, const char *mnemonic)
5538 {
5539   char *token_start;
5540
5541   /* 1 if operand is pending after ','.  */
5542   unsigned int expecting_operand = 0;
5543
5544   while (*l != END_OF_INSN)
5545     {
5546       /* Non-zero if operand parens not balanced.  */
5547       unsigned int paren_not_balanced = 0;
5548       /* True if inside double quotes.  */
5549       bool in_quotes = false;
5550
5551       /* Skip optional white space before operand.  */
5552       if (is_space_char (*l))
5553         ++l;
5554       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5555         {
5556           as_bad (_("invalid character %s before operand %d"),
5557                   output_invalid (*l),
5558                   i.operands + 1);
5559           return NULL;
5560         }
5561       token_start = l;  /* After white space.  */
5562       while (in_quotes || paren_not_balanced || *l != ',')
5563         {
5564           if (*l == END_OF_INSN)
5565             {
5566               if (in_quotes)
5567                 {
5568                   as_bad (_("unbalanced double quotes in operand %d."),
5569                           i.operands + 1);
5570                   return NULL;
5571                 }
5572               if (paren_not_balanced)
5573                 {
5574                   know (!intel_syntax);
5575                   as_bad (_("unbalanced parenthesis in operand %d."),
5576                           i.operands + 1);
5577                   return NULL;
5578                 }
5579               else
5580                 break;  /* we are done */
5581             }
5582           else if (*l == '\\' && l[1] == '"')
5583             ++l;
5584           else if (*l == '"')
5585             in_quotes = !in_quotes;
5586           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
5587             {
5588               as_bad (_("invalid character %s in operand %d"),
5589                       output_invalid (*l),
5590                       i.operands + 1);
5591               return NULL;
5592             }
5593           if (!intel_syntax && !in_quotes)
5594             {
5595               if (*l == '(')
5596                 ++paren_not_balanced;
5597               if (*l == ')')
5598                 --paren_not_balanced;
5599             }
5600           l++;
5601         }
5602       if (l != token_start)
5603         {                       /* Yes, we've read in another operand.  */
5604           unsigned int operand_ok;
5605           this_operand = i.operands++;
5606           if (i.operands > MAX_OPERANDS)
5607             {
5608               as_bad (_("spurious operands; (%d operands/instruction max)"),
5609                       MAX_OPERANDS);
5610               return NULL;
5611             }
5612           i.types[this_operand].bitfield.unspecified = 1;
5613           /* Now parse operand adding info to 'i' as we go along.  */
5614           END_STRING_AND_SAVE (l);
5615
5616           if (i.mem_operands > 1)
5617             {
5618               as_bad (_("too many memory references for `%s'"),
5619                       mnemonic);
5620               return 0;
5621             }
5622
5623           if (intel_syntax)
5624             operand_ok =
5625               i386_intel_operand (token_start,
5626                                   intel_float_operand (mnemonic));
5627           else
5628             operand_ok = i386_att_operand (token_start);
5629
5630           RESTORE_END_STRING (l);
5631           if (!operand_ok)
5632             return NULL;
5633         }
5634       else
5635         {
5636           if (expecting_operand)
5637             {
5638             expecting_operand_after_comma:
5639               as_bad (_("expecting operand after ','; got nothing"));
5640               return NULL;
5641             }
5642           if (*l == ',')
5643             {
5644               as_bad (_("expecting operand before ','; got nothing"));
5645               return NULL;
5646             }
5647         }
5648
5649       /* Now *l must be either ',' or END_OF_INSN.  */
5650       if (*l == ',')
5651         {
5652           if (*++l == END_OF_INSN)
5653             {
5654               /* Just skip it, if it's \n complain.  */
5655               goto expecting_operand_after_comma;
5656             }
5657           expecting_operand = 1;
5658         }
5659     }
5660   return l;
5661 }
5662
5663 static void
5664 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
5665 {
5666   union i386_op temp_op;
5667   i386_operand_type temp_type;
5668   unsigned int temp_flags;
5669   enum bfd_reloc_code_real temp_reloc;
5670
5671   temp_type = i.types[xchg2];
5672   i.types[xchg2] = i.types[xchg1];
5673   i.types[xchg1] = temp_type;
5674
5675   temp_flags = i.flags[xchg2];
5676   i.flags[xchg2] = i.flags[xchg1];
5677   i.flags[xchg1] = temp_flags;
5678
5679   temp_op = i.op[xchg2];
5680   i.op[xchg2] = i.op[xchg1];
5681   i.op[xchg1] = temp_op;
5682
5683   temp_reloc = i.reloc[xchg2];
5684   i.reloc[xchg2] = i.reloc[xchg1];
5685   i.reloc[xchg1] = temp_reloc;
5686
5687   if (i.mask.reg)
5688     {
5689       if (i.mask.operand == xchg1)
5690         i.mask.operand = xchg2;
5691       else if (i.mask.operand == xchg2)
5692         i.mask.operand = xchg1;
5693     }
5694   if (i.broadcast.type)
5695     {
5696       if (i.broadcast.operand == xchg1)
5697         i.broadcast.operand = xchg2;
5698       else if (i.broadcast.operand == xchg2)
5699         i.broadcast.operand = xchg1;
5700     }
5701   if (i.rounding.type != rc_none)
5702     {
5703       if (i.rounding.operand == xchg1)
5704         i.rounding.operand = xchg2;
5705       else if (i.rounding.operand == xchg2)
5706         i.rounding.operand = xchg1;
5707     }
5708 }
5709
5710 static void
5711 swap_operands (void)
5712 {
5713   switch (i.operands)
5714     {
5715     case 5:
5716     case 4:
5717       swap_2_operands (1, i.operands - 2);
5718       /* Fall through.  */
5719     case 3:
5720     case 2:
5721       swap_2_operands (0, i.operands - 1);
5722       break;
5723     default:
5724       abort ();
5725     }
5726
5727   if (i.mem_operands == 2)
5728     {
5729       const reg_entry *temp_seg;
5730       temp_seg = i.seg[0];
5731       i.seg[0] = i.seg[1];
5732       i.seg[1] = temp_seg;
5733     }
5734 }
5735
5736 /* Try to ensure constant immediates are represented in the smallest
5737    opcode possible.  */
5738 static void
5739 optimize_imm (void)
5740 {
5741   char guess_suffix = 0;
5742   int op;
5743
5744   if (i.suffix)
5745     guess_suffix = i.suffix;
5746   else if (i.reg_operands)
5747     {
5748       /* Figure out a suffix from the last register operand specified.
5749          We can't do this properly yet, i.e. excluding special register
5750          instances, but the following works for instructions with
5751          immediates.  In any case, we can't set i.suffix yet.  */
5752       for (op = i.operands; --op >= 0;)
5753         if (i.types[op].bitfield.class != Reg)
5754           continue;
5755         else if (i.types[op].bitfield.byte)
5756           {
5757             guess_suffix = BYTE_MNEM_SUFFIX;
5758             break;
5759           }
5760         else if (i.types[op].bitfield.word)
5761           {
5762             guess_suffix = WORD_MNEM_SUFFIX;
5763             break;
5764           }
5765         else if (i.types[op].bitfield.dword)
5766           {
5767             guess_suffix = LONG_MNEM_SUFFIX;
5768             break;
5769           }
5770         else if (i.types[op].bitfield.qword)
5771           {
5772             guess_suffix = QWORD_MNEM_SUFFIX;
5773             break;
5774           }
5775     }
5776   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
5777     guess_suffix = WORD_MNEM_SUFFIX;
5778
5779   for (op = i.operands; --op >= 0;)
5780     if (operand_type_check (i.types[op], imm))
5781       {
5782         switch (i.op[op].imms->X_op)
5783           {
5784           case O_constant:
5785             /* If a suffix is given, this operand may be shortened.  */
5786             switch (guess_suffix)
5787               {
5788               case LONG_MNEM_SUFFIX:
5789                 i.types[op].bitfield.imm32 = 1;
5790                 i.types[op].bitfield.imm64 = 1;
5791                 break;
5792               case WORD_MNEM_SUFFIX:
5793                 i.types[op].bitfield.imm16 = 1;
5794                 i.types[op].bitfield.imm32 = 1;
5795                 i.types[op].bitfield.imm32s = 1;
5796                 i.types[op].bitfield.imm64 = 1;
5797                 break;
5798               case BYTE_MNEM_SUFFIX:
5799                 i.types[op].bitfield.imm8 = 1;
5800                 i.types[op].bitfield.imm8s = 1;
5801                 i.types[op].bitfield.imm16 = 1;
5802                 i.types[op].bitfield.imm32 = 1;
5803                 i.types[op].bitfield.imm32s = 1;
5804                 i.types[op].bitfield.imm64 = 1;
5805                 break;
5806               }
5807
5808             /* If this operand is at most 16 bits, convert it
5809                to a signed 16 bit number before trying to see
5810                whether it will fit in an even smaller size.
5811                This allows a 16-bit operand such as $0xffe0 to
5812                be recognised as within Imm8S range.  */
5813             if ((i.types[op].bitfield.imm16)
5814                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
5815               {
5816                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5817                                                 ^ 0x8000) - 0x8000);
5818               }
5819 #ifdef BFD64
5820             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
5821             if ((i.types[op].bitfield.imm32)
5822                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
5823               {
5824                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5825                                                 ^ ((offsetT) 1 << 31))
5826                                                - ((offsetT) 1 << 31));
5827               }
5828 #endif
5829             i.types[op]
5830               = operand_type_or (i.types[op],
5831                                  smallest_imm_type (i.op[op].imms->X_add_number));
5832
5833             /* We must avoid matching of Imm32 templates when 64bit
5834                only immediate is available.  */
5835             if (guess_suffix == QWORD_MNEM_SUFFIX)
5836               i.types[op].bitfield.imm32 = 0;
5837             break;
5838
5839           case O_absent:
5840           case O_register:
5841             abort ();
5842
5843             /* Symbols and expressions.  */
5844           default:
5845             /* Convert symbolic operand to proper sizes for matching, but don't
5846                prevent matching a set of insns that only supports sizes other
5847                than those matching the insn suffix.  */
5848             {
5849               i386_operand_type mask, allowed;
5850               const insn_template *t = current_templates->start;
5851
5852               operand_type_set (&mask, 0);
5853               allowed = t->operand_types[op];
5854
5855               while (++t < current_templates->end)
5856                 {
5857                   allowed = operand_type_and (allowed, anyimm);
5858                   allowed = operand_type_or (allowed, t->operand_types[op]);
5859                 }
5860               switch (guess_suffix)
5861                 {
5862                 case QWORD_MNEM_SUFFIX:
5863                   mask.bitfield.imm64 = 1;
5864                   mask.bitfield.imm32s = 1;
5865                   break;
5866                 case LONG_MNEM_SUFFIX:
5867                   mask.bitfield.imm32 = 1;
5868                   break;
5869                 case WORD_MNEM_SUFFIX:
5870                   mask.bitfield.imm16 = 1;
5871                   break;
5872                 case BYTE_MNEM_SUFFIX:
5873                   mask.bitfield.imm8 = 1;
5874                   break;
5875                 default:
5876                   break;
5877                 }
5878               allowed = operand_type_and (mask, allowed);
5879               if (!operand_type_all_zero (&allowed))
5880                 i.types[op] = operand_type_and (i.types[op], mask);
5881             }
5882             break;
5883           }
5884       }
5885 }
5886
5887 /* Try to use the smallest displacement type too.  */
5888 static void
5889 optimize_disp (void)
5890 {
5891   int op;
5892
5893   for (op = i.operands; --op >= 0;)
5894     if (operand_type_check (i.types[op], disp))
5895       {
5896         if (i.op[op].disps->X_op == O_constant)
5897           {
5898             offsetT op_disp = i.op[op].disps->X_add_number;
5899
5900             if (!op_disp && i.types[op].bitfield.baseindex)
5901               {
5902                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
5903                 i.op[op].disps = NULL;
5904                 i.disp_operands--;
5905                 continue;
5906               }
5907
5908             if (i.types[op].bitfield.disp16
5909                 && fits_in_unsigned_word (op_disp))
5910               {
5911                 /* If this operand is at most 16 bits, convert
5912                    to a signed 16 bit number and don't use 64bit
5913                    displacement.  */
5914                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
5915                 i.types[op].bitfield.disp64 = 0;
5916               }
5917
5918 #ifdef BFD64
5919             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
5920             if ((i.types[op].bitfield.disp32
5921                  || (flag_code == CODE_64BIT
5922                      && want_disp32 (current_templates->start)))
5923                 && fits_in_unsigned_long (op_disp))
5924               {
5925                 /* If this operand is at most 32 bits, convert
5926                    to a signed 32 bit number and don't use 64bit
5927                    displacement.  */
5928                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
5929                 i.types[op].bitfield.disp64 = 0;
5930                 i.types[op].bitfield.disp32 = 1;
5931               }
5932
5933             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
5934               {
5935                 i.types[op].bitfield.disp64 = 0;
5936                 i.types[op].bitfield.disp32s = 1;
5937               }
5938 #endif
5939             if ((i.types[op].bitfield.disp32
5940                  || i.types[op].bitfield.disp32s
5941                  || i.types[op].bitfield.disp16)
5942                 && fits_in_disp8 (op_disp))
5943               i.types[op].bitfield.disp8 = 1;
5944
5945             i.op[op].disps->X_add_number = op_disp;
5946           }
5947         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
5948                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
5949           {
5950             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
5951                          i.op[op].disps, 0, i.reloc[op]);
5952             i.types[op] = operand_type_and_not (i.types[op], anydisp);
5953           }
5954         else
5955           /* We only support 64bit displacement on constants.  */
5956           i.types[op].bitfield.disp64 = 0;
5957       }
5958 }
5959
5960 /* Return 1 if there is a match in broadcast bytes between operand
5961    GIVEN and instruction template T.   */
5962
5963 static INLINE int
5964 match_broadcast_size (const insn_template *t, unsigned int given)
5965 {
5966   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
5967            && i.types[given].bitfield.byte)
5968           || (t->opcode_modifier.broadcast == WORD_BROADCAST
5969               && i.types[given].bitfield.word)
5970           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
5971               && i.types[given].bitfield.dword)
5972           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
5973               && i.types[given].bitfield.qword));
5974 }
5975
5976 /* Check if operands are valid for the instruction.  */
5977
5978 static int
5979 check_VecOperands (const insn_template *t)
5980 {
5981   unsigned int op;
5982   i386_cpu_flags cpu;
5983
5984   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
5985      any one operand are implicity requiring AVX512VL support if the actual
5986      operand size is YMMword or XMMword.  Since this function runs after
5987      template matching, there's no need to check for YMMword/XMMword in
5988      the template.  */
5989   cpu = cpu_flags_and (t->cpu_flags, avx512);
5990   if (!cpu_flags_all_zero (&cpu)
5991       && !t->cpu_flags.bitfield.cpuavx512vl
5992       && !cpu_arch_flags.bitfield.cpuavx512vl)
5993     {
5994       for (op = 0; op < t->operands; ++op)
5995         {
5996           if (t->operand_types[op].bitfield.zmmword
5997               && (i.types[op].bitfield.ymmword
5998                   || i.types[op].bitfield.xmmword))
5999             {
6000               i.error = unsupported;
6001               return 1;
6002             }
6003         }
6004     }
6005
6006   /* Somewhat similarly, templates specifying both AVX and AVX2 are
6007      requiring AVX2 support if the actual operand size is YMMword.  */
6008   if (t->cpu_flags.bitfield.cpuavx
6009       && t->cpu_flags.bitfield.cpuavx2
6010       && !cpu_arch_flags.bitfield.cpuavx2)
6011     {
6012       for (op = 0; op < t->operands; ++op)
6013         {
6014           if (t->operand_types[op].bitfield.xmmword
6015               && i.types[op].bitfield.ymmword)
6016             {
6017               i.error = unsupported;
6018               return 1;
6019             }
6020         }
6021     }
6022
6023   /* Without VSIB byte, we can't have a vector register for index.  */
6024   if (!t->opcode_modifier.sib
6025       && i.index_reg
6026       && (i.index_reg->reg_type.bitfield.xmmword
6027           || i.index_reg->reg_type.bitfield.ymmword
6028           || i.index_reg->reg_type.bitfield.zmmword))
6029     {
6030       i.error = unsupported_vector_index_register;
6031       return 1;
6032     }
6033
6034   /* Check if default mask is allowed.  */
6035   if (t->opcode_modifier.nodefmask
6036       && (!i.mask.reg || i.mask.reg->reg_num == 0))
6037     {
6038       i.error = no_default_mask;
6039       return 1;
6040     }
6041
6042   /* For VSIB byte, we need a vector register for index, and all vector
6043      registers must be distinct.  */
6044   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
6045     {
6046       if (!i.index_reg
6047           || !((t->opcode_modifier.sib == VECSIB128
6048                 && i.index_reg->reg_type.bitfield.xmmword)
6049                || (t->opcode_modifier.sib == VECSIB256
6050                    && i.index_reg->reg_type.bitfield.ymmword)
6051                || (t->opcode_modifier.sib == VECSIB512
6052                    && i.index_reg->reg_type.bitfield.zmmword)))
6053       {
6054         i.error = invalid_vsib_address;
6055         return 1;
6056       }
6057
6058       gas_assert (i.reg_operands == 2 || i.mask.reg);
6059       if (i.reg_operands == 2 && !i.mask.reg)
6060         {
6061           gas_assert (i.types[0].bitfield.class == RegSIMD);
6062           gas_assert (i.types[0].bitfield.xmmword
6063                       || i.types[0].bitfield.ymmword);
6064           gas_assert (i.types[2].bitfield.class == RegSIMD);
6065           gas_assert (i.types[2].bitfield.xmmword
6066                       || i.types[2].bitfield.ymmword);
6067           if (operand_check == check_none)
6068             return 0;
6069           if (register_number (i.op[0].regs)
6070               != register_number (i.index_reg)
6071               && register_number (i.op[2].regs)
6072                  != register_number (i.index_reg)
6073               && register_number (i.op[0].regs)
6074                  != register_number (i.op[2].regs))
6075             return 0;
6076           if (operand_check == check_error)
6077             {
6078               i.error = invalid_vector_register_set;
6079               return 1;
6080             }
6081           as_warn (_("mask, index, and destination registers should be distinct"));
6082         }
6083       else if (i.reg_operands == 1 && i.mask.reg)
6084         {
6085           if (i.types[1].bitfield.class == RegSIMD
6086               && (i.types[1].bitfield.xmmword
6087                   || i.types[1].bitfield.ymmword
6088                   || i.types[1].bitfield.zmmword)
6089               && (register_number (i.op[1].regs)
6090                   == register_number (i.index_reg)))
6091             {
6092               if (operand_check == check_error)
6093                 {
6094                   i.error = invalid_vector_register_set;
6095                   return 1;
6096                 }
6097               if (operand_check != check_none)
6098                 as_warn (_("index and destination registers should be distinct"));
6099             }
6100         }
6101     }
6102
6103   /* For AMX instructions with 3 TMM register operands, all operands
6104       must be distinct.  */
6105   if (i.reg_operands == 3
6106       && t->operand_types[0].bitfield.tmmword
6107       && (i.op[0].regs == i.op[1].regs
6108           || i.op[0].regs == i.op[2].regs
6109           || i.op[1].regs == i.op[2].regs))
6110     {
6111       i.error = invalid_tmm_register_set;
6112       return 1;
6113     }
6114
6115   /* For some special instructions require that destination must be distinct
6116      from source registers.  */
6117   if (t->opcode_modifier.distinctdest)
6118     {
6119       unsigned int dest_reg = i.operands - 1;
6120
6121       know (i.operands >= 3);
6122
6123       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
6124       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
6125           || (i.reg_operands > 2
6126               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
6127         {
6128           i.error = invalid_dest_and_src_register_set;
6129           return 1;
6130         }
6131     }
6132
6133   /* Check if broadcast is supported by the instruction and is applied
6134      to the memory operand.  */
6135   if (i.broadcast.type)
6136     {
6137       i386_operand_type type, overlap;
6138
6139       /* Check if specified broadcast is supported in this instruction,
6140          and its broadcast bytes match the memory operand.  */
6141       op = i.broadcast.operand;
6142       if (!t->opcode_modifier.broadcast
6143           || !(i.flags[op] & Operand_Mem)
6144           || (!i.types[op].bitfield.unspecified
6145               && !match_broadcast_size (t, op)))
6146         {
6147         bad_broadcast:
6148           i.error = unsupported_broadcast;
6149           return 1;
6150         }
6151
6152       i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
6153                            * i.broadcast.type);
6154       operand_type_set (&type, 0);
6155       switch (i.broadcast.bytes)
6156         {
6157         case 2:
6158           type.bitfield.word = 1;
6159           break;
6160         case 4:
6161           type.bitfield.dword = 1;
6162           break;
6163         case 8:
6164           type.bitfield.qword = 1;
6165           break;
6166         case 16:
6167           type.bitfield.xmmword = 1;
6168           break;
6169         case 32:
6170           type.bitfield.ymmword = 1;
6171           break;
6172         case 64:
6173           type.bitfield.zmmword = 1;
6174           break;
6175         default:
6176           goto bad_broadcast;
6177         }
6178
6179       overlap = operand_type_and (type, t->operand_types[op]);
6180       if (t->operand_types[op].bitfield.class == RegSIMD
6181           && t->operand_types[op].bitfield.byte
6182              + t->operand_types[op].bitfield.word
6183              + t->operand_types[op].bitfield.dword
6184              + t->operand_types[op].bitfield.qword > 1)
6185         {
6186           overlap.bitfield.xmmword = 0;
6187           overlap.bitfield.ymmword = 0;
6188           overlap.bitfield.zmmword = 0;
6189         }
6190       if (operand_type_all_zero (&overlap))
6191           goto bad_broadcast;
6192
6193       if (t->opcode_modifier.checkregsize)
6194         {
6195           unsigned int j;
6196
6197           type.bitfield.baseindex = 1;
6198           for (j = 0; j < i.operands; ++j)
6199             {
6200               if (j != op
6201                   && !operand_type_register_match(i.types[j],
6202                                                   t->operand_types[j],
6203                                                   type,
6204                                                   t->operand_types[op]))
6205                 goto bad_broadcast;
6206             }
6207         }
6208     }
6209   /* If broadcast is supported in this instruction, we need to check if
6210      operand of one-element size isn't specified without broadcast.  */
6211   else if (t->opcode_modifier.broadcast && i.mem_operands)
6212     {
6213       /* Find memory operand.  */
6214       for (op = 0; op < i.operands; op++)
6215         if (i.flags[op] & Operand_Mem)
6216           break;
6217       gas_assert (op < i.operands);
6218       /* Check size of the memory operand.  */
6219       if (match_broadcast_size (t, op))
6220         {
6221           i.error = broadcast_needed;
6222           return 1;
6223         }
6224     }
6225   else
6226     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6227
6228   /* Check if requested masking is supported.  */
6229   if (i.mask.reg)
6230     {
6231       switch (t->opcode_modifier.masking)
6232         {
6233         case BOTH_MASKING:
6234           break;
6235         case MERGING_MASKING:
6236           if (i.mask.zeroing)
6237             {
6238         case 0:
6239               i.error = unsupported_masking;
6240               return 1;
6241             }
6242           break;
6243         case DYNAMIC_MASKING:
6244           /* Memory destinations allow only merging masking.  */
6245           if (i.mask.zeroing && i.mem_operands)
6246             {
6247               /* Find memory operand.  */
6248               for (op = 0; op < i.operands; op++)
6249                 if (i.flags[op] & Operand_Mem)
6250                   break;
6251               gas_assert (op < i.operands);
6252               if (op == i.operands - 1)
6253                 {
6254                   i.error = unsupported_masking;
6255                   return 1;
6256                 }
6257             }
6258           break;
6259         default:
6260           abort ();
6261         }
6262     }
6263
6264   /* Check if masking is applied to dest operand.  */
6265   if (i.mask.reg && (i.mask.operand != i.operands - 1))
6266     {
6267       i.error = mask_not_on_destination;
6268       return 1;
6269     }
6270
6271   /* Check RC/SAE.  */
6272   if (i.rounding.type != rc_none)
6273     {
6274       if (!t->opcode_modifier.sae
6275           || (i.rounding.type != saeonly && !t->opcode_modifier.staticrounding))
6276         {
6277           i.error = unsupported_rc_sae;
6278           return 1;
6279         }
6280       /* If the instruction has several immediate operands and one of
6281          them is rounding, the rounding operand should be the last
6282          immediate operand.  */
6283       if (i.imm_operands > 1
6284           && i.rounding.operand != i.imm_operands - 1)
6285         {
6286           i.error = rc_sae_operand_not_last_imm;
6287           return 1;
6288         }
6289     }
6290
6291   /* Check the special Imm4 cases; must be the first operand.  */
6292   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6293     {
6294       if (i.op[0].imms->X_op != O_constant
6295           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6296         {
6297           i.error = bad_imm4;
6298           return 1;
6299         }
6300
6301       /* Turn off Imm<N> so that update_imm won't complain.  */
6302       operand_type_set (&i.types[0], 0);
6303     }
6304
6305   /* Check vector Disp8 operand.  */
6306   if (t->opcode_modifier.disp8memshift
6307       && i.disp_encoding != disp_encoding_32bit)
6308     {
6309       if (i.broadcast.type)
6310         i.memshift = t->opcode_modifier.broadcast - 1;
6311       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6312         i.memshift = t->opcode_modifier.disp8memshift;
6313       else
6314         {
6315           const i386_operand_type *type = NULL, *fallback = NULL;
6316
6317           i.memshift = 0;
6318           for (op = 0; op < i.operands; op++)
6319             if (i.flags[op] & Operand_Mem)
6320               {
6321                 if (t->opcode_modifier.evex == EVEXLIG)
6322                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6323                 else if (t->operand_types[op].bitfield.xmmword
6324                          + t->operand_types[op].bitfield.ymmword
6325                          + t->operand_types[op].bitfield.zmmword <= 1)
6326                   type = &t->operand_types[op];
6327                 else if (!i.types[op].bitfield.unspecified)
6328                   type = &i.types[op];
6329                 else /* Ambiguities get resolved elsewhere.  */
6330                   fallback = &t->operand_types[op];
6331               }
6332             else if (i.types[op].bitfield.class == RegSIMD
6333                      && t->opcode_modifier.evex != EVEXLIG)
6334               {
6335                 if (i.types[op].bitfield.zmmword)
6336                   i.memshift = 6;
6337                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6338                   i.memshift = 5;
6339                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6340                   i.memshift = 4;
6341               }
6342
6343           if (!type && !i.memshift)
6344             type = fallback;
6345           if (type)
6346             {
6347               if (type->bitfield.zmmword)
6348                 i.memshift = 6;
6349               else if (type->bitfield.ymmword)
6350                 i.memshift = 5;
6351               else if (type->bitfield.xmmword)
6352                 i.memshift = 4;
6353             }
6354
6355           /* For the check in fits_in_disp8().  */
6356           if (i.memshift == 0)
6357             i.memshift = -1;
6358         }
6359
6360       for (op = 0; op < i.operands; op++)
6361         if (operand_type_check (i.types[op], disp)
6362             && i.op[op].disps->X_op == O_constant)
6363           {
6364             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6365               {
6366                 i.types[op].bitfield.disp8 = 1;
6367                 return 0;
6368               }
6369             i.types[op].bitfield.disp8 = 0;
6370           }
6371     }
6372
6373   i.memshift = 0;
6374
6375   return 0;
6376 }
6377
6378 /* Check if encoding requirements are met by the instruction.  */
6379
6380 static int
6381 VEX_check_encoding (const insn_template *t)
6382 {
6383   if (i.vec_encoding == vex_encoding_error)
6384     {
6385       i.error = unsupported;
6386       return 1;
6387     }
6388
6389   if (i.vec_encoding == vex_encoding_evex)
6390     {
6391       /* This instruction must be encoded with EVEX prefix.  */
6392       if (!is_evex_encoding (t))
6393         {
6394           i.error = unsupported;
6395           return 1;
6396         }
6397       return 0;
6398     }
6399
6400   if (!t->opcode_modifier.vex)
6401     {
6402       /* This instruction template doesn't have VEX prefix.  */
6403       if (i.vec_encoding != vex_encoding_default)
6404         {
6405           i.error = unsupported;
6406           return 1;
6407         }
6408       return 0;
6409     }
6410
6411   return 0;
6412 }
6413
6414 static const insn_template *
6415 match_template (char mnem_suffix)
6416 {
6417   /* Points to template once we've found it.  */
6418   const insn_template *t;
6419   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6420   i386_operand_type overlap4;
6421   unsigned int found_reverse_match;
6422   i386_opcode_modifier suffix_check;
6423   i386_operand_type operand_types [MAX_OPERANDS];
6424   int addr_prefix_disp;
6425   unsigned int j, size_match, check_register;
6426   enum i386_error specific_error = 0;
6427
6428 #if MAX_OPERANDS != 5
6429 # error "MAX_OPERANDS must be 5."
6430 #endif
6431
6432   found_reverse_match = 0;
6433   addr_prefix_disp = -1;
6434
6435   /* Prepare for mnemonic suffix check.  */
6436   memset (&suffix_check, 0, sizeof (suffix_check));
6437   switch (mnem_suffix)
6438     {
6439     case BYTE_MNEM_SUFFIX:
6440       suffix_check.no_bsuf = 1;
6441       break;
6442     case WORD_MNEM_SUFFIX:
6443       suffix_check.no_wsuf = 1;
6444       break;
6445     case SHORT_MNEM_SUFFIX:
6446       suffix_check.no_ssuf = 1;
6447       break;
6448     case LONG_MNEM_SUFFIX:
6449       suffix_check.no_lsuf = 1;
6450       break;
6451     case QWORD_MNEM_SUFFIX:
6452       suffix_check.no_qsuf = 1;
6453       break;
6454     default:
6455       /* NB: In Intel syntax, normally we can check for memory operand
6456          size when there is no mnemonic suffix.  But jmp and call have
6457          2 different encodings with Dword memory operand size, one with
6458          No_ldSuf and the other without.  i.suffix is set to
6459          LONG_DOUBLE_MNEM_SUFFIX to skip the one with No_ldSuf.  */
6460       if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX)
6461         suffix_check.no_ldsuf = 1;
6462     }
6463
6464   /* Must have right number of operands.  */
6465   i.error = number_of_operands_mismatch;
6466
6467   for (t = current_templates->start; t < current_templates->end; t++)
6468     {
6469       addr_prefix_disp = -1;
6470       found_reverse_match = 0;
6471
6472       if (i.operands != t->operands)
6473         continue;
6474
6475       /* Check processor support.  */
6476       i.error = unsupported;
6477       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6478         continue;
6479
6480       /* Check Pseudo Prefix.  */
6481       i.error = unsupported;
6482       if (t->opcode_modifier.pseudovexprefix
6483           && !(i.vec_encoding == vex_encoding_vex
6484               || i.vec_encoding == vex_encoding_vex3))
6485         continue;
6486
6487       /* Check AT&T mnemonic.   */
6488       i.error = unsupported_with_intel_mnemonic;
6489       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6490         continue;
6491
6492       /* Check AT&T/Intel syntax.  */
6493       i.error = unsupported_syntax;
6494       if ((intel_syntax && t->opcode_modifier.attsyntax)
6495           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6496         continue;
6497
6498       /* Check Intel64/AMD64 ISA.   */
6499       switch (isa64)
6500         {
6501         default:
6502           /* Default: Don't accept Intel64.  */
6503           if (t->opcode_modifier.isa64 == INTEL64)
6504             continue;
6505           break;
6506         case amd64:
6507           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6508           if (t->opcode_modifier.isa64 >= INTEL64)
6509             continue;
6510           break;
6511         case intel64:
6512           /* -mintel64: Don't accept AMD64.  */
6513           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6514             continue;
6515           break;
6516         }
6517
6518       /* Check the suffix.  */
6519       i.error = invalid_instruction_suffix;
6520       if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
6521           || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf)
6522           || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf)
6523           || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf)
6524           || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf)
6525           || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf))
6526         continue;
6527
6528       size_match = operand_size_match (t);
6529       if (!size_match)
6530         continue;
6531
6532       /* This is intentionally not
6533
6534          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6535
6536          as the case of a missing * on the operand is accepted (perhaps with
6537          a warning, issued further down).  */
6538       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6539         {
6540           i.error = operand_type_mismatch;
6541           continue;
6542         }
6543
6544       for (j = 0; j < MAX_OPERANDS; j++)
6545         operand_types[j] = t->operand_types[j];
6546
6547       /* In general, don't allow
6548          - 64-bit operands outside of 64-bit mode,
6549          - 32-bit operands on pre-386.  */
6550       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6551       if (((i.suffix == QWORD_MNEM_SUFFIX
6552             && flag_code != CODE_64BIT
6553             && !(t->opcode_modifier.opcodespace == SPACE_0F
6554                  && t->base_opcode == 0xc7
6555                  && t->opcode_modifier.opcodeprefix == PREFIX_NONE
6556                  && t->extension_opcode == 1) /* cmpxchg8b */)
6557            || (i.suffix == LONG_MNEM_SUFFIX
6558                && !cpu_arch_flags.bitfield.cpui386))
6559           && (intel_syntax
6560               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6561                  && !intel_float_operand (t->name))
6562               : intel_float_operand (t->name) != 2)
6563           && (t->operands == i.imm_operands
6564               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6565                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6566                && operand_types[i.imm_operands].bitfield.class != RegMask)
6567               || (operand_types[j].bitfield.class != RegMMX
6568                   && operand_types[j].bitfield.class != RegSIMD
6569                   && operand_types[j].bitfield.class != RegMask))
6570           && !t->opcode_modifier.sib)
6571         continue;
6572
6573       /* Do not verify operands when there are none.  */
6574       if (!t->operands)
6575         {
6576           if (VEX_check_encoding (t))
6577             {
6578               specific_error = i.error;
6579               continue;
6580             }
6581
6582           /* We've found a match; break out of loop.  */
6583           break;
6584         }
6585
6586       if (!t->opcode_modifier.jump
6587           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6588         {
6589           /* There should be only one Disp operand.  */
6590           for (j = 0; j < MAX_OPERANDS; j++)
6591             if (operand_type_check (operand_types[j], disp))
6592               break;
6593           if (j < MAX_OPERANDS)
6594             {
6595               bool override = (i.prefix[ADDR_PREFIX] != 0);
6596
6597               addr_prefix_disp = j;
6598
6599               /* Address size prefix will turn Disp64/Disp32S/Disp32/Disp16
6600                  operand into Disp32/Disp32/Disp16/Disp32 operand.  */
6601               switch (flag_code)
6602                 {
6603                 case CODE_16BIT:
6604                   override = !override;
6605                   /* Fall through.  */
6606                 case CODE_32BIT:
6607                   if (operand_types[j].bitfield.disp32
6608                       && operand_types[j].bitfield.disp16)
6609                     {
6610                       operand_types[j].bitfield.disp16 = override;
6611                       operand_types[j].bitfield.disp32 = !override;
6612                     }
6613                   operand_types[j].bitfield.disp32s = 0;
6614                   operand_types[j].bitfield.disp64 = 0;
6615                   break;
6616
6617                 case CODE_64BIT:
6618                   if (operand_types[j].bitfield.disp32s
6619                       || operand_types[j].bitfield.disp64)
6620                     {
6621                       operand_types[j].bitfield.disp64 &= !override;
6622                       operand_types[j].bitfield.disp32s &= !override;
6623                       operand_types[j].bitfield.disp32 = override;
6624                     }
6625                   operand_types[j].bitfield.disp16 = 0;
6626                   break;
6627                 }
6628             }
6629         }
6630
6631       switch (i.reloc[0])
6632         {
6633         case BFD_RELOC_386_GOT32:
6634           /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6635           if (t->base_opcode == 0xa0
6636               && t->opcode_modifier.opcodespace == SPACE_BASE)
6637             continue;
6638           break;
6639         case BFD_RELOC_386_TLS_GOTIE:
6640         case BFD_RELOC_386_TLS_LE_32:
6641         case BFD_RELOC_X86_64_GOTTPOFF:
6642         case BFD_RELOC_X86_64_TLSLD:
6643           /* Don't allow KMOV in TLS code sequences.  */
6644           if (t->opcode_modifier.vex)
6645             continue;
6646           break;
6647         default:
6648           break;
6649         }
6650
6651       /* We check register size if needed.  */
6652       if (t->opcode_modifier.checkregsize)
6653         {
6654           check_register = (1 << t->operands) - 1;
6655           if (i.broadcast.type)
6656             check_register &= ~(1 << i.broadcast.operand);
6657         }
6658       else
6659         check_register = 0;
6660
6661       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6662       switch (t->operands)
6663         {
6664         case 1:
6665           if (!operand_type_match (overlap0, i.types[0]))
6666             continue;
6667           break;
6668         case 2:
6669           /* xchg %eax, %eax is a special case. It is an alias for nop
6670              only in 32bit mode and we can use opcode 0x90.  In 64bit
6671              mode, we can't use 0x90 for xchg %eax, %eax since it should
6672              zero-extend %eax to %rax.  */
6673           if (flag_code == CODE_64BIT
6674               && t->base_opcode == 0x90
6675               && t->opcode_modifier.opcodespace == SPACE_BASE
6676               && i.types[0].bitfield.instance == Accum
6677               && i.types[0].bitfield.dword
6678               && i.types[1].bitfield.instance == Accum
6679               && i.types[1].bitfield.dword)
6680             continue;
6681           /* xrelease mov %eax, <disp> is another special case. It must not
6682              match the accumulator-only encoding of mov.  */
6683           if (flag_code != CODE_64BIT
6684               && i.hle_prefix
6685               && t->base_opcode == 0xa0
6686               && t->opcode_modifier.opcodespace == SPACE_BASE
6687               && i.types[0].bitfield.instance == Accum
6688               && (i.flags[1] & Operand_Mem))
6689             continue;
6690           /* Fall through.  */
6691
6692         case 3:
6693           if (!(size_match & MATCH_STRAIGHT))
6694             goto check_reverse;
6695           /* Reverse direction of operands if swapping is possible in the first
6696              place (operands need to be symmetric) and
6697              - the load form is requested, and the template is a store form,
6698              - the store form is requested, and the template is a load form,
6699              - the non-default (swapped) form is requested.  */
6700           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6701           if (t->opcode_modifier.d && i.reg_operands == i.operands
6702               && !operand_type_all_zero (&overlap1))
6703             switch (i.dir_encoding)
6704               {
6705               case dir_encoding_load:
6706                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6707                     || t->opcode_modifier.regmem)
6708                   goto check_reverse;
6709                 break;
6710
6711               case dir_encoding_store:
6712                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6713                     && !t->opcode_modifier.regmem)
6714                   goto check_reverse;
6715                 break;
6716
6717               case dir_encoding_swap:
6718                 goto check_reverse;
6719
6720               case dir_encoding_default:
6721                 break;
6722               }
6723           /* If we want store form, we skip the current load.  */
6724           if ((i.dir_encoding == dir_encoding_store
6725                || i.dir_encoding == dir_encoding_swap)
6726               && i.mem_operands == 0
6727               && t->opcode_modifier.load)
6728             continue;
6729           /* Fall through.  */
6730         case 4:
6731         case 5:
6732           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6733           if (!operand_type_match (overlap0, i.types[0])
6734               || !operand_type_match (overlap1, i.types[1])
6735               || ((check_register & 3) == 3
6736                   && !operand_type_register_match (i.types[0],
6737                                                    operand_types[0],
6738                                                    i.types[1],
6739                                                    operand_types[1])))
6740             {
6741               /* Check if other direction is valid ...  */
6742               if (!t->opcode_modifier.d)
6743                 continue;
6744
6745             check_reverse:
6746               if (!(size_match & MATCH_REVERSE))
6747                 continue;
6748               /* Try reversing direction of operands.  */
6749               overlap0 = operand_type_and (i.types[0], operand_types[i.operands - 1]);
6750               overlap1 = operand_type_and (i.types[i.operands - 1], operand_types[0]);
6751               if (!operand_type_match (overlap0, i.types[0])
6752                   || !operand_type_match (overlap1, i.types[i.operands - 1])
6753                   || (check_register
6754                       && !operand_type_register_match (i.types[0],
6755                                                        operand_types[i.operands - 1],
6756                                                        i.types[i.operands - 1],
6757                                                        operand_types[0])))
6758                 {
6759                   /* Does not match either direction.  */
6760                   continue;
6761                 }
6762               /* found_reverse_match holds which of D or FloatR
6763                  we've found.  */
6764               if (!t->opcode_modifier.d)
6765                 found_reverse_match = 0;
6766               else if (operand_types[0].bitfield.tbyte)
6767                 found_reverse_match = Opcode_FloatD;
6768               else if (operand_types[0].bitfield.xmmword
6769                        || operand_types[i.operands - 1].bitfield.xmmword
6770                        || operand_types[0].bitfield.class == RegMMX
6771                        || operand_types[i.operands - 1].bitfield.class == RegMMX
6772                        || is_any_vex_encoding(t))
6773                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
6774                                       ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
6775               else
6776                 found_reverse_match = Opcode_D;
6777               if (t->opcode_modifier.floatr)
6778                 found_reverse_match |= Opcode_FloatR;
6779             }
6780           else
6781             {
6782               /* Found a forward 2 operand match here.  */
6783               switch (t->operands)
6784                 {
6785                 case 5:
6786                   overlap4 = operand_type_and (i.types[4],
6787                                                operand_types[4]);
6788                   /* Fall through.  */
6789                 case 4:
6790                   overlap3 = operand_type_and (i.types[3],
6791                                                operand_types[3]);
6792                   /* Fall through.  */
6793                 case 3:
6794                   overlap2 = operand_type_and (i.types[2],
6795                                                operand_types[2]);
6796                   break;
6797                 }
6798
6799               switch (t->operands)
6800                 {
6801                 case 5:
6802                   if (!operand_type_match (overlap4, i.types[4])
6803                       || !operand_type_register_match (i.types[3],
6804                                                        operand_types[3],
6805                                                        i.types[4],
6806                                                        operand_types[4]))
6807                     continue;
6808                   /* Fall through.  */
6809                 case 4:
6810                   if (!operand_type_match (overlap3, i.types[3])
6811                       || ((check_register & 0xa) == 0xa
6812                           && !operand_type_register_match (i.types[1],
6813                                                             operand_types[1],
6814                                                             i.types[3],
6815                                                             operand_types[3]))
6816                       || ((check_register & 0xc) == 0xc
6817                           && !operand_type_register_match (i.types[2],
6818                                                             operand_types[2],
6819                                                             i.types[3],
6820                                                             operand_types[3])))
6821                     continue;
6822                   /* Fall through.  */
6823                 case 3:
6824                   /* Here we make use of the fact that there are no
6825                      reverse match 3 operand instructions.  */
6826                   if (!operand_type_match (overlap2, i.types[2])
6827                       || ((check_register & 5) == 5
6828                           && !operand_type_register_match (i.types[0],
6829                                                             operand_types[0],
6830                                                             i.types[2],
6831                                                             operand_types[2]))
6832                       || ((check_register & 6) == 6
6833                           && !operand_type_register_match (i.types[1],
6834                                                             operand_types[1],
6835                                                             i.types[2],
6836                                                             operand_types[2])))
6837                     continue;
6838                   break;
6839                 }
6840             }
6841           /* Found either forward/reverse 2, 3 or 4 operand match here:
6842              slip through to break.  */
6843         }
6844
6845       /* Check if vector operands are valid.  */
6846       if (check_VecOperands (t))
6847         {
6848           specific_error = i.error;
6849           continue;
6850         }
6851
6852       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
6853       if (VEX_check_encoding (t))
6854         {
6855           specific_error = i.error;
6856           continue;
6857         }
6858
6859       /* We've found a match; break out of loop.  */
6860       break;
6861     }
6862
6863   if (t == current_templates->end)
6864     {
6865       /* We found no match.  */
6866       const char *err_msg;
6867       switch (specific_error ? specific_error : i.error)
6868         {
6869         default:
6870           abort ();
6871         case operand_size_mismatch:
6872           err_msg = _("operand size mismatch");
6873           break;
6874         case operand_type_mismatch:
6875           err_msg = _("operand type mismatch");
6876           break;
6877         case register_type_mismatch:
6878           err_msg = _("register type mismatch");
6879           break;
6880         case number_of_operands_mismatch:
6881           err_msg = _("number of operands mismatch");
6882           break;
6883         case invalid_instruction_suffix:
6884           err_msg = _("invalid instruction suffix");
6885           break;
6886         case bad_imm4:
6887           err_msg = _("constant doesn't fit in 4 bits");
6888           break;
6889         case unsupported_with_intel_mnemonic:
6890           err_msg = _("unsupported with Intel mnemonic");
6891           break;
6892         case unsupported_syntax:
6893           err_msg = _("unsupported syntax");
6894           break;
6895         case unsupported:
6896           as_bad (_("unsupported instruction `%s'"),
6897                   current_templates->start->name);
6898           return NULL;
6899         case invalid_sib_address:
6900           err_msg = _("invalid SIB address");
6901           break;
6902         case invalid_vsib_address:
6903           err_msg = _("invalid VSIB address");
6904           break;
6905         case invalid_vector_register_set:
6906           err_msg = _("mask, index, and destination registers must be distinct");
6907           break;
6908         case invalid_tmm_register_set:
6909           err_msg = _("all tmm registers must be distinct");
6910           break;
6911         case invalid_dest_and_src_register_set:
6912           err_msg = _("destination and source registers must be distinct");
6913           break;
6914         case unsupported_vector_index_register:
6915           err_msg = _("unsupported vector index register");
6916           break;
6917         case unsupported_broadcast:
6918           err_msg = _("unsupported broadcast");
6919           break;
6920         case broadcast_needed:
6921           err_msg = _("broadcast is needed for operand of such type");
6922           break;
6923         case unsupported_masking:
6924           err_msg = _("unsupported masking");
6925           break;
6926         case mask_not_on_destination:
6927           err_msg = _("mask not on destination operand");
6928           break;
6929         case no_default_mask:
6930           err_msg = _("default mask isn't allowed");
6931           break;
6932         case unsupported_rc_sae:
6933           err_msg = _("unsupported static rounding/sae");
6934           break;
6935         case rc_sae_operand_not_last_imm:
6936           if (intel_syntax)
6937             err_msg = _("RC/SAE operand must precede immediate operands");
6938           else
6939             err_msg = _("RC/SAE operand must follow immediate operands");
6940           break;
6941         case invalid_register_operand:
6942           err_msg = _("invalid register operand");
6943           break;
6944         }
6945       as_bad (_("%s for `%s'"), err_msg,
6946               current_templates->start->name);
6947       return NULL;
6948     }
6949
6950   if (!quiet_warnings)
6951     {
6952       if (!intel_syntax
6953           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
6954         as_warn (_("indirect %s without `*'"), t->name);
6955
6956       if (t->opcode_modifier.isprefix
6957           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
6958         {
6959           /* Warn them that a data or address size prefix doesn't
6960              affect assembly of the next line of code.  */
6961           as_warn (_("stand-alone `%s' prefix"), t->name);
6962         }
6963     }
6964
6965   /* Copy the template we found.  */
6966   install_template (t);
6967
6968   if (addr_prefix_disp != -1)
6969     i.tm.operand_types[addr_prefix_disp]
6970       = operand_types[addr_prefix_disp];
6971
6972   if (found_reverse_match)
6973     {
6974       /* If we found a reverse match we must alter the opcode direction
6975          bit and clear/flip the regmem modifier one.  found_reverse_match
6976          holds bits to change (different for int & float insns).  */
6977
6978       i.tm.base_opcode ^= found_reverse_match;
6979
6980       i.tm.operand_types[0] = operand_types[i.operands - 1];
6981       i.tm.operand_types[i.operands - 1] = operand_types[0];
6982
6983       /* Certain SIMD insns have their load forms specified in the opcode
6984          table, and hence we need to _set_ RegMem instead of clearing it.
6985          We need to avoid setting the bit though on insns like KMOVW.  */
6986       i.tm.opcode_modifier.regmem
6987         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
6988           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
6989           && !i.tm.opcode_modifier.regmem;
6990     }
6991
6992   return t;
6993 }
6994
6995 static int
6996 check_string (void)
6997 {
6998   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
6999   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
7000
7001   if (i.seg[op] != NULL && i.seg[op] != reg_es)
7002     {
7003       as_bad (_("`%s' operand %u must use `%ses' segment"),
7004               i.tm.name,
7005               intel_syntax ? i.tm.operands - es_op : es_op + 1,
7006               register_prefix);
7007       return 0;
7008     }
7009
7010   /* There's only ever one segment override allowed per instruction.
7011      This instruction possibly has a legal segment override on the
7012      second operand, so copy the segment to where non-string
7013      instructions store it, allowing common code.  */
7014   i.seg[op] = i.seg[1];
7015
7016   return 1;
7017 }
7018
7019 static int
7020 process_suffix (void)
7021 {
7022   bool is_crc32 = false, is_movx = false;
7023
7024   /* If matched instruction specifies an explicit instruction mnemonic
7025      suffix, use it.  */
7026   if (i.tm.opcode_modifier.size == SIZE16)
7027     i.suffix = WORD_MNEM_SUFFIX;
7028   else if (i.tm.opcode_modifier.size == SIZE32)
7029     i.suffix = LONG_MNEM_SUFFIX;
7030   else if (i.tm.opcode_modifier.size == SIZE64)
7031     i.suffix = QWORD_MNEM_SUFFIX;
7032   else if (i.reg_operands
7033            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
7034            && !i.tm.opcode_modifier.addrprefixopreg)
7035     {
7036       unsigned int numop = i.operands;
7037
7038       /* MOVSX/MOVZX */
7039       is_movx = (i.tm.opcode_modifier.opcodespace == SPACE_0F
7040                  && (i.tm.base_opcode | 8) == 0xbe)
7041                 || (i.tm.opcode_modifier.opcodespace == SPACE_BASE
7042                     && i.tm.base_opcode == 0x63
7043                     && i.tm.cpu_flags.bitfield.cpu64);
7044
7045       /* CRC32 */
7046       is_crc32 = (i.tm.base_opcode == 0xf0
7047                   && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7048                   && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2);
7049
7050       /* movsx/movzx want only their source operand considered here, for the
7051          ambiguity checking below.  The suffix will be replaced afterwards
7052          to represent the destination (register).  */
7053       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
7054         --i.operands;
7055
7056       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
7057       if (is_crc32 && i.tm.operand_types[1].bitfield.qword)
7058         i.rex |= REX_W;
7059
7060       /* If there's no instruction mnemonic suffix we try to invent one
7061          based on GPR operands.  */
7062       if (!i.suffix)
7063         {
7064           /* We take i.suffix from the last register operand specified,
7065              Destination register type is more significant than source
7066              register type.  crc32 in SSE4.2 prefers source register
7067              type. */
7068           unsigned int op = is_crc32 ? 1 : i.operands;
7069
7070           while (op--)
7071             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
7072                 || i.tm.operand_types[op].bitfield.instance == Accum)
7073               {
7074                 if (i.types[op].bitfield.class != Reg)
7075                   continue;
7076                 if (i.types[op].bitfield.byte)
7077                   i.suffix = BYTE_MNEM_SUFFIX;
7078                 else if (i.types[op].bitfield.word)
7079                   i.suffix = WORD_MNEM_SUFFIX;
7080                 else if (i.types[op].bitfield.dword)
7081                   i.suffix = LONG_MNEM_SUFFIX;
7082                 else if (i.types[op].bitfield.qword)
7083                   i.suffix = QWORD_MNEM_SUFFIX;
7084                 else
7085                   continue;
7086                 break;
7087               }
7088
7089           /* As an exception, movsx/movzx silently default to a byte source
7090              in AT&T mode.  */
7091           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
7092             i.suffix = BYTE_MNEM_SUFFIX;
7093         }
7094       else if (i.suffix == BYTE_MNEM_SUFFIX)
7095         {
7096           if (intel_syntax
7097               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
7098               && i.tm.opcode_modifier.no_bsuf)
7099             i.suffix = 0;
7100           else if (!check_byte_reg ())
7101             return 0;
7102         }
7103       else if (i.suffix == LONG_MNEM_SUFFIX)
7104         {
7105           if (intel_syntax
7106               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
7107               && i.tm.opcode_modifier.no_lsuf
7108               && !i.tm.opcode_modifier.todword
7109               && !i.tm.opcode_modifier.toqword)
7110             i.suffix = 0;
7111           else if (!check_long_reg ())
7112             return 0;
7113         }
7114       else if (i.suffix == QWORD_MNEM_SUFFIX)
7115         {
7116           if (intel_syntax
7117               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
7118               && i.tm.opcode_modifier.no_qsuf
7119               && !i.tm.opcode_modifier.todword
7120               && !i.tm.opcode_modifier.toqword)
7121             i.suffix = 0;
7122           else if (!check_qword_reg ())
7123             return 0;
7124         }
7125       else if (i.suffix == WORD_MNEM_SUFFIX)
7126         {
7127           if (intel_syntax
7128               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
7129               && i.tm.opcode_modifier.no_wsuf)
7130             i.suffix = 0;
7131           else if (!check_word_reg ())
7132             return 0;
7133         }
7134       else if (intel_syntax
7135                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
7136         /* Do nothing if the instruction is going to ignore the prefix.  */
7137         ;
7138       else
7139         abort ();
7140
7141       /* Undo the movsx/movzx change done above.  */
7142       i.operands = numop;
7143     }
7144   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
7145            && !i.suffix)
7146     {
7147       i.suffix = stackop_size;
7148       if (stackop_size == LONG_MNEM_SUFFIX)
7149         {
7150           /* stackop_size is set to LONG_MNEM_SUFFIX for the
7151              .code16gcc directive to support 16-bit mode with
7152              32-bit address.  For IRET without a suffix, generate
7153              16-bit IRET (opcode 0xcf) to return from an interrupt
7154              handler.  */
7155           if (i.tm.base_opcode == 0xcf)
7156             {
7157               i.suffix = WORD_MNEM_SUFFIX;
7158               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
7159             }
7160           /* Warn about changed behavior for segment register push/pop.  */
7161           else if ((i.tm.base_opcode | 1) == 0x07)
7162             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
7163                      i.tm.name);
7164         }
7165     }
7166   else if (!i.suffix
7167            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
7168                || i.tm.opcode_modifier.jump == JUMP_BYTE
7169                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
7170                || (i.tm.opcode_modifier.opcodespace == SPACE_0F
7171                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
7172                    && i.tm.extension_opcode <= 3)))
7173     {
7174       switch (flag_code)
7175         {
7176         case CODE_64BIT:
7177           if (!i.tm.opcode_modifier.no_qsuf)
7178             {
7179               if (i.tm.opcode_modifier.jump == JUMP_BYTE
7180                   || i.tm.opcode_modifier.no_lsuf)
7181                 i.suffix = QWORD_MNEM_SUFFIX;
7182               break;
7183             }
7184           /* Fall through.  */
7185         case CODE_32BIT:
7186           if (!i.tm.opcode_modifier.no_lsuf)
7187             i.suffix = LONG_MNEM_SUFFIX;
7188           break;
7189         case CODE_16BIT:
7190           if (!i.tm.opcode_modifier.no_wsuf)
7191             i.suffix = WORD_MNEM_SUFFIX;
7192           break;
7193         }
7194     }
7195
7196   if (!i.suffix
7197       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7198           /* Also cover lret/retf/iret in 64-bit mode.  */
7199           || (flag_code == CODE_64BIT
7200               && !i.tm.opcode_modifier.no_lsuf
7201               && !i.tm.opcode_modifier.no_qsuf))
7202       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7203       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
7204       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
7205       /* Accept FLDENV et al without suffix.  */
7206       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
7207     {
7208       unsigned int suffixes, evex = 0;
7209
7210       suffixes = !i.tm.opcode_modifier.no_bsuf;
7211       if (!i.tm.opcode_modifier.no_wsuf)
7212         suffixes |= 1 << 1;
7213       if (!i.tm.opcode_modifier.no_lsuf)
7214         suffixes |= 1 << 2;
7215       if (!i.tm.opcode_modifier.no_ldsuf)
7216         suffixes |= 1 << 3;
7217       if (!i.tm.opcode_modifier.no_ssuf)
7218         suffixes |= 1 << 4;
7219       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
7220         suffixes |= 1 << 5;
7221
7222       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
7223          also suitable for AT&T syntax mode, it was requested that this be
7224          restricted to just Intel syntax.  */
7225       if (intel_syntax && is_any_vex_encoding (&i.tm) && !i.broadcast.type)
7226         {
7227           unsigned int op;
7228
7229           for (op = 0; op < i.tm.operands; ++op)
7230             {
7231               if (is_evex_encoding (&i.tm)
7232                   && !cpu_arch_flags.bitfield.cpuavx512vl)
7233                 {
7234                   if (i.tm.operand_types[op].bitfield.ymmword)
7235                     i.tm.operand_types[op].bitfield.xmmword = 0;
7236                   if (i.tm.operand_types[op].bitfield.zmmword)
7237                     i.tm.operand_types[op].bitfield.ymmword = 0;
7238                   if (!i.tm.opcode_modifier.evex
7239                       || i.tm.opcode_modifier.evex == EVEXDYN)
7240                     i.tm.opcode_modifier.evex = EVEX512;
7241                 }
7242
7243               if (i.tm.operand_types[op].bitfield.xmmword
7244                   + i.tm.operand_types[op].bitfield.ymmword
7245                   + i.tm.operand_types[op].bitfield.zmmword < 2)
7246                 continue;
7247
7248               /* Any properly sized operand disambiguates the insn.  */
7249               if (i.types[op].bitfield.xmmword
7250                   || i.types[op].bitfield.ymmword
7251                   || i.types[op].bitfield.zmmword)
7252                 {
7253                   suffixes &= ~(7 << 6);
7254                   evex = 0;
7255                   break;
7256                 }
7257
7258               if ((i.flags[op] & Operand_Mem)
7259                   && i.tm.operand_types[op].bitfield.unspecified)
7260                 {
7261                   if (i.tm.operand_types[op].bitfield.xmmword)
7262                     suffixes |= 1 << 6;
7263                   if (i.tm.operand_types[op].bitfield.ymmword)
7264                     suffixes |= 1 << 7;
7265                   if (i.tm.operand_types[op].bitfield.zmmword)
7266                     suffixes |= 1 << 8;
7267                   if (is_evex_encoding (&i.tm))
7268                     evex = EVEX512;
7269                 }
7270             }
7271         }
7272
7273       /* Are multiple suffixes / operand sizes allowed?  */
7274       if (suffixes & (suffixes - 1))
7275         {
7276           if (intel_syntax
7277               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7278                   || operand_check == check_error))
7279             {
7280               as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
7281               return 0;
7282             }
7283           if (operand_check == check_error)
7284             {
7285               as_bad (_("no instruction mnemonic suffix given and "
7286                         "no register operands; can't size `%s'"), i.tm.name);
7287               return 0;
7288             }
7289           if (operand_check == check_warning)
7290             as_warn (_("%s; using default for `%s'"),
7291                        intel_syntax
7292                        ? _("ambiguous operand size")
7293                        : _("no instruction mnemonic suffix given and "
7294                            "no register operands"),
7295                        i.tm.name);
7296
7297           if (i.tm.opcode_modifier.floatmf)
7298             i.suffix = SHORT_MNEM_SUFFIX;
7299           else if (is_movx)
7300             /* handled below */;
7301           else if (evex)
7302             i.tm.opcode_modifier.evex = evex;
7303           else if (flag_code == CODE_16BIT)
7304             i.suffix = WORD_MNEM_SUFFIX;
7305           else if (!i.tm.opcode_modifier.no_lsuf)
7306             i.suffix = LONG_MNEM_SUFFIX;
7307           else
7308             i.suffix = QWORD_MNEM_SUFFIX;
7309         }
7310     }
7311
7312   if (is_movx)
7313     {
7314       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7315          In AT&T syntax, if there is no suffix (warned about above), the default
7316          will be byte extension.  */
7317       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7318         i.tm.base_opcode |= 1;
7319
7320       /* For further processing, the suffix should represent the destination
7321          (register).  This is already the case when one was used with
7322          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7323          no suffix to begin with.  */
7324       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7325         {
7326           if (i.types[1].bitfield.word)
7327             i.suffix = WORD_MNEM_SUFFIX;
7328           else if (i.types[1].bitfield.qword)
7329             i.suffix = QWORD_MNEM_SUFFIX;
7330           else
7331             i.suffix = LONG_MNEM_SUFFIX;
7332
7333           i.tm.opcode_modifier.w = 0;
7334         }
7335     }
7336
7337   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7338     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7339                    != (i.tm.operand_types[1].bitfield.class == Reg);
7340
7341   /* Change the opcode based on the operand size given by i.suffix.  */
7342   switch (i.suffix)
7343     {
7344     /* Size floating point instruction.  */
7345     case LONG_MNEM_SUFFIX:
7346       if (i.tm.opcode_modifier.floatmf)
7347         {
7348           i.tm.base_opcode ^= 4;
7349           break;
7350         }
7351     /* fall through */
7352     case WORD_MNEM_SUFFIX:
7353     case QWORD_MNEM_SUFFIX:
7354       /* It's not a byte, select word/dword operation.  */
7355       if (i.tm.opcode_modifier.w)
7356         {
7357           if (i.short_form)
7358             i.tm.base_opcode |= 8;
7359           else
7360             i.tm.base_opcode |= 1;
7361         }
7362     /* fall through */
7363     case SHORT_MNEM_SUFFIX:
7364       /* Now select between word & dword operations via the operand
7365          size prefix, except for instructions that will ignore this
7366          prefix anyway.  */
7367       if (i.suffix != QWORD_MNEM_SUFFIX
7368           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7369           && !i.tm.opcode_modifier.floatmf
7370           && !is_any_vex_encoding (&i.tm)
7371           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7372               || (flag_code == CODE_64BIT
7373                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7374         {
7375           unsigned int prefix = DATA_PREFIX_OPCODE;
7376
7377           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7378             prefix = ADDR_PREFIX_OPCODE;
7379
7380           if (!add_prefix (prefix))
7381             return 0;
7382         }
7383
7384       /* Set mode64 for an operand.  */
7385       if (i.suffix == QWORD_MNEM_SUFFIX
7386           && flag_code == CODE_64BIT
7387           && !i.tm.opcode_modifier.norex64
7388           && !i.tm.opcode_modifier.vexw
7389           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7390              need rex64. */
7391           && ! (i.operands == 2
7392                 && i.tm.base_opcode == 0x90
7393                 && i.tm.extension_opcode == None
7394                 && i.types[0].bitfield.instance == Accum
7395                 && i.types[0].bitfield.qword
7396                 && i.types[1].bitfield.instance == Accum
7397                 && i.types[1].bitfield.qword))
7398         i.rex |= REX_W;
7399
7400       break;
7401
7402     case 0:
7403       /* Select word/dword/qword operation with explicit data sizing prefix
7404          when there are no suitable register operands.  */
7405       if (i.tm.opcode_modifier.w
7406           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7407           && (!i.reg_operands
7408               || (i.reg_operands == 1
7409                       /* ShiftCount */
7410                   && (i.tm.operand_types[0].bitfield.instance == RegC
7411                       /* InOutPortReg */
7412                       || i.tm.operand_types[0].bitfield.instance == RegD
7413                       || i.tm.operand_types[1].bitfield.instance == RegD
7414                       /* CRC32 */
7415                       || is_crc32))))
7416         i.tm.base_opcode |= 1;
7417       break;
7418     }
7419
7420   if (i.tm.opcode_modifier.addrprefixopreg)
7421     {
7422       gas_assert (!i.suffix);
7423       gas_assert (i.reg_operands);
7424
7425       if (i.tm.operand_types[0].bitfield.instance == Accum
7426           || i.operands == 1)
7427         {
7428           /* The address size override prefix changes the size of the
7429              first operand.  */
7430           if (flag_code == CODE_64BIT
7431               && i.op[0].regs->reg_type.bitfield.word)
7432             {
7433               as_bad (_("16-bit addressing unavailable for `%s'"),
7434                       i.tm.name);
7435               return 0;
7436             }
7437
7438           if ((flag_code == CODE_32BIT
7439                ? i.op[0].regs->reg_type.bitfield.word
7440                : i.op[0].regs->reg_type.bitfield.dword)
7441               && !add_prefix (ADDR_PREFIX_OPCODE))
7442             return 0;
7443         }
7444       else
7445         {
7446           /* Check invalid register operand when the address size override
7447              prefix changes the size of register operands.  */
7448           unsigned int op;
7449           enum { need_word, need_dword, need_qword } need;
7450
7451           /* Check the register operand for the address size prefix if
7452              the memory operand has no real registers, like symbol, DISP
7453              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
7454           if (i.mem_operands == 1
7455               && i.reg_operands == 1
7456               && i.operands == 2
7457               && i.types[1].bitfield.class == Reg
7458               && (flag_code == CODE_32BIT
7459                   ? i.op[1].regs->reg_type.bitfield.word
7460                   : i.op[1].regs->reg_type.bitfield.dword)
7461               && ((i.base_reg == NULL && i.index_reg == NULL)
7462 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7463                   || (x86_elf_abi == X86_64_X32_ABI
7464                       && i.base_reg
7465                       && i.base_reg->reg_num == RegIP
7466                       && i.base_reg->reg_type.bitfield.qword))
7467 #else
7468                   || 0)
7469 #endif
7470               && !add_prefix (ADDR_PREFIX_OPCODE))
7471             return 0;
7472
7473           if (flag_code == CODE_32BIT)
7474             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7475           else if (i.prefix[ADDR_PREFIX])
7476             need = need_dword;
7477           else
7478             need = flag_code == CODE_64BIT ? need_qword : need_word;
7479
7480           for (op = 0; op < i.operands; op++)
7481             {
7482               if (i.types[op].bitfield.class != Reg)
7483                 continue;
7484
7485               switch (need)
7486                 {
7487                 case need_word:
7488                   if (i.op[op].regs->reg_type.bitfield.word)
7489                     continue;
7490                   break;
7491                 case need_dword:
7492                   if (i.op[op].regs->reg_type.bitfield.dword)
7493                     continue;
7494                   break;
7495                 case need_qword:
7496                   if (i.op[op].regs->reg_type.bitfield.qword)
7497                     continue;
7498                   break;
7499                 }
7500
7501               as_bad (_("invalid register operand size for `%s'"),
7502                       i.tm.name);
7503               return 0;
7504             }
7505         }
7506     }
7507
7508   return 1;
7509 }
7510
7511 static int
7512 check_byte_reg (void)
7513 {
7514   int op;
7515
7516   for (op = i.operands; --op >= 0;)
7517     {
7518       /* Skip non-register operands. */
7519       if (i.types[op].bitfield.class != Reg)
7520         continue;
7521
7522       /* If this is an eight bit register, it's OK.  If it's the 16 or
7523          32 bit version of an eight bit register, we will just use the
7524          low portion, and that's OK too.  */
7525       if (i.types[op].bitfield.byte)
7526         continue;
7527
7528       /* I/O port address operands are OK too.  */
7529       if (i.tm.operand_types[op].bitfield.instance == RegD
7530           && i.tm.operand_types[op].bitfield.word)
7531         continue;
7532
7533       /* crc32 only wants its source operand checked here.  */
7534       if (i.tm.base_opcode == 0xf0
7535           && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7536           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2
7537           && op != 0)
7538         continue;
7539
7540       /* Any other register is bad.  */
7541       as_bad (_("`%s%s' not allowed with `%s%c'"),
7542               register_prefix, i.op[op].regs->reg_name,
7543               i.tm.name, i.suffix);
7544       return 0;
7545     }
7546   return 1;
7547 }
7548
7549 static int
7550 check_long_reg (void)
7551 {
7552   int op;
7553
7554   for (op = i.operands; --op >= 0;)
7555     /* Skip non-register operands. */
7556     if (i.types[op].bitfield.class != Reg)
7557       continue;
7558     /* Reject eight bit registers, except where the template requires
7559        them. (eg. movzb)  */
7560     else if (i.types[op].bitfield.byte
7561              && (i.tm.operand_types[op].bitfield.class == Reg
7562                  || i.tm.operand_types[op].bitfield.instance == Accum)
7563              && (i.tm.operand_types[op].bitfield.word
7564                  || i.tm.operand_types[op].bitfield.dword))
7565       {
7566         as_bad (_("`%s%s' not allowed with `%s%c'"),
7567                 register_prefix,
7568                 i.op[op].regs->reg_name,
7569                 i.tm.name,
7570                 i.suffix);
7571         return 0;
7572       }
7573     /* Error if the e prefix on a general reg is missing.  */
7574     else if (i.types[op].bitfield.word
7575              && (i.tm.operand_types[op].bitfield.class == Reg
7576                  || i.tm.operand_types[op].bitfield.instance == Accum)
7577              && i.tm.operand_types[op].bitfield.dword)
7578       {
7579         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7580                 register_prefix, i.op[op].regs->reg_name,
7581                 i.suffix);
7582         return 0;
7583       }
7584     /* Warn if the r prefix on a general reg is present.  */
7585     else if (i.types[op].bitfield.qword
7586              && (i.tm.operand_types[op].bitfield.class == Reg
7587                  || i.tm.operand_types[op].bitfield.instance == Accum)
7588              && i.tm.operand_types[op].bitfield.dword)
7589       {
7590         if (intel_syntax
7591             && i.tm.opcode_modifier.toqword
7592             && i.types[0].bitfield.class != RegSIMD)
7593           {
7594             /* Convert to QWORD.  We want REX byte. */
7595             i.suffix = QWORD_MNEM_SUFFIX;
7596           }
7597         else
7598           {
7599             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7600                     register_prefix, i.op[op].regs->reg_name,
7601                     i.suffix);
7602             return 0;
7603           }
7604       }
7605   return 1;
7606 }
7607
7608 static int
7609 check_qword_reg (void)
7610 {
7611   int op;
7612
7613   for (op = i.operands; --op >= 0; )
7614     /* Skip non-register operands. */
7615     if (i.types[op].bitfield.class != Reg)
7616       continue;
7617     /* Reject eight bit registers, except where the template requires
7618        them. (eg. movzb)  */
7619     else if (i.types[op].bitfield.byte
7620              && (i.tm.operand_types[op].bitfield.class == Reg
7621                  || i.tm.operand_types[op].bitfield.instance == Accum)
7622              && (i.tm.operand_types[op].bitfield.word
7623                  || i.tm.operand_types[op].bitfield.dword))
7624       {
7625         as_bad (_("`%s%s' not allowed with `%s%c'"),
7626                 register_prefix,
7627                 i.op[op].regs->reg_name,
7628                 i.tm.name,
7629                 i.suffix);
7630         return 0;
7631       }
7632     /* Warn if the r prefix on a general reg is missing.  */
7633     else if ((i.types[op].bitfield.word
7634               || i.types[op].bitfield.dword)
7635              && (i.tm.operand_types[op].bitfield.class == Reg
7636                  || i.tm.operand_types[op].bitfield.instance == Accum)
7637              && i.tm.operand_types[op].bitfield.qword)
7638       {
7639         /* Prohibit these changes in the 64bit mode, since the
7640            lowering is more complicated.  */
7641         if (intel_syntax
7642             && i.tm.opcode_modifier.todword
7643             && i.types[0].bitfield.class != RegSIMD)
7644           {
7645             /* Convert to DWORD.  We don't want REX byte. */
7646             i.suffix = LONG_MNEM_SUFFIX;
7647           }
7648         else
7649           {
7650             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7651                     register_prefix, i.op[op].regs->reg_name,
7652                     i.suffix);
7653             return 0;
7654           }
7655       }
7656   return 1;
7657 }
7658
7659 static int
7660 check_word_reg (void)
7661 {
7662   int op;
7663   for (op = i.operands; --op >= 0;)
7664     /* Skip non-register operands. */
7665     if (i.types[op].bitfield.class != Reg)
7666       continue;
7667     /* Reject eight bit registers, except where the template requires
7668        them. (eg. movzb)  */
7669     else if (i.types[op].bitfield.byte
7670              && (i.tm.operand_types[op].bitfield.class == Reg
7671                  || i.tm.operand_types[op].bitfield.instance == Accum)
7672              && (i.tm.operand_types[op].bitfield.word
7673                  || i.tm.operand_types[op].bitfield.dword))
7674       {
7675         as_bad (_("`%s%s' not allowed with `%s%c'"),
7676                 register_prefix,
7677                 i.op[op].regs->reg_name,
7678                 i.tm.name,
7679                 i.suffix);
7680         return 0;
7681       }
7682     /* Error if the e or r prefix on a general reg is present.  */
7683     else if ((i.types[op].bitfield.dword
7684                  || i.types[op].bitfield.qword)
7685              && (i.tm.operand_types[op].bitfield.class == Reg
7686                  || i.tm.operand_types[op].bitfield.instance == Accum)
7687              && i.tm.operand_types[op].bitfield.word)
7688       {
7689         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7690                 register_prefix, i.op[op].regs->reg_name,
7691                 i.suffix);
7692         return 0;
7693       }
7694     /* For some instructions need encode as EVEX.W=1 without explicit VexW1. */
7695     else if (i.types[op].bitfield.qword
7696              && intel_syntax
7697              && i.tm.opcode_modifier.toqword)
7698       {
7699           /* Convert to QWORD.  We want EVEX.W byte. */
7700           i.suffix = QWORD_MNEM_SUFFIX;
7701       }
7702   return 1;
7703 }
7704
7705 static int
7706 update_imm (unsigned int j)
7707 {
7708   i386_operand_type overlap = i.types[j];
7709   if ((overlap.bitfield.imm8
7710        || overlap.bitfield.imm8s
7711        || overlap.bitfield.imm16
7712        || overlap.bitfield.imm32
7713        || overlap.bitfield.imm32s
7714        || overlap.bitfield.imm64)
7715       && !operand_type_equal (&overlap, &imm8)
7716       && !operand_type_equal (&overlap, &imm8s)
7717       && !operand_type_equal (&overlap, &imm16)
7718       && !operand_type_equal (&overlap, &imm32)
7719       && !operand_type_equal (&overlap, &imm32s)
7720       && !operand_type_equal (&overlap, &imm64))
7721     {
7722       if (i.suffix)
7723         {
7724           i386_operand_type temp;
7725
7726           operand_type_set (&temp, 0);
7727           if (i.suffix == BYTE_MNEM_SUFFIX)
7728             {
7729               temp.bitfield.imm8 = overlap.bitfield.imm8;
7730               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7731             }
7732           else if (i.suffix == WORD_MNEM_SUFFIX)
7733             temp.bitfield.imm16 = overlap.bitfield.imm16;
7734           else if (i.suffix == QWORD_MNEM_SUFFIX)
7735             {
7736               temp.bitfield.imm64 = overlap.bitfield.imm64;
7737               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7738             }
7739           else
7740             temp.bitfield.imm32 = overlap.bitfield.imm32;
7741           overlap = temp;
7742         }
7743       else if (operand_type_equal (&overlap, &imm16_32_32s)
7744                || operand_type_equal (&overlap, &imm16_32)
7745                || operand_type_equal (&overlap, &imm16_32s))
7746         {
7747           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7748             overlap = imm16;
7749           else
7750             overlap = imm32s;
7751         }
7752       else if (i.prefix[REX_PREFIX] & REX_W)
7753         overlap = operand_type_and (overlap, imm32s);
7754       else if (i.prefix[DATA_PREFIX])
7755         overlap = operand_type_and (overlap,
7756                                     flag_code != CODE_16BIT ? imm16 : imm32);
7757       if (!operand_type_equal (&overlap, &imm8)
7758           && !operand_type_equal (&overlap, &imm8s)
7759           && !operand_type_equal (&overlap, &imm16)
7760           && !operand_type_equal (&overlap, &imm32)
7761           && !operand_type_equal (&overlap, &imm32s)
7762           && !operand_type_equal (&overlap, &imm64))
7763         {
7764           as_bad (_("no instruction mnemonic suffix given; "
7765                     "can't determine immediate size"));
7766           return 0;
7767         }
7768     }
7769   i.types[j] = overlap;
7770
7771   return 1;
7772 }
7773
7774 static int
7775 finalize_imm (void)
7776 {
7777   unsigned int j, n;
7778
7779   /* Update the first 2 immediate operands.  */
7780   n = i.operands > 2 ? 2 : i.operands;
7781   if (n)
7782     {
7783       for (j = 0; j < n; j++)
7784         if (update_imm (j) == 0)
7785           return 0;
7786
7787       /* The 3rd operand can't be immediate operand.  */
7788       gas_assert (operand_type_check (i.types[2], imm) == 0);
7789     }
7790
7791   return 1;
7792 }
7793
7794 static int
7795 process_operands (void)
7796 {
7797   /* Default segment register this instruction will use for memory
7798      accesses.  0 means unknown.  This is only for optimizing out
7799      unnecessary segment overrides.  */
7800   const reg_entry *default_seg = NULL;
7801
7802   if (i.tm.opcode_modifier.sse2avx)
7803     {
7804       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
7805          need converting.  */
7806       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
7807       i.prefix[REX_PREFIX] = 0;
7808       i.rex_encoding = 0;
7809     }
7810   /* ImmExt should be processed after SSE2AVX.  */
7811   else if (i.tm.opcode_modifier.immext)
7812     process_immext ();
7813
7814   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
7815     {
7816       unsigned int dupl = i.operands;
7817       unsigned int dest = dupl - 1;
7818       unsigned int j;
7819
7820       /* The destination must be an xmm register.  */
7821       gas_assert (i.reg_operands
7822                   && MAX_OPERANDS > dupl
7823                   && operand_type_equal (&i.types[dest], &regxmm));
7824
7825       if (i.tm.operand_types[0].bitfield.instance == Accum
7826           && i.tm.operand_types[0].bitfield.xmmword)
7827         {
7828           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
7829             {
7830               /* Keep xmm0 for instructions with VEX prefix and 3
7831                  sources.  */
7832               i.tm.operand_types[0].bitfield.instance = InstanceNone;
7833               i.tm.operand_types[0].bitfield.class = RegSIMD;
7834               goto duplicate;
7835             }
7836           else
7837             {
7838               /* We remove the first xmm0 and keep the number of
7839                  operands unchanged, which in fact duplicates the
7840                  destination.  */
7841               for (j = 1; j < i.operands; j++)
7842                 {
7843                   i.op[j - 1] = i.op[j];
7844                   i.types[j - 1] = i.types[j];
7845                   i.tm.operand_types[j - 1] = i.tm.operand_types[j];
7846                   i.flags[j - 1] = i.flags[j];
7847                 }
7848             }
7849         }
7850       else if (i.tm.opcode_modifier.implicit1stxmm0)
7851         {
7852           gas_assert ((MAX_OPERANDS - 1) > dupl
7853                       && (i.tm.opcode_modifier.vexsources
7854                           == VEX3SOURCES));
7855
7856           /* Add the implicit xmm0 for instructions with VEX prefix
7857              and 3 sources.  */
7858           for (j = i.operands; j > 0; j--)
7859             {
7860               i.op[j] = i.op[j - 1];
7861               i.types[j] = i.types[j - 1];
7862               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
7863               i.flags[j] = i.flags[j - 1];
7864             }
7865           i.op[0].regs
7866             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
7867           i.types[0] = regxmm;
7868           i.tm.operand_types[0] = regxmm;
7869
7870           i.operands += 2;
7871           i.reg_operands += 2;
7872           i.tm.operands += 2;
7873
7874           dupl++;
7875           dest++;
7876           i.op[dupl] = i.op[dest];
7877           i.types[dupl] = i.types[dest];
7878           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7879           i.flags[dupl] = i.flags[dest];
7880         }
7881       else
7882         {
7883         duplicate:
7884           i.operands++;
7885           i.reg_operands++;
7886           i.tm.operands++;
7887
7888           i.op[dupl] = i.op[dest];
7889           i.types[dupl] = i.types[dest];
7890           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7891           i.flags[dupl] = i.flags[dest];
7892         }
7893
7894        if (i.tm.opcode_modifier.immext)
7895          process_immext ();
7896     }
7897   else if (i.tm.operand_types[0].bitfield.instance == Accum
7898            && i.tm.operand_types[0].bitfield.xmmword)
7899     {
7900       unsigned int j;
7901
7902       for (j = 1; j < i.operands; j++)
7903         {
7904           i.op[j - 1] = i.op[j];
7905           i.types[j - 1] = i.types[j];
7906
7907           /* We need to adjust fields in i.tm since they are used by
7908              build_modrm_byte.  */
7909           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
7910
7911           i.flags[j - 1] = i.flags[j];
7912         }
7913
7914       i.operands--;
7915       i.reg_operands--;
7916       i.tm.operands--;
7917     }
7918   else if (i.tm.opcode_modifier.implicitquadgroup)
7919     {
7920       unsigned int regnum, first_reg_in_group, last_reg_in_group;
7921
7922       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
7923       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
7924       regnum = register_number (i.op[1].regs);
7925       first_reg_in_group = regnum & ~3;
7926       last_reg_in_group = first_reg_in_group + 3;
7927       if (regnum != first_reg_in_group)
7928         as_warn (_("source register `%s%s' implicitly denotes"
7929                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
7930                  register_prefix, i.op[1].regs->reg_name,
7931                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
7932                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
7933                  i.tm.name);
7934     }
7935   else if (i.tm.opcode_modifier.regkludge)
7936     {
7937       /* The imul $imm, %reg instruction is converted into
7938          imul $imm, %reg, %reg, and the clr %reg instruction
7939          is converted into xor %reg, %reg.  */
7940
7941       unsigned int first_reg_op;
7942
7943       if (operand_type_check (i.types[0], reg))
7944         first_reg_op = 0;
7945       else
7946         first_reg_op = 1;
7947       /* Pretend we saw the extra register operand.  */
7948       gas_assert (i.reg_operands == 1
7949                   && i.op[first_reg_op + 1].regs == 0);
7950       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
7951       i.types[first_reg_op + 1] = i.types[first_reg_op];
7952       i.operands++;
7953       i.reg_operands++;
7954     }
7955
7956   if (i.tm.opcode_modifier.modrm)
7957     {
7958       /* The opcode is completed (modulo i.tm.extension_opcode which
7959          must be put into the modrm byte).  Now, we make the modrm and
7960          index base bytes based on all the info we've collected.  */
7961
7962       default_seg = build_modrm_byte ();
7963     }
7964   else if (i.types[0].bitfield.class == SReg)
7965     {
7966       if (flag_code != CODE_64BIT
7967           ? i.tm.base_opcode == POP_SEG_SHORT
7968             && i.op[0].regs->reg_num == 1
7969           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
7970             && i.op[0].regs->reg_num < 4)
7971         {
7972           as_bad (_("you can't `%s %s%s'"),
7973                   i.tm.name, register_prefix, i.op[0].regs->reg_name);
7974           return 0;
7975         }
7976       if (i.op[0].regs->reg_num > 3
7977           && i.tm.opcode_modifier.opcodespace == SPACE_BASE )
7978         {
7979           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
7980           i.tm.opcode_modifier.opcodespace = SPACE_0F;
7981         }
7982       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
7983     }
7984   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
7985            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
7986     {
7987       default_seg = reg_ds;
7988     }
7989   else if (i.tm.opcode_modifier.isstring)
7990     {
7991       /* For the string instructions that allow a segment override
7992          on one of their operands, the default segment is ds.  */
7993       default_seg = reg_ds;
7994     }
7995   else if (i.short_form)
7996     {
7997       /* The register or float register operand is in operand
7998          0 or 1.  */
7999       unsigned int op = i.tm.operand_types[0].bitfield.class != Reg;
8000
8001       /* Register goes in low 3 bits of opcode.  */
8002       i.tm.base_opcode |= i.op[op].regs->reg_num;
8003       if ((i.op[op].regs->reg_flags & RegRex) != 0)
8004         i.rex |= REX_B;
8005       if (!quiet_warnings && i.tm.opcode_modifier.ugh)
8006         {
8007           /* Warn about some common errors, but press on regardless.
8008              The first case can be generated by gcc (<= 2.8.1).  */
8009           if (i.operands == 2)
8010             {
8011               /* Reversed arguments on faddp, fsubp, etc.  */
8012               as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
8013                        register_prefix, i.op[!intel_syntax].regs->reg_name,
8014                        register_prefix, i.op[intel_syntax].regs->reg_name);
8015             }
8016           else
8017             {
8018               /* Extraneous `l' suffix on fp insn.  */
8019               as_warn (_("translating to `%s %s%s'"), i.tm.name,
8020                        register_prefix, i.op[0].regs->reg_name);
8021             }
8022         }
8023     }
8024
8025   if ((i.seg[0] || i.prefix[SEG_PREFIX])
8026       && i.tm.base_opcode == 0x8d /* lea */
8027       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
8028       && !is_any_vex_encoding(&i.tm))
8029     {
8030       if (!quiet_warnings)
8031         as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
8032       if (optimize)
8033         {
8034           i.seg[0] = NULL;
8035           i.prefix[SEG_PREFIX] = 0;
8036         }
8037     }
8038
8039   /* If a segment was explicitly specified, and the specified segment
8040      is neither the default nor the one already recorded from a prefix,
8041      use an opcode prefix to select it.  If we never figured out what
8042      the default segment is, then default_seg will be zero at this
8043      point, and the specified segment prefix will always be used.  */
8044   if (i.seg[0]
8045       && i.seg[0] != default_seg
8046       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
8047     {
8048       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
8049         return 0;
8050     }
8051   return 1;
8052 }
8053
8054 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
8055                                  bool do_sse2avx)
8056 {
8057   if (r->reg_flags & RegRex)
8058     {
8059       if (i.rex & rex_bit)
8060         as_bad (_("same type of prefix used twice"));
8061       i.rex |= rex_bit;
8062     }
8063   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
8064     {
8065       gas_assert (i.vex.register_specifier == r);
8066       i.vex.register_specifier += 8;
8067     }
8068
8069   if (r->reg_flags & RegVRex)
8070     i.vrex |= rex_bit;
8071 }
8072
8073 static const reg_entry *
8074 build_modrm_byte (void)
8075 {
8076   const reg_entry *default_seg = NULL;
8077   unsigned int source, dest;
8078   int vex_3_sources;
8079
8080   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
8081   if (vex_3_sources)
8082     {
8083       unsigned int nds, reg_slot;
8084       expressionS *exp;
8085
8086       dest = i.operands - 1;
8087       nds = dest - 1;
8088
8089       /* There are 2 kinds of instructions:
8090          1. 5 operands: 4 register operands or 3 register operands
8091          plus 1 memory operand plus one Imm4 operand, VexXDS, and
8092          VexW0 or VexW1.  The destination must be either XMM, YMM or
8093          ZMM register.
8094          2. 4 operands: 4 register operands or 3 register operands
8095          plus 1 memory operand, with VexXDS.  */
8096       gas_assert ((i.reg_operands == 4
8097                    || (i.reg_operands == 3 && i.mem_operands == 1))
8098                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
8099                   && i.tm.opcode_modifier.vexw
8100                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
8101
8102       /* If VexW1 is set, the first non-immediate operand is the source and
8103          the second non-immediate one is encoded in the immediate operand.  */
8104       if (i.tm.opcode_modifier.vexw == VEXW1)
8105         {
8106           source = i.imm_operands;
8107           reg_slot = i.imm_operands + 1;
8108         }
8109       else
8110         {
8111           source = i.imm_operands + 1;
8112           reg_slot = i.imm_operands;
8113         }
8114
8115       if (i.imm_operands == 0)
8116         {
8117           /* When there is no immediate operand, generate an 8bit
8118              immediate operand to encode the first operand.  */
8119           exp = &im_expressions[i.imm_operands++];
8120           i.op[i.operands].imms = exp;
8121           i.types[i.operands] = imm8;
8122           i.operands++;
8123
8124           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8125           exp->X_op = O_constant;
8126           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
8127           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8128         }
8129       else
8130         {
8131           gas_assert (i.imm_operands == 1);
8132           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
8133           gas_assert (!i.tm.opcode_modifier.immext);
8134
8135           /* Turn on Imm8 again so that output_imm will generate it.  */
8136           i.types[0].bitfield.imm8 = 1;
8137
8138           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8139           i.op[0].imms->X_add_number
8140               |= register_number (i.op[reg_slot].regs) << 4;
8141           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8142         }
8143
8144       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
8145       i.vex.register_specifier = i.op[nds].regs;
8146     }
8147   else
8148     source = dest = 0;
8149
8150   /* i.reg_operands MUST be the number of real register operands;
8151      implicit registers do not count.  If there are 3 register
8152      operands, it must be a instruction with VexNDS.  For a
8153      instruction with VexNDD, the destination register is encoded
8154      in VEX prefix.  If there are 4 register operands, it must be
8155      a instruction with VEX prefix and 3 sources.  */
8156   if (i.mem_operands == 0
8157       && ((i.reg_operands == 2
8158            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
8159           || (i.reg_operands == 3
8160               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
8161           || (i.reg_operands == 4 && vex_3_sources)))
8162     {
8163       switch (i.operands)
8164         {
8165         case 2:
8166           source = 0;
8167           break;
8168         case 3:
8169           /* When there are 3 operands, one of them may be immediate,
8170              which may be the first or the last operand.  Otherwise,
8171              the first operand must be shift count register (cl) or it
8172              is an instruction with VexNDS. */
8173           gas_assert (i.imm_operands == 1
8174                       || (i.imm_operands == 0
8175                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
8176                               || (i.types[0].bitfield.instance == RegC
8177                                   && i.types[0].bitfield.byte))));
8178           if (operand_type_check (i.types[0], imm)
8179               || (i.types[0].bitfield.instance == RegC
8180                   && i.types[0].bitfield.byte))
8181             source = 1;
8182           else
8183             source = 0;
8184           break;
8185         case 4:
8186           /* When there are 4 operands, the first two must be 8bit
8187              immediate operands. The source operand will be the 3rd
8188              one.
8189
8190              For instructions with VexNDS, if the first operand
8191              an imm8, the source operand is the 2nd one.  If the last
8192              operand is imm8, the source operand is the first one.  */
8193           gas_assert ((i.imm_operands == 2
8194                        && i.types[0].bitfield.imm8
8195                        && i.types[1].bitfield.imm8)
8196                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
8197                           && i.imm_operands == 1
8198                           && (i.types[0].bitfield.imm8
8199                               || i.types[i.operands - 1].bitfield.imm8
8200                               || i.rounding.type != rc_none)));
8201           if (i.imm_operands == 2)
8202             source = 2;
8203           else
8204             {
8205               if (i.types[0].bitfield.imm8)
8206                 source = 1;
8207               else
8208                 source = 0;
8209             }
8210           break;
8211         case 5:
8212           if (is_evex_encoding (&i.tm))
8213             {
8214               /* For EVEX instructions, when there are 5 operands, the
8215                  first one must be immediate operand.  If the second one
8216                  is immediate operand, the source operand is the 3th
8217                  one.  If the last one is immediate operand, the source
8218                  operand is the 2nd one.  */
8219               gas_assert (i.imm_operands == 2
8220                           && i.tm.opcode_modifier.sae
8221                           && operand_type_check (i.types[0], imm));
8222               if (operand_type_check (i.types[1], imm))
8223                 source = 2;
8224               else if (operand_type_check (i.types[4], imm))
8225                 source = 1;
8226               else
8227                 abort ();
8228             }
8229           break;
8230         default:
8231           abort ();
8232         }
8233
8234       if (!vex_3_sources)
8235         {
8236           dest = source + 1;
8237
8238           /* RC/SAE operand could be between DEST and SRC.  That happens
8239              when one operand is GPR and the other one is XMM/YMM/ZMM
8240              register.  */
8241           if (i.rounding.type != rc_none && i.rounding.operand == dest)
8242             dest++;
8243
8244           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8245             {
8246               /* For instructions with VexNDS, the register-only source
8247                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
8248                  register.  It is encoded in VEX prefix.  */
8249
8250               i386_operand_type op;
8251               unsigned int vvvv;
8252
8253               /* Swap two source operands if needed.  */
8254               if (i.tm.opcode_modifier.swapsources)
8255                 {
8256                   vvvv = source;
8257                   source = dest;
8258                 }
8259               else
8260                 vvvv = dest;
8261
8262               op = i.tm.operand_types[vvvv];
8263               if ((dest + 1) >= i.operands
8264                   || ((op.bitfield.class != Reg
8265                        || (!op.bitfield.dword && !op.bitfield.qword))
8266                       && op.bitfield.class != RegSIMD
8267                       && !operand_type_equal (&op, &regmask)))
8268                 abort ();
8269               i.vex.register_specifier = i.op[vvvv].regs;
8270               dest++;
8271             }
8272         }
8273
8274       i.rm.mode = 3;
8275       /* One of the register operands will be encoded in the i.rm.reg
8276          field, the other in the combined i.rm.mode and i.rm.regmem
8277          fields.  If no form of this instruction supports a memory
8278          destination operand, then we assume the source operand may
8279          sometimes be a memory operand and so we need to store the
8280          destination in the i.rm.reg field.  */
8281       if (!i.tm.opcode_modifier.regmem
8282           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
8283         {
8284           i.rm.reg = i.op[dest].regs->reg_num;
8285           i.rm.regmem = i.op[source].regs->reg_num;
8286           set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8287           set_rex_vrex (i.op[source].regs, REX_B, false);
8288         }
8289       else
8290         {
8291           i.rm.reg = i.op[source].regs->reg_num;
8292           i.rm.regmem = i.op[dest].regs->reg_num;
8293           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8294           set_rex_vrex (i.op[source].regs, REX_R, false);
8295         }
8296       if (flag_code != CODE_64BIT && (i.rex & REX_R))
8297         {
8298           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
8299             abort ();
8300           i.rex &= ~REX_R;
8301           add_prefix (LOCK_PREFIX_OPCODE);
8302         }
8303     }
8304   else
8305     {                   /* If it's not 2 reg operands...  */
8306       unsigned int mem;
8307
8308       if (i.mem_operands)
8309         {
8310           unsigned int fake_zero_displacement = 0;
8311           unsigned int op;
8312
8313           for (op = 0; op < i.operands; op++)
8314             if (i.flags[op] & Operand_Mem)
8315               break;
8316           gas_assert (op < i.operands);
8317
8318           if (i.tm.opcode_modifier.sib)
8319             {
8320               /* The index register of VSIB shouldn't be RegIZ.  */
8321               if (i.tm.opcode_modifier.sib != SIBMEM
8322                   && i.index_reg->reg_num == RegIZ)
8323                 abort ();
8324
8325               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8326               if (!i.base_reg)
8327                 {
8328                   i.sib.base = NO_BASE_REGISTER;
8329                   i.sib.scale = i.log2_scale_factor;
8330                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8331                   if (want_disp32 (&i.tm))
8332                     i.types[op].bitfield.disp32 = 1;
8333                   else
8334                     i.types[op].bitfield.disp32s = 1;
8335                 }
8336
8337               /* Since the mandatory SIB always has index register, so
8338                  the code logic remains unchanged. The non-mandatory SIB
8339                  without index register is allowed and will be handled
8340                  later.  */
8341               if (i.index_reg)
8342                 {
8343                   if (i.index_reg->reg_num == RegIZ)
8344                     i.sib.index = NO_INDEX_REGISTER;
8345                   else
8346                     i.sib.index = i.index_reg->reg_num;
8347                   set_rex_vrex (i.index_reg, REX_X, false);
8348                 }
8349             }
8350
8351           default_seg = reg_ds;
8352
8353           if (i.base_reg == 0)
8354             {
8355               i.rm.mode = 0;
8356               if (!i.disp_operands)
8357                 fake_zero_displacement = 1;
8358               if (i.index_reg == 0)
8359                 {
8360                   /* Both check for VSIB and mandatory non-vector SIB. */
8361                   gas_assert (!i.tm.opcode_modifier.sib
8362                               || i.tm.opcode_modifier.sib == SIBMEM);
8363                   /* Operand is just <disp>  */
8364                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8365                   if (flag_code == CODE_64BIT)
8366                     {
8367                       /* 64bit mode overwrites the 32bit absolute
8368                          addressing by RIP relative addressing and
8369                          absolute addressing is encoded by one of the
8370                          redundant SIB forms.  */
8371                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8372                       i.sib.base = NO_BASE_REGISTER;
8373                       i.sib.index = NO_INDEX_REGISTER;
8374                       if (want_disp32 (&i.tm))
8375                         i.types[op].bitfield.disp32 = 1;
8376                       else
8377                         i.types[op].bitfield.disp32s = 1;
8378                     }
8379                   else if ((flag_code == CODE_16BIT)
8380                            ^ (i.prefix[ADDR_PREFIX] != 0))
8381                     {
8382                       i.rm.regmem = NO_BASE_REGISTER_16;
8383                       i.types[op].bitfield.disp16 = 1;
8384                     }
8385                   else
8386                     {
8387                       i.rm.regmem = NO_BASE_REGISTER;
8388                       i.types[op].bitfield.disp32 = 1;
8389                     }
8390                 }
8391               else if (!i.tm.opcode_modifier.sib)
8392                 {
8393                   /* !i.base_reg && i.index_reg  */
8394                   if (i.index_reg->reg_num == RegIZ)
8395                     i.sib.index = NO_INDEX_REGISTER;
8396                   else
8397                     i.sib.index = i.index_reg->reg_num;
8398                   i.sib.base = NO_BASE_REGISTER;
8399                   i.sib.scale = i.log2_scale_factor;
8400                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8401                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8402                   if (want_disp32 (&i.tm))
8403                     i.types[op].bitfield.disp32 = 1;
8404                   else
8405                     i.types[op].bitfield.disp32s = 1;
8406                   if ((i.index_reg->reg_flags & RegRex) != 0)
8407                     i.rex |= REX_X;
8408                 }
8409             }
8410           /* RIP addressing for 64bit mode.  */
8411           else if (i.base_reg->reg_num == RegIP)
8412             {
8413               gas_assert (!i.tm.opcode_modifier.sib);
8414               i.rm.regmem = NO_BASE_REGISTER;
8415               i.types[op].bitfield.disp8 = 0;
8416               i.types[op].bitfield.disp16 = 0;
8417               i.types[op].bitfield.disp32 = 0;
8418               i.types[op].bitfield.disp32s = 1;
8419               i.types[op].bitfield.disp64 = 0;
8420               i.flags[op] |= Operand_PCrel;
8421               if (! i.disp_operands)
8422                 fake_zero_displacement = 1;
8423             }
8424           else if (i.base_reg->reg_type.bitfield.word)
8425             {
8426               gas_assert (!i.tm.opcode_modifier.sib);
8427               switch (i.base_reg->reg_num)
8428                 {
8429                 case 3: /* (%bx)  */
8430                   if (i.index_reg == 0)
8431                     i.rm.regmem = 7;
8432                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8433                     i.rm.regmem = i.index_reg->reg_num - 6;
8434                   break;
8435                 case 5: /* (%bp)  */
8436                   default_seg = reg_ss;
8437                   if (i.index_reg == 0)
8438                     {
8439                       i.rm.regmem = 6;
8440                       if (operand_type_check (i.types[op], disp) == 0)
8441                         {
8442                           /* fake (%bp) into 0(%bp)  */
8443                           if (i.disp_encoding == disp_encoding_16bit)
8444                             i.types[op].bitfield.disp16 = 1;
8445                           else
8446                             i.types[op].bitfield.disp8 = 1;
8447                           fake_zero_displacement = 1;
8448                         }
8449                     }
8450                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8451                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8452                   break;
8453                 default: /* (%si) -> 4 or (%di) -> 5  */
8454                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8455                 }
8456               if (!fake_zero_displacement
8457                   && !i.disp_operands
8458                   && i.disp_encoding)
8459                 {
8460                   fake_zero_displacement = 1;
8461                   if (i.disp_encoding == disp_encoding_8bit)
8462                     i.types[op].bitfield.disp8 = 1;
8463                   else
8464                     i.types[op].bitfield.disp16 = 1;
8465                 }
8466               i.rm.mode = mode_from_disp_size (i.types[op]);
8467             }
8468           else /* i.base_reg and 32/64 bit mode  */
8469             {
8470               if (operand_type_check (i.types[op], disp))
8471                 {
8472                   i.types[op].bitfield.disp16 = 0;
8473                   i.types[op].bitfield.disp64 = 0;
8474                   if (!want_disp32 (&i.tm))
8475                     {
8476                       i.types[op].bitfield.disp32 = 0;
8477                       i.types[op].bitfield.disp32s = 1;
8478                     }
8479                   else
8480                     {
8481                       i.types[op].bitfield.disp32 = 1;
8482                       i.types[op].bitfield.disp32s = 0;
8483                     }
8484                 }
8485
8486               if (!i.tm.opcode_modifier.sib)
8487                 i.rm.regmem = i.base_reg->reg_num;
8488               if ((i.base_reg->reg_flags & RegRex) != 0)
8489                 i.rex |= REX_B;
8490               i.sib.base = i.base_reg->reg_num;
8491               /* x86-64 ignores REX prefix bit here to avoid decoder
8492                  complications.  */
8493               if (!(i.base_reg->reg_flags & RegRex)
8494                   && (i.base_reg->reg_num == EBP_REG_NUM
8495                    || i.base_reg->reg_num == ESP_REG_NUM))
8496                   default_seg = reg_ss;
8497               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8498                 {
8499                   fake_zero_displacement = 1;
8500                   if (i.disp_encoding == disp_encoding_32bit)
8501                     i.types[op].bitfield.disp32 = 1;
8502                   else
8503                     i.types[op].bitfield.disp8 = 1;
8504                 }
8505               i.sib.scale = i.log2_scale_factor;
8506               if (i.index_reg == 0)
8507                 {
8508                   /* Only check for VSIB. */
8509                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8510                               && i.tm.opcode_modifier.sib != VECSIB256
8511                               && i.tm.opcode_modifier.sib != VECSIB512);
8512
8513                   /* <disp>(%esp) becomes two byte modrm with no index
8514                      register.  We've already stored the code for esp
8515                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8516                      Any base register besides %esp will not use the
8517                      extra modrm byte.  */
8518                   i.sib.index = NO_INDEX_REGISTER;
8519                 }
8520               else if (!i.tm.opcode_modifier.sib)
8521                 {
8522                   if (i.index_reg->reg_num == RegIZ)
8523                     i.sib.index = NO_INDEX_REGISTER;
8524                   else
8525                     i.sib.index = i.index_reg->reg_num;
8526                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8527                   if ((i.index_reg->reg_flags & RegRex) != 0)
8528                     i.rex |= REX_X;
8529                 }
8530
8531               if (i.disp_operands
8532                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8533                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8534                 i.rm.mode = 0;
8535               else
8536                 {
8537                   if (!fake_zero_displacement
8538                       && !i.disp_operands
8539                       && i.disp_encoding)
8540                     {
8541                       fake_zero_displacement = 1;
8542                       if (i.disp_encoding == disp_encoding_8bit)
8543                         i.types[op].bitfield.disp8 = 1;
8544                       else
8545                         i.types[op].bitfield.disp32 = 1;
8546                     }
8547                   i.rm.mode = mode_from_disp_size (i.types[op]);
8548                 }
8549             }
8550
8551           if (fake_zero_displacement)
8552             {
8553               /* Fakes a zero displacement assuming that i.types[op]
8554                  holds the correct displacement size.  */
8555               expressionS *exp;
8556
8557               gas_assert (i.op[op].disps == 0);
8558               exp = &disp_expressions[i.disp_operands++];
8559               i.op[op].disps = exp;
8560               exp->X_op = O_constant;
8561               exp->X_add_number = 0;
8562               exp->X_add_symbol = (symbolS *) 0;
8563               exp->X_op_symbol = (symbolS *) 0;
8564             }
8565
8566           mem = op;
8567         }
8568       else
8569         mem = ~0;
8570
8571       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
8572         {
8573           if (operand_type_check (i.types[0], imm))
8574             i.vex.register_specifier = NULL;
8575           else
8576             {
8577               /* VEX.vvvv encodes one of the sources when the first
8578                  operand is not an immediate.  */
8579               if (i.tm.opcode_modifier.vexw == VEXW0)
8580                 i.vex.register_specifier = i.op[0].regs;
8581               else
8582                 i.vex.register_specifier = i.op[1].regs;
8583             }
8584
8585           /* Destination is a XMM register encoded in the ModRM.reg
8586              and VEX.R bit.  */
8587           i.rm.reg = i.op[2].regs->reg_num;
8588           if ((i.op[2].regs->reg_flags & RegRex) != 0)
8589             i.rex |= REX_R;
8590
8591           /* ModRM.rm and VEX.B encodes the other source.  */
8592           if (!i.mem_operands)
8593             {
8594               i.rm.mode = 3;
8595
8596               if (i.tm.opcode_modifier.vexw == VEXW0)
8597                 i.rm.regmem = i.op[1].regs->reg_num;
8598               else
8599                 i.rm.regmem = i.op[0].regs->reg_num;
8600
8601               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8602                 i.rex |= REX_B;
8603             }
8604         }
8605       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
8606         {
8607           i.vex.register_specifier = i.op[2].regs;
8608           if (!i.mem_operands)
8609             {
8610               i.rm.mode = 3;
8611               i.rm.regmem = i.op[1].regs->reg_num;
8612               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8613                 i.rex |= REX_B;
8614             }
8615         }
8616       /* Fill in i.rm.reg or i.rm.regmem field with register operand
8617          (if any) based on i.tm.extension_opcode.  Again, we must be
8618          careful to make sure that segment/control/debug/test/MMX
8619          registers are coded into the i.rm.reg field.  */
8620       else if (i.reg_operands)
8621         {
8622           unsigned int op;
8623           unsigned int vex_reg = ~0;
8624
8625           for (op = 0; op < i.operands; op++)
8626             if (i.types[op].bitfield.class == Reg
8627                 || i.types[op].bitfield.class == RegBND
8628                 || i.types[op].bitfield.class == RegMask
8629                 || i.types[op].bitfield.class == SReg
8630                 || i.types[op].bitfield.class == RegCR
8631                 || i.types[op].bitfield.class == RegDR
8632                 || i.types[op].bitfield.class == RegTR
8633                 || i.types[op].bitfield.class == RegSIMD
8634                 || i.types[op].bitfield.class == RegMMX)
8635               break;
8636
8637           if (vex_3_sources)
8638             op = dest;
8639           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8640             {
8641               /* For instructions with VexNDS, the register-only
8642                  source operand is encoded in VEX prefix. */
8643               gas_assert (mem != (unsigned int) ~0);
8644
8645               if (op > mem)
8646                 {
8647                   vex_reg = op++;
8648                   gas_assert (op < i.operands);
8649                 }
8650               else
8651                 {
8652                   /* Check register-only source operand when two source
8653                      operands are swapped.  */
8654                   if (!i.tm.operand_types[op].bitfield.baseindex
8655                       && i.tm.operand_types[op + 1].bitfield.baseindex)
8656                     {
8657                       vex_reg = op;
8658                       op += 2;
8659                       gas_assert (mem == (vex_reg + 1)
8660                                   && op < i.operands);
8661                     }
8662                   else
8663                     {
8664                       vex_reg = op + 1;
8665                       gas_assert (vex_reg < i.operands);
8666                     }
8667                 }
8668             }
8669           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
8670             {
8671               /* For instructions with VexNDD, the register destination
8672                  is encoded in VEX prefix.  */
8673               if (i.mem_operands == 0)
8674                 {
8675                   /* There is no memory operand.  */
8676                   gas_assert ((op + 2) == i.operands);
8677                   vex_reg = op + 1;
8678                 }
8679               else
8680                 {
8681                   /* There are only 2 non-immediate operands.  */
8682                   gas_assert (op < i.imm_operands + 2
8683                               && i.operands == i.imm_operands + 2);
8684                   vex_reg = i.imm_operands + 1;
8685                 }
8686             }
8687           else
8688             gas_assert (op < i.operands);
8689
8690           if (vex_reg != (unsigned int) ~0)
8691             {
8692               i386_operand_type *type = &i.tm.operand_types[vex_reg];
8693
8694               if ((type->bitfield.class != Reg
8695                    || (!type->bitfield.dword && !type->bitfield.qword))
8696                   && type->bitfield.class != RegSIMD
8697                   && !operand_type_equal (type, &regmask))
8698                 abort ();
8699
8700               i.vex.register_specifier = i.op[vex_reg].regs;
8701             }
8702
8703           /* Don't set OP operand twice.  */
8704           if (vex_reg != op)
8705             {
8706               /* If there is an extension opcode to put here, the
8707                  register number must be put into the regmem field.  */
8708               if (i.tm.extension_opcode != None)
8709                 {
8710                   i.rm.regmem = i.op[op].regs->reg_num;
8711                   set_rex_vrex (i.op[op].regs, REX_B,
8712                                 i.tm.opcode_modifier.sse2avx);
8713                 }
8714               else
8715                 {
8716                   i.rm.reg = i.op[op].regs->reg_num;
8717                   set_rex_vrex (i.op[op].regs, REX_R,
8718                                 i.tm.opcode_modifier.sse2avx);
8719                 }
8720             }
8721
8722           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
8723              must set it to 3 to indicate this is a register operand
8724              in the regmem field.  */
8725           if (!i.mem_operands)
8726             i.rm.mode = 3;
8727         }
8728
8729       /* Fill in i.rm.reg field with extension opcode (if any).  */
8730       if (i.tm.extension_opcode != None)
8731         i.rm.reg = i.tm.extension_opcode;
8732     }
8733   return default_seg;
8734 }
8735
8736 static INLINE void
8737 frag_opcode_byte (unsigned char byte)
8738 {
8739   if (now_seg != absolute_section)
8740     FRAG_APPEND_1_CHAR (byte);
8741   else
8742     ++abs_section_offset;
8743 }
8744
8745 static unsigned int
8746 flip_code16 (unsigned int code16)
8747 {
8748   gas_assert (i.tm.operands == 1);
8749
8750   return !(i.prefix[REX_PREFIX] & REX_W)
8751          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8752                       || i.tm.operand_types[0].bitfield.disp32s
8753                     : i.tm.operand_types[0].bitfield.disp16)
8754          ? CODE16 : 0;
8755 }
8756
8757 static void
8758 output_branch (void)
8759 {
8760   char *p;
8761   int size;
8762   int code16;
8763   int prefix;
8764   relax_substateT subtype;
8765   symbolS *sym;
8766   offsetT off;
8767
8768   if (now_seg == absolute_section)
8769     {
8770       as_bad (_("relaxable branches not supported in absolute section"));
8771       return;
8772     }
8773
8774   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8775   size = i.disp_encoding == disp_encoding_32bit ? BIG : SMALL;
8776
8777   prefix = 0;
8778   if (i.prefix[DATA_PREFIX] != 0)
8779     {
8780       prefix = 1;
8781       i.prefixes -= 1;
8782       code16 ^= flip_code16(code16);
8783     }
8784   /* Pentium4 branch hints.  */
8785   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8786       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8787     {
8788       prefix++;
8789       i.prefixes--;
8790     }
8791   if (i.prefix[REX_PREFIX] != 0)
8792     {
8793       prefix++;
8794       i.prefixes--;
8795     }
8796
8797   /* BND prefixed jump.  */
8798   if (i.prefix[BND_PREFIX] != 0)
8799     {
8800       prefix++;
8801       i.prefixes--;
8802     }
8803
8804   if (i.prefixes != 0)
8805     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8806
8807   /* It's always a symbol;  End frag & setup for relax.
8808      Make sure there is enough room in this frag for the largest
8809      instruction we may generate in md_convert_frag.  This is 2
8810      bytes for the opcode and room for the prefix and largest
8811      displacement.  */
8812   frag_grow (prefix + 2 + 4);
8813   /* Prefix and 1 opcode byte go in fr_fix.  */
8814   p = frag_more (prefix + 1);
8815   if (i.prefix[DATA_PREFIX] != 0)
8816     *p++ = DATA_PREFIX_OPCODE;
8817   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8818       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8819     *p++ = i.prefix[SEG_PREFIX];
8820   if (i.prefix[BND_PREFIX] != 0)
8821     *p++ = BND_PREFIX_OPCODE;
8822   if (i.prefix[REX_PREFIX] != 0)
8823     *p++ = i.prefix[REX_PREFIX];
8824   *p = i.tm.base_opcode;
8825
8826   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8827     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8828   else if (cpu_arch_flags.bitfield.cpui386)
8829     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8830   else
8831     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8832   subtype |= code16;
8833
8834   sym = i.op[0].disps->X_add_symbol;
8835   off = i.op[0].disps->X_add_number;
8836
8837   if (i.op[0].disps->X_op != O_constant
8838       && i.op[0].disps->X_op != O_symbol)
8839     {
8840       /* Handle complex expressions.  */
8841       sym = make_expr_symbol (i.op[0].disps);
8842       off = 0;
8843     }
8844
8845   frag_now->tc_frag_data.code64 = flag_code == CODE_64BIT;
8846
8847   /* 1 possible extra opcode + 4 byte displacement go in var part.
8848      Pass reloc in fr_var.  */
8849   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8850 }
8851
8852 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8853 /* Return TRUE iff PLT32 relocation should be used for branching to
8854    symbol S.  */
8855
8856 static bool
8857 need_plt32_p (symbolS *s)
8858 {
8859   /* PLT32 relocation is ELF only.  */
8860   if (!IS_ELF)
8861     return false;
8862
8863 #ifdef TE_SOLARIS
8864   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8865      krtld support it.  */
8866   return false;
8867 #endif
8868
8869   /* Since there is no need to prepare for PLT branch on x86-64, we
8870      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8871      be used as a marker for 32-bit PC-relative branches.  */
8872   if (!object_64bit)
8873     return false;
8874
8875   if (s == NULL)
8876     return false;
8877
8878   /* Weak or undefined symbol need PLT32 relocation.  */
8879   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8880     return true;
8881
8882   /* Non-global symbol doesn't need PLT32 relocation.  */
8883   if (! S_IS_EXTERNAL (s))
8884     return false;
8885
8886   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8887      non-default visibilities are treated as normal global symbol
8888      so that PLT32 relocation can be used as a marker for 32-bit
8889      PC-relative branches.  It is useful for linker relaxation.  */
8890   return true;
8891 }
8892 #endif
8893
8894 static void
8895 output_jump (void)
8896 {
8897   char *p;
8898   int size;
8899   fixS *fixP;
8900   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8901
8902   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8903     {
8904       /* This is a loop or jecxz type instruction.  */
8905       size = 1;
8906       if (i.prefix[ADDR_PREFIX] != 0)
8907         {
8908           frag_opcode_byte (ADDR_PREFIX_OPCODE);
8909           i.prefixes -= 1;
8910         }
8911       /* Pentium4 branch hints.  */
8912       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8913           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8914         {
8915           frag_opcode_byte (i.prefix[SEG_PREFIX]);
8916           i.prefixes--;
8917         }
8918     }
8919   else
8920     {
8921       int code16;
8922
8923       code16 = 0;
8924       if (flag_code == CODE_16BIT)
8925         code16 = CODE16;
8926
8927       if (i.prefix[DATA_PREFIX] != 0)
8928         {
8929           frag_opcode_byte (DATA_PREFIX_OPCODE);
8930           i.prefixes -= 1;
8931           code16 ^= flip_code16(code16);
8932         }
8933
8934       size = 4;
8935       if (code16)
8936         size = 2;
8937     }
8938
8939   /* BND prefixed jump.  */
8940   if (i.prefix[BND_PREFIX] != 0)
8941     {
8942       frag_opcode_byte (i.prefix[BND_PREFIX]);
8943       i.prefixes -= 1;
8944     }
8945
8946   if (i.prefix[REX_PREFIX] != 0)
8947     {
8948       frag_opcode_byte (i.prefix[REX_PREFIX]);
8949       i.prefixes -= 1;
8950     }
8951
8952   if (i.prefixes != 0)
8953     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8954
8955   if (now_seg == absolute_section)
8956     {
8957       abs_section_offset += i.opcode_length + size;
8958       return;
8959     }
8960
8961   p = frag_more (i.opcode_length + size);
8962   switch (i.opcode_length)
8963     {
8964     case 2:
8965       *p++ = i.tm.base_opcode >> 8;
8966       /* Fall through.  */
8967     case 1:
8968       *p++ = i.tm.base_opcode;
8969       break;
8970     default:
8971       abort ();
8972     }
8973
8974 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8975   if (flag_code == CODE_64BIT && size == 4
8976       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
8977       && need_plt32_p (i.op[0].disps->X_add_symbol))
8978     jump_reloc = BFD_RELOC_X86_64_PLT32;
8979 #endif
8980
8981   jump_reloc = reloc (size, 1, 1, jump_reloc);
8982
8983   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8984                       i.op[0].disps, 1, jump_reloc);
8985
8986   /* All jumps handled here are signed, but don't unconditionally use a
8987      signed limit check for 32 and 16 bit jumps as we want to allow wrap
8988      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
8989      respectively.  */
8990   switch (size)
8991     {
8992     case 1:
8993       fixP->fx_signed = 1;
8994       break;
8995
8996     case 2:
8997       if (i.tm.base_opcode == 0xc7f8)
8998         fixP->fx_signed = 1;
8999       break;
9000
9001     case 4:
9002       if (flag_code == CODE_64BIT)
9003         fixP->fx_signed = 1;
9004       break;
9005     }
9006 }
9007
9008 static void
9009 output_interseg_jump (void)
9010 {
9011   char *p;
9012   int size;
9013   int prefix;
9014   int code16;
9015
9016   code16 = 0;
9017   if (flag_code == CODE_16BIT)
9018     code16 = CODE16;
9019
9020   prefix = 0;
9021   if (i.prefix[DATA_PREFIX] != 0)
9022     {
9023       prefix = 1;
9024       i.prefixes -= 1;
9025       code16 ^= CODE16;
9026     }
9027
9028   gas_assert (!i.prefix[REX_PREFIX]);
9029
9030   size = 4;
9031   if (code16)
9032     size = 2;
9033
9034   if (i.prefixes != 0)
9035     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
9036
9037   if (now_seg == absolute_section)
9038     {
9039       abs_section_offset += prefix + 1 + 2 + size;
9040       return;
9041     }
9042
9043   /* 1 opcode; 2 segment; offset  */
9044   p = frag_more (prefix + 1 + 2 + size);
9045
9046   if (i.prefix[DATA_PREFIX] != 0)
9047     *p++ = DATA_PREFIX_OPCODE;
9048
9049   if (i.prefix[REX_PREFIX] != 0)
9050     *p++ = i.prefix[REX_PREFIX];
9051
9052   *p++ = i.tm.base_opcode;
9053   if (i.op[1].imms->X_op == O_constant)
9054     {
9055       offsetT n = i.op[1].imms->X_add_number;
9056
9057       if (size == 2
9058           && !fits_in_unsigned_word (n)
9059           && !fits_in_signed_word (n))
9060         {
9061           as_bad (_("16-bit jump out of range"));
9062           return;
9063         }
9064       md_number_to_chars (p, n, size);
9065     }
9066   else
9067     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9068                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
9069
9070   p += size;
9071   if (i.op[0].imms->X_op == O_constant)
9072     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
9073   else
9074     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
9075                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
9076 }
9077
9078 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9079 void
9080 x86_cleanup (void)
9081 {
9082   char *p;
9083   asection *seg = now_seg;
9084   subsegT subseg = now_subseg;
9085   asection *sec;
9086   unsigned int alignment, align_size_1;
9087   unsigned int isa_1_descsz, feature_2_descsz, descsz;
9088   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
9089   unsigned int padding;
9090
9091   if (!IS_ELF || !x86_used_note)
9092     return;
9093
9094   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
9095
9096   /* The .note.gnu.property section layout:
9097
9098      Field      Length          Contents
9099      ----       ----            ----
9100      n_namsz    4               4
9101      n_descsz   4               The note descriptor size
9102      n_type     4               NT_GNU_PROPERTY_TYPE_0
9103      n_name     4               "GNU"
9104      n_desc     n_descsz        The program property array
9105      ....       ....            ....
9106    */
9107
9108   /* Create the .note.gnu.property section.  */
9109   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
9110   bfd_set_section_flags (sec,
9111                          (SEC_ALLOC
9112                           | SEC_LOAD
9113                           | SEC_DATA
9114                           | SEC_HAS_CONTENTS
9115                           | SEC_READONLY));
9116
9117   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
9118     {
9119       align_size_1 = 7;
9120       alignment = 3;
9121     }
9122   else
9123     {
9124       align_size_1 = 3;
9125       alignment = 2;
9126     }
9127
9128   bfd_set_section_alignment (sec, alignment);
9129   elf_section_type (sec) = SHT_NOTE;
9130
9131   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
9132                                   + 4-byte data  */
9133   isa_1_descsz_raw = 4 + 4 + 4;
9134   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
9135   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
9136
9137   feature_2_descsz_raw = isa_1_descsz;
9138   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
9139                                       + 4-byte data  */
9140   feature_2_descsz_raw += 4 + 4 + 4;
9141   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
9142   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
9143                       & ~align_size_1);
9144
9145   descsz = feature_2_descsz;
9146   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
9147   p = frag_more (4 + 4 + 4 + 4 + descsz);
9148
9149   /* Write n_namsz.  */
9150   md_number_to_chars (p, (valueT) 4, 4);
9151
9152   /* Write n_descsz.  */
9153   md_number_to_chars (p + 4, (valueT) descsz, 4);
9154
9155   /* Write n_type.  */
9156   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
9157
9158   /* Write n_name.  */
9159   memcpy (p + 4 * 3, "GNU", 4);
9160
9161   /* Write 4-byte type.  */
9162   md_number_to_chars (p + 4 * 4,
9163                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
9164
9165   /* Write 4-byte data size.  */
9166   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
9167
9168   /* Write 4-byte data.  */
9169   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
9170
9171   /* Zero out paddings.  */
9172   padding = isa_1_descsz - isa_1_descsz_raw;
9173   if (padding)
9174     memset (p + 4 * 7, 0, padding);
9175
9176   /* Write 4-byte type.  */
9177   md_number_to_chars (p + isa_1_descsz + 4 * 4,
9178                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
9179
9180   /* Write 4-byte data size.  */
9181   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
9182
9183   /* Write 4-byte data.  */
9184   md_number_to_chars (p + isa_1_descsz + 4 * 6,
9185                       (valueT) x86_feature_2_used, 4);
9186
9187   /* Zero out paddings.  */
9188   padding = feature_2_descsz - feature_2_descsz_raw;
9189   if (padding)
9190     memset (p + isa_1_descsz + 4 * 7, 0, padding);
9191
9192   /* We probably can't restore the current segment, for there likely
9193      isn't one yet...  */
9194   if (seg && subseg)
9195     subseg_set (seg, subseg);
9196 }
9197 #endif
9198
9199 static unsigned int
9200 encoding_length (const fragS *start_frag, offsetT start_off,
9201                  const char *frag_now_ptr)
9202 {
9203   unsigned int len = 0;
9204
9205   if (start_frag != frag_now)
9206     {
9207       const fragS *fr = start_frag;
9208
9209       do {
9210         len += fr->fr_fix;
9211         fr = fr->fr_next;
9212       } while (fr && fr != frag_now);
9213     }
9214
9215   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
9216 }
9217
9218 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
9219    be macro-fused with conditional jumps.
9220    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
9221    or is one of the following format:
9222
9223     cmp m, imm
9224     add m, imm
9225     sub m, imm
9226    test m, imm
9227     and m, imm
9228     inc m
9229     dec m
9230
9231    it is unfusible.  */
9232
9233 static int
9234 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
9235 {
9236   /* No RIP address.  */
9237   if (i.base_reg && i.base_reg->reg_num == RegIP)
9238     return 0;
9239
9240   /* No opcodes outside of base encoding space.  */
9241   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9242     return 0;
9243
9244   /* add, sub without add/sub m, imm.  */
9245   if (i.tm.base_opcode <= 5
9246       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
9247       || ((i.tm.base_opcode | 3) == 0x83
9248           && (i.tm.extension_opcode == 0x5
9249               || i.tm.extension_opcode == 0x0)))
9250     {
9251       *mf_cmp_p = mf_cmp_alu_cmp;
9252       return !(i.mem_operands && i.imm_operands);
9253     }
9254
9255   /* and without and m, imm.  */
9256   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
9257       || ((i.tm.base_opcode | 3) == 0x83
9258           && i.tm.extension_opcode == 0x4))
9259     {
9260       *mf_cmp_p = mf_cmp_test_and;
9261       return !(i.mem_operands && i.imm_operands);
9262     }
9263
9264   /* test without test m imm.  */
9265   if ((i.tm.base_opcode | 1) == 0x85
9266       || (i.tm.base_opcode | 1) == 0xa9
9267       || ((i.tm.base_opcode | 1) == 0xf7
9268           && i.tm.extension_opcode == 0))
9269     {
9270       *mf_cmp_p = mf_cmp_test_and;
9271       return !(i.mem_operands && i.imm_operands);
9272     }
9273
9274   /* cmp without cmp m, imm.  */
9275   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9276       || ((i.tm.base_opcode | 3) == 0x83
9277           && (i.tm.extension_opcode == 0x7)))
9278     {
9279       *mf_cmp_p = mf_cmp_alu_cmp;
9280       return !(i.mem_operands && i.imm_operands);
9281     }
9282
9283   /* inc, dec without inc/dec m.   */
9284   if ((i.tm.cpu_flags.bitfield.cpuno64
9285        && (i.tm.base_opcode | 0xf) == 0x4f)
9286       || ((i.tm.base_opcode | 1) == 0xff
9287           && i.tm.extension_opcode <= 0x1))
9288     {
9289       *mf_cmp_p = mf_cmp_incdec;
9290       return !i.mem_operands;
9291     }
9292
9293   return 0;
9294 }
9295
9296 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9297
9298 static int
9299 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9300 {
9301   /* NB: Don't work with COND_JUMP86 without i386.  */
9302   if (!align_branch_power
9303       || now_seg == absolute_section
9304       || !cpu_arch_flags.bitfield.cpui386
9305       || !(align_branch & align_branch_fused_bit))
9306     return 0;
9307
9308   if (maybe_fused_with_jcc_p (mf_cmp_p))
9309     {
9310       if (last_insn.kind == last_insn_other
9311           || last_insn.seg != now_seg)
9312         return 1;
9313       if (flag_debug)
9314         as_warn_where (last_insn.file, last_insn.line,
9315                        _("`%s` skips -malign-branch-boundary on `%s`"),
9316                        last_insn.name, i.tm.name);
9317     }
9318
9319   return 0;
9320 }
9321
9322 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9323
9324 static int
9325 add_branch_prefix_frag_p (void)
9326 {
9327   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9328      to PadLock instructions since they include prefixes in opcode.  */
9329   if (!align_branch_power
9330       || !align_branch_prefix_size
9331       || now_seg == absolute_section
9332       || i.tm.cpu_flags.bitfield.cpupadlock
9333       || !cpu_arch_flags.bitfield.cpui386)
9334     return 0;
9335
9336   /* Don't add prefix if it is a prefix or there is no operand in case
9337      that segment prefix is special.  */
9338   if (!i.operands || i.tm.opcode_modifier.isprefix)
9339     return 0;
9340
9341   if (last_insn.kind == last_insn_other
9342       || last_insn.seg != now_seg)
9343     return 1;
9344
9345   if (flag_debug)
9346     as_warn_where (last_insn.file, last_insn.line,
9347                    _("`%s` skips -malign-branch-boundary on `%s`"),
9348                    last_insn.name, i.tm.name);
9349
9350   return 0;
9351 }
9352
9353 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9354
9355 static int
9356 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9357                            enum mf_jcc_kind *mf_jcc_p)
9358 {
9359   int add_padding;
9360
9361   /* NB: Don't work with COND_JUMP86 without i386.  */
9362   if (!align_branch_power
9363       || now_seg == absolute_section
9364       || !cpu_arch_flags.bitfield.cpui386
9365       || i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9366     return 0;
9367
9368   add_padding = 0;
9369
9370   /* Check for jcc and direct jmp.  */
9371   if (i.tm.opcode_modifier.jump == JUMP)
9372     {
9373       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9374         {
9375           *branch_p = align_branch_jmp;
9376           add_padding = align_branch & align_branch_jmp_bit;
9377         }
9378       else
9379         {
9380           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9381              igore the lowest bit.  */
9382           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9383           *branch_p = align_branch_jcc;
9384           if ((align_branch & align_branch_jcc_bit))
9385             add_padding = 1;
9386         }
9387     }
9388   else if ((i.tm.base_opcode | 1) == 0xc3)
9389     {
9390       /* Near ret.  */
9391       *branch_p = align_branch_ret;
9392       if ((align_branch & align_branch_ret_bit))
9393         add_padding = 1;
9394     }
9395   else
9396     {
9397       /* Check for indirect jmp, direct and indirect calls.  */
9398       if (i.tm.base_opcode == 0xe8)
9399         {
9400           /* Direct call.  */
9401           *branch_p = align_branch_call;
9402           if ((align_branch & align_branch_call_bit))
9403             add_padding = 1;
9404         }
9405       else if (i.tm.base_opcode == 0xff
9406                && (i.tm.extension_opcode == 2
9407                    || i.tm.extension_opcode == 4))
9408         {
9409           /* Indirect call and jmp.  */
9410           *branch_p = align_branch_indirect;
9411           if ((align_branch & align_branch_indirect_bit))
9412             add_padding = 1;
9413         }
9414
9415       if (add_padding
9416           && i.disp_operands
9417           && tls_get_addr
9418           && (i.op[0].disps->X_op == O_symbol
9419               || (i.op[0].disps->X_op == O_subtract
9420                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9421         {
9422           symbolS *s = i.op[0].disps->X_add_symbol;
9423           /* No padding to call to global or undefined tls_get_addr.  */
9424           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9425               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9426             return 0;
9427         }
9428     }
9429
9430   if (add_padding
9431       && last_insn.kind != last_insn_other
9432       && last_insn.seg == now_seg)
9433     {
9434       if (flag_debug)
9435         as_warn_where (last_insn.file, last_insn.line,
9436                        _("`%s` skips -malign-branch-boundary on `%s`"),
9437                        last_insn.name, i.tm.name);
9438       return 0;
9439     }
9440
9441   return add_padding;
9442 }
9443
9444 static void
9445 output_insn (void)
9446 {
9447   fragS *insn_start_frag;
9448   offsetT insn_start_off;
9449   fragS *fragP = NULL;
9450   enum align_branch_kind branch = align_branch_none;
9451   /* The initializer is arbitrary just to avoid uninitialized error.
9452      it's actually either assigned in add_branch_padding_frag_p
9453      or never be used.  */
9454   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9455
9456 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9457   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9458     {
9459       if ((i.xstate & xstate_tmm) == xstate_tmm
9460           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9461         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9462
9463       if (i.tm.cpu_flags.bitfield.cpu8087
9464           || i.tm.cpu_flags.bitfield.cpu287
9465           || i.tm.cpu_flags.bitfield.cpu387
9466           || i.tm.cpu_flags.bitfield.cpu687
9467           || i.tm.cpu_flags.bitfield.cpufisttp)
9468         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9469
9470       if ((i.xstate & xstate_mmx)
9471           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9472               && !is_any_vex_encoding (&i.tm)
9473               && (i.tm.base_opcode == 0x77 /* emms */
9474                   || i.tm.base_opcode == 0x0e /* femms */)))
9475         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9476
9477       if (i.index_reg)
9478         {
9479           if (i.index_reg->reg_type.bitfield.zmmword)
9480             i.xstate |= xstate_zmm;
9481           else if (i.index_reg->reg_type.bitfield.ymmword)
9482             i.xstate |= xstate_ymm;
9483           else if (i.index_reg->reg_type.bitfield.xmmword)
9484             i.xstate |= xstate_xmm;
9485         }
9486
9487       /* vzeroall / vzeroupper */
9488       if (i.tm.base_opcode == 0x77 && i.tm.cpu_flags.bitfield.cpuavx)
9489         i.xstate |= xstate_ymm;
9490
9491       if ((i.xstate & xstate_xmm)
9492           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
9493           || (i.tm.base_opcode == 0xae
9494               && (i.tm.cpu_flags.bitfield.cpusse
9495                   || i.tm.cpu_flags.bitfield.cpuavx))
9496           || i.tm.cpu_flags.bitfield.cpuwidekl
9497           || i.tm.cpu_flags.bitfield.cpukl)
9498         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9499
9500       if ((i.xstate & xstate_ymm) == xstate_ymm)
9501         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9502       if ((i.xstate & xstate_zmm) == xstate_zmm)
9503         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9504       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
9505         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9506       if (i.tm.cpu_flags.bitfield.cpufxsr)
9507         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9508       if (i.tm.cpu_flags.bitfield.cpuxsave)
9509         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9510       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9511         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9512       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9513         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9514
9515       if (x86_feature_2_used
9516           || i.tm.cpu_flags.bitfield.cpucmov
9517           || i.tm.cpu_flags.bitfield.cpusyscall
9518           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9519               && i.tm.base_opcode == 0xc7
9520               && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
9521               && i.tm.extension_opcode == 1) /* cmpxchg8b */)
9522         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
9523       if (i.tm.cpu_flags.bitfield.cpusse3
9524           || i.tm.cpu_flags.bitfield.cpussse3
9525           || i.tm.cpu_flags.bitfield.cpusse4_1
9526           || i.tm.cpu_flags.bitfield.cpusse4_2
9527           || i.tm.cpu_flags.bitfield.cpucx16
9528           || i.tm.cpu_flags.bitfield.cpupopcnt
9529           /* LAHF-SAHF insns in 64-bit mode.  */
9530           || (flag_code == CODE_64BIT
9531               && (i.tm.base_opcode | 1) == 0x9f
9532               && i.tm.opcode_modifier.opcodespace == SPACE_BASE))
9533         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9534       if (i.tm.cpu_flags.bitfield.cpuavx
9535           || i.tm.cpu_flags.bitfield.cpuavx2
9536           /* Any VEX encoded insns execpt for CpuAVX512F, CpuAVX512BW,
9537              CpuAVX512DQ, LPW, TBM and AMX.  */
9538           || (i.tm.opcode_modifier.vex
9539               && !i.tm.cpu_flags.bitfield.cpuavx512f
9540               && !i.tm.cpu_flags.bitfield.cpuavx512bw
9541               && !i.tm.cpu_flags.bitfield.cpuavx512dq
9542               && !i.tm.cpu_flags.bitfield.cpulwp
9543               && !i.tm.cpu_flags.bitfield.cputbm
9544               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9545           || i.tm.cpu_flags.bitfield.cpuf16c
9546           || i.tm.cpu_flags.bitfield.cpufma
9547           || i.tm.cpu_flags.bitfield.cpulzcnt
9548           || i.tm.cpu_flags.bitfield.cpumovbe
9549           || i.tm.cpu_flags.bitfield.cpuxsaves
9550           || (x86_feature_2_used
9551               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
9552                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
9553                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
9554         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9555       if (i.tm.cpu_flags.bitfield.cpuavx512f
9556           || i.tm.cpu_flags.bitfield.cpuavx512bw
9557           || i.tm.cpu_flags.bitfield.cpuavx512dq
9558           || i.tm.cpu_flags.bitfield.cpuavx512vl
9559           /* Any EVEX encoded insns except for AVX512ER, AVX512PF and
9560              VNNIW.  */
9561           || (i.tm.opcode_modifier.evex
9562               && !i.tm.cpu_flags.bitfield.cpuavx512er
9563               && !i.tm.cpu_flags.bitfield.cpuavx512pf
9564               && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
9565         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9566     }
9567 #endif
9568
9569   /* Tie dwarf2 debug info to the address at the start of the insn.
9570      We can't do this after the insn has been output as the current
9571      frag may have been closed off.  eg. by frag_var.  */
9572   dwarf2_emit_insn (0);
9573
9574   insn_start_frag = frag_now;
9575   insn_start_off = frag_now_fix ();
9576
9577   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9578     {
9579       char *p;
9580       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9581       unsigned int max_branch_padding_size = 14;
9582
9583       /* Align section to boundary.  */
9584       record_alignment (now_seg, align_branch_power);
9585
9586       /* Make room for padding.  */
9587       frag_grow (max_branch_padding_size);
9588
9589       /* Start of the padding.  */
9590       p = frag_more (0);
9591
9592       fragP = frag_now;
9593
9594       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9595                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9596                 NULL, 0, p);
9597
9598       fragP->tc_frag_data.mf_type = mf_jcc;
9599       fragP->tc_frag_data.branch_type = branch;
9600       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9601     }
9602
9603   /* Output jumps.  */
9604   if (i.tm.opcode_modifier.jump == JUMP)
9605     output_branch ();
9606   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9607            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9608     output_jump ();
9609   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9610     output_interseg_jump ();
9611   else
9612     {
9613       /* Output normal instructions here.  */
9614       char *p;
9615       unsigned char *q;
9616       unsigned int j;
9617       enum mf_cmp_kind mf_cmp;
9618
9619       if (avoid_fence
9620           && (i.tm.base_opcode == 0xaee8
9621               || i.tm.base_opcode == 0xaef0
9622               || i.tm.base_opcode == 0xaef8))
9623         {
9624           /* Encode lfence, mfence, and sfence as
9625              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9626           if (flag_code == CODE_16BIT)
9627             as_bad (_("Cannot convert `%s' in 16-bit mode"), i.tm.name);
9628           else if (omit_lock_prefix)
9629             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
9630                     i.tm.name);
9631           else if (now_seg != absolute_section)
9632             {
9633               offsetT val = 0x240483f0ULL;
9634
9635               p = frag_more (5);
9636               md_number_to_chars (p, val, 5);
9637             }
9638           else
9639             abs_section_offset += 5;
9640           return;
9641         }
9642
9643       /* Some processors fail on LOCK prefix. This options makes
9644          assembler ignore LOCK prefix and serves as a workaround.  */
9645       if (omit_lock_prefix)
9646         {
9647           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
9648               && i.tm.opcode_modifier.isprefix)
9649             return;
9650           i.prefix[LOCK_PREFIX] = 0;
9651         }
9652
9653       if (branch)
9654         /* Skip if this is a branch.  */
9655         ;
9656       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9657         {
9658           /* Make room for padding.  */
9659           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9660           p = frag_more (0);
9661
9662           fragP = frag_now;
9663
9664           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9665                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9666                     NULL, 0, p);
9667
9668           fragP->tc_frag_data.mf_type = mf_cmp;
9669           fragP->tc_frag_data.branch_type = align_branch_fused;
9670           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9671         }
9672       else if (add_branch_prefix_frag_p ())
9673         {
9674           unsigned int max_prefix_size = align_branch_prefix_size;
9675
9676           /* Make room for padding.  */
9677           frag_grow (max_prefix_size);
9678           p = frag_more (0);
9679
9680           fragP = frag_now;
9681
9682           frag_var (rs_machine_dependent, max_prefix_size, 0,
9683                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9684                     NULL, 0, p);
9685
9686           fragP->tc_frag_data.max_bytes = max_prefix_size;
9687         }
9688
9689       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9690          don't need the explicit prefix.  */
9691       if (!i.tm.opcode_modifier.vex && !i.tm.opcode_modifier.evex)
9692         {
9693           switch (i.tm.opcode_modifier.opcodeprefix)
9694             {
9695             case PREFIX_0X66:
9696               add_prefix (0x66);
9697               break;
9698             case PREFIX_0XF2:
9699               add_prefix (0xf2);
9700               break;
9701             case PREFIX_0XF3:
9702               if (!i.tm.cpu_flags.bitfield.cpupadlock
9703                   || (i.prefix[REP_PREFIX] != 0xf3))
9704                 add_prefix (0xf3);
9705               break;
9706             case PREFIX_NONE:
9707               switch (i.opcode_length)
9708                 {
9709                 case 2:
9710                   break;
9711                 case 1:
9712                   /* Check for pseudo prefixes.  */
9713                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
9714                     break;
9715                   as_bad_where (insn_start_frag->fr_file,
9716                                 insn_start_frag->fr_line,
9717                                 _("pseudo prefix without instruction"));
9718                   return;
9719                 default:
9720                   abort ();
9721                 }
9722               break;
9723             default:
9724               abort ();
9725             }
9726
9727 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9728           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9729              R_X86_64_GOTTPOFF relocation so that linker can safely
9730              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9731              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9732              relocation for GDesc -> IE/LE optimization.  */
9733           if (x86_elf_abi == X86_64_X32_ABI
9734               && i.operands == 2
9735               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9736                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9737               && i.prefix[REX_PREFIX] == 0)
9738             add_prefix (REX_OPCODE);
9739 #endif
9740
9741           /* The prefix bytes.  */
9742           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9743             if (*q)
9744               frag_opcode_byte (*q);
9745         }
9746       else
9747         {
9748           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9749             if (*q)
9750               switch (j)
9751                 {
9752                 case SEG_PREFIX:
9753                 case ADDR_PREFIX:
9754                   frag_opcode_byte (*q);
9755                   break;
9756                 default:
9757                   /* There should be no other prefixes for instructions
9758                      with VEX prefix.  */
9759                   abort ();
9760                 }
9761
9762           /* For EVEX instructions i.vrex should become 0 after
9763              build_evex_prefix.  For VEX instructions upper 16 registers
9764              aren't available, so VREX should be 0.  */
9765           if (i.vrex)
9766             abort ();
9767           /* Now the VEX prefix.  */
9768           if (now_seg != absolute_section)
9769             {
9770               p = frag_more (i.vex.length);
9771               for (j = 0; j < i.vex.length; j++)
9772                 p[j] = i.vex.bytes[j];
9773             }
9774           else
9775             abs_section_offset += i.vex.length;
9776         }
9777
9778       /* Now the opcode; be careful about word order here!  */
9779       j = i.opcode_length;
9780       if (!i.vex.length)
9781         switch (i.tm.opcode_modifier.opcodespace)
9782           {
9783           case SPACE_BASE:
9784             break;
9785           case SPACE_0F:
9786             ++j;
9787             break;
9788           case SPACE_0F38:
9789           case SPACE_0F3A:
9790             j += 2;
9791             break;
9792           default:
9793             abort ();
9794           }
9795
9796       if (now_seg == absolute_section)
9797         abs_section_offset += j;
9798       else if (j == 1)
9799         {
9800           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9801         }
9802       else
9803         {
9804           p = frag_more (j);
9805           if (!i.vex.length
9806               && i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9807             {
9808               *p++ = 0x0f;
9809               if (i.tm.opcode_modifier.opcodespace != SPACE_0F)
9810                 *p++ = i.tm.opcode_modifier.opcodespace == SPACE_0F38
9811                        ? 0x38 : 0x3a;
9812             }
9813
9814           switch (i.opcode_length)
9815             {
9816             case 2:
9817               /* Put out high byte first: can't use md_number_to_chars!  */
9818               *p++ = (i.tm.base_opcode >> 8) & 0xff;
9819               /* Fall through.  */
9820             case 1:
9821               *p = i.tm.base_opcode & 0xff;
9822               break;
9823             default:
9824               abort ();
9825               break;
9826             }
9827
9828         }
9829
9830       /* Now the modrm byte and sib byte (if present).  */
9831       if (i.tm.opcode_modifier.modrm)
9832         {
9833           frag_opcode_byte ((i.rm.regmem << 0)
9834                              | (i.rm.reg << 3)
9835                              | (i.rm.mode << 6));
9836           /* If i.rm.regmem == ESP (4)
9837              && i.rm.mode != (Register mode)
9838              && not 16 bit
9839              ==> need second modrm byte.  */
9840           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9841               && i.rm.mode != 3
9842               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9843             frag_opcode_byte ((i.sib.base << 0)
9844                               | (i.sib.index << 3)
9845                               | (i.sib.scale << 6));
9846         }
9847
9848       if (i.disp_operands)
9849         output_disp (insn_start_frag, insn_start_off);
9850
9851       if (i.imm_operands)
9852         output_imm (insn_start_frag, insn_start_off);
9853
9854       /*
9855        * frag_now_fix () returning plain abs_section_offset when we're in the
9856        * absolute section, and abs_section_offset not getting updated as data
9857        * gets added to the frag breaks the logic below.
9858        */
9859       if (now_seg != absolute_section)
9860         {
9861           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
9862           if (j > 15)
9863             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
9864                      j);
9865           else if (fragP)
9866             {
9867               /* NB: Don't add prefix with GOTPC relocation since
9868                  output_disp() above depends on the fixed encoding
9869                  length.  Can't add prefix with TLS relocation since
9870                  it breaks TLS linker optimization.  */
9871               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
9872               /* Prefix count on the current instruction.  */
9873               unsigned int count = i.vex.length;
9874               unsigned int k;
9875               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
9876                 /* REX byte is encoded in VEX/EVEX prefix.  */
9877                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
9878                   count++;
9879
9880               /* Count prefixes for extended opcode maps.  */
9881               if (!i.vex.length)
9882                 switch (i.tm.opcode_modifier.opcodespace)
9883                   {
9884                   case SPACE_BASE:
9885                     break;
9886                   case SPACE_0F:
9887                     count++;
9888                     break;
9889                   case SPACE_0F38:
9890                   case SPACE_0F3A:
9891                     count += 2;
9892                     break;
9893                   default:
9894                     abort ();
9895                   }
9896
9897               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
9898                   == BRANCH_PREFIX)
9899                 {
9900                   /* Set the maximum prefix size in BRANCH_PREFIX
9901                      frag.  */
9902                   if (fragP->tc_frag_data.max_bytes > max)
9903                     fragP->tc_frag_data.max_bytes = max;
9904                   if (fragP->tc_frag_data.max_bytes > count)
9905                     fragP->tc_frag_data.max_bytes -= count;
9906                   else
9907                     fragP->tc_frag_data.max_bytes = 0;
9908                 }
9909               else
9910                 {
9911                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
9912                      frag.  */
9913                   unsigned int max_prefix_size;
9914                   if (align_branch_prefix_size > max)
9915                     max_prefix_size = max;
9916                   else
9917                     max_prefix_size = align_branch_prefix_size;
9918                   if (max_prefix_size > count)
9919                     fragP->tc_frag_data.max_prefix_length
9920                       = max_prefix_size - count;
9921                 }
9922
9923               /* Use existing segment prefix if possible.  Use CS
9924                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
9925                  segment prefix with ESP/EBP base register and use DS
9926                  segment prefix without ESP/EBP base register.  */
9927               if (i.prefix[SEG_PREFIX])
9928                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
9929               else if (flag_code == CODE_64BIT)
9930                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
9931               else if (i.base_reg
9932                        && (i.base_reg->reg_num == 4
9933                            || i.base_reg->reg_num == 5))
9934                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
9935               else
9936                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
9937             }
9938         }
9939     }
9940
9941   /* NB: Don't work with COND_JUMP86 without i386.  */
9942   if (align_branch_power
9943       && now_seg != absolute_section
9944       && cpu_arch_flags.bitfield.cpui386)
9945     {
9946       /* Terminate each frag so that we can add prefix and check for
9947          fused jcc.  */
9948       frag_wane (frag_now);
9949       frag_new (0);
9950     }
9951
9952 #ifdef DEBUG386
9953   if (flag_debug)
9954     {
9955       pi ("" /*line*/, &i);
9956     }
9957 #endif /* DEBUG386  */
9958 }
9959
9960 /* Return the size of the displacement operand N.  */
9961
9962 static int
9963 disp_size (unsigned int n)
9964 {
9965   int size = 4;
9966
9967   if (i.types[n].bitfield.disp64)
9968     size = 8;
9969   else if (i.types[n].bitfield.disp8)
9970     size = 1;
9971   else if (i.types[n].bitfield.disp16)
9972     size = 2;
9973   return size;
9974 }
9975
9976 /* Return the size of the immediate operand N.  */
9977
9978 static int
9979 imm_size (unsigned int n)
9980 {
9981   int size = 4;
9982   if (i.types[n].bitfield.imm64)
9983     size = 8;
9984   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
9985     size = 1;
9986   else if (i.types[n].bitfield.imm16)
9987     size = 2;
9988   return size;
9989 }
9990
9991 static void
9992 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
9993 {
9994   char *p;
9995   unsigned int n;
9996
9997   for (n = 0; n < i.operands; n++)
9998     {
9999       if (operand_type_check (i.types[n], disp))
10000         {
10001           int size = disp_size (n);
10002
10003           if (now_seg == absolute_section)
10004             abs_section_offset += size;
10005           else if (i.op[n].disps->X_op == O_constant)
10006             {
10007               offsetT val = i.op[n].disps->X_add_number;
10008
10009               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
10010                                      size);
10011               p = frag_more (size);
10012               md_number_to_chars (p, val, size);
10013             }
10014           else
10015             {
10016               enum bfd_reloc_code_real reloc_type;
10017               int sign = i.types[n].bitfield.disp32s;
10018               int pcrel = (i.flags[n] & Operand_PCrel) != 0;
10019               fixS *fixP;
10020
10021               /* We can't have 8 bit displacement here.  */
10022               gas_assert (!i.types[n].bitfield.disp8);
10023
10024               /* The PC relative address is computed relative
10025                  to the instruction boundary, so in case immediate
10026                  fields follows, we need to adjust the value.  */
10027               if (pcrel && i.imm_operands)
10028                 {
10029                   unsigned int n1;
10030                   int sz = 0;
10031
10032                   for (n1 = 0; n1 < i.operands; n1++)
10033                     if (operand_type_check (i.types[n1], imm))
10034                       {
10035                         /* Only one immediate is allowed for PC
10036                            relative address.  */
10037                         gas_assert (sz == 0);
10038                         sz = imm_size (n1);
10039                         i.op[n].disps->X_add_number -= sz;
10040                       }
10041                   /* We should find the immediate.  */
10042                   gas_assert (sz != 0);
10043                 }
10044
10045               p = frag_more (size);
10046               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
10047               if (GOT_symbol
10048                   && GOT_symbol == i.op[n].disps->X_add_symbol
10049                   && (((reloc_type == BFD_RELOC_32
10050                         || reloc_type == BFD_RELOC_X86_64_32S
10051                         || (reloc_type == BFD_RELOC_64
10052                             && object_64bit))
10053                        && (i.op[n].disps->X_op == O_symbol
10054                            || (i.op[n].disps->X_op == O_add
10055                                && ((symbol_get_value_expression
10056                                     (i.op[n].disps->X_op_symbol)->X_op)
10057                                    == O_subtract))))
10058                       || reloc_type == BFD_RELOC_32_PCREL))
10059                 {
10060                   if (!object_64bit)
10061                     {
10062                       reloc_type = BFD_RELOC_386_GOTPC;
10063                       i.has_gotpc_tls_reloc = true;
10064                       i.op[n].disps->X_add_number +=
10065                         encoding_length (insn_start_frag, insn_start_off, p);
10066                     }
10067                   else if (reloc_type == BFD_RELOC_64)
10068                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10069                   else
10070                     /* Don't do the adjustment for x86-64, as there
10071                        the pcrel addressing is relative to the _next_
10072                        insn, and that is taken care of in other code.  */
10073                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10074                 }
10075               else if (align_branch_power)
10076                 {
10077                   switch (reloc_type)
10078                     {
10079                     case BFD_RELOC_386_TLS_GD:
10080                     case BFD_RELOC_386_TLS_LDM:
10081                     case BFD_RELOC_386_TLS_IE:
10082                     case BFD_RELOC_386_TLS_IE_32:
10083                     case BFD_RELOC_386_TLS_GOTIE:
10084                     case BFD_RELOC_386_TLS_GOTDESC:
10085                     case BFD_RELOC_386_TLS_DESC_CALL:
10086                     case BFD_RELOC_X86_64_TLSGD:
10087                     case BFD_RELOC_X86_64_TLSLD:
10088                     case BFD_RELOC_X86_64_GOTTPOFF:
10089                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
10090                     case BFD_RELOC_X86_64_TLSDESC_CALL:
10091                       i.has_gotpc_tls_reloc = true;
10092                     default:
10093                       break;
10094                     }
10095                 }
10096               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
10097                                   size, i.op[n].disps, pcrel,
10098                                   reloc_type);
10099
10100               if (flag_code == CODE_64BIT && size == 4 && pcrel
10101                   && !i.prefix[ADDR_PREFIX])
10102                 fixP->fx_signed = 1;
10103
10104               /* Check for "call/jmp *mem", "mov mem, %reg",
10105                  "test %reg, mem" and "binop mem, %reg" where binop
10106                  is one of adc, add, and, cmp, or, sbb, sub, xor
10107                  instructions without data prefix.  Always generate
10108                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
10109               if (i.prefix[DATA_PREFIX] == 0
10110                   && (generate_relax_relocations
10111                       || (!object_64bit
10112                           && i.rm.mode == 0
10113                           && i.rm.regmem == 5))
10114                   && (i.rm.mode == 2
10115                       || (i.rm.mode == 0 && i.rm.regmem == 5))
10116                   && i.tm.opcode_modifier.opcodespace == SPACE_BASE
10117                   && ((i.operands == 1
10118                        && i.tm.base_opcode == 0xff
10119                        && (i.rm.reg == 2 || i.rm.reg == 4))
10120                       || (i.operands == 2
10121                           && (i.tm.base_opcode == 0x8b
10122                               || i.tm.base_opcode == 0x85
10123                               || (i.tm.base_opcode & ~0x38) == 0x03))))
10124                 {
10125                   if (object_64bit)
10126                     {
10127                       fixP->fx_tcbit = i.rex != 0;
10128                       if (i.base_reg
10129                           && (i.base_reg->reg_num == RegIP))
10130                       fixP->fx_tcbit2 = 1;
10131                     }
10132                   else
10133                     fixP->fx_tcbit2 = 1;
10134                 }
10135             }
10136         }
10137     }
10138 }
10139
10140 static void
10141 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
10142 {
10143   char *p;
10144   unsigned int n;
10145
10146   for (n = 0; n < i.operands; n++)
10147     {
10148       /* Skip SAE/RC Imm operand in EVEX.  They are already handled.  */
10149       if (i.rounding.type != rc_none && n == i.rounding.operand)
10150         continue;
10151
10152       if (operand_type_check (i.types[n], imm))
10153         {
10154           int size = imm_size (n);
10155
10156           if (now_seg == absolute_section)
10157             abs_section_offset += size;
10158           else if (i.op[n].imms->X_op == O_constant)
10159             {
10160               offsetT val;
10161
10162               val = offset_in_range (i.op[n].imms->X_add_number,
10163                                      size);
10164               p = frag_more (size);
10165               md_number_to_chars (p, val, size);
10166             }
10167           else
10168             {
10169               /* Not absolute_section.
10170                  Need a 32-bit fixup (don't support 8bit
10171                  non-absolute imms).  Try to support other
10172                  sizes ...  */
10173               enum bfd_reloc_code_real reloc_type;
10174               int sign;
10175
10176               if (i.types[n].bitfield.imm32s
10177                   && (i.suffix == QWORD_MNEM_SUFFIX
10178                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
10179                 sign = 1;
10180               else
10181                 sign = 0;
10182
10183               p = frag_more (size);
10184               reloc_type = reloc (size, 0, sign, i.reloc[n]);
10185
10186               /*   This is tough to explain.  We end up with this one if we
10187                * have operands that look like
10188                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
10189                * obtain the absolute address of the GOT, and it is strongly
10190                * preferable from a performance point of view to avoid using
10191                * a runtime relocation for this.  The actual sequence of
10192                * instructions often look something like:
10193                *
10194                *        call    .L66
10195                * .L66:
10196                *        popl    %ebx
10197                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
10198                *
10199                *   The call and pop essentially return the absolute address
10200                * of the label .L66 and store it in %ebx.  The linker itself
10201                * will ultimately change the first operand of the addl so
10202                * that %ebx points to the GOT, but to keep things simple, the
10203                * .o file must have this operand set so that it generates not
10204                * the absolute address of .L66, but the absolute address of
10205                * itself.  This allows the linker itself simply treat a GOTPC
10206                * relocation as asking for a pcrel offset to the GOT to be
10207                * added in, and the addend of the relocation is stored in the
10208                * operand field for the instruction itself.
10209                *
10210                *   Our job here is to fix the operand so that it would add
10211                * the correct offset so that %ebx would point to itself.  The
10212                * thing that is tricky is that .-.L66 will point to the
10213                * beginning of the instruction, so we need to further modify
10214                * the operand so that it will point to itself.  There are
10215                * other cases where you have something like:
10216                *
10217                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
10218                *
10219                * and here no correction would be required.  Internally in
10220                * the assembler we treat operands of this form as not being
10221                * pcrel since the '.' is explicitly mentioned, and I wonder
10222                * whether it would simplify matters to do it this way.  Who
10223                * knows.  In earlier versions of the PIC patches, the
10224                * pcrel_adjust field was used to store the correction, but
10225                * since the expression is not pcrel, I felt it would be
10226                * confusing to do it this way.  */
10227
10228               if ((reloc_type == BFD_RELOC_32
10229                    || reloc_type == BFD_RELOC_X86_64_32S
10230                    || reloc_type == BFD_RELOC_64)
10231                   && GOT_symbol
10232                   && GOT_symbol == i.op[n].imms->X_add_symbol
10233                   && (i.op[n].imms->X_op == O_symbol
10234                       || (i.op[n].imms->X_op == O_add
10235                           && ((symbol_get_value_expression
10236                                (i.op[n].imms->X_op_symbol)->X_op)
10237                               == O_subtract))))
10238                 {
10239                   if (!object_64bit)
10240                     reloc_type = BFD_RELOC_386_GOTPC;
10241                   else if (size == 4)
10242                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10243                   else if (size == 8)
10244                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10245                   i.has_gotpc_tls_reloc = true;
10246                   i.op[n].imms->X_add_number +=
10247                     encoding_length (insn_start_frag, insn_start_off, p);
10248                 }
10249               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10250                            i.op[n].imms, 0, reloc_type);
10251             }
10252         }
10253     }
10254 }
10255 \f
10256 /* x86_cons_fix_new is called via the expression parsing code when a
10257    reloc is needed.  We use this hook to get the correct .got reloc.  */
10258 static int cons_sign = -1;
10259
10260 void
10261 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
10262                   expressionS *exp, bfd_reloc_code_real_type r)
10263 {
10264   r = reloc (len, 0, cons_sign, r);
10265
10266 #ifdef TE_PE
10267   if (exp->X_op == O_secrel)
10268     {
10269       exp->X_op = O_symbol;
10270       r = BFD_RELOC_32_SECREL;
10271     }
10272   else if (exp->X_op == O_secidx)
10273     r = BFD_RELOC_16_SECIDX;
10274 #endif
10275
10276   fix_new_exp (frag, off, len, exp, 0, r);
10277 }
10278
10279 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
10280    purpose of the `.dc.a' internal pseudo-op.  */
10281
10282 int
10283 x86_address_bytes (void)
10284 {
10285   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
10286     return 4;
10287   return stdoutput->arch_info->bits_per_address / 8;
10288 }
10289
10290 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
10291      || defined (LEX_AT)) && !defined (TE_PE)
10292 # define lex_got(reloc, adjust, types) NULL
10293 #else
10294 /* Parse operands of the form
10295    <symbol>@GOTOFF+<nnn>
10296    and similar .plt or .got references.
10297
10298    If we find one, set up the correct relocation in RELOC and copy the
10299    input string, minus the `@GOTOFF' into a malloc'd buffer for
10300    parsing by the calling routine.  Return this buffer, and if ADJUST
10301    is non-null set it to the length of the string we removed from the
10302    input line.  Otherwise return NULL.  */
10303 static char *
10304 lex_got (enum bfd_reloc_code_real *rel,
10305          int *adjust,
10306          i386_operand_type *types)
10307 {
10308   /* Some of the relocations depend on the size of what field is to
10309      be relocated.  But in our callers i386_immediate and i386_displacement
10310      we don't yet know the operand size (this will be set by insn
10311      matching).  Hence we record the word32 relocation here,
10312      and adjust the reloc according to the real size in reloc().  */
10313   static const struct
10314   {
10315     const char *str;
10316     int len;
10317     const enum bfd_reloc_code_real rel[2];
10318     const i386_operand_type types64;
10319     bool need_GOT_symbol;
10320   }
10321     gotrel[] =
10322   {
10323 #ifndef TE_PE
10324 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10325     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10326                                         BFD_RELOC_SIZE32 },
10327       OPERAND_TYPE_IMM32_64, false },
10328 #endif
10329     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10330                                        BFD_RELOC_X86_64_PLTOFF64 },
10331       OPERAND_TYPE_IMM64, true },
10332     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10333                                        BFD_RELOC_X86_64_PLT32    },
10334       OPERAND_TYPE_IMM32_32S_DISP32, false },
10335     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10336                                        BFD_RELOC_X86_64_GOTPLT64 },
10337       OPERAND_TYPE_IMM64_DISP64, true },
10338     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10339                                        BFD_RELOC_X86_64_GOTOFF64 },
10340       OPERAND_TYPE_IMM64_DISP64, true },
10341     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10342                                        BFD_RELOC_X86_64_GOTPCREL },
10343       OPERAND_TYPE_IMM32_32S_DISP32, true },
10344     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10345                                        BFD_RELOC_X86_64_TLSGD    },
10346       OPERAND_TYPE_IMM32_32S_DISP32, true },
10347     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10348                                        _dummy_first_bfd_reloc_code_real },
10349       OPERAND_TYPE_NONE, true },
10350     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10351                                        BFD_RELOC_X86_64_TLSLD    },
10352       OPERAND_TYPE_IMM32_32S_DISP32, true },
10353     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10354                                        BFD_RELOC_X86_64_GOTTPOFF },
10355       OPERAND_TYPE_IMM32_32S_DISP32, true },
10356     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10357                                        BFD_RELOC_X86_64_TPOFF32  },
10358       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10359     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10360                                        _dummy_first_bfd_reloc_code_real },
10361       OPERAND_TYPE_NONE, true },
10362     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10363                                        BFD_RELOC_X86_64_DTPOFF32 },
10364       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10365     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10366                                        _dummy_first_bfd_reloc_code_real },
10367       OPERAND_TYPE_NONE, true },
10368     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10369                                        _dummy_first_bfd_reloc_code_real },
10370       OPERAND_TYPE_NONE, true },
10371     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10372                                        BFD_RELOC_X86_64_GOT32    },
10373       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
10374     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10375                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10376       OPERAND_TYPE_IMM32_32S_DISP32, true },
10377     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10378                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10379       OPERAND_TYPE_IMM32_32S_DISP32, true },
10380 #else /* TE_PE */
10381     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
10382                                        BFD_RELOC_32_SECREL },
10383       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
10384 #endif
10385   };
10386   char *cp;
10387   unsigned int j;
10388
10389 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
10390   if (!IS_ELF)
10391     return NULL;
10392 #endif
10393
10394   for (cp = input_line_pointer; *cp != '@'; cp++)
10395     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10396       return NULL;
10397
10398   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10399     {
10400       int len = gotrel[j].len;
10401       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10402         {
10403           if (gotrel[j].rel[object_64bit] != 0)
10404             {
10405               int first, second;
10406               char *tmpbuf, *past_reloc;
10407
10408               *rel = gotrel[j].rel[object_64bit];
10409
10410               if (types)
10411                 {
10412                   if (flag_code != CODE_64BIT)
10413                     {
10414                       types->bitfield.imm32 = 1;
10415                       types->bitfield.disp32 = 1;
10416                     }
10417                   else
10418                     *types = gotrel[j].types64;
10419                 }
10420
10421               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
10422                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10423
10424               /* The length of the first part of our input line.  */
10425               first = cp - input_line_pointer;
10426
10427               /* The second part goes from after the reloc token until
10428                  (and including) an end_of_line char or comma.  */
10429               past_reloc = cp + 1 + len;
10430               cp = past_reloc;
10431               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10432                 ++cp;
10433               second = cp + 1 - past_reloc;
10434
10435               /* Allocate and copy string.  The trailing NUL shouldn't
10436                  be necessary, but be safe.  */
10437               tmpbuf = XNEWVEC (char, first + second + 2);
10438               memcpy (tmpbuf, input_line_pointer, first);
10439               if (second != 0 && *past_reloc != ' ')
10440                 /* Replace the relocation token with ' ', so that
10441                    errors like foo@GOTOFF1 will be detected.  */
10442                 tmpbuf[first++] = ' ';
10443               else
10444                 /* Increment length by 1 if the relocation token is
10445                    removed.  */
10446                 len++;
10447               if (adjust)
10448                 *adjust = len;
10449               memcpy (tmpbuf + first, past_reloc, second);
10450               tmpbuf[first + second] = '\0';
10451               return tmpbuf;
10452             }
10453
10454           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10455                   gotrel[j].str, 1 << (5 + object_64bit));
10456           return NULL;
10457         }
10458     }
10459
10460   /* Might be a symbol version string.  Don't as_bad here.  */
10461   return NULL;
10462 }
10463 #endif
10464
10465 bfd_reloc_code_real_type
10466 x86_cons (expressionS *exp, int size)
10467 {
10468   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10469
10470 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
10471       && !defined (LEX_AT)) \
10472     || defined (TE_PE)
10473   intel_syntax = -intel_syntax;
10474
10475   exp->X_md = 0;
10476   if (size == 4 || (object_64bit && size == 8))
10477     {
10478       /* Handle @GOTOFF and the like in an expression.  */
10479       char *save;
10480       char *gotfree_input_line;
10481       int adjust = 0;
10482
10483       save = input_line_pointer;
10484       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10485       if (gotfree_input_line)
10486         input_line_pointer = gotfree_input_line;
10487
10488       expression (exp);
10489
10490       if (gotfree_input_line)
10491         {
10492           /* expression () has merrily parsed up to the end of line,
10493              or a comma - in the wrong buffer.  Transfer how far
10494              input_line_pointer has moved to the right buffer.  */
10495           input_line_pointer = (save
10496                                 + (input_line_pointer - gotfree_input_line)
10497                                 + adjust);
10498           free (gotfree_input_line);
10499           if (exp->X_op == O_constant
10500               || exp->X_op == O_absent
10501               || exp->X_op == O_illegal
10502               || exp->X_op == O_register
10503               || exp->X_op == O_big)
10504             {
10505               char c = *input_line_pointer;
10506               *input_line_pointer = 0;
10507               as_bad (_("missing or invalid expression `%s'"), save);
10508               *input_line_pointer = c;
10509             }
10510           else if ((got_reloc == BFD_RELOC_386_PLT32
10511                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10512                    && exp->X_op != O_symbol)
10513             {
10514               char c = *input_line_pointer;
10515               *input_line_pointer = 0;
10516               as_bad (_("invalid PLT expression `%s'"), save);
10517               *input_line_pointer = c;
10518             }
10519         }
10520     }
10521   else
10522     expression (exp);
10523
10524   intel_syntax = -intel_syntax;
10525
10526   if (intel_syntax)
10527     i386_intel_simplify (exp);
10528 #else
10529   expression (exp);
10530 #endif
10531
10532   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
10533   if (size == 4 && exp->X_op == O_constant && !object_64bit)
10534     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10535
10536   return got_reloc;
10537 }
10538
10539 static void
10540 signed_cons (int size)
10541 {
10542   if (object_64bit)
10543     cons_sign = 1;
10544   cons (size);
10545   cons_sign = -1;
10546 }
10547
10548 #ifdef TE_PE
10549 static void
10550 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
10551 {
10552   expressionS exp;
10553
10554   do
10555     {
10556       expression (&exp);
10557       if (exp.X_op == O_symbol)
10558         exp.X_op = O_secrel;
10559
10560       emit_expr (&exp, 4);
10561     }
10562   while (*input_line_pointer++ == ',');
10563
10564   input_line_pointer--;
10565   demand_empty_rest_of_line ();
10566 }
10567
10568 static void
10569 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
10570 {
10571   expressionS exp;
10572
10573   do
10574     {
10575       expression (&exp);
10576       if (exp.X_op == O_symbol)
10577         exp.X_op = O_secidx;
10578
10579       emit_expr (&exp, 2);
10580     }
10581   while (*input_line_pointer++ == ',');
10582
10583   input_line_pointer--;
10584   demand_empty_rest_of_line ();
10585 }
10586 #endif
10587
10588 /* Handle Vector operations.  */
10589
10590 static char *
10591 check_VecOperations (char *op_string)
10592 {
10593   const reg_entry *mask;
10594   const char *saved;
10595   char *end_op;
10596
10597   while (*op_string)
10598     {
10599       saved = op_string;
10600       if (*op_string == '{')
10601         {
10602           op_string++;
10603
10604           /* Check broadcasts.  */
10605           if (startswith (op_string, "1to"))
10606             {
10607               unsigned int bcst_type;
10608
10609               if (i.broadcast.type)
10610                 goto duplicated_vec_op;
10611
10612               op_string += 3;
10613               if (*op_string == '8')
10614                 bcst_type = 8;
10615               else if (*op_string == '4')
10616                 bcst_type = 4;
10617               else if (*op_string == '2')
10618                 bcst_type = 2;
10619               else if (*op_string == '1'
10620                        && *(op_string+1) == '6')
10621                 {
10622                   bcst_type = 16;
10623                   op_string++;
10624                 }
10625               else if (*op_string == '3'
10626                        && *(op_string+1) == '2')
10627                 {
10628                   bcst_type = 32;
10629                   op_string++;
10630                 }
10631               else
10632                 {
10633                   as_bad (_("Unsupported broadcast: `%s'"), saved);
10634                   return NULL;
10635                 }
10636               op_string++;
10637
10638               i.broadcast.type = bcst_type;
10639               i.broadcast.operand = this_operand;
10640             }
10641           /* Check masking operation.  */
10642           else if ((mask = parse_register (op_string, &end_op)) != NULL)
10643             {
10644               if (mask == &bad_reg)
10645                 return NULL;
10646
10647               /* k0 can't be used for write mask.  */
10648               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
10649                 {
10650                   as_bad (_("`%s%s' can't be used for write mask"),
10651                           register_prefix, mask->reg_name);
10652                   return NULL;
10653                 }
10654
10655               if (!i.mask.reg)
10656                 {
10657                   i.mask.reg = mask;
10658                   i.mask.operand = this_operand;
10659                 }
10660               else if (i.mask.reg->reg_num)
10661                 goto duplicated_vec_op;
10662               else
10663                 {
10664                   i.mask.reg = mask;
10665
10666                   /* Only "{z}" is allowed here.  No need to check
10667                      zeroing mask explicitly.  */
10668                   if (i.mask.operand != (unsigned int) this_operand)
10669                     {
10670                       as_bad (_("invalid write mask `%s'"), saved);
10671                       return NULL;
10672                     }
10673                 }
10674
10675               op_string = end_op;
10676             }
10677           /* Check zeroing-flag for masking operation.  */
10678           else if (*op_string == 'z')
10679             {
10680               if (!i.mask.reg)
10681                 {
10682                   i.mask.reg = reg_k0;
10683                   i.mask.zeroing = 1;
10684                   i.mask.operand = this_operand;
10685                 }
10686               else
10687                 {
10688                   if (i.mask.zeroing)
10689                     {
10690                     duplicated_vec_op:
10691                       as_bad (_("duplicated `%s'"), saved);
10692                       return NULL;
10693                     }
10694
10695                   i.mask.zeroing = 1;
10696
10697                   /* Only "{%k}" is allowed here.  No need to check mask
10698                      register explicitly.  */
10699                   if (i.mask.operand != (unsigned int) this_operand)
10700                     {
10701                       as_bad (_("invalid zeroing-masking `%s'"),
10702                               saved);
10703                       return NULL;
10704                     }
10705                 }
10706
10707               op_string++;
10708             }
10709           else
10710             goto unknown_vec_op;
10711
10712           if (*op_string != '}')
10713             {
10714               as_bad (_("missing `}' in `%s'"), saved);
10715               return NULL;
10716             }
10717           op_string++;
10718
10719           /* Strip whitespace since the addition of pseudo prefixes
10720              changed how the scrubber treats '{'.  */
10721           if (is_space_char (*op_string))
10722             ++op_string;
10723
10724           continue;
10725         }
10726     unknown_vec_op:
10727       /* We don't know this one.  */
10728       as_bad (_("unknown vector operation: `%s'"), saved);
10729       return NULL;
10730     }
10731
10732   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
10733     {
10734       as_bad (_("zeroing-masking only allowed with write mask"));
10735       return NULL;
10736     }
10737
10738   return op_string;
10739 }
10740
10741 static int
10742 i386_immediate (char *imm_start)
10743 {
10744   char *save_input_line_pointer;
10745   char *gotfree_input_line;
10746   segT exp_seg = 0;
10747   expressionS *exp;
10748   i386_operand_type types;
10749
10750   operand_type_set (&types, ~0);
10751
10752   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
10753     {
10754       as_bad (_("at most %d immediate operands are allowed"),
10755               MAX_IMMEDIATE_OPERANDS);
10756       return 0;
10757     }
10758
10759   exp = &im_expressions[i.imm_operands++];
10760   i.op[this_operand].imms = exp;
10761
10762   if (is_space_char (*imm_start))
10763     ++imm_start;
10764
10765   save_input_line_pointer = input_line_pointer;
10766   input_line_pointer = imm_start;
10767
10768   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10769   if (gotfree_input_line)
10770     input_line_pointer = gotfree_input_line;
10771
10772   exp_seg = expression (exp);
10773
10774   SKIP_WHITESPACE ();
10775   if (*input_line_pointer)
10776     as_bad (_("junk `%s' after expression"), input_line_pointer);
10777
10778   input_line_pointer = save_input_line_pointer;
10779   if (gotfree_input_line)
10780     {
10781       free (gotfree_input_line);
10782
10783       if (exp->X_op == O_constant)
10784         exp->X_op = O_illegal;
10785     }
10786
10787   if (exp_seg == reg_section)
10788     {
10789       as_bad (_("illegal immediate register operand %s"), imm_start);
10790       return 0;
10791     }
10792
10793   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
10794 }
10795
10796 static int
10797 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10798                          i386_operand_type types, const char *imm_start)
10799 {
10800   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
10801     {
10802       if (imm_start)
10803         as_bad (_("missing or invalid immediate expression `%s'"),
10804                 imm_start);
10805       return 0;
10806     }
10807   else if (exp->X_op == O_constant)
10808     {
10809       /* Size it properly later.  */
10810       i.types[this_operand].bitfield.imm64 = 1;
10811
10812       /* If not 64bit, sign/zero extend val, to account for wraparound
10813          when !BFD64.  */
10814       if (flag_code != CODE_64BIT)
10815         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10816     }
10817 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10818   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
10819            && exp_seg != absolute_section
10820            && exp_seg != text_section
10821            && exp_seg != data_section
10822            && exp_seg != bss_section
10823            && exp_seg != undefined_section
10824            && !bfd_is_com_section (exp_seg))
10825     {
10826       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10827       return 0;
10828     }
10829 #endif
10830   else
10831     {
10832       /* This is an address.  The size of the address will be
10833          determined later, depending on destination register,
10834          suffix, or the default for the section.  */
10835       i.types[this_operand].bitfield.imm8 = 1;
10836       i.types[this_operand].bitfield.imm16 = 1;
10837       i.types[this_operand].bitfield.imm32 = 1;
10838       i.types[this_operand].bitfield.imm32s = 1;
10839       i.types[this_operand].bitfield.imm64 = 1;
10840       i.types[this_operand] = operand_type_and (i.types[this_operand],
10841                                                 types);
10842     }
10843
10844   return 1;
10845 }
10846
10847 static char *
10848 i386_scale (char *scale)
10849 {
10850   offsetT val;
10851   char *save = input_line_pointer;
10852
10853   input_line_pointer = scale;
10854   val = get_absolute_expression ();
10855
10856   switch (val)
10857     {
10858     case 1:
10859       i.log2_scale_factor = 0;
10860       break;
10861     case 2:
10862       i.log2_scale_factor = 1;
10863       break;
10864     case 4:
10865       i.log2_scale_factor = 2;
10866       break;
10867     case 8:
10868       i.log2_scale_factor = 3;
10869       break;
10870     default:
10871       {
10872         char sep = *input_line_pointer;
10873
10874         *input_line_pointer = '\0';
10875         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
10876                 scale);
10877         *input_line_pointer = sep;
10878         input_line_pointer = save;
10879         return NULL;
10880       }
10881     }
10882   if (i.log2_scale_factor != 0 && i.index_reg == 0)
10883     {
10884       as_warn (_("scale factor of %d without an index register"),
10885                1 << i.log2_scale_factor);
10886       i.log2_scale_factor = 0;
10887     }
10888   scale = input_line_pointer;
10889   input_line_pointer = save;
10890   return scale;
10891 }
10892
10893 static int
10894 i386_displacement (char *disp_start, char *disp_end)
10895 {
10896   expressionS *exp;
10897   segT exp_seg = 0;
10898   char *save_input_line_pointer;
10899   char *gotfree_input_line;
10900   int override;
10901   i386_operand_type bigdisp, types = anydisp;
10902   int ret;
10903
10904   if (i.disp_operands == MAX_MEMORY_OPERANDS)
10905     {
10906       as_bad (_("at most %d displacement operands are allowed"),
10907               MAX_MEMORY_OPERANDS);
10908       return 0;
10909     }
10910
10911   operand_type_set (&bigdisp, 0);
10912   if (i.jumpabsolute
10913       || i.types[this_operand].bitfield.baseindex
10914       || (current_templates->start->opcode_modifier.jump != JUMP
10915           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
10916     {
10917       i386_addressing_mode ();
10918       override = (i.prefix[ADDR_PREFIX] != 0);
10919       if (flag_code == CODE_64BIT)
10920         {
10921           if (!override)
10922             {
10923               bigdisp.bitfield.disp32s = 1;
10924               bigdisp.bitfield.disp64 = 1;
10925             }
10926           else
10927             bigdisp.bitfield.disp32 = 1;
10928         }
10929       else if ((flag_code == CODE_16BIT) ^ override)
10930           bigdisp.bitfield.disp16 = 1;
10931       else
10932           bigdisp.bitfield.disp32 = 1;
10933     }
10934   else
10935     {
10936       /* For PC-relative branches, the width of the displacement may be
10937          dependent upon data size, but is never dependent upon address size.
10938          Also make sure to not unintentionally match against a non-PC-relative
10939          branch template.  */
10940       static templates aux_templates;
10941       const insn_template *t = current_templates->start;
10942       bool has_intel64 = false;
10943
10944       aux_templates.start = t;
10945       while (++t < current_templates->end)
10946         {
10947           if (t->opcode_modifier.jump
10948               != current_templates->start->opcode_modifier.jump)
10949             break;
10950           if ((t->opcode_modifier.isa64 >= INTEL64))
10951             has_intel64 = true;
10952         }
10953       if (t < current_templates->end)
10954         {
10955           aux_templates.end = t;
10956           current_templates = &aux_templates;
10957         }
10958
10959       override = (i.prefix[DATA_PREFIX] != 0);
10960       if (flag_code == CODE_64BIT)
10961         {
10962           if ((override || i.suffix == WORD_MNEM_SUFFIX)
10963               && (!intel64 || !has_intel64))
10964             bigdisp.bitfield.disp16 = 1;
10965           else
10966             bigdisp.bitfield.disp32s = 1;
10967         }
10968       else
10969         {
10970           if (!override)
10971             override = (i.suffix == (flag_code != CODE_16BIT
10972                                      ? WORD_MNEM_SUFFIX
10973                                      : LONG_MNEM_SUFFIX));
10974           bigdisp.bitfield.disp32 = 1;
10975           if ((flag_code == CODE_16BIT) ^ override)
10976             {
10977               bigdisp.bitfield.disp32 = 0;
10978               bigdisp.bitfield.disp16 = 1;
10979             }
10980         }
10981     }
10982   i.types[this_operand] = operand_type_or (i.types[this_operand],
10983                                            bigdisp);
10984
10985   exp = &disp_expressions[i.disp_operands];
10986   i.op[this_operand].disps = exp;
10987   i.disp_operands++;
10988   save_input_line_pointer = input_line_pointer;
10989   input_line_pointer = disp_start;
10990   END_STRING_AND_SAVE (disp_end);
10991
10992 #ifndef GCC_ASM_O_HACK
10993 #define GCC_ASM_O_HACK 0
10994 #endif
10995 #if GCC_ASM_O_HACK
10996   END_STRING_AND_SAVE (disp_end + 1);
10997   if (i.types[this_operand].bitfield.baseIndex
10998       && displacement_string_end[-1] == '+')
10999     {
11000       /* This hack is to avoid a warning when using the "o"
11001          constraint within gcc asm statements.
11002          For instance:
11003
11004          #define _set_tssldt_desc(n,addr,limit,type) \
11005          __asm__ __volatile__ ( \
11006          "movw %w2,%0\n\t" \
11007          "movw %w1,2+%0\n\t" \
11008          "rorl $16,%1\n\t" \
11009          "movb %b1,4+%0\n\t" \
11010          "movb %4,5+%0\n\t" \
11011          "movb $0,6+%0\n\t" \
11012          "movb %h1,7+%0\n\t" \
11013          "rorl $16,%1" \
11014          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
11015
11016          This works great except that the output assembler ends
11017          up looking a bit weird if it turns out that there is
11018          no offset.  You end up producing code that looks like:
11019
11020          #APP
11021          movw $235,(%eax)
11022          movw %dx,2+(%eax)
11023          rorl $16,%edx
11024          movb %dl,4+(%eax)
11025          movb $137,5+(%eax)
11026          movb $0,6+(%eax)
11027          movb %dh,7+(%eax)
11028          rorl $16,%edx
11029          #NO_APP
11030
11031          So here we provide the missing zero.  */
11032
11033       *displacement_string_end = '0';
11034     }
11035 #endif
11036   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
11037   if (gotfree_input_line)
11038     input_line_pointer = gotfree_input_line;
11039
11040   exp_seg = expression (exp);
11041
11042   SKIP_WHITESPACE ();
11043   if (*input_line_pointer)
11044     as_bad (_("junk `%s' after expression"), input_line_pointer);
11045 #if GCC_ASM_O_HACK
11046   RESTORE_END_STRING (disp_end + 1);
11047 #endif
11048   input_line_pointer = save_input_line_pointer;
11049   if (gotfree_input_line)
11050     {
11051       free (gotfree_input_line);
11052
11053       if (exp->X_op == O_constant || exp->X_op == O_register)
11054         exp->X_op = O_illegal;
11055     }
11056
11057   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
11058
11059   RESTORE_END_STRING (disp_end);
11060
11061   return ret;
11062 }
11063
11064 static int
11065 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
11066                             i386_operand_type types, const char *disp_start)
11067 {
11068   i386_operand_type bigdisp;
11069   int ret = 1;
11070
11071   /* We do this to make sure that the section symbol is in
11072      the symbol table.  We will ultimately change the relocation
11073      to be relative to the beginning of the section.  */
11074   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
11075       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
11076       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11077     {
11078       if (exp->X_op != O_symbol)
11079         goto inv_disp;
11080
11081       if (S_IS_LOCAL (exp->X_add_symbol)
11082           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
11083           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
11084         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
11085       exp->X_op = O_subtract;
11086       exp->X_op_symbol = GOT_symbol;
11087       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
11088         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
11089       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11090         i.reloc[this_operand] = BFD_RELOC_64;
11091       else
11092         i.reloc[this_operand] = BFD_RELOC_32;
11093     }
11094
11095   else if (exp->X_op == O_absent
11096            || exp->X_op == O_illegal
11097            || exp->X_op == O_big)
11098     {
11099     inv_disp:
11100       as_bad (_("missing or invalid displacement expression `%s'"),
11101               disp_start);
11102       ret = 0;
11103     }
11104
11105   else if (exp->X_op == O_constant)
11106     {
11107       /* Sizing gets taken care of by optimize_disp().
11108
11109          If not 64bit, sign/zero extend val, to account for wraparound
11110          when !BFD64.  */
11111       if (flag_code != CODE_64BIT)
11112         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11113     }
11114
11115 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11116   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11117            && exp_seg != absolute_section
11118            && exp_seg != text_section
11119            && exp_seg != data_section
11120            && exp_seg != bss_section
11121            && exp_seg != undefined_section
11122            && !bfd_is_com_section (exp_seg))
11123     {
11124       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11125       ret = 0;
11126     }
11127 #endif
11128
11129   else if (current_templates->start->opcode_modifier.jump == JUMP_BYTE)
11130     i.types[this_operand].bitfield.disp8 = 1;
11131
11132   /* Check if this is a displacement only operand.  */
11133   bigdisp = operand_type_and_not (i.types[this_operand], anydisp);
11134   if (operand_type_all_zero (&bigdisp))
11135     i.types[this_operand] = operand_type_and (i.types[this_operand],
11136                                               types);
11137
11138   return ret;
11139 }
11140
11141 /* Return the active addressing mode, taking address override and
11142    registers forming the address into consideration.  Update the
11143    address override prefix if necessary.  */
11144
11145 static enum flag_code
11146 i386_addressing_mode (void)
11147 {
11148   enum flag_code addr_mode;
11149
11150   if (i.prefix[ADDR_PREFIX])
11151     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
11152   else if (flag_code == CODE_16BIT
11153            && current_templates->start->cpu_flags.bitfield.cpumpx
11154            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
11155               from md_assemble() by "is not a valid base/index expression"
11156               when there is a base and/or index.  */
11157            && !i.types[this_operand].bitfield.baseindex)
11158     {
11159       /* MPX insn memory operands with neither base nor index must be forced
11160          to use 32-bit addressing in 16-bit mode.  */
11161       addr_mode = CODE_32BIT;
11162       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11163       ++i.prefixes;
11164       gas_assert (!i.types[this_operand].bitfield.disp16);
11165       gas_assert (!i.types[this_operand].bitfield.disp32);
11166     }
11167   else
11168     {
11169       addr_mode = flag_code;
11170
11171 #if INFER_ADDR_PREFIX
11172       if (i.mem_operands == 0)
11173         {
11174           /* Infer address prefix from the first memory operand.  */
11175           const reg_entry *addr_reg = i.base_reg;
11176
11177           if (addr_reg == NULL)
11178             addr_reg = i.index_reg;
11179
11180           if (addr_reg)
11181             {
11182               if (addr_reg->reg_type.bitfield.dword)
11183                 addr_mode = CODE_32BIT;
11184               else if (flag_code != CODE_64BIT
11185                        && addr_reg->reg_type.bitfield.word)
11186                 addr_mode = CODE_16BIT;
11187
11188               if (addr_mode != flag_code)
11189                 {
11190                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11191                   i.prefixes += 1;
11192                   /* Change the size of any displacement too.  At most one
11193                      of Disp16 or Disp32 is set.
11194                      FIXME.  There doesn't seem to be any real need for
11195                      separate Disp16 and Disp32 flags.  The same goes for
11196                      Imm16 and Imm32.  Removing them would probably clean
11197                      up the code quite a lot.  */
11198                   if (flag_code != CODE_64BIT
11199                       && (i.types[this_operand].bitfield.disp16
11200                           || i.types[this_operand].bitfield.disp32))
11201                     i.types[this_operand]
11202                       = operand_type_xor (i.types[this_operand], disp16_32);
11203                 }
11204             }
11205         }
11206 #endif
11207     }
11208
11209   return addr_mode;
11210 }
11211
11212 /* Make sure the memory operand we've been dealt is valid.
11213    Return 1 on success, 0 on a failure.  */
11214
11215 static int
11216 i386_index_check (const char *operand_string)
11217 {
11218   const char *kind = "base/index";
11219   enum flag_code addr_mode = i386_addressing_mode ();
11220   const insn_template *t = current_templates->start;
11221
11222   if (t->opcode_modifier.isstring
11223       && !t->cpu_flags.bitfield.cpupadlock
11224       && (current_templates->end[-1].opcode_modifier.isstring
11225           || i.mem_operands))
11226     {
11227       /* Memory operands of string insns are special in that they only allow
11228          a single register (rDI, rSI, or rBX) as their memory address.  */
11229       const reg_entry *expected_reg;
11230       static const char *di_si[][2] =
11231         {
11232           { "esi", "edi" },
11233           { "si", "di" },
11234           { "rsi", "rdi" }
11235         };
11236       static const char *bx[] = { "ebx", "bx", "rbx" };
11237
11238       kind = "string address";
11239
11240       if (t->opcode_modifier.prefixok == PrefixRep)
11241         {
11242           int es_op = current_templates->end[-1].opcode_modifier.isstring
11243                       - IS_STRING_ES_OP0;
11244           int op = 0;
11245
11246           if (!current_templates->end[-1].operand_types[0].bitfield.baseindex
11247               || ((!i.mem_operands != !intel_syntax)
11248                   && current_templates->end[-1].operand_types[1]
11249                      .bitfield.baseindex))
11250             op = 1;
11251           expected_reg
11252             = (const reg_entry *) str_hash_find (reg_hash,
11253                                                  di_si[addr_mode][op == es_op]);
11254         }
11255       else
11256         expected_reg
11257           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
11258
11259       if (i.base_reg != expected_reg
11260           || i.index_reg
11261           || operand_type_check (i.types[this_operand], disp))
11262         {
11263           /* The second memory operand must have the same size as
11264              the first one.  */
11265           if (i.mem_operands
11266               && i.base_reg
11267               && !((addr_mode == CODE_64BIT
11268                     && i.base_reg->reg_type.bitfield.qword)
11269                    || (addr_mode == CODE_32BIT
11270                        ? i.base_reg->reg_type.bitfield.dword
11271                        : i.base_reg->reg_type.bitfield.word)))
11272             goto bad_address;
11273
11274           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
11275                    operand_string,
11276                    intel_syntax ? '[' : '(',
11277                    register_prefix,
11278                    expected_reg->reg_name,
11279                    intel_syntax ? ']' : ')');
11280           return 1;
11281         }
11282       else
11283         return 1;
11284
11285     bad_address:
11286       as_bad (_("`%s' is not a valid %s expression"),
11287               operand_string, kind);
11288       return 0;
11289     }
11290   else
11291     {
11292       if (addr_mode != CODE_16BIT)
11293         {
11294           /* 32-bit/64-bit checks.  */
11295           if (i.disp_encoding == disp_encoding_16bit)
11296             {
11297             bad_disp:
11298               as_bad (_("invalid `%s' prefix"),
11299                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
11300               return 0;
11301             }
11302
11303           if ((i.base_reg
11304                && ((addr_mode == CODE_64BIT
11305                     ? !i.base_reg->reg_type.bitfield.qword
11306                     : !i.base_reg->reg_type.bitfield.dword)
11307                    || (i.index_reg && i.base_reg->reg_num == RegIP)
11308                    || i.base_reg->reg_num == RegIZ))
11309               || (i.index_reg
11310                   && !i.index_reg->reg_type.bitfield.xmmword
11311                   && !i.index_reg->reg_type.bitfield.ymmword
11312                   && !i.index_reg->reg_type.bitfield.zmmword
11313                   && ((addr_mode == CODE_64BIT
11314                        ? !i.index_reg->reg_type.bitfield.qword
11315                        : !i.index_reg->reg_type.bitfield.dword)
11316                       || !i.index_reg->reg_type.bitfield.baseindex)))
11317             goto bad_address;
11318
11319           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
11320           if ((t->opcode_modifier.opcodeprefix == PREFIX_0XF3
11321                && t->opcode_modifier.opcodespace == SPACE_0F
11322                && t->base_opcode == 0x1b)
11323               || (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11324                   && t->opcode_modifier.opcodespace == SPACE_0F
11325                   && (t->base_opcode & ~1) == 0x1a)
11326               || t->opcode_modifier.sib == SIBMEM)
11327             {
11328               /* They cannot use RIP-relative addressing. */
11329               if (i.base_reg && i.base_reg->reg_num == RegIP)
11330                 {
11331                   as_bad (_("`%s' cannot be used here"), operand_string);
11332                   return 0;
11333                 }
11334
11335               /* bndldx and bndstx ignore their scale factor. */
11336               if (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11337                   && t->opcode_modifier.opcodespace == SPACE_0F
11338                   && (t->base_opcode & ~1) == 0x1a
11339                   && i.log2_scale_factor)
11340                 as_warn (_("register scaling is being ignored here"));
11341             }
11342         }
11343       else
11344         {
11345           /* 16-bit checks.  */
11346           if (i.disp_encoding == disp_encoding_32bit)
11347             goto bad_disp;
11348
11349           if ((i.base_reg
11350                && (!i.base_reg->reg_type.bitfield.word
11351                    || !i.base_reg->reg_type.bitfield.baseindex))
11352               || (i.index_reg
11353                   && (!i.index_reg->reg_type.bitfield.word
11354                       || !i.index_reg->reg_type.bitfield.baseindex
11355                       || !(i.base_reg
11356                            && i.base_reg->reg_num < 6
11357                            && i.index_reg->reg_num >= 6
11358                            && i.log2_scale_factor == 0))))
11359             goto bad_address;
11360         }
11361     }
11362   return 1;
11363 }
11364
11365 /* Handle vector immediates.  */
11366
11367 static int
11368 RC_SAE_immediate (const char *imm_start)
11369 {
11370   unsigned int match_found, j;
11371   const char *pstr = imm_start;
11372   expressionS *exp;
11373
11374   if (*pstr != '{')
11375     return 0;
11376
11377   pstr++;
11378   match_found = 0;
11379   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
11380     {
11381       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
11382         {
11383           if (i.rounding.type != rc_none)
11384             {
11385               as_bad (_("duplicated `%s'"), imm_start);
11386               return 0;
11387             }
11388
11389           i.rounding.type = RC_NamesTable[j].type;
11390           i.rounding.operand = this_operand;
11391
11392           pstr += RC_NamesTable[j].len;
11393           match_found = 1;
11394           break;
11395         }
11396     }
11397   if (!match_found)
11398     return 0;
11399
11400   if (*pstr++ != '}')
11401     {
11402       as_bad (_("Missing '}': '%s'"), imm_start);
11403       return 0;
11404     }
11405   /* RC/SAE immediate string should contain nothing more.  */;
11406   if (*pstr != 0)
11407     {
11408       as_bad (_("Junk after '}': '%s'"), imm_start);
11409       return 0;
11410     }
11411
11412   exp = &im_expressions[i.imm_operands++];
11413   i.op[this_operand].imms = exp;
11414
11415   exp->X_op = O_constant;
11416   exp->X_add_number = 0;
11417   exp->X_add_symbol = (symbolS *) 0;
11418   exp->X_op_symbol = (symbolS *) 0;
11419
11420   i.types[this_operand].bitfield.imm8 = 1;
11421   return 1;
11422 }
11423
11424 /* Only string instructions can have a second memory operand, so
11425    reduce current_templates to just those if it contains any.  */
11426 static int
11427 maybe_adjust_templates (void)
11428 {
11429   const insn_template *t;
11430
11431   gas_assert (i.mem_operands == 1);
11432
11433   for (t = current_templates->start; t < current_templates->end; ++t)
11434     if (t->opcode_modifier.isstring)
11435       break;
11436
11437   if (t < current_templates->end)
11438     {
11439       static templates aux_templates;
11440       bool recheck;
11441
11442       aux_templates.start = t;
11443       for (; t < current_templates->end; ++t)
11444         if (!t->opcode_modifier.isstring)
11445           break;
11446       aux_templates.end = t;
11447
11448       /* Determine whether to re-check the first memory operand.  */
11449       recheck = (aux_templates.start != current_templates->start
11450                  || t != current_templates->end);
11451
11452       current_templates = &aux_templates;
11453
11454       if (recheck)
11455         {
11456           i.mem_operands = 0;
11457           if (i.memop1_string != NULL
11458               && i386_index_check (i.memop1_string) == 0)
11459             return 0;
11460           i.mem_operands = 1;
11461         }
11462     }
11463
11464   return 1;
11465 }
11466
11467 static INLINE bool starts_memory_operand (char c)
11468 {
11469   return ISDIGIT (c)
11470          || is_identifier_char (c)
11471          || strchr ("([\"+-!~", c);
11472 }
11473
11474 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
11475    on error.  */
11476
11477 static int
11478 i386_att_operand (char *operand_string)
11479 {
11480   const reg_entry *r;
11481   char *end_op;
11482   char *op_string = operand_string;
11483
11484   if (is_space_char (*op_string))
11485     ++op_string;
11486
11487   /* We check for an absolute prefix (differentiating,
11488      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
11489   if (*op_string == ABSOLUTE_PREFIX)
11490     {
11491       ++op_string;
11492       if (is_space_char (*op_string))
11493         ++op_string;
11494       i.jumpabsolute = true;
11495     }
11496
11497   /* Check if operand is a register.  */
11498   if ((r = parse_register (op_string, &end_op)) != NULL)
11499     {
11500       i386_operand_type temp;
11501
11502       if (r == &bad_reg)
11503         return 0;
11504
11505       /* Check for a segment override by searching for ':' after a
11506          segment register.  */
11507       op_string = end_op;
11508       if (is_space_char (*op_string))
11509         ++op_string;
11510       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
11511         {
11512           i.seg[i.mem_operands] = r;
11513
11514           /* Skip the ':' and whitespace.  */
11515           ++op_string;
11516           if (is_space_char (*op_string))
11517             ++op_string;
11518
11519           /* Handle case of %es:*foo.  */
11520           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX)
11521             {
11522               ++op_string;
11523               if (is_space_char (*op_string))
11524                 ++op_string;
11525               i.jumpabsolute = true;
11526             }
11527
11528           if (!starts_memory_operand (*op_string))
11529             {
11530               as_bad (_("bad memory operand `%s'"), op_string);
11531               return 0;
11532             }
11533           goto do_memory_reference;
11534         }
11535
11536       /* Handle vector operations.  */
11537       if (*op_string == '{')
11538         {
11539           op_string = check_VecOperations (op_string);
11540           if (op_string == NULL)
11541             return 0;
11542         }
11543
11544       if (*op_string)
11545         {
11546           as_bad (_("junk `%s' after register"), op_string);
11547           return 0;
11548         }
11549       temp = r->reg_type;
11550       temp.bitfield.baseindex = 0;
11551       i.types[this_operand] = operand_type_or (i.types[this_operand],
11552                                                temp);
11553       i.types[this_operand].bitfield.unspecified = 0;
11554       i.op[this_operand].regs = r;
11555       i.reg_operands++;
11556     }
11557   else if (*op_string == REGISTER_PREFIX)
11558     {
11559       as_bad (_("bad register name `%s'"), op_string);
11560       return 0;
11561     }
11562   else if (*op_string == IMMEDIATE_PREFIX)
11563     {
11564       ++op_string;
11565       if (i.jumpabsolute)
11566         {
11567           as_bad (_("immediate operand illegal with absolute jump"));
11568           return 0;
11569         }
11570       if (!i386_immediate (op_string))
11571         return 0;
11572     }
11573   else if (RC_SAE_immediate (operand_string))
11574     {
11575       /* If it is a RC or SAE immediate, do nothing.  */
11576       ;
11577     }
11578   else if (starts_memory_operand (*op_string))
11579     {
11580       /* This is a memory reference of some sort.  */
11581       char *base_string;
11582
11583       /* Start and end of displacement string expression (if found).  */
11584       char *displacement_string_start;
11585       char *displacement_string_end;
11586
11587     do_memory_reference:
11588       if (i.mem_operands == 1 && !maybe_adjust_templates ())
11589         return 0;
11590       if ((i.mem_operands == 1
11591            && !current_templates->start->opcode_modifier.isstring)
11592           || i.mem_operands == 2)
11593         {
11594           as_bad (_("too many memory references for `%s'"),
11595                   current_templates->start->name);
11596           return 0;
11597         }
11598
11599       /* Check for base index form.  We detect the base index form by
11600          looking for an ')' at the end of the operand, searching
11601          for the '(' matching it, and finding a REGISTER_PREFIX or ','
11602          after the '('.  */
11603       base_string = op_string + strlen (op_string);
11604
11605       /* Handle vector operations.  */
11606       --base_string;
11607       if (is_space_char (*base_string))
11608         --base_string;
11609
11610       if (*base_string == '}')
11611         {
11612           char *vop_start = NULL;
11613
11614           while (base_string-- > op_string)
11615             {
11616               if (*base_string == '"')
11617                 break;
11618               if (*base_string != '{')
11619                 continue;
11620
11621               vop_start = base_string;
11622
11623               --base_string;
11624               if (is_space_char (*base_string))
11625                 --base_string;
11626
11627               if (*base_string != '}')
11628                 break;
11629
11630               vop_start = NULL;
11631             }
11632
11633           if (!vop_start)
11634             {
11635               as_bad (_("unbalanced figure braces"));
11636               return 0;
11637             }
11638
11639           if (check_VecOperations (vop_start) == NULL)
11640             return 0;
11641         }
11642
11643       /* If we only have a displacement, set-up for it to be parsed later.  */
11644       displacement_string_start = op_string;
11645       displacement_string_end = base_string + 1;
11646
11647       if (*base_string == ')')
11648         {
11649           char *temp_string;
11650           unsigned int parens_not_balanced = 1;
11651
11652           /* We've already checked that the number of left & right ()'s are
11653              equal, so this loop will not be infinite.  */
11654           do
11655             {
11656               base_string--;
11657               if (*base_string == ')')
11658                 parens_not_balanced++;
11659               if (*base_string == '(')
11660                 parens_not_balanced--;
11661             }
11662           while (parens_not_balanced && *base_string != '"');
11663
11664           temp_string = base_string;
11665
11666           /* Skip past '(' and whitespace.  */
11667           if (*base_string == '(')
11668             ++base_string;
11669           if (is_space_char (*base_string))
11670             ++base_string;
11671
11672           if (*base_string == ','
11673               || ((i.base_reg = parse_register (base_string, &end_op))
11674                   != NULL))
11675             {
11676               displacement_string_end = temp_string;
11677
11678               i.types[this_operand].bitfield.baseindex = 1;
11679
11680               if (i.base_reg)
11681                 {
11682                   if (i.base_reg == &bad_reg)
11683                     return 0;
11684                   base_string = end_op;
11685                   if (is_space_char (*base_string))
11686                     ++base_string;
11687                 }
11688
11689               /* There may be an index reg or scale factor here.  */
11690               if (*base_string == ',')
11691                 {
11692                   ++base_string;
11693                   if (is_space_char (*base_string))
11694                     ++base_string;
11695
11696                   if ((i.index_reg = parse_register (base_string, &end_op))
11697                       != NULL)
11698                     {
11699                       if (i.index_reg == &bad_reg)
11700                         return 0;
11701                       base_string = end_op;
11702                       if (is_space_char (*base_string))
11703                         ++base_string;
11704                       if (*base_string == ',')
11705                         {
11706                           ++base_string;
11707                           if (is_space_char (*base_string))
11708                             ++base_string;
11709                         }
11710                       else if (*base_string != ')')
11711                         {
11712                           as_bad (_("expecting `,' or `)' "
11713                                     "after index register in `%s'"),
11714                                   operand_string);
11715                           return 0;
11716                         }
11717                     }
11718                   else if (*base_string == REGISTER_PREFIX)
11719                     {
11720                       end_op = strchr (base_string, ',');
11721                       if (end_op)
11722                         *end_op = '\0';
11723                       as_bad (_("bad register name `%s'"), base_string);
11724                       return 0;
11725                     }
11726
11727                   /* Check for scale factor.  */
11728                   if (*base_string != ')')
11729                     {
11730                       char *end_scale = i386_scale (base_string);
11731
11732                       if (!end_scale)
11733                         return 0;
11734
11735                       base_string = end_scale;
11736                       if (is_space_char (*base_string))
11737                         ++base_string;
11738                       if (*base_string != ')')
11739                         {
11740                           as_bad (_("expecting `)' "
11741                                     "after scale factor in `%s'"),
11742                                   operand_string);
11743                           return 0;
11744                         }
11745                     }
11746                   else if (!i.index_reg)
11747                     {
11748                       as_bad (_("expecting index register or scale factor "
11749                                 "after `,'; got '%c'"),
11750                               *base_string);
11751                       return 0;
11752                     }
11753                 }
11754               else if (*base_string != ')')
11755                 {
11756                   as_bad (_("expecting `,' or `)' "
11757                             "after base register in `%s'"),
11758                           operand_string);
11759                   return 0;
11760                 }
11761             }
11762           else if (*base_string == REGISTER_PREFIX)
11763             {
11764               end_op = strchr (base_string, ',');
11765               if (end_op)
11766                 *end_op = '\0';
11767               as_bad (_("bad register name `%s'"), base_string);
11768               return 0;
11769             }
11770         }
11771
11772       /* If there's an expression beginning the operand, parse it,
11773          assuming displacement_string_start and
11774          displacement_string_end are meaningful.  */
11775       if (displacement_string_start != displacement_string_end)
11776         {
11777           if (!i386_displacement (displacement_string_start,
11778                                   displacement_string_end))
11779             return 0;
11780         }
11781
11782       /* Special case for (%dx) while doing input/output op.  */
11783       if (i.base_reg
11784           && i.base_reg->reg_type.bitfield.instance == RegD
11785           && i.base_reg->reg_type.bitfield.word
11786           && i.index_reg == 0
11787           && i.log2_scale_factor == 0
11788           && i.seg[i.mem_operands] == 0
11789           && !operand_type_check (i.types[this_operand], disp))
11790         {
11791           i.types[this_operand] = i.base_reg->reg_type;
11792           return 1;
11793         }
11794
11795       if (i386_index_check (operand_string) == 0)
11796         return 0;
11797       i.flags[this_operand] |= Operand_Mem;
11798       if (i.mem_operands == 0)
11799         i.memop1_string = xstrdup (operand_string);
11800       i.mem_operands++;
11801     }
11802   else
11803     {
11804       /* It's not a memory operand; argh!  */
11805       as_bad (_("invalid char %s beginning operand %d `%s'"),
11806               output_invalid (*op_string),
11807               this_operand + 1,
11808               op_string);
11809       return 0;
11810     }
11811   return 1;                     /* Normal return.  */
11812 }
11813 \f
11814 /* Calculate the maximum variable size (i.e., excluding fr_fix)
11815    that an rs_machine_dependent frag may reach.  */
11816
11817 unsigned int
11818 i386_frag_max_var (fragS *frag)
11819 {
11820   /* The only relaxable frags are for jumps.
11821      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
11822   gas_assert (frag->fr_type == rs_machine_dependent);
11823   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
11824 }
11825
11826 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11827 static int
11828 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
11829 {
11830   /* STT_GNU_IFUNC symbol must go through PLT.  */
11831   if ((symbol_get_bfdsym (fr_symbol)->flags
11832        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11833     return 0;
11834
11835   if (!S_IS_EXTERNAL (fr_symbol))
11836     /* Symbol may be weak or local.  */
11837     return !S_IS_WEAK (fr_symbol);
11838
11839   /* Global symbols with non-default visibility can't be preempted. */
11840   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11841     return 1;
11842
11843   if (fr_var != NO_RELOC)
11844     switch ((enum bfd_reloc_code_real) fr_var)
11845       {
11846       case BFD_RELOC_386_PLT32:
11847       case BFD_RELOC_X86_64_PLT32:
11848         /* Symbol with PLT relocation may be preempted. */
11849         return 0;
11850       default:
11851         abort ();
11852       }
11853
11854   /* Global symbols with default visibility in a shared library may be
11855      preempted by another definition.  */
11856   return !shared;
11857 }
11858 #endif
11859
11860 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11861    Note also work for Skylake and Cascadelake.
11862 ---------------------------------------------------------------------
11863 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
11864 | ------  | ----------- | ------- | -------- |
11865 |   Jo    |      N      |    N    |     Y    |
11866 |   Jno   |      N      |    N    |     Y    |
11867 |  Jc/Jb  |      Y      |    N    |     Y    |
11868 | Jae/Jnb |      Y      |    N    |     Y    |
11869 |  Je/Jz  |      Y      |    Y    |     Y    |
11870 | Jne/Jnz |      Y      |    Y    |     Y    |
11871 | Jna/Jbe |      Y      |    N    |     Y    |
11872 | Ja/Jnbe |      Y      |    N    |     Y    |
11873 |   Js    |      N      |    N    |     Y    |
11874 |   Jns   |      N      |    N    |     Y    |
11875 |  Jp/Jpe |      N      |    N    |     Y    |
11876 | Jnp/Jpo |      N      |    N    |     Y    |
11877 | Jl/Jnge |      Y      |    Y    |     Y    |
11878 | Jge/Jnl |      Y      |    Y    |     Y    |
11879 | Jle/Jng |      Y      |    Y    |     Y    |
11880 | Jg/Jnle |      Y      |    Y    |     Y    |
11881 ---------------------------------------------------------------------  */
11882 static int
11883 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
11884 {
11885   if (mf_cmp == mf_cmp_alu_cmp)
11886     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
11887             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
11888   if (mf_cmp == mf_cmp_incdec)
11889     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
11890             || mf_jcc == mf_jcc_jle);
11891   if (mf_cmp == mf_cmp_test_and)
11892     return 1;
11893   return 0;
11894 }
11895
11896 /* Return the next non-empty frag.  */
11897
11898 static fragS *
11899 i386_next_non_empty_frag (fragS *fragP)
11900 {
11901   /* There may be a frag with a ".fill 0" when there is no room in
11902      the current frag for frag_grow in output_insn.  */
11903   for (fragP = fragP->fr_next;
11904        (fragP != NULL
11905         && fragP->fr_type == rs_fill
11906         && fragP->fr_fix == 0);
11907        fragP = fragP->fr_next)
11908     ;
11909   return fragP;
11910 }
11911
11912 /* Return the next jcc frag after BRANCH_PADDING.  */
11913
11914 static fragS *
11915 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
11916 {
11917   fragS *branch_fragP;
11918   if (!pad_fragP)
11919     return NULL;
11920
11921   if (pad_fragP->fr_type == rs_machine_dependent
11922       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
11923           == BRANCH_PADDING))
11924     {
11925       branch_fragP = i386_next_non_empty_frag (pad_fragP);
11926       if (branch_fragP->fr_type != rs_machine_dependent)
11927         return NULL;
11928       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
11929           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
11930                                    pad_fragP->tc_frag_data.mf_type))
11931         return branch_fragP;
11932     }
11933
11934   return NULL;
11935 }
11936
11937 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
11938
11939 static void
11940 i386_classify_machine_dependent_frag (fragS *fragP)
11941 {
11942   fragS *cmp_fragP;
11943   fragS *pad_fragP;
11944   fragS *branch_fragP;
11945   fragS *next_fragP;
11946   unsigned int max_prefix_length;
11947
11948   if (fragP->tc_frag_data.classified)
11949     return;
11950
11951   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
11952      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
11953   for (next_fragP = fragP;
11954        next_fragP != NULL;
11955        next_fragP = next_fragP->fr_next)
11956     {
11957       next_fragP->tc_frag_data.classified = 1;
11958       if (next_fragP->fr_type == rs_machine_dependent)
11959         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
11960           {
11961           case BRANCH_PADDING:
11962             /* The BRANCH_PADDING frag must be followed by a branch
11963                frag.  */
11964             branch_fragP = i386_next_non_empty_frag (next_fragP);
11965             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11966             break;
11967           case FUSED_JCC_PADDING:
11968             /* Check if this is a fused jcc:
11969                FUSED_JCC_PADDING
11970                CMP like instruction
11971                BRANCH_PADDING
11972                COND_JUMP
11973                */
11974             cmp_fragP = i386_next_non_empty_frag (next_fragP);
11975             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
11976             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
11977             if (branch_fragP)
11978               {
11979                 /* The BRANCH_PADDING frag is merged with the
11980                    FUSED_JCC_PADDING frag.  */
11981                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11982                 /* CMP like instruction size.  */
11983                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
11984                 frag_wane (pad_fragP);
11985                 /* Skip to branch_fragP.  */
11986                 next_fragP = branch_fragP;
11987               }
11988             else if (next_fragP->tc_frag_data.max_prefix_length)
11989               {
11990                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
11991                    a fused jcc.  */
11992                 next_fragP->fr_subtype
11993                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
11994                 next_fragP->tc_frag_data.max_bytes
11995                   = next_fragP->tc_frag_data.max_prefix_length;
11996                 /* This will be updated in the BRANCH_PREFIX scan.  */
11997                 next_fragP->tc_frag_data.max_prefix_length = 0;
11998               }
11999             else
12000               frag_wane (next_fragP);
12001             break;
12002           }
12003     }
12004
12005   /* Stop if there is no BRANCH_PREFIX.  */
12006   if (!align_branch_prefix_size)
12007     return;
12008
12009   /* Scan for BRANCH_PREFIX.  */
12010   for (; fragP != NULL; fragP = fragP->fr_next)
12011     {
12012       if (fragP->fr_type != rs_machine_dependent
12013           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12014               != BRANCH_PREFIX))
12015         continue;
12016
12017       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
12018          COND_JUMP_PREFIX.  */
12019       max_prefix_length = 0;
12020       for (next_fragP = fragP;
12021            next_fragP != NULL;
12022            next_fragP = next_fragP->fr_next)
12023         {
12024           if (next_fragP->fr_type == rs_fill)
12025             /* Skip rs_fill frags.  */
12026             continue;
12027           else if (next_fragP->fr_type != rs_machine_dependent)
12028             /* Stop for all other frags.  */
12029             break;
12030
12031           /* rs_machine_dependent frags.  */
12032           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12033               == BRANCH_PREFIX)
12034             {
12035               /* Count BRANCH_PREFIX frags.  */
12036               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
12037                 {
12038                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
12039                   frag_wane (next_fragP);
12040                 }
12041               else
12042                 max_prefix_length
12043                   += next_fragP->tc_frag_data.max_bytes;
12044             }
12045           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12046                     == BRANCH_PADDING)
12047                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12048                        == FUSED_JCC_PADDING))
12049             {
12050               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
12051               fragP->tc_frag_data.u.padding_fragP = next_fragP;
12052               break;
12053             }
12054           else
12055             /* Stop for other rs_machine_dependent frags.  */
12056             break;
12057         }
12058
12059       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
12060
12061       /* Skip to the next frag.  */
12062       fragP = next_fragP;
12063     }
12064 }
12065
12066 /* Compute padding size for
12067
12068         FUSED_JCC_PADDING
12069         CMP like instruction
12070         BRANCH_PADDING
12071         COND_JUMP/UNCOND_JUMP
12072
12073    or
12074
12075         BRANCH_PADDING
12076         COND_JUMP/UNCOND_JUMP
12077  */
12078
12079 static int
12080 i386_branch_padding_size (fragS *fragP, offsetT address)
12081 {
12082   unsigned int offset, size, padding_size;
12083   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
12084
12085   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
12086   if (!address)
12087     address = fragP->fr_address;
12088   address += fragP->fr_fix;
12089
12090   /* CMP like instrunction size.  */
12091   size = fragP->tc_frag_data.cmp_size;
12092
12093   /* The base size of the branch frag.  */
12094   size += branch_fragP->fr_fix;
12095
12096   /* Add opcode and displacement bytes for the rs_machine_dependent
12097      branch frag.  */
12098   if (branch_fragP->fr_type == rs_machine_dependent)
12099     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
12100
12101   /* Check if branch is within boundary and doesn't end at the last
12102      byte.  */
12103   offset = address & ((1U << align_branch_power) - 1);
12104   if ((offset + size) >= (1U << align_branch_power))
12105     /* Padding needed to avoid crossing boundary.  */
12106     padding_size = (1U << align_branch_power) - offset;
12107   else
12108     /* No padding needed.  */
12109     padding_size = 0;
12110
12111   /* The return value may be saved in tc_frag_data.length which is
12112      unsigned byte.  */
12113   if (!fits_in_unsigned_byte (padding_size))
12114     abort ();
12115
12116   return padding_size;
12117 }
12118
12119 /* i386_generic_table_relax_frag()
12120
12121    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
12122    grow/shrink padding to align branch frags.  Hand others to
12123    relax_frag().  */
12124
12125 long
12126 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
12127 {
12128   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12129       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12130     {
12131       long padding_size = i386_branch_padding_size (fragP, 0);
12132       long grow = padding_size - fragP->tc_frag_data.length;
12133
12134       /* When the BRANCH_PREFIX frag is used, the computed address
12135          must match the actual address and there should be no padding.  */
12136       if (fragP->tc_frag_data.padding_address
12137           && (fragP->tc_frag_data.padding_address != fragP->fr_address
12138               || padding_size))
12139         abort ();
12140
12141       /* Update the padding size.  */
12142       if (grow)
12143         fragP->tc_frag_data.length = padding_size;
12144
12145       return grow;
12146     }
12147   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12148     {
12149       fragS *padding_fragP, *next_fragP;
12150       long padding_size, left_size, last_size;
12151
12152       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12153       if (!padding_fragP)
12154         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
12155         return (fragP->tc_frag_data.length
12156                 - fragP->tc_frag_data.last_length);
12157
12158       /* Compute the relative address of the padding frag in the very
12159         first time where the BRANCH_PREFIX frag sizes are zero.  */
12160       if (!fragP->tc_frag_data.padding_address)
12161         fragP->tc_frag_data.padding_address
12162           = padding_fragP->fr_address - (fragP->fr_address - stretch);
12163
12164       /* First update the last length from the previous interation.  */
12165       left_size = fragP->tc_frag_data.prefix_length;
12166       for (next_fragP = fragP;
12167            next_fragP != padding_fragP;
12168            next_fragP = next_fragP->fr_next)
12169         if (next_fragP->fr_type == rs_machine_dependent
12170             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12171                 == BRANCH_PREFIX))
12172           {
12173             if (left_size)
12174               {
12175                 int max = next_fragP->tc_frag_data.max_bytes;
12176                 if (max)
12177                   {
12178                     int size;
12179                     if (max > left_size)
12180                       size = left_size;
12181                     else
12182                       size = max;
12183                     left_size -= size;
12184                     next_fragP->tc_frag_data.last_length = size;
12185                   }
12186               }
12187             else
12188               next_fragP->tc_frag_data.last_length = 0;
12189           }
12190
12191       /* Check the padding size for the padding frag.  */
12192       padding_size = i386_branch_padding_size
12193         (padding_fragP, (fragP->fr_address
12194                          + fragP->tc_frag_data.padding_address));
12195
12196       last_size = fragP->tc_frag_data.prefix_length;
12197       /* Check if there is change from the last interation.  */
12198       if (padding_size == last_size)
12199         {
12200           /* Update the expected address of the padding frag.  */
12201           padding_fragP->tc_frag_data.padding_address
12202             = (fragP->fr_address + padding_size
12203                + fragP->tc_frag_data.padding_address);
12204           return 0;
12205         }
12206
12207       if (padding_size > fragP->tc_frag_data.max_prefix_length)
12208         {
12209           /* No padding if there is no sufficient room.  Clear the
12210              expected address of the padding frag.  */
12211           padding_fragP->tc_frag_data.padding_address = 0;
12212           padding_size = 0;
12213         }
12214       else
12215         /* Store the expected address of the padding frag.  */
12216         padding_fragP->tc_frag_data.padding_address
12217           = (fragP->fr_address + padding_size
12218              + fragP->tc_frag_data.padding_address);
12219
12220       fragP->tc_frag_data.prefix_length = padding_size;
12221
12222       /* Update the length for the current interation.  */
12223       left_size = padding_size;
12224       for (next_fragP = fragP;
12225            next_fragP != padding_fragP;
12226            next_fragP = next_fragP->fr_next)
12227         if (next_fragP->fr_type == rs_machine_dependent
12228             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12229                 == BRANCH_PREFIX))
12230           {
12231             if (left_size)
12232               {
12233                 int max = next_fragP->tc_frag_data.max_bytes;
12234                 if (max)
12235                   {
12236                     int size;
12237                     if (max > left_size)
12238                       size = left_size;
12239                     else
12240                       size = max;
12241                     left_size -= size;
12242                     next_fragP->tc_frag_data.length = size;
12243                   }
12244               }
12245             else
12246               next_fragP->tc_frag_data.length = 0;
12247           }
12248
12249       return (fragP->tc_frag_data.length
12250               - fragP->tc_frag_data.last_length);
12251     }
12252   return relax_frag (segment, fragP, stretch);
12253 }
12254
12255 /* md_estimate_size_before_relax()
12256
12257    Called just before relax() for rs_machine_dependent frags.  The x86
12258    assembler uses these frags to handle variable size jump
12259    instructions.
12260
12261    Any symbol that is now undefined will not become defined.
12262    Return the correct fr_subtype in the frag.
12263    Return the initial "guess for variable size of frag" to caller.
12264    The guess is actually the growth beyond the fixed part.  Whatever
12265    we do to grow the fixed or variable part contributes to our
12266    returned value.  */
12267
12268 int
12269 md_estimate_size_before_relax (fragS *fragP, segT segment)
12270 {
12271   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12272       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
12273       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12274     {
12275       i386_classify_machine_dependent_frag (fragP);
12276       return fragP->tc_frag_data.length;
12277     }
12278
12279   /* We've already got fragP->fr_subtype right;  all we have to do is
12280      check for un-relaxable symbols.  On an ELF system, we can't relax
12281      an externally visible symbol, because it may be overridden by a
12282      shared library.  */
12283   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
12284 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12285       || (IS_ELF
12286           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
12287                                                 fragP->fr_var))
12288 #endif
12289 #if defined (OBJ_COFF) && defined (TE_PE)
12290       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
12291           && S_IS_WEAK (fragP->fr_symbol))
12292 #endif
12293       )
12294     {
12295       /* Symbol is undefined in this segment, or we need to keep a
12296          reloc so that weak symbols can be overridden.  */
12297       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
12298       enum bfd_reloc_code_real reloc_type;
12299       unsigned char *opcode;
12300       int old_fr_fix;
12301       fixS *fixP = NULL;
12302
12303       if (fragP->fr_var != NO_RELOC)
12304         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
12305       else if (size == 2)
12306         reloc_type = BFD_RELOC_16_PCREL;
12307 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12308       else if (fragP->tc_frag_data.code64 && fragP->fr_offset == 0
12309                && need_plt32_p (fragP->fr_symbol))
12310         reloc_type = BFD_RELOC_X86_64_PLT32;
12311 #endif
12312       else
12313         reloc_type = BFD_RELOC_32_PCREL;
12314
12315       old_fr_fix = fragP->fr_fix;
12316       opcode = (unsigned char *) fragP->fr_opcode;
12317
12318       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
12319         {
12320         case UNCOND_JUMP:
12321           /* Make jmp (0xeb) a (d)word displacement jump.  */
12322           opcode[0] = 0xe9;
12323           fragP->fr_fix += size;
12324           fixP = fix_new (fragP, old_fr_fix, size,
12325                           fragP->fr_symbol,
12326                           fragP->fr_offset, 1,
12327                           reloc_type);
12328           break;
12329
12330         case COND_JUMP86:
12331           if (size == 2
12332               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
12333             {
12334               /* Negate the condition, and branch past an
12335                  unconditional jump.  */
12336               opcode[0] ^= 1;
12337               opcode[1] = 3;
12338               /* Insert an unconditional jump.  */
12339               opcode[2] = 0xe9;
12340               /* We added two extra opcode bytes, and have a two byte
12341                  offset.  */
12342               fragP->fr_fix += 2 + 2;
12343               fix_new (fragP, old_fr_fix + 2, 2,
12344                        fragP->fr_symbol,
12345                        fragP->fr_offset, 1,
12346                        reloc_type);
12347               break;
12348             }
12349           /* Fall through.  */
12350
12351         case COND_JUMP:
12352           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
12353             {
12354               fragP->fr_fix += 1;
12355               fixP = fix_new (fragP, old_fr_fix, 1,
12356                               fragP->fr_symbol,
12357                               fragP->fr_offset, 1,
12358                               BFD_RELOC_8_PCREL);
12359               fixP->fx_signed = 1;
12360               break;
12361             }
12362
12363           /* This changes the byte-displacement jump 0x7N
12364              to the (d)word-displacement jump 0x0f,0x8N.  */
12365           opcode[1] = opcode[0] + 0x10;
12366           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12367           /* We've added an opcode byte.  */
12368           fragP->fr_fix += 1 + size;
12369           fixP = fix_new (fragP, old_fr_fix + 1, size,
12370                           fragP->fr_symbol,
12371                           fragP->fr_offset, 1,
12372                           reloc_type);
12373           break;
12374
12375         default:
12376           BAD_CASE (fragP->fr_subtype);
12377           break;
12378         }
12379
12380       /* All jumps handled here are signed, but don't unconditionally use a
12381          signed limit check for 32 and 16 bit jumps as we want to allow wrap
12382          around at 4G (outside of 64-bit mode) and 64k.  */
12383       if (size == 4 && flag_code == CODE_64BIT)
12384         fixP->fx_signed = 1;
12385
12386       frag_wane (fragP);
12387       return fragP->fr_fix - old_fr_fix;
12388     }
12389
12390   /* Guess size depending on current relax state.  Initially the relax
12391      state will correspond to a short jump and we return 1, because
12392      the variable part of the frag (the branch offset) is one byte
12393      long.  However, we can relax a section more than once and in that
12394      case we must either set fr_subtype back to the unrelaxed state,
12395      or return the value for the appropriate branch.  */
12396   return md_relax_table[fragP->fr_subtype].rlx_length;
12397 }
12398
12399 /* Called after relax() is finished.
12400
12401    In:  Address of frag.
12402         fr_type == rs_machine_dependent.
12403         fr_subtype is what the address relaxed to.
12404
12405    Out: Any fixSs and constants are set up.
12406         Caller will turn frag into a ".space 0".  */
12407
12408 void
12409 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
12410                  fragS *fragP)
12411 {
12412   unsigned char *opcode;
12413   unsigned char *where_to_put_displacement = NULL;
12414   offsetT target_address;
12415   offsetT opcode_address;
12416   unsigned int extension = 0;
12417   offsetT displacement_from_opcode_start;
12418
12419   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12420       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
12421       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12422     {
12423       /* Generate nop padding.  */
12424       unsigned int size = fragP->tc_frag_data.length;
12425       if (size)
12426         {
12427           if (size > fragP->tc_frag_data.max_bytes)
12428             abort ();
12429
12430           if (flag_debug)
12431             {
12432               const char *msg;
12433               const char *branch = "branch";
12434               const char *prefix = "";
12435               fragS *padding_fragP;
12436               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12437                   == BRANCH_PREFIX)
12438                 {
12439                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12440                   switch (fragP->tc_frag_data.default_prefix)
12441                     {
12442                     default:
12443                       abort ();
12444                       break;
12445                     case CS_PREFIX_OPCODE:
12446                       prefix = " cs";
12447                       break;
12448                     case DS_PREFIX_OPCODE:
12449                       prefix = " ds";
12450                       break;
12451                     case ES_PREFIX_OPCODE:
12452                       prefix = " es";
12453                       break;
12454                     case FS_PREFIX_OPCODE:
12455                       prefix = " fs";
12456                       break;
12457                     case GS_PREFIX_OPCODE:
12458                       prefix = " gs";
12459                       break;
12460                     case SS_PREFIX_OPCODE:
12461                       prefix = " ss";
12462                       break;
12463                     }
12464                   if (padding_fragP)
12465                     msg = _("%s:%u: add %d%s at 0x%llx to align "
12466                             "%s within %d-byte boundary\n");
12467                   else
12468                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
12469                             "align %s within %d-byte boundary\n");
12470                 }
12471               else
12472                 {
12473                   padding_fragP = fragP;
12474                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
12475                           "%s within %d-byte boundary\n");
12476                 }
12477
12478               if (padding_fragP)
12479                 switch (padding_fragP->tc_frag_data.branch_type)
12480                   {
12481                   case align_branch_jcc:
12482                     branch = "jcc";
12483                     break;
12484                   case align_branch_fused:
12485                     branch = "fused jcc";
12486                     break;
12487                   case align_branch_jmp:
12488                     branch = "jmp";
12489                     break;
12490                   case align_branch_call:
12491                     branch = "call";
12492                     break;
12493                   case align_branch_indirect:
12494                     branch = "indiret branch";
12495                     break;
12496                   case align_branch_ret:
12497                     branch = "ret";
12498                     break;
12499                   default:
12500                     break;
12501                   }
12502
12503               fprintf (stdout, msg,
12504                        fragP->fr_file, fragP->fr_line, size, prefix,
12505                        (long long) fragP->fr_address, branch,
12506                        1 << align_branch_power);
12507             }
12508           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12509             memset (fragP->fr_opcode,
12510                     fragP->tc_frag_data.default_prefix, size);
12511           else
12512             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
12513                                 size, 0);
12514           fragP->fr_fix += size;
12515         }
12516       return;
12517     }
12518
12519   opcode = (unsigned char *) fragP->fr_opcode;
12520
12521   /* Address we want to reach in file space.  */
12522   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
12523
12524   /* Address opcode resides at in file space.  */
12525   opcode_address = fragP->fr_address + fragP->fr_fix;
12526
12527   /* Displacement from opcode start to fill into instruction.  */
12528   displacement_from_opcode_start = target_address - opcode_address;
12529
12530   if ((fragP->fr_subtype & BIG) == 0)
12531     {
12532       /* Don't have to change opcode.  */
12533       extension = 1;            /* 1 opcode + 1 displacement  */
12534       where_to_put_displacement = &opcode[1];
12535     }
12536   else
12537     {
12538       if (no_cond_jump_promotion
12539           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
12540         as_warn_where (fragP->fr_file, fragP->fr_line,
12541                        _("long jump required"));
12542
12543       switch (fragP->fr_subtype)
12544         {
12545         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
12546           extension = 4;                /* 1 opcode + 4 displacement  */
12547           opcode[0] = 0xe9;
12548           where_to_put_displacement = &opcode[1];
12549           break;
12550
12551         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
12552           extension = 2;                /* 1 opcode + 2 displacement  */
12553           opcode[0] = 0xe9;
12554           where_to_put_displacement = &opcode[1];
12555           break;
12556
12557         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
12558         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
12559           extension = 5;                /* 2 opcode + 4 displacement  */
12560           opcode[1] = opcode[0] + 0x10;
12561           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12562           where_to_put_displacement = &opcode[2];
12563           break;
12564
12565         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
12566           extension = 3;                /* 2 opcode + 2 displacement  */
12567           opcode[1] = opcode[0] + 0x10;
12568           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12569           where_to_put_displacement = &opcode[2];
12570           break;
12571
12572         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
12573           extension = 4;
12574           opcode[0] ^= 1;
12575           opcode[1] = 3;
12576           opcode[2] = 0xe9;
12577           where_to_put_displacement = &opcode[3];
12578           break;
12579
12580         default:
12581           BAD_CASE (fragP->fr_subtype);
12582           break;
12583         }
12584     }
12585
12586   /* If size if less then four we are sure that the operand fits,
12587      but if it's 4, then it could be that the displacement is larger
12588      then -/+ 2GB.  */
12589   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
12590       && object_64bit
12591       && ((addressT) (displacement_from_opcode_start - extension
12592                       + ((addressT) 1 << 31))
12593           > (((addressT) 2 << 31) - 1)))
12594     {
12595       as_bad_where (fragP->fr_file, fragP->fr_line,
12596                     _("jump target out of range"));
12597       /* Make us emit 0.  */
12598       displacement_from_opcode_start = extension;
12599     }
12600   /* Now put displacement after opcode.  */
12601   md_number_to_chars ((char *) where_to_put_displacement,
12602                       (valueT) (displacement_from_opcode_start - extension),
12603                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
12604   fragP->fr_fix += extension;
12605 }
12606 \f
12607 /* Apply a fixup (fixP) to segment data, once it has been determined
12608    by our caller that we have all the info we need to fix it up.
12609
12610    Parameter valP is the pointer to the value of the bits.
12611
12612    On the 386, immediates, displacements, and data pointers are all in
12613    the same (little-endian) format, so we don't need to care about which
12614    we are handling.  */
12615
12616 void
12617 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
12618 {
12619   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
12620   valueT value = *valP;
12621
12622 #if !defined (TE_Mach)
12623   if (fixP->fx_pcrel)
12624     {
12625       switch (fixP->fx_r_type)
12626         {
12627         default:
12628           break;
12629
12630         case BFD_RELOC_64:
12631           fixP->fx_r_type = BFD_RELOC_64_PCREL;
12632           break;
12633         case BFD_RELOC_32:
12634         case BFD_RELOC_X86_64_32S:
12635           fixP->fx_r_type = BFD_RELOC_32_PCREL;
12636           break;
12637         case BFD_RELOC_16:
12638           fixP->fx_r_type = BFD_RELOC_16_PCREL;
12639           break;
12640         case BFD_RELOC_8:
12641           fixP->fx_r_type = BFD_RELOC_8_PCREL;
12642           break;
12643         }
12644     }
12645
12646   if (fixP->fx_addsy != NULL
12647       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
12648           || fixP->fx_r_type == BFD_RELOC_64_PCREL
12649           || fixP->fx_r_type == BFD_RELOC_16_PCREL
12650           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
12651       && !use_rela_relocations)
12652     {
12653       /* This is a hack.  There should be a better way to handle this.
12654          This covers for the fact that bfd_install_relocation will
12655          subtract the current location (for partial_inplace, PC relative
12656          relocations); see more below.  */
12657 #ifndef OBJ_AOUT
12658       if (IS_ELF
12659 #ifdef TE_PE
12660           || OUTPUT_FLAVOR == bfd_target_coff_flavour
12661 #endif
12662           )
12663         value += fixP->fx_where + fixP->fx_frag->fr_address;
12664 #endif
12665 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12666       if (IS_ELF)
12667         {
12668           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
12669
12670           if ((sym_seg == seg
12671                || (symbol_section_p (fixP->fx_addsy)
12672                    && sym_seg != absolute_section))
12673               && !generic_force_reloc (fixP))
12674             {
12675               /* Yes, we add the values in twice.  This is because
12676                  bfd_install_relocation subtracts them out again.  I think
12677                  bfd_install_relocation is broken, but I don't dare change
12678                  it.  FIXME.  */
12679               value += fixP->fx_where + fixP->fx_frag->fr_address;
12680             }
12681         }
12682 #endif
12683 #if defined (OBJ_COFF) && defined (TE_PE)
12684       /* For some reason, the PE format does not store a
12685          section address offset for a PC relative symbol.  */
12686       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
12687           || S_IS_WEAK (fixP->fx_addsy))
12688         value += md_pcrel_from (fixP);
12689 #endif
12690     }
12691 #if defined (OBJ_COFF) && defined (TE_PE)
12692   if (fixP->fx_addsy != NULL
12693       && S_IS_WEAK (fixP->fx_addsy)
12694       /* PR 16858: Do not modify weak function references.  */
12695       && ! fixP->fx_pcrel)
12696     {
12697 #if !defined (TE_PEP)
12698       /* For x86 PE weak function symbols are neither PC-relative
12699          nor do they set S_IS_FUNCTION.  So the only reliable way
12700          to detect them is to check the flags of their containing
12701          section.  */
12702       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
12703           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
12704         ;
12705       else
12706 #endif
12707       value -= S_GET_VALUE (fixP->fx_addsy);
12708     }
12709 #endif
12710
12711   /* Fix a few things - the dynamic linker expects certain values here,
12712      and we must not disappoint it.  */
12713 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12714   if (IS_ELF && fixP->fx_addsy)
12715     switch (fixP->fx_r_type)
12716       {
12717       case BFD_RELOC_386_PLT32:
12718       case BFD_RELOC_X86_64_PLT32:
12719         /* Make the jump instruction point to the address of the operand.
12720            At runtime we merely add the offset to the actual PLT entry.
12721            NB: Subtract the offset size only for jump instructions.  */
12722         if (fixP->fx_pcrel)
12723           value = -4;
12724         break;
12725
12726       case BFD_RELOC_386_TLS_GD:
12727       case BFD_RELOC_386_TLS_LDM:
12728       case BFD_RELOC_386_TLS_IE_32:
12729       case BFD_RELOC_386_TLS_IE:
12730       case BFD_RELOC_386_TLS_GOTIE:
12731       case BFD_RELOC_386_TLS_GOTDESC:
12732       case BFD_RELOC_X86_64_TLSGD:
12733       case BFD_RELOC_X86_64_TLSLD:
12734       case BFD_RELOC_X86_64_GOTTPOFF:
12735       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12736         value = 0; /* Fully resolved at runtime.  No addend.  */
12737         /* Fallthrough */
12738       case BFD_RELOC_386_TLS_LE:
12739       case BFD_RELOC_386_TLS_LDO_32:
12740       case BFD_RELOC_386_TLS_LE_32:
12741       case BFD_RELOC_X86_64_DTPOFF32:
12742       case BFD_RELOC_X86_64_DTPOFF64:
12743       case BFD_RELOC_X86_64_TPOFF32:
12744       case BFD_RELOC_X86_64_TPOFF64:
12745         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12746         break;
12747
12748       case BFD_RELOC_386_TLS_DESC_CALL:
12749       case BFD_RELOC_X86_64_TLSDESC_CALL:
12750         value = 0; /* Fully resolved at runtime.  No addend.  */
12751         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12752         fixP->fx_done = 0;
12753         return;
12754
12755       case BFD_RELOC_VTABLE_INHERIT:
12756       case BFD_RELOC_VTABLE_ENTRY:
12757         fixP->fx_done = 0;
12758         return;
12759
12760       default:
12761         break;
12762       }
12763 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
12764
12765   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
12766   if (!object_64bit)
12767     value = extend_to_32bit_address (value);
12768
12769   *valP = value;
12770 #endif /* !defined (TE_Mach)  */
12771
12772   /* Are we finished with this relocation now?  */
12773   if (fixP->fx_addsy == NULL)
12774     {
12775       fixP->fx_done = 1;
12776       switch (fixP->fx_r_type)
12777         {
12778         case BFD_RELOC_X86_64_32S:
12779           fixP->fx_signed = 1;
12780           break;
12781
12782         default:
12783           break;
12784         }
12785     }
12786 #if defined (OBJ_COFF) && defined (TE_PE)
12787   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
12788     {
12789       fixP->fx_done = 0;
12790       /* Remember value for tc_gen_reloc.  */
12791       fixP->fx_addnumber = value;
12792       /* Clear out the frag for now.  */
12793       value = 0;
12794     }
12795 #endif
12796   else if (use_rela_relocations)
12797     {
12798       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
12799         fixP->fx_no_overflow = 1;
12800       /* Remember value for tc_gen_reloc.  */
12801       fixP->fx_addnumber = value;
12802       value = 0;
12803     }
12804
12805   md_number_to_chars (p, value, fixP->fx_size);
12806 }
12807 \f
12808 const char *
12809 md_atof (int type, char *litP, int *sizeP)
12810 {
12811   /* This outputs the LITTLENUMs in REVERSE order;
12812      in accord with the bigendian 386.  */
12813   return ieee_md_atof (type, litP, sizeP, false);
12814 }
12815 \f
12816 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
12817
12818 static char *
12819 output_invalid (int c)
12820 {
12821   if (ISPRINT (c))
12822     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12823               "'%c'", c);
12824   else
12825     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12826               "(0x%x)", (unsigned char) c);
12827   return output_invalid_buf;
12828 }
12829
12830 /* Verify that @r can be used in the current context.  */
12831
12832 static bool check_register (const reg_entry *r)
12833 {
12834   if (allow_pseudo_reg)
12835     return true;
12836
12837   if (operand_type_all_zero (&r->reg_type))
12838     return false;
12839
12840   if ((r->reg_type.bitfield.dword
12841        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12842        || r->reg_type.bitfield.class == RegCR
12843        || r->reg_type.bitfield.class == RegDR)
12844       && !cpu_arch_flags.bitfield.cpui386)
12845     return false;
12846
12847   if (r->reg_type.bitfield.class == RegTR
12848       && (flag_code == CODE_64BIT
12849           || !cpu_arch_flags.bitfield.cpui386
12850           || cpu_arch_isa_flags.bitfield.cpui586
12851           || cpu_arch_isa_flags.bitfield.cpui686))
12852     return false;
12853
12854   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12855     return false;
12856
12857   if (!cpu_arch_flags.bitfield.cpuavx512f)
12858     {
12859       if (r->reg_type.bitfield.zmmword
12860           || r->reg_type.bitfield.class == RegMask)
12861         return false;
12862
12863       if (!cpu_arch_flags.bitfield.cpuavx)
12864         {
12865           if (r->reg_type.bitfield.ymmword)
12866             return false;
12867
12868           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
12869             return false;
12870         }
12871     }
12872
12873   if (r->reg_type.bitfield.tmmword
12874       && (!cpu_arch_flags.bitfield.cpuamx_tile
12875           || flag_code != CODE_64BIT))
12876     return false;
12877
12878   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
12879     return false;
12880
12881   /* Don't allow fake index register unless allow_index_reg isn't 0. */
12882   if (!allow_index_reg && r->reg_num == RegIZ)
12883     return false;
12884
12885   /* Upper 16 vector registers are only available with VREX in 64bit
12886      mode, and require EVEX encoding.  */
12887   if (r->reg_flags & RegVRex)
12888     {
12889       if (!cpu_arch_flags.bitfield.cpuavx512f
12890           || flag_code != CODE_64BIT)
12891         return false;
12892
12893       if (i.vec_encoding == vex_encoding_default)
12894         i.vec_encoding = vex_encoding_evex;
12895       else if (i.vec_encoding != vex_encoding_evex)
12896         i.vec_encoding = vex_encoding_error;
12897     }
12898
12899   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
12900       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
12901       && flag_code != CODE_64BIT)
12902     return false;
12903
12904   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
12905       && !intel_syntax)
12906     return false;
12907
12908   return true;
12909 }
12910
12911 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12912
12913 static const reg_entry *
12914 parse_real_register (char *reg_string, char **end_op)
12915 {
12916   char *s = reg_string;
12917   char *p;
12918   char reg_name_given[MAX_REG_NAME_SIZE + 1];
12919   const reg_entry *r;
12920
12921   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
12922   if (*s == REGISTER_PREFIX)
12923     ++s;
12924
12925   if (is_space_char (*s))
12926     ++s;
12927
12928   p = reg_name_given;
12929   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
12930     {
12931       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
12932         return (const reg_entry *) NULL;
12933       s++;
12934     }
12935
12936   /* For naked regs, make sure that we are not dealing with an identifier.
12937      This prevents confusing an identifier like `eax_var' with register
12938      `eax'.  */
12939   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
12940     return (const reg_entry *) NULL;
12941
12942   *end_op = s;
12943
12944   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
12945
12946   /* Handle floating point regs, allowing spaces in the (i) part.  */
12947   if (r == reg_st0)
12948     {
12949       if (!cpu_arch_flags.bitfield.cpu8087
12950           && !cpu_arch_flags.bitfield.cpu287
12951           && !cpu_arch_flags.bitfield.cpu387
12952           && !allow_pseudo_reg)
12953         return (const reg_entry *) NULL;
12954
12955       if (is_space_char (*s))
12956         ++s;
12957       if (*s == '(')
12958         {
12959           ++s;
12960           if (is_space_char (*s))
12961             ++s;
12962           if (*s >= '0' && *s <= '7')
12963             {
12964               int fpr = *s - '0';
12965               ++s;
12966               if (is_space_char (*s))
12967                 ++s;
12968               if (*s == ')')
12969                 {
12970                   *end_op = s + 1;
12971                   know (r[fpr].reg_num == fpr);
12972                   return r + fpr;
12973                 }
12974             }
12975           /* We have "%st(" then garbage.  */
12976           return (const reg_entry *) NULL;
12977         }
12978     }
12979
12980   return r && check_register (r) ? r : NULL;
12981 }
12982
12983 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12984
12985 static const reg_entry *
12986 parse_register (char *reg_string, char **end_op)
12987 {
12988   const reg_entry *r;
12989
12990   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
12991     r = parse_real_register (reg_string, end_op);
12992   else
12993     r = NULL;
12994   if (!r)
12995     {
12996       char *save = input_line_pointer;
12997       char c;
12998       symbolS *symbolP;
12999
13000       input_line_pointer = reg_string;
13001       c = get_symbol_name (&reg_string);
13002       symbolP = symbol_find (reg_string);
13003       while (symbolP && S_GET_SEGMENT (symbolP) != reg_section)
13004         {
13005           const expressionS *e = symbol_get_value_expression(symbolP);
13006
13007           if (e->X_op != O_symbol || e->X_add_number)
13008             break;
13009           symbolP = e->X_add_symbol;
13010         }
13011       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
13012         {
13013           const expressionS *e = symbol_get_value_expression (symbolP);
13014
13015           know (e->X_op == O_register);
13016           know (e->X_add_number >= 0
13017                 && (valueT) e->X_add_number < i386_regtab_size);
13018           r = i386_regtab + e->X_add_number;
13019           if (!check_register (r))
13020             {
13021               as_bad (_("register '%s%s' cannot be used here"),
13022                       register_prefix, r->reg_name);
13023               r = &bad_reg;
13024             }
13025           *end_op = input_line_pointer;
13026         }
13027       *input_line_pointer = c;
13028       input_line_pointer = save;
13029     }
13030   return r;
13031 }
13032
13033 int
13034 i386_parse_name (char *name, expressionS *e, char *nextcharP)
13035 {
13036   const reg_entry *r = NULL;
13037   char *end = input_line_pointer;
13038
13039   *end = *nextcharP;
13040   if (*name == REGISTER_PREFIX || allow_naked_reg)
13041     r = parse_real_register (name, &input_line_pointer);
13042   if (r && end <= input_line_pointer)
13043     {
13044       *nextcharP = *input_line_pointer;
13045       *input_line_pointer = 0;
13046       if (r != &bad_reg)
13047         {
13048           e->X_op = O_register;
13049           e->X_add_number = r - i386_regtab;
13050         }
13051       else
13052           e->X_op = O_illegal;
13053       return 1;
13054     }
13055   input_line_pointer = end;
13056   *end = 0;
13057   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
13058 }
13059
13060 void
13061 md_operand (expressionS *e)
13062 {
13063   char *end;
13064   const reg_entry *r;
13065
13066   switch (*input_line_pointer)
13067     {
13068     case REGISTER_PREFIX:
13069       r = parse_real_register (input_line_pointer, &end);
13070       if (r)
13071         {
13072           e->X_op = O_register;
13073           e->X_add_number = r - i386_regtab;
13074           input_line_pointer = end;
13075         }
13076       break;
13077
13078     case '[':
13079       gas_assert (intel_syntax);
13080       end = input_line_pointer++;
13081       expression (e);
13082       if (*input_line_pointer == ']')
13083         {
13084           ++input_line_pointer;
13085           e->X_op_symbol = make_expr_symbol (e);
13086           e->X_add_symbol = NULL;
13087           e->X_add_number = 0;
13088           e->X_op = O_index;
13089         }
13090       else
13091         {
13092           e->X_op = O_absent;
13093           input_line_pointer = end;
13094         }
13095       break;
13096     }
13097 }
13098
13099 \f
13100 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13101 const char *md_shortopts = "kVQ:sqnO::";
13102 #else
13103 const char *md_shortopts = "qnO::";
13104 #endif
13105
13106 #define OPTION_32 (OPTION_MD_BASE + 0)
13107 #define OPTION_64 (OPTION_MD_BASE + 1)
13108 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
13109 #define OPTION_MARCH (OPTION_MD_BASE + 3)
13110 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
13111 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
13112 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
13113 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
13114 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
13115 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
13116 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
13117 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
13118 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
13119 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
13120 #define OPTION_X32 (OPTION_MD_BASE + 14)
13121 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
13122 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
13123 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
13124 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
13125 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
13126 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
13127 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
13128 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
13129 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
13130 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
13131 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
13132 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
13133 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
13134 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
13135 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
13136 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
13137 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
13138 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
13139 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
13140 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
13141
13142 struct option md_longopts[] =
13143 {
13144   {"32", no_argument, NULL, OPTION_32},
13145 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13146      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13147   {"64", no_argument, NULL, OPTION_64},
13148 #endif
13149 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13150   {"x32", no_argument, NULL, OPTION_X32},
13151   {"mshared", no_argument, NULL, OPTION_MSHARED},
13152   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
13153 #endif
13154   {"divide", no_argument, NULL, OPTION_DIVIDE},
13155   {"march", required_argument, NULL, OPTION_MARCH},
13156   {"mtune", required_argument, NULL, OPTION_MTUNE},
13157   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
13158   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
13159   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
13160   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
13161   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
13162   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
13163   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
13164   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
13165   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
13166   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
13167   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
13168   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
13169   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
13170 # if defined (TE_PE) || defined (TE_PEP)
13171   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
13172 #endif
13173   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
13174   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
13175   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
13176   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
13177   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
13178   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
13179   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
13180   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
13181   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
13182   {"mlfence-before-indirect-branch", required_argument, NULL,
13183    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
13184   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
13185   {"mamd64", no_argument, NULL, OPTION_MAMD64},
13186   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
13187   {NULL, no_argument, NULL, 0}
13188 };
13189 size_t md_longopts_size = sizeof (md_longopts);
13190
13191 int
13192 md_parse_option (int c, const char *arg)
13193 {
13194   unsigned int j;
13195   char *arch, *next, *saved, *type;
13196
13197   switch (c)
13198     {
13199     case 'n':
13200       optimize_align_code = 0;
13201       break;
13202
13203     case 'q':
13204       quiet_warnings = 1;
13205       break;
13206
13207 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13208       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
13209          should be emitted or not.  FIXME: Not implemented.  */
13210     case 'Q':
13211       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
13212         return 0;
13213       break;
13214
13215       /* -V: SVR4 argument to print version ID.  */
13216     case 'V':
13217       print_version_id ();
13218       break;
13219
13220       /* -k: Ignore for FreeBSD compatibility.  */
13221     case 'k':
13222       break;
13223
13224     case 's':
13225       /* -s: On i386 Solaris, this tells the native assembler to use
13226          .stab instead of .stab.excl.  We always use .stab anyhow.  */
13227       break;
13228
13229     case OPTION_MSHARED:
13230       shared = 1;
13231       break;
13232
13233     case OPTION_X86_USED_NOTE:
13234       if (strcasecmp (arg, "yes") == 0)
13235         x86_used_note = 1;
13236       else if (strcasecmp (arg, "no") == 0)
13237         x86_used_note = 0;
13238       else
13239         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
13240       break;
13241
13242
13243 #endif
13244 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13245      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13246     case OPTION_64:
13247       {
13248         const char **list, **l;
13249
13250         list = bfd_target_list ();
13251         for (l = list; *l != NULL; l++)
13252           if (startswith (*l, "elf64-x86-64")
13253               || strcmp (*l, "coff-x86-64") == 0
13254               || strcmp (*l, "pe-x86-64") == 0
13255               || strcmp (*l, "pei-x86-64") == 0
13256               || strcmp (*l, "mach-o-x86-64") == 0)
13257             {
13258               default_arch = "x86_64";
13259               break;
13260             }
13261         if (*l == NULL)
13262           as_fatal (_("no compiled in support for x86_64"));
13263         free (list);
13264       }
13265       break;
13266 #endif
13267
13268 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13269     case OPTION_X32:
13270       if (IS_ELF)
13271         {
13272           const char **list, **l;
13273
13274           list = bfd_target_list ();
13275           for (l = list; *l != NULL; l++)
13276             if (startswith (*l, "elf32-x86-64"))
13277               {
13278                 default_arch = "x86_64:32";
13279                 break;
13280               }
13281           if (*l == NULL)
13282             as_fatal (_("no compiled in support for 32bit x86_64"));
13283           free (list);
13284         }
13285       else
13286         as_fatal (_("32bit x86_64 is only supported for ELF"));
13287       break;
13288 #endif
13289
13290     case OPTION_32:
13291       default_arch = "i386";
13292       break;
13293
13294     case OPTION_DIVIDE:
13295 #ifdef SVR4_COMMENT_CHARS
13296       {
13297         char *n, *t;
13298         const char *s;
13299
13300         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
13301         t = n;
13302         for (s = i386_comment_chars; *s != '\0'; s++)
13303           if (*s != '/')
13304             *t++ = *s;
13305         *t = '\0';
13306         i386_comment_chars = n;
13307       }
13308 #endif
13309       break;
13310
13311     case OPTION_MARCH:
13312       saved = xstrdup (arg);
13313       arch = saved;
13314       /* Allow -march=+nosse.  */
13315       if (*arch == '+')
13316         arch++;
13317       do
13318         {
13319           if (*arch == '.')
13320             as_fatal (_("invalid -march= option: `%s'"), arg);
13321           next = strchr (arch, '+');
13322           if (next)
13323             *next++ = '\0';
13324           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13325             {
13326               if (arch == saved && strcmp (arch, cpu_arch [j].name) == 0)
13327                 {
13328                   /* Processor.  */
13329                   if (! cpu_arch[j].flags.bitfield.cpui386)
13330                     continue;
13331
13332                   cpu_arch_name = cpu_arch[j].name;
13333                   cpu_sub_arch_name = NULL;
13334                   cpu_arch_flags = cpu_arch[j].flags;
13335                   cpu_arch_isa = cpu_arch[j].type;
13336                   cpu_arch_isa_flags = cpu_arch[j].flags;
13337                   if (!cpu_arch_tune_set)
13338                     {
13339                       cpu_arch_tune = cpu_arch_isa;
13340                       cpu_arch_tune_flags = cpu_arch_isa_flags;
13341                     }
13342                   break;
13343                 }
13344               else if (*cpu_arch [j].name == '.'
13345                        && strcmp (arch, cpu_arch [j].name + 1) == 0)
13346                 {
13347                   /* ISA extension.  */
13348                   i386_cpu_flags flags;
13349
13350                   flags = cpu_flags_or (cpu_arch_flags,
13351                                         cpu_arch[j].flags);
13352
13353                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13354                     {
13355                       if (cpu_sub_arch_name)
13356                         {
13357                           char *name = cpu_sub_arch_name;
13358                           cpu_sub_arch_name = concat (name,
13359                                                       cpu_arch[j].name,
13360                                                       (const char *) NULL);
13361                           free (name);
13362                         }
13363                       else
13364                         cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
13365                       cpu_arch_flags = flags;
13366                       cpu_arch_isa_flags = flags;
13367                     }
13368                   else
13369                     cpu_arch_isa_flags
13370                       = cpu_flags_or (cpu_arch_isa_flags,
13371                                       cpu_arch[j].flags);
13372                   break;
13373                 }
13374             }
13375
13376           if (j >= ARRAY_SIZE (cpu_arch))
13377             {
13378               /* Disable an ISA extension.  */
13379               for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
13380                 if (strcmp (arch, cpu_noarch [j].name) == 0)
13381                   {
13382                     i386_cpu_flags flags;
13383
13384                     flags = cpu_flags_and_not (cpu_arch_flags,
13385                                                cpu_noarch[j].flags);
13386                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13387                       {
13388                         if (cpu_sub_arch_name)
13389                           {
13390                             char *name = cpu_sub_arch_name;
13391                             cpu_sub_arch_name = concat (arch,
13392                                                         (const char *) NULL);
13393                             free (name);
13394                           }
13395                         else
13396                           cpu_sub_arch_name = xstrdup (arch);
13397                         cpu_arch_flags = flags;
13398                         cpu_arch_isa_flags = flags;
13399                       }
13400                     break;
13401                   }
13402
13403               if (j >= ARRAY_SIZE (cpu_noarch))
13404                 j = ARRAY_SIZE (cpu_arch);
13405             }
13406
13407           if (j >= ARRAY_SIZE (cpu_arch))
13408             as_fatal (_("invalid -march= option: `%s'"), arg);
13409
13410           arch = next;
13411         }
13412       while (next != NULL);
13413       free (saved);
13414       break;
13415
13416     case OPTION_MTUNE:
13417       if (*arg == '.')
13418         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13419       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13420         {
13421           if (strcmp (arg, cpu_arch [j].name) == 0)
13422             {
13423               cpu_arch_tune_set = 1;
13424               cpu_arch_tune = cpu_arch [j].type;
13425               cpu_arch_tune_flags = cpu_arch[j].flags;
13426               break;
13427             }
13428         }
13429       if (j >= ARRAY_SIZE (cpu_arch))
13430         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13431       break;
13432
13433     case OPTION_MMNEMONIC:
13434       if (strcasecmp (arg, "att") == 0)
13435         intel_mnemonic = 0;
13436       else if (strcasecmp (arg, "intel") == 0)
13437         intel_mnemonic = 1;
13438       else
13439         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
13440       break;
13441
13442     case OPTION_MSYNTAX:
13443       if (strcasecmp (arg, "att") == 0)
13444         intel_syntax = 0;
13445       else if (strcasecmp (arg, "intel") == 0)
13446         intel_syntax = 1;
13447       else
13448         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
13449       break;
13450
13451     case OPTION_MINDEX_REG:
13452       allow_index_reg = 1;
13453       break;
13454
13455     case OPTION_MNAKED_REG:
13456       allow_naked_reg = 1;
13457       break;
13458
13459     case OPTION_MSSE2AVX:
13460       sse2avx = 1;
13461       break;
13462
13463     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
13464       use_unaligned_vector_move = 1;
13465       break;
13466
13467     case OPTION_MSSE_CHECK:
13468       if (strcasecmp (arg, "error") == 0)
13469         sse_check = check_error;
13470       else if (strcasecmp (arg, "warning") == 0)
13471         sse_check = check_warning;
13472       else if (strcasecmp (arg, "none") == 0)
13473         sse_check = check_none;
13474       else
13475         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
13476       break;
13477
13478     case OPTION_MOPERAND_CHECK:
13479       if (strcasecmp (arg, "error") == 0)
13480         operand_check = check_error;
13481       else if (strcasecmp (arg, "warning") == 0)
13482         operand_check = check_warning;
13483       else if (strcasecmp (arg, "none") == 0)
13484         operand_check = check_none;
13485       else
13486         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
13487       break;
13488
13489     case OPTION_MAVXSCALAR:
13490       if (strcasecmp (arg, "128") == 0)
13491         avxscalar = vex128;
13492       else if (strcasecmp (arg, "256") == 0)
13493         avxscalar = vex256;
13494       else
13495         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
13496       break;
13497
13498     case OPTION_MVEXWIG:
13499       if (strcmp (arg, "0") == 0)
13500         vexwig = vexw0;
13501       else if (strcmp (arg, "1") == 0)
13502         vexwig = vexw1;
13503       else
13504         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
13505       break;
13506
13507     case OPTION_MADD_BND_PREFIX:
13508       add_bnd_prefix = 1;
13509       break;
13510
13511     case OPTION_MEVEXLIG:
13512       if (strcmp (arg, "128") == 0)
13513         evexlig = evexl128;
13514       else if (strcmp (arg, "256") == 0)
13515         evexlig = evexl256;
13516       else  if (strcmp (arg, "512") == 0)
13517         evexlig = evexl512;
13518       else
13519         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
13520       break;
13521
13522     case OPTION_MEVEXRCIG:
13523       if (strcmp (arg, "rne") == 0)
13524         evexrcig = rne;
13525       else if (strcmp (arg, "rd") == 0)
13526         evexrcig = rd;
13527       else if (strcmp (arg, "ru") == 0)
13528         evexrcig = ru;
13529       else if (strcmp (arg, "rz") == 0)
13530         evexrcig = rz;
13531       else
13532         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
13533       break;
13534
13535     case OPTION_MEVEXWIG:
13536       if (strcmp (arg, "0") == 0)
13537         evexwig = evexw0;
13538       else if (strcmp (arg, "1") == 0)
13539         evexwig = evexw1;
13540       else
13541         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
13542       break;
13543
13544 # if defined (TE_PE) || defined (TE_PEP)
13545     case OPTION_MBIG_OBJ:
13546       use_big_obj = 1;
13547       break;
13548 #endif
13549
13550     case OPTION_MOMIT_LOCK_PREFIX:
13551       if (strcasecmp (arg, "yes") == 0)
13552         omit_lock_prefix = 1;
13553       else if (strcasecmp (arg, "no") == 0)
13554         omit_lock_prefix = 0;
13555       else
13556         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
13557       break;
13558
13559     case OPTION_MFENCE_AS_LOCK_ADD:
13560       if (strcasecmp (arg, "yes") == 0)
13561         avoid_fence = 1;
13562       else if (strcasecmp (arg, "no") == 0)
13563         avoid_fence = 0;
13564       else
13565         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
13566       break;
13567
13568     case OPTION_MLFENCE_AFTER_LOAD:
13569       if (strcasecmp (arg, "yes") == 0)
13570         lfence_after_load = 1;
13571       else if (strcasecmp (arg, "no") == 0)
13572         lfence_after_load = 0;
13573       else
13574         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
13575       break;
13576
13577     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
13578       if (strcasecmp (arg, "all") == 0)
13579         {
13580           lfence_before_indirect_branch = lfence_branch_all;
13581           if (lfence_before_ret == lfence_before_ret_none)
13582             lfence_before_ret = lfence_before_ret_shl;
13583         }
13584       else if (strcasecmp (arg, "memory") == 0)
13585         lfence_before_indirect_branch = lfence_branch_memory;
13586       else if (strcasecmp (arg, "register") == 0)
13587         lfence_before_indirect_branch = lfence_branch_register;
13588       else if (strcasecmp (arg, "none") == 0)
13589         lfence_before_indirect_branch = lfence_branch_none;
13590       else
13591         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
13592                   arg);
13593       break;
13594
13595     case OPTION_MLFENCE_BEFORE_RET:
13596       if (strcasecmp (arg, "or") == 0)
13597         lfence_before_ret = lfence_before_ret_or;
13598       else if (strcasecmp (arg, "not") == 0)
13599         lfence_before_ret = lfence_before_ret_not;
13600       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
13601         lfence_before_ret = lfence_before_ret_shl;
13602       else if (strcasecmp (arg, "none") == 0)
13603         lfence_before_ret = lfence_before_ret_none;
13604       else
13605         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
13606                   arg);
13607       break;
13608
13609     case OPTION_MRELAX_RELOCATIONS:
13610       if (strcasecmp (arg, "yes") == 0)
13611         generate_relax_relocations = 1;
13612       else if (strcasecmp (arg, "no") == 0)
13613         generate_relax_relocations = 0;
13614       else
13615         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
13616       break;
13617
13618     case OPTION_MALIGN_BRANCH_BOUNDARY:
13619       {
13620         char *end;
13621         long int align = strtoul (arg, &end, 0);
13622         if (*end == '\0')
13623           {
13624             if (align == 0)
13625               {
13626                 align_branch_power = 0;
13627                 break;
13628               }
13629             else if (align >= 16)
13630               {
13631                 int align_power;
13632                 for (align_power = 0;
13633                      (align & 1) == 0;
13634                      align >>= 1, align_power++)
13635                   continue;
13636                 /* Limit alignment power to 31.  */
13637                 if (align == 1 && align_power < 32)
13638                   {
13639                     align_branch_power = align_power;
13640                     break;
13641                   }
13642               }
13643           }
13644         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
13645       }
13646       break;
13647
13648     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
13649       {
13650         char *end;
13651         int align = strtoul (arg, &end, 0);
13652         /* Some processors only support 5 prefixes.  */
13653         if (*end == '\0' && align >= 0 && align < 6)
13654           {
13655             align_branch_prefix_size = align;
13656             break;
13657           }
13658         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
13659                   arg);
13660       }
13661       break;
13662
13663     case OPTION_MALIGN_BRANCH:
13664       align_branch = 0;
13665       saved = xstrdup (arg);
13666       type = saved;
13667       do
13668         {
13669           next = strchr (type, '+');
13670           if (next)
13671             *next++ = '\0';
13672           if (strcasecmp (type, "jcc") == 0)
13673             align_branch |= align_branch_jcc_bit;
13674           else if (strcasecmp (type, "fused") == 0)
13675             align_branch |= align_branch_fused_bit;
13676           else if (strcasecmp (type, "jmp") == 0)
13677             align_branch |= align_branch_jmp_bit;
13678           else if (strcasecmp (type, "call") == 0)
13679             align_branch |= align_branch_call_bit;
13680           else if (strcasecmp (type, "ret") == 0)
13681             align_branch |= align_branch_ret_bit;
13682           else if (strcasecmp (type, "indirect") == 0)
13683             align_branch |= align_branch_indirect_bit;
13684           else
13685             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
13686           type = next;
13687         }
13688       while (next != NULL);
13689       free (saved);
13690       break;
13691
13692     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
13693       align_branch_power = 5;
13694       align_branch_prefix_size = 5;
13695       align_branch = (align_branch_jcc_bit
13696                       | align_branch_fused_bit
13697                       | align_branch_jmp_bit);
13698       break;
13699
13700     case OPTION_MAMD64:
13701       isa64 = amd64;
13702       break;
13703
13704     case OPTION_MINTEL64:
13705       isa64 = intel64;
13706       break;
13707
13708     case 'O':
13709       if (arg == NULL)
13710         {
13711           optimize = 1;
13712           /* Turn off -Os.  */
13713           optimize_for_space = 0;
13714         }
13715       else if (*arg == 's')
13716         {
13717           optimize_for_space = 1;
13718           /* Turn on all encoding optimizations.  */
13719           optimize = INT_MAX;
13720         }
13721       else
13722         {
13723           optimize = atoi (arg);
13724           /* Turn off -Os.  */
13725           optimize_for_space = 0;
13726         }
13727       break;
13728
13729     default:
13730       return 0;
13731     }
13732   return 1;
13733 }
13734
13735 #define MESSAGE_TEMPLATE \
13736 "                                                                                "
13737
13738 static char *
13739 output_message (FILE *stream, char *p, char *message, char *start,
13740                 int *left_p, const char *name, int len)
13741 {
13742   int size = sizeof (MESSAGE_TEMPLATE);
13743   int left = *left_p;
13744
13745   /* Reserve 2 spaces for ", " or ",\0" */
13746   left -= len + 2;
13747
13748   /* Check if there is any room.  */
13749   if (left >= 0)
13750     {
13751       if (p != start)
13752         {
13753           *p++ = ',';
13754           *p++ = ' ';
13755         }
13756       p = mempcpy (p, name, len);
13757     }
13758   else
13759     {
13760       /* Output the current message now and start a new one.  */
13761       *p++ = ',';
13762       *p = '\0';
13763       fprintf (stream, "%s\n", message);
13764       p = start;
13765       left = size - (start - message) - len - 2;
13766
13767       gas_assert (left >= 0);
13768
13769       p = mempcpy (p, name, len);
13770     }
13771
13772   *left_p = left;
13773   return p;
13774 }
13775
13776 static void
13777 show_arch (FILE *stream, int ext, int check)
13778 {
13779   static char message[] = MESSAGE_TEMPLATE;
13780   char *start = message + 27;
13781   char *p;
13782   int size = sizeof (MESSAGE_TEMPLATE);
13783   int left;
13784   const char *name;
13785   int len;
13786   unsigned int j;
13787
13788   p = start;
13789   left = size - (start - message);
13790   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13791     {
13792       /* Should it be skipped?  */
13793       if (cpu_arch [j].skip)
13794         continue;
13795
13796       name = cpu_arch [j].name;
13797       len = cpu_arch [j].len;
13798       if (*name == '.')
13799         {
13800           /* It is an extension.  Skip if we aren't asked to show it.  */
13801           if (ext)
13802             {
13803               name++;
13804               len--;
13805             }
13806           else
13807             continue;
13808         }
13809       else if (ext)
13810         {
13811           /* It is an processor.  Skip if we show only extension.  */
13812           continue;
13813         }
13814       else if (check && ! cpu_arch[j].flags.bitfield.cpui386)
13815         {
13816           /* It is an impossible processor - skip.  */
13817           continue;
13818         }
13819
13820       p = output_message (stream, p, message, start, &left, name, len);
13821     }
13822
13823   /* Display disabled extensions.  */
13824   if (ext)
13825     for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
13826       {
13827         name = cpu_noarch [j].name;
13828         len = cpu_noarch [j].len;
13829         p = output_message (stream, p, message, start, &left, name,
13830                             len);
13831       }
13832
13833   *p = '\0';
13834   fprintf (stream, "%s\n", message);
13835 }
13836
13837 void
13838 md_show_usage (FILE *stream)
13839 {
13840 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13841   fprintf (stream, _("\
13842   -Qy, -Qn                ignored\n\
13843   -V                      print assembler version number\n\
13844   -k                      ignored\n"));
13845 #endif
13846   fprintf (stream, _("\
13847   -n                      Do not optimize code alignment\n\
13848   -q                      quieten some warnings\n"));
13849 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13850   fprintf (stream, _("\
13851   -s                      ignored\n"));
13852 #endif
13853 #ifdef BFD64
13854 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13855   fprintf (stream, _("\
13856   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
13857 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
13858   fprintf (stream, _("\
13859   --32/--64               generate 32bit/64bit object\n"));
13860 # endif
13861 #endif
13862 #ifdef SVR4_COMMENT_CHARS
13863   fprintf (stream, _("\
13864   --divide                do not treat `/' as a comment character\n"));
13865 #else
13866   fprintf (stream, _("\
13867   --divide                ignored\n"));
13868 #endif
13869   fprintf (stream, _("\
13870   -march=CPU[,+EXTENSION...]\n\
13871                           generate code for CPU and EXTENSION, CPU is one of:\n"));
13872   show_arch (stream, 0, 1);
13873   fprintf (stream, _("\
13874                           EXTENSION is combination of:\n"));
13875   show_arch (stream, 1, 0);
13876   fprintf (stream, _("\
13877   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
13878   show_arch (stream, 0, 0);
13879   fprintf (stream, _("\
13880   -msse2avx               encode SSE instructions with VEX prefix\n"));
13881   fprintf (stream, _("\
13882   -muse-unaligned-vector-move\n\
13883                           encode aligned vector move as unaligned vector move\n"));
13884   fprintf (stream, _("\
13885   -msse-check=[none|error|warning] (default: warning)\n\
13886                           check SSE instructions\n"));
13887   fprintf (stream, _("\
13888   -moperand-check=[none|error|warning] (default: warning)\n\
13889                           check operand combinations for validity\n"));
13890   fprintf (stream, _("\
13891   -mavxscalar=[128|256] (default: 128)\n\
13892                           encode scalar AVX instructions with specific vector\n\
13893                            length\n"));
13894   fprintf (stream, _("\
13895   -mvexwig=[0|1] (default: 0)\n\
13896                           encode VEX instructions with specific VEX.W value\n\
13897                            for VEX.W bit ignored instructions\n"));
13898   fprintf (stream, _("\
13899   -mevexlig=[128|256|512] (default: 128)\n\
13900                           encode scalar EVEX instructions with specific vector\n\
13901                            length\n"));
13902   fprintf (stream, _("\
13903   -mevexwig=[0|1] (default: 0)\n\
13904                           encode EVEX instructions with specific EVEX.W value\n\
13905                            for EVEX.W bit ignored instructions\n"));
13906   fprintf (stream, _("\
13907   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
13908                           encode EVEX instructions with specific EVEX.RC value\n\
13909                            for SAE-only ignored instructions\n"));
13910   fprintf (stream, _("\
13911   -mmnemonic=[att|intel] "));
13912   if (SYSV386_COMPAT)
13913     fprintf (stream, _("(default: att)\n"));
13914   else
13915     fprintf (stream, _("(default: intel)\n"));
13916   fprintf (stream, _("\
13917                           use AT&T/Intel mnemonic\n"));
13918   fprintf (stream, _("\
13919   -msyntax=[att|intel] (default: att)\n\
13920                           use AT&T/Intel syntax\n"));
13921   fprintf (stream, _("\
13922   -mindex-reg             support pseudo index registers\n"));
13923   fprintf (stream, _("\
13924   -mnaked-reg             don't require `%%' prefix for registers\n"));
13925   fprintf (stream, _("\
13926   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
13927 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13928   fprintf (stream, _("\
13929   -mshared                disable branch optimization for shared code\n"));
13930   fprintf (stream, _("\
13931   -mx86-used-note=[no|yes] "));
13932   if (DEFAULT_X86_USED_NOTE)
13933     fprintf (stream, _("(default: yes)\n"));
13934   else
13935     fprintf (stream, _("(default: no)\n"));
13936   fprintf (stream, _("\
13937                           generate x86 used ISA and feature properties\n"));
13938 #endif
13939 #if defined (TE_PE) || defined (TE_PEP)
13940   fprintf (stream, _("\
13941   -mbig-obj               generate big object files\n"));
13942 #endif
13943   fprintf (stream, _("\
13944   -momit-lock-prefix=[no|yes] (default: no)\n\
13945                           strip all lock prefixes\n"));
13946   fprintf (stream, _("\
13947   -mfence-as-lock-add=[no|yes] (default: no)\n\
13948                           encode lfence, mfence and sfence as\n\
13949                            lock addl $0x0, (%%{re}sp)\n"));
13950   fprintf (stream, _("\
13951   -mrelax-relocations=[no|yes] "));
13952   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
13953     fprintf (stream, _("(default: yes)\n"));
13954   else
13955     fprintf (stream, _("(default: no)\n"));
13956   fprintf (stream, _("\
13957                           generate relax relocations\n"));
13958   fprintf (stream, _("\
13959   -malign-branch-boundary=NUM (default: 0)\n\
13960                           align branches within NUM byte boundary\n"));
13961   fprintf (stream, _("\
13962   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
13963                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
13964                            indirect\n\
13965                           specify types of branches to align\n"));
13966   fprintf (stream, _("\
13967   -malign-branch-prefix-size=NUM (default: 5)\n\
13968                           align branches with NUM prefixes per instruction\n"));
13969   fprintf (stream, _("\
13970   -mbranches-within-32B-boundaries\n\
13971                           align branches within 32 byte boundary\n"));
13972   fprintf (stream, _("\
13973   -mlfence-after-load=[no|yes] (default: no)\n\
13974                           generate lfence after load\n"));
13975   fprintf (stream, _("\
13976   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
13977                           generate lfence before indirect near branch\n"));
13978   fprintf (stream, _("\
13979   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
13980                           generate lfence before ret\n"));
13981   fprintf (stream, _("\
13982   -mamd64                 accept only AMD64 ISA [default]\n"));
13983   fprintf (stream, _("\
13984   -mintel64               accept only Intel64 ISA\n"));
13985 }
13986
13987 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
13988      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13989      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13990
13991 /* Pick the target format to use.  */
13992
13993 const char *
13994 i386_target_format (void)
13995 {
13996   if (startswith (default_arch, "x86_64"))
13997     {
13998       update_code_flag (CODE_64BIT, 1);
13999       if (default_arch[6] == '\0')
14000         x86_elf_abi = X86_64_ABI;
14001       else
14002         x86_elf_abi = X86_64_X32_ABI;
14003     }
14004   else if (!strcmp (default_arch, "i386"))
14005     update_code_flag (CODE_32BIT, 1);
14006   else if (!strcmp (default_arch, "iamcu"))
14007     {
14008       update_code_flag (CODE_32BIT, 1);
14009       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
14010         {
14011           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
14012           cpu_arch_name = "iamcu";
14013           cpu_sub_arch_name = NULL;
14014           cpu_arch_flags = iamcu_flags;
14015           cpu_arch_isa = PROCESSOR_IAMCU;
14016           cpu_arch_isa_flags = iamcu_flags;
14017           if (!cpu_arch_tune_set)
14018             {
14019               cpu_arch_tune = cpu_arch_isa;
14020               cpu_arch_tune_flags = cpu_arch_isa_flags;
14021             }
14022         }
14023       else if (cpu_arch_isa != PROCESSOR_IAMCU)
14024         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
14025                   cpu_arch_name);
14026     }
14027   else
14028     as_fatal (_("unknown architecture"));
14029
14030   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
14031     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].flags;
14032   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
14033     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].flags;
14034
14035   switch (OUTPUT_FLAVOR)
14036     {
14037 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
14038     case bfd_target_aout_flavour:
14039       return AOUT_TARGET_FORMAT;
14040 #endif
14041 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
14042 # if defined (TE_PE) || defined (TE_PEP)
14043     case bfd_target_coff_flavour:
14044       if (flag_code == CODE_64BIT)
14045         {
14046           object_64bit = 1;
14047           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
14048         }
14049       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
14050 # elif defined (TE_GO32)
14051     case bfd_target_coff_flavour:
14052       return "coff-go32";
14053 # else
14054     case bfd_target_coff_flavour:
14055       return "coff-i386";
14056 # endif
14057 #endif
14058 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14059     case bfd_target_elf_flavour:
14060       {
14061         const char *format;
14062
14063         switch (x86_elf_abi)
14064           {
14065           default:
14066             format = ELF_TARGET_FORMAT;
14067 #ifndef TE_SOLARIS
14068             tls_get_addr = "___tls_get_addr";
14069 #endif
14070             break;
14071           case X86_64_ABI:
14072             use_rela_relocations = 1;
14073             object_64bit = 1;
14074 #ifndef TE_SOLARIS
14075             tls_get_addr = "__tls_get_addr";
14076 #endif
14077             format = ELF_TARGET_FORMAT64;
14078             break;
14079           case X86_64_X32_ABI:
14080             use_rela_relocations = 1;
14081             object_64bit = 1;
14082 #ifndef TE_SOLARIS
14083             tls_get_addr = "__tls_get_addr";
14084 #endif
14085             disallow_64bit_reloc = 1;
14086             format = ELF_TARGET_FORMAT32;
14087             break;
14088           }
14089         if (cpu_arch_isa == PROCESSOR_IAMCU)
14090           {
14091             if (x86_elf_abi != I386_ABI)
14092               as_fatal (_("Intel MCU is 32bit only"));
14093             return ELF_TARGET_IAMCU_FORMAT;
14094           }
14095         else
14096           return format;
14097       }
14098 #endif
14099 #if defined (OBJ_MACH_O)
14100     case bfd_target_mach_o_flavour:
14101       if (flag_code == CODE_64BIT)
14102         {
14103           use_rela_relocations = 1;
14104           object_64bit = 1;
14105           return "mach-o-x86-64";
14106         }
14107       else
14108         return "mach-o-i386";
14109 #endif
14110     default:
14111       abort ();
14112       return NULL;
14113     }
14114 }
14115
14116 #endif /* OBJ_MAYBE_ more than one  */
14117 \f
14118 symbolS *
14119 md_undefined_symbol (char *name)
14120 {
14121   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
14122       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
14123       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
14124       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
14125     {
14126       if (!GOT_symbol)
14127         {
14128           if (symbol_find (name))
14129             as_bad (_("GOT already in symbol table"));
14130           GOT_symbol = symbol_new (name, undefined_section,
14131                                    &zero_address_frag, 0);
14132         };
14133       return GOT_symbol;
14134     }
14135   return 0;
14136 }
14137
14138 /* Round up a section size to the appropriate boundary.  */
14139
14140 valueT
14141 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
14142 {
14143 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
14144   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
14145     {
14146       /* For a.out, force the section size to be aligned.  If we don't do
14147          this, BFD will align it for us, but it will not write out the
14148          final bytes of the section.  This may be a bug in BFD, but it is
14149          easier to fix it here since that is how the other a.out targets
14150          work.  */
14151       int align;
14152
14153       align = bfd_section_alignment (segment);
14154       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
14155     }
14156 #endif
14157
14158   return size;
14159 }
14160
14161 /* On the i386, PC-relative offsets are relative to the start of the
14162    next instruction.  That is, the address of the offset, plus its
14163    size, since the offset is always the last part of the insn.  */
14164
14165 long
14166 md_pcrel_from (fixS *fixP)
14167 {
14168   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
14169 }
14170
14171 #ifndef I386COFF
14172
14173 static void
14174 s_bss (int ignore ATTRIBUTE_UNUSED)
14175 {
14176   int temp;
14177
14178 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14179   if (IS_ELF)
14180     obj_elf_section_change_hook ();
14181 #endif
14182   temp = get_absolute_expression ();
14183   subseg_set (bss_section, (subsegT) temp);
14184   demand_empty_rest_of_line ();
14185 }
14186
14187 #endif
14188
14189 /* Remember constant directive.  */
14190
14191 void
14192 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
14193 {
14194   if (last_insn.kind != last_insn_directive
14195       && (bfd_section_flags (now_seg) & SEC_CODE))
14196     {
14197       last_insn.seg = now_seg;
14198       last_insn.kind = last_insn_directive;
14199       last_insn.name = "constant directive";
14200       last_insn.file = as_where (&last_insn.line);
14201       if (lfence_before_ret != lfence_before_ret_none)
14202         {
14203           if (lfence_before_indirect_branch != lfence_branch_none)
14204             as_warn (_("constant directive skips -mlfence-before-ret "
14205                        "and -mlfence-before-indirect-branch"));
14206           else
14207             as_warn (_("constant directive skips -mlfence-before-ret"));
14208         }
14209       else if (lfence_before_indirect_branch != lfence_branch_none)
14210         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
14211     }
14212 }
14213
14214 int
14215 i386_validate_fix (fixS *fixp)
14216 {
14217   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
14218     {
14219       reloc_howto_type *howto;
14220
14221       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
14222       as_bad_where (fixp->fx_file, fixp->fx_line,
14223                     _("invalid %s relocation against register"),
14224                     howto ? howto->name : "<unknown>");
14225       return 0;
14226     }
14227
14228 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14229   if (fixp->fx_r_type == BFD_RELOC_SIZE32
14230       || fixp->fx_r_type == BFD_RELOC_SIZE64)
14231     return IS_ELF && fixp->fx_addsy
14232            && (!S_IS_DEFINED (fixp->fx_addsy)
14233                || S_IS_EXTERNAL (fixp->fx_addsy));
14234 #endif
14235
14236   if (fixp->fx_subsy)
14237     {
14238       if (fixp->fx_subsy == GOT_symbol)
14239         {
14240           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
14241             {
14242               if (!object_64bit)
14243                 abort ();
14244 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14245               if (fixp->fx_tcbit2)
14246                 fixp->fx_r_type = (fixp->fx_tcbit
14247                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
14248                                    : BFD_RELOC_X86_64_GOTPCRELX);
14249               else
14250 #endif
14251                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
14252             }
14253           else
14254             {
14255               if (!object_64bit)
14256                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
14257               else
14258                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
14259             }
14260           fixp->fx_subsy = 0;
14261         }
14262     }
14263 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14264   else
14265     {
14266       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
14267          to section.  Since PLT32 relocation must be against symbols,
14268          turn such PLT32 relocation into PC32 relocation.  */
14269       if (fixp->fx_addsy
14270           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
14271               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
14272           && symbol_section_p (fixp->fx_addsy))
14273         fixp->fx_r_type = BFD_RELOC_32_PCREL;
14274       if (!object_64bit)
14275         {
14276           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
14277               && fixp->fx_tcbit2)
14278             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
14279         }
14280     }
14281 #endif
14282
14283   return 1;
14284 }
14285
14286 arelent *
14287 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
14288 {
14289   arelent *rel;
14290   bfd_reloc_code_real_type code;
14291
14292   switch (fixp->fx_r_type)
14293     {
14294 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14295       symbolS *sym;
14296
14297     case BFD_RELOC_SIZE32:
14298     case BFD_RELOC_SIZE64:
14299       if (fixp->fx_addsy
14300           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
14301           && (!fixp->fx_subsy
14302               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
14303         sym = fixp->fx_addsy;
14304       else if (fixp->fx_subsy
14305                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
14306                && (!fixp->fx_addsy
14307                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
14308         sym = fixp->fx_subsy;
14309       else
14310         sym = NULL;
14311       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
14312         {
14313           /* Resolve size relocation against local symbol to size of
14314              the symbol plus addend.  */
14315           valueT value = S_GET_SIZE (sym);
14316
14317           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
14318             value = bfd_section_size (S_GET_SEGMENT (sym));
14319           if (sym == fixp->fx_subsy)
14320             {
14321               value = -value;
14322               if (fixp->fx_addsy)
14323                 value += S_GET_VALUE (fixp->fx_addsy);
14324             }
14325           else if (fixp->fx_subsy)
14326             value -= S_GET_VALUE (fixp->fx_subsy);
14327           value += fixp->fx_offset;
14328           if (fixp->fx_r_type == BFD_RELOC_SIZE32
14329               && object_64bit
14330               && !fits_in_unsigned_long (value))
14331             as_bad_where (fixp->fx_file, fixp->fx_line,
14332                           _("symbol size computation overflow"));
14333           fixp->fx_addsy = NULL;
14334           fixp->fx_subsy = NULL;
14335           md_apply_fix (fixp, (valueT *) &value, NULL);
14336           return NULL;
14337         }
14338       if (!fixp->fx_addsy || fixp->fx_subsy)
14339         {
14340           as_bad_where (fixp->fx_file, fixp->fx_line,
14341                         "unsupported expression involving @size");
14342           return NULL;
14343         }
14344 #endif
14345       /* Fall through.  */
14346
14347     case BFD_RELOC_X86_64_PLT32:
14348     case BFD_RELOC_X86_64_GOT32:
14349     case BFD_RELOC_X86_64_GOTPCREL:
14350     case BFD_RELOC_X86_64_GOTPCRELX:
14351     case BFD_RELOC_X86_64_REX_GOTPCRELX:
14352     case BFD_RELOC_386_PLT32:
14353     case BFD_RELOC_386_GOT32:
14354     case BFD_RELOC_386_GOT32X:
14355     case BFD_RELOC_386_GOTOFF:
14356     case BFD_RELOC_386_GOTPC:
14357     case BFD_RELOC_386_TLS_GD:
14358     case BFD_RELOC_386_TLS_LDM:
14359     case BFD_RELOC_386_TLS_LDO_32:
14360     case BFD_RELOC_386_TLS_IE_32:
14361     case BFD_RELOC_386_TLS_IE:
14362     case BFD_RELOC_386_TLS_GOTIE:
14363     case BFD_RELOC_386_TLS_LE_32:
14364     case BFD_RELOC_386_TLS_LE:
14365     case BFD_RELOC_386_TLS_GOTDESC:
14366     case BFD_RELOC_386_TLS_DESC_CALL:
14367     case BFD_RELOC_X86_64_TLSGD:
14368     case BFD_RELOC_X86_64_TLSLD:
14369     case BFD_RELOC_X86_64_DTPOFF32:
14370     case BFD_RELOC_X86_64_DTPOFF64:
14371     case BFD_RELOC_X86_64_GOTTPOFF:
14372     case BFD_RELOC_X86_64_TPOFF32:
14373     case BFD_RELOC_X86_64_TPOFF64:
14374     case BFD_RELOC_X86_64_GOTOFF64:
14375     case BFD_RELOC_X86_64_GOTPC32:
14376     case BFD_RELOC_X86_64_GOT64:
14377     case BFD_RELOC_X86_64_GOTPCREL64:
14378     case BFD_RELOC_X86_64_GOTPC64:
14379     case BFD_RELOC_X86_64_GOTPLT64:
14380     case BFD_RELOC_X86_64_PLTOFF64:
14381     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14382     case BFD_RELOC_X86_64_TLSDESC_CALL:
14383     case BFD_RELOC_RVA:
14384     case BFD_RELOC_VTABLE_ENTRY:
14385     case BFD_RELOC_VTABLE_INHERIT:
14386 #ifdef TE_PE
14387     case BFD_RELOC_32_SECREL:
14388     case BFD_RELOC_16_SECIDX:
14389 #endif
14390       code = fixp->fx_r_type;
14391       break;
14392     case BFD_RELOC_X86_64_32S:
14393       if (!fixp->fx_pcrel)
14394         {
14395           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
14396           code = fixp->fx_r_type;
14397           break;
14398         }
14399       /* Fall through.  */
14400     default:
14401       if (fixp->fx_pcrel)
14402         {
14403           switch (fixp->fx_size)
14404             {
14405             default:
14406               as_bad_where (fixp->fx_file, fixp->fx_line,
14407                             _("can not do %d byte pc-relative relocation"),
14408                             fixp->fx_size);
14409               code = BFD_RELOC_32_PCREL;
14410               break;
14411             case 1: code = BFD_RELOC_8_PCREL;  break;
14412             case 2: code = BFD_RELOC_16_PCREL; break;
14413             case 4: code = BFD_RELOC_32_PCREL; break;
14414 #ifdef BFD64
14415             case 8: code = BFD_RELOC_64_PCREL; break;
14416 #endif
14417             }
14418         }
14419       else
14420         {
14421           switch (fixp->fx_size)
14422             {
14423             default:
14424               as_bad_where (fixp->fx_file, fixp->fx_line,
14425                             _("can not do %d byte relocation"),
14426                             fixp->fx_size);
14427               code = BFD_RELOC_32;
14428               break;
14429             case 1: code = BFD_RELOC_8;  break;
14430             case 2: code = BFD_RELOC_16; break;
14431             case 4: code = BFD_RELOC_32; break;
14432 #ifdef BFD64
14433             case 8: code = BFD_RELOC_64; break;
14434 #endif
14435             }
14436         }
14437       break;
14438     }
14439
14440   if ((code == BFD_RELOC_32
14441        || code == BFD_RELOC_32_PCREL
14442        || code == BFD_RELOC_X86_64_32S)
14443       && GOT_symbol
14444       && fixp->fx_addsy == GOT_symbol)
14445     {
14446       if (!object_64bit)
14447         code = BFD_RELOC_386_GOTPC;
14448       else
14449         code = BFD_RELOC_X86_64_GOTPC32;
14450     }
14451   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
14452       && GOT_symbol
14453       && fixp->fx_addsy == GOT_symbol)
14454     {
14455       code = BFD_RELOC_X86_64_GOTPC64;
14456     }
14457
14458   rel = XNEW (arelent);
14459   rel->sym_ptr_ptr = XNEW (asymbol *);
14460   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
14461
14462   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
14463
14464   if (!use_rela_relocations)
14465     {
14466       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
14467          vtable entry to be used in the relocation's section offset.  */
14468       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
14469         rel->address = fixp->fx_offset;
14470 #if defined (OBJ_COFF) && defined (TE_PE)
14471       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
14472         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
14473       else
14474 #endif
14475       rel->addend = 0;
14476     }
14477   /* Use the rela in 64bit mode.  */
14478   else
14479     {
14480       if (disallow_64bit_reloc)
14481         switch (code)
14482           {
14483           case BFD_RELOC_X86_64_DTPOFF64:
14484           case BFD_RELOC_X86_64_TPOFF64:
14485           case BFD_RELOC_64_PCREL:
14486           case BFD_RELOC_X86_64_GOTOFF64:
14487           case BFD_RELOC_X86_64_GOT64:
14488           case BFD_RELOC_X86_64_GOTPCREL64:
14489           case BFD_RELOC_X86_64_GOTPC64:
14490           case BFD_RELOC_X86_64_GOTPLT64:
14491           case BFD_RELOC_X86_64_PLTOFF64:
14492             as_bad_where (fixp->fx_file, fixp->fx_line,
14493                           _("cannot represent relocation type %s in x32 mode"),
14494                           bfd_get_reloc_code_name (code));
14495             break;
14496           default:
14497             break;
14498           }
14499
14500       if (!fixp->fx_pcrel)
14501         rel->addend = fixp->fx_offset;
14502       else
14503         switch (code)
14504           {
14505           case BFD_RELOC_X86_64_PLT32:
14506           case BFD_RELOC_X86_64_GOT32:
14507           case BFD_RELOC_X86_64_GOTPCREL:
14508           case BFD_RELOC_X86_64_GOTPCRELX:
14509           case BFD_RELOC_X86_64_REX_GOTPCRELX:
14510           case BFD_RELOC_X86_64_TLSGD:
14511           case BFD_RELOC_X86_64_TLSLD:
14512           case BFD_RELOC_X86_64_GOTTPOFF:
14513           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14514           case BFD_RELOC_X86_64_TLSDESC_CALL:
14515             rel->addend = fixp->fx_offset - fixp->fx_size;
14516             break;
14517           default:
14518             rel->addend = (section->vma
14519                            - fixp->fx_size
14520                            + fixp->fx_addnumber
14521                            + md_pcrel_from (fixp));
14522             break;
14523           }
14524     }
14525
14526   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
14527   if (rel->howto == NULL)
14528     {
14529       as_bad_where (fixp->fx_file, fixp->fx_line,
14530                     _("cannot represent relocation type %s"),
14531                     bfd_get_reloc_code_name (code));
14532       /* Set howto to a garbage value so that we can keep going.  */
14533       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
14534       gas_assert (rel->howto != NULL);
14535     }
14536
14537   return rel;
14538 }
14539
14540 #include "tc-i386-intel.c"
14541
14542 void
14543 tc_x86_parse_to_dw2regnum (expressionS *exp)
14544 {
14545   int saved_naked_reg;
14546   char saved_register_dot;
14547
14548   saved_naked_reg = allow_naked_reg;
14549   allow_naked_reg = 1;
14550   saved_register_dot = register_chars['.'];
14551   register_chars['.'] = '.';
14552   allow_pseudo_reg = 1;
14553   expression_and_evaluate (exp);
14554   allow_pseudo_reg = 0;
14555   register_chars['.'] = saved_register_dot;
14556   allow_naked_reg = saved_naked_reg;
14557
14558   if (exp->X_op == O_register && exp->X_add_number >= 0)
14559     {
14560       if ((addressT) exp->X_add_number < i386_regtab_size)
14561         {
14562           exp->X_op = O_constant;
14563           exp->X_add_number = i386_regtab[exp->X_add_number]
14564                               .dw2_regnum[flag_code >> 1];
14565         }
14566       else
14567         exp->X_op = O_illegal;
14568     }
14569 }
14570
14571 void
14572 tc_x86_frame_initial_instructions (void)
14573 {
14574   static unsigned int sp_regno[2];
14575
14576   if (!sp_regno[flag_code >> 1])
14577     {
14578       char *saved_input = input_line_pointer;
14579       char sp[][4] = {"esp", "rsp"};
14580       expressionS exp;
14581
14582       input_line_pointer = sp[flag_code >> 1];
14583       tc_x86_parse_to_dw2regnum (&exp);
14584       gas_assert (exp.X_op == O_constant);
14585       sp_regno[flag_code >> 1] = exp.X_add_number;
14586       input_line_pointer = saved_input;
14587     }
14588
14589   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
14590   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
14591 }
14592
14593 int
14594 x86_dwarf2_addr_size (void)
14595 {
14596 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14597   if (x86_elf_abi == X86_64_X32_ABI)
14598     return 4;
14599 #endif
14600   return bfd_arch_bits_per_address (stdoutput) / 8;
14601 }
14602
14603 int
14604 i386_elf_section_type (const char *str, size_t len)
14605 {
14606   if (flag_code == CODE_64BIT
14607       && len == sizeof ("unwind") - 1
14608       && startswith (str, "unwind"))
14609     return SHT_X86_64_UNWIND;
14610
14611   return -1;
14612 }
14613
14614 #ifdef TE_SOLARIS
14615 void
14616 i386_solaris_fix_up_eh_frame (segT sec)
14617 {
14618   if (flag_code == CODE_64BIT)
14619     elf_section_type (sec) = SHT_X86_64_UNWIND;
14620 }
14621 #endif
14622
14623 #ifdef TE_PE
14624 void
14625 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
14626 {
14627   expressionS exp;
14628
14629   exp.X_op = O_secrel;
14630   exp.X_add_symbol = symbol;
14631   exp.X_add_number = 0;
14632   emit_expr (&exp, size);
14633 }
14634 #endif
14635
14636 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14637 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
14638
14639 bfd_vma
14640 x86_64_section_letter (int letter, const char **ptr_msg)
14641 {
14642   if (flag_code == CODE_64BIT)
14643     {
14644       if (letter == 'l')
14645         return SHF_X86_64_LARGE;
14646
14647       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
14648     }
14649   else
14650     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
14651   return -1;
14652 }
14653
14654 bfd_vma
14655 x86_64_section_word (char *str, size_t len)
14656 {
14657   if (len == 5 && flag_code == CODE_64BIT && startswith (str, "large"))
14658     return SHF_X86_64_LARGE;
14659
14660   return -1;
14661 }
14662
14663 static void
14664 handle_large_common (int small ATTRIBUTE_UNUSED)
14665 {
14666   if (flag_code != CODE_64BIT)
14667     {
14668       s_comm_internal (0, elf_common_parse);
14669       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
14670     }
14671   else
14672     {
14673       static segT lbss_section;
14674       asection *saved_com_section_ptr = elf_com_section_ptr;
14675       asection *saved_bss_section = bss_section;
14676
14677       if (lbss_section == NULL)
14678         {
14679           flagword applicable;
14680           segT seg = now_seg;
14681           subsegT subseg = now_subseg;
14682
14683           /* The .lbss section is for local .largecomm symbols.  */
14684           lbss_section = subseg_new (".lbss", 0);
14685           applicable = bfd_applicable_section_flags (stdoutput);
14686           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
14687           seg_info (lbss_section)->bss = 1;
14688
14689           subseg_set (seg, subseg);
14690         }
14691
14692       elf_com_section_ptr = &_bfd_elf_large_com_section;
14693       bss_section = lbss_section;
14694
14695       s_comm_internal (0, elf_common_parse);
14696
14697       elf_com_section_ptr = saved_com_section_ptr;
14698       bss_section = saved_bss_section;
14699     }
14700 }
14701 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */